Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / guest_amd64_toIR.c
blob57a8a434b8452d8da31cd849cb0a7ff647059538
2 /*--------------------------------------------------------------------*/
3 /*--- begin guest_amd64_toIR.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 /* Translates AMD64 code to IR. */
36 /* TODO:
38 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
39 to ensure a 64-bit value is being written.
41 x87 FP Limitations:
43 * all arithmetic done at 64 bits
45 * no FP exceptions, except for handling stack over/underflow
47 * FP rounding mode observed only for float->int conversions and
48 int->float conversions which could lose accuracy, and for
49 float-to-float rounding. For all other operations,
50 round-to-nearest is used, regardless.
52 * some of the FCOM cases could do with testing -- not convinced
53 that the args are the right way round.
55 * FSAVE does not re-initialise the FPU; it should do
57 * FINIT not only initialises the FPU environment, it also zeroes
58 all the FP registers. It should leave the registers unchanged.
60 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
61 per Intel docs this bit has no meaning anyway. Since PUSHF is the
62 only way to observe eflags[1], a proper fix would be to make that
63 bit be set by PUSHF.
65 This module uses global variables and so is not MT-safe (if that
66 should ever become relevant).
69 /* Notes re address size overrides (0x67).
71 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
72 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
73 and System Instructions"), Section 1.2.3 ("Address-Size Override
74 Prefix"):
76 0x67 applies to all explicit memory references, causing the top
77 32 bits of the effective address to become zero.
79 0x67 has no effect on stack references (push/pop); these always
80 use a 64-bit address.
82 0x67 changes the interpretation of instructions which implicitly
83 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
84 instead. These are:
86 cmp{s,sb,sw,sd,sq}
87 in{s,sb,sw,sd}
88 jcxz, jecxz, jrcxz
89 lod{s,sb,sw,sd,sq}
90 loop{,e,bz,be,z}
91 mov{s,sb,sw,sd,sq}
92 out{s,sb,sw,sd}
93 rep{,e,ne,nz}
94 sca{s,sb,sw,sd,sq}
95 sto{s,sb,sw,sd,sq}
96 xlat{,b} */
98 /* "Special" instructions.
100 This instruction decoder can decode three special instructions
101 which mean nothing natively (are no-ops as far as regs/mem are
102 concerned) but have meaning for supporting Valgrind. A special
103 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
104 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
105 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
106 Following that, one of the following 3 are allowed (standard
107 interpretation in parentheses):
109 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
110 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
111 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
112 4887F6 (xchgq %rdi,%rdi) IR injection
114 Any other bytes following the 16-byte preamble are illegal and
115 constitute a failure in instruction decoding. This all assumes
116 that the preamble will never occur except in specific code
117 fragments designed for Valgrind to catch.
119 No prefixes may precede a "Special" instruction.
122 /* casLE (implementation of lock-prefixed insns) and rep-prefixed
123 insns: the side-exit back to the start of the insn is done with
124 Ijk_Boring. This is quite wrong, it should be done with
125 Ijk_NoRedir, since otherwise the side exit, which is intended to
126 restart the instruction for whatever reason, could go somewhere
127 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
128 no-redir jumps performance critical, at least for rep-prefixed
129 instructions, since all iterations thereof would involve such a
130 jump. It's not such a big deal with casLE since the side exit is
131 only taken if the CAS fails, that is, the location is contended,
132 which is relatively unlikely.
134 Note also, the test for CAS success vs failure is done using
135 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
136 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
137 shouldn't definedness-check these comparisons. See
138 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
139 background/rationale.
142 /* LOCK prefixed instructions. These are translated using IR-level
143 CAS statements (IRCAS) and are believed to preserve atomicity, even
144 from the point of view of some other process racing against a
145 simulated one (presumably they communicate via a shared memory
146 segment).
148 Handlers which are aware of LOCK prefixes are:
149 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
150 dis_cmpxchg_G_E (cmpxchg)
151 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
152 dis_Grp3 (not, neg)
153 dis_Grp4 (inc, dec)
154 dis_Grp5 (inc, dec)
155 dis_Grp8_Imm (bts, btc, btr)
156 dis_bt_G_E (bts, btc, btr)
157 dis_xadd_G_E (xadd)
161 #include "libvex_basictypes.h"
162 #include "libvex_ir.h"
163 #include "libvex.h"
164 #include "libvex_guest_amd64.h"
166 #include "main_util.h"
167 #include "main_globals.h"
168 #include "guest_generic_bb_to_IR.h"
169 #include "guest_generic_x87.h"
170 #include "guest_amd64_defs.h"
173 /*------------------------------------------------------------*/
174 /*--- Globals ---*/
175 /*------------------------------------------------------------*/
177 /* These are set at the start of the translation of an insn, right
178 down in disInstr_AMD64, so that we don't have to pass them around
179 endlessly. They are all constant during the translation of any
180 given insn. */
182 /* These are set at the start of the translation of a BB, so
183 that we don't have to pass them around endlessly. */
185 /* We need to know this to do sub-register accesses correctly. */
186 static VexEndness host_endness;
188 /* Pointer to the guest code area (points to start of BB, not to the
189 insn being processed). */
190 static const UChar* guest_code;
192 /* The guest address corresponding to guest_code[0]. */
193 static Addr64 guest_RIP_bbstart;
195 /* The guest address for the instruction currently being
196 translated. */
197 static Addr64 guest_RIP_curr_instr;
199 /* The IRSB* into which we're generating code. */
200 static IRSB* irsb;
202 /* For ensuring that %rip-relative addressing is done right. A read
203 of %rip generates the address of the next instruction. It may be
204 that we don't conveniently know that inside disAMode(). For sanity
205 checking, if the next insn %rip is needed, we make a guess at what
206 it is, record that guess here, and set the accompanying Bool to
207 indicate that -- after this insn's decode is finished -- that guess
208 needs to be checked. */
210 /* At the start of each insn decode, is set to (0, False).
211 After the decode, if _mustcheck is now True, _assumed is
212 checked. */
214 static Addr64 guest_RIP_next_assumed;
215 static Bool guest_RIP_next_mustcheck;
218 /*------------------------------------------------------------*/
219 /*--- Helpers for constructing IR. ---*/
220 /*------------------------------------------------------------*/
222 /* Generate a new temporary of the given type. */
223 static IRTemp newTemp ( IRType ty )
225 vassert(isPlausibleIRType(ty));
226 return newIRTemp( irsb->tyenv, ty );
229 /* Add a statement to the list held by "irsb". */
230 static void stmt ( IRStmt* st )
232 addStmtToIRSB( irsb, st );
235 /* Generate a statement "dst := e". */
236 static void assign ( IRTemp dst, IRExpr* e )
238 stmt( IRStmt_WrTmp(dst, e) );
241 static IRExpr* unop ( IROp op, IRExpr* a )
243 return IRExpr_Unop(op, a);
246 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
248 return IRExpr_Binop(op, a1, a2);
251 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
253 return IRExpr_Triop(op, a1, a2, a3);
256 static IRExpr* mkexpr ( IRTemp tmp )
258 return IRExpr_RdTmp(tmp);
261 static IRExpr* mkU8 ( ULong i )
263 vassert(i < 256);
264 return IRExpr_Const(IRConst_U8( (UChar)i ));
267 static IRExpr* mkU16 ( ULong i )
269 vassert(i < 0x10000ULL);
270 return IRExpr_Const(IRConst_U16( (UShort)i ));
273 static IRExpr* mkU32 ( ULong i )
275 vassert(i < 0x100000000ULL);
276 return IRExpr_Const(IRConst_U32( (UInt)i ));
279 static IRExpr* mkU64 ( ULong i )
281 return IRExpr_Const(IRConst_U64(i));
284 static IRExpr* mkU ( IRType ty, ULong i )
286 switch (ty) {
287 case Ity_I8: return mkU8(i);
288 case Ity_I16: return mkU16(i);
289 case Ity_I32: return mkU32(i);
290 case Ity_I64: return mkU64(i);
291 default: vpanic("mkU(amd64)");
295 static void storeLE ( IRExpr* addr, IRExpr* data )
297 stmt( IRStmt_Store(Iend_LE, addr, data) );
300 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
302 return IRExpr_Load(Iend_LE, ty, addr);
305 static IROp mkSizedOp ( IRType ty, IROp op8 )
307 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
308 || op8 == Iop_Mul8
309 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
310 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
311 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
312 || op8 == Iop_CasCmpNE8
313 || op8 == Iop_Not8 );
314 switch (ty) {
315 case Ity_I8: return 0 +op8;
316 case Ity_I16: return 1 +op8;
317 case Ity_I32: return 2 +op8;
318 case Ity_I64: return 3 +op8;
319 default: vpanic("mkSizedOp(amd64)");
323 static
324 IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
326 if (szSmall == 1 && szBig == 4) {
327 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
329 if (szSmall == 1 && szBig == 2) {
330 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
332 if (szSmall == 2 && szBig == 4) {
333 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
335 if (szSmall == 1 && szBig == 8 && !signd) {
336 return unop(Iop_8Uto64, src);
338 if (szSmall == 1 && szBig == 8 && signd) {
339 return unop(Iop_8Sto64, src);
341 if (szSmall == 2 && szBig == 8 && !signd) {
342 return unop(Iop_16Uto64, src);
344 if (szSmall == 2 && szBig == 8 && signd) {
345 return unop(Iop_16Sto64, src);
347 vpanic("doScalarWidening(amd64)");
350 static
351 void putGuarded ( Int gstOffB, IRExpr* guard, IRExpr* value )
353 IRType ty = typeOfIRExpr(irsb->tyenv, value);
354 stmt( IRStmt_Put(gstOffB,
355 IRExpr_ITE(guard, value, IRExpr_Get(gstOffB, ty))) );
359 /*------------------------------------------------------------*/
360 /*--- Debugging output ---*/
361 /*------------------------------------------------------------*/
363 /* Bomb out if we can't handle something. */
364 __attribute__ ((noreturn))
365 static void unimplemented ( const HChar* str )
367 vex_printf("amd64toIR: unimplemented feature\n");
368 vpanic(str);
371 #define DIP(format, args...) \
372 if (vex_traceflags & VEX_TRACE_FE) \
373 vex_printf(format, ## args)
375 #define DIS(buf, format, args...) \
376 if (vex_traceflags & VEX_TRACE_FE) \
377 vex_sprintf(buf, format, ## args)
380 /*------------------------------------------------------------*/
381 /*--- Offsets of various parts of the amd64 guest state. ---*/
382 /*------------------------------------------------------------*/
384 #define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
385 #define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
386 #define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
387 #define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
388 #define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
389 #define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
390 #define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
391 #define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
392 #define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
393 #define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
394 #define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
395 #define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
396 #define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
397 #define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
398 #define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
399 #define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
401 #define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
403 #define OFFB_FS_CONST offsetof(VexGuestAMD64State,guest_FS_CONST)
404 #define OFFB_GS_CONST offsetof(VexGuestAMD64State,guest_GS_CONST)
406 #define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
407 #define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
408 #define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
409 #define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
411 #define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
412 #define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
413 #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
414 #define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
415 #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
416 #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
417 #define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
418 #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
420 #define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
421 #define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
422 #define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
423 #define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
424 #define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
425 #define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
426 #define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
427 #define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
428 #define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
429 #define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
430 #define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
431 #define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
432 #define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
433 #define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
434 #define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
435 #define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
436 #define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
437 #define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
439 #define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
440 #define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
441 #define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
443 #define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
446 /*------------------------------------------------------------*/
447 /*--- Helper bits and pieces for deconstructing the ---*/
448 /*--- amd64 insn stream. ---*/
449 /*------------------------------------------------------------*/
451 /* This is the AMD64 register encoding -- integer regs. */
452 #define R_RAX 0
453 #define R_RCX 1
454 #define R_RDX 2
455 #define R_RBX 3
456 #define R_RSP 4
457 #define R_RBP 5
458 #define R_RSI 6
459 #define R_RDI 7
460 #define R_R8 8
461 #define R_R9 9
462 #define R_R10 10
463 #define R_R11 11
464 #define R_R12 12
465 #define R_R13 13
466 #define R_R14 14
467 #define R_R15 15
469 /* This is the Intel register encoding -- segment regs. */
470 #define R_ES 0
471 #define R_CS 1
472 #define R_SS 2
473 #define R_DS 3
474 #define R_FS 4
475 #define R_GS 5
478 /* Various simple conversions */
480 static ULong extend_s_8to64 ( UChar x )
482 return (ULong)((Long)(((ULong)x) << 56) >> 56);
485 static ULong extend_s_16to64 ( UShort x )
487 return (ULong)((Long)(((ULong)x) << 48) >> 48);
490 static ULong extend_s_32to64 ( UInt x )
492 return (ULong)((Long)(((ULong)x) << 32) >> 32);
495 /* Figure out whether the mod and rm parts of a modRM byte refer to a
496 register or memory. If so, the byte will have the form 11XXXYYY,
497 where YYY is the register number. */
498 inline
499 static Bool epartIsReg ( UChar mod_reg_rm )
501 return toBool(0xC0 == (mod_reg_rm & 0xC0));
504 /* Extract the 'g' field from a modRM byte. This only produces 3
505 bits, which is not a complete register number. You should avoid
506 this function if at all possible. */
507 inline
508 static Int gregLO3ofRM ( UChar mod_reg_rm )
510 return (Int)( (mod_reg_rm >> 3) & 7 );
513 /* Ditto the 'e' field of a modRM byte. */
514 inline
515 static Int eregLO3ofRM ( UChar mod_reg_rm )
517 return (Int)(mod_reg_rm & 0x7);
520 /* Get a 8/16/32-bit unsigned value out of the insn stream. */
522 static inline UChar getUChar ( Long delta )
524 UChar v = guest_code[delta+0];
525 return v;
528 static UInt getUDisp16 ( Long delta )
530 UInt v = guest_code[delta+1]; v <<= 8;
531 v |= guest_code[delta+0];
532 return v & 0xFFFF;
535 //.. static UInt getUDisp ( Int size, Long delta )
536 //.. {
537 //.. switch (size) {
538 //.. case 4: return getUDisp32(delta);
539 //.. case 2: return getUDisp16(delta);
540 //.. case 1: return getUChar(delta);
541 //.. default: vpanic("getUDisp(x86)");
542 //.. }
543 //.. return 0; /*notreached*/
544 //.. }
547 /* Get a byte value out of the insn stream and sign-extend to 64
548 bits. */
549 static Long getSDisp8 ( Long delta )
551 return extend_s_8to64( guest_code[delta] );
554 /* Get a 16-bit value out of the insn stream and sign-extend to 64
555 bits. */
556 static Long getSDisp16 ( Long delta )
558 UInt v = guest_code[delta+1]; v <<= 8;
559 v |= guest_code[delta+0];
560 return extend_s_16to64( (UShort)v );
563 /* Get a 32-bit value out of the insn stream and sign-extend to 64
564 bits. */
565 static Long getSDisp32 ( Long delta )
567 UInt v = guest_code[delta+3]; v <<= 8;
568 v |= guest_code[delta+2]; v <<= 8;
569 v |= guest_code[delta+1]; v <<= 8;
570 v |= guest_code[delta+0];
571 return extend_s_32to64( v );
574 /* Get a 64-bit value out of the insn stream. */
575 static Long getDisp64 ( Long delta )
577 ULong v = 0;
578 v |= guest_code[delta+7]; v <<= 8;
579 v |= guest_code[delta+6]; v <<= 8;
580 v |= guest_code[delta+5]; v <<= 8;
581 v |= guest_code[delta+4]; v <<= 8;
582 v |= guest_code[delta+3]; v <<= 8;
583 v |= guest_code[delta+2]; v <<= 8;
584 v |= guest_code[delta+1]; v <<= 8;
585 v |= guest_code[delta+0];
586 return v;
589 /* Note: because AMD64 doesn't allow 64-bit literals, it is an error
590 if this is called with size==8. Should not happen. */
591 static Long getSDisp ( Int size, Long delta )
593 switch (size) {
594 case 4: return getSDisp32(delta);
595 case 2: return getSDisp16(delta);
596 case 1: return getSDisp8(delta);
597 default: vpanic("getSDisp(amd64)");
601 static ULong mkSizeMask ( Int sz )
603 switch (sz) {
604 case 1: return 0x00000000000000FFULL;
605 case 2: return 0x000000000000FFFFULL;
606 case 4: return 0x00000000FFFFFFFFULL;
607 case 8: return 0xFFFFFFFFFFFFFFFFULL;
608 default: vpanic("mkSzMask(amd64)");
612 static Int imin ( Int a, Int b )
614 return (a < b) ? a : b;
617 static IRType szToITy ( Int n )
619 switch (n) {
620 case 1: return Ity_I8;
621 case 2: return Ity_I16;
622 case 4: return Ity_I32;
623 case 8: return Ity_I64;
624 default: vex_printf("\nszToITy(%d)\n", n);
625 vpanic("szToITy(amd64)");
630 /*------------------------------------------------------------*/
631 /*--- For dealing with prefixes. ---*/
632 /*------------------------------------------------------------*/
634 /* The idea is to pass around an int holding a bitmask summarising
635 info from the prefixes seen on the current instruction, including
636 info from the REX byte. This info is used in various places, but
637 most especially when making sense of register fields in
638 instructions.
640 The top 8 bits of the prefix are 0x55, just as a hacky way to
641 ensure it really is a valid prefix.
643 Things you can safely assume about a well-formed prefix:
644 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
645 * if REX is not present then REXW,REXR,REXX,REXB will read
646 as zero.
647 * F2 and F3 will not both be 1.
650 typedef UInt Prefix;
652 #define PFX_ASO (1<<0) /* address-size override present (0x67) */
653 #define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
654 #define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
655 #define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
656 #define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
657 #define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
658 #define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
659 #define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
660 #define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
661 #define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
662 #define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
663 #define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
664 #define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
665 #define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
666 #define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
667 #define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
668 #define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
669 #define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
670 /* The extra register field VEX.vvvv is encoded (after not-ing it) as
671 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
672 positions. */
673 #define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
674 #define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
675 #define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
676 #define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
679 #define PFX_EMPTY 0x55000000
681 static Bool IS_VALID_PFX ( Prefix pfx ) {
682 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
685 static Bool haveREX ( Prefix pfx ) {
686 return toBool(pfx & PFX_REX);
689 static Int getRexW ( Prefix pfx ) {
690 return (pfx & PFX_REXW) ? 1 : 0;
692 static Int getRexR ( Prefix pfx ) {
693 return (pfx & PFX_REXR) ? 1 : 0;
695 static Int getRexX ( Prefix pfx ) {
696 return (pfx & PFX_REXX) ? 1 : 0;
698 static Int getRexB ( Prefix pfx ) {
699 return (pfx & PFX_REXB) ? 1 : 0;
702 /* Check a prefix doesn't have F2 or F3 set in it, since usually that
703 completely changes what instruction it really is. */
704 static Bool haveF2orF3 ( Prefix pfx ) {
705 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
707 static Bool haveF2andF3 ( Prefix pfx ) {
708 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
710 static Bool haveF2 ( Prefix pfx ) {
711 return toBool((pfx & PFX_F2) > 0);
713 static Bool haveF3 ( Prefix pfx ) {
714 return toBool((pfx & PFX_F3) > 0);
717 static Bool have66 ( Prefix pfx ) {
718 return toBool((pfx & PFX_66) > 0);
720 static Bool haveASO ( Prefix pfx ) {
721 return toBool((pfx & PFX_ASO) > 0);
723 static Bool haveLOCK ( Prefix pfx ) {
724 return toBool((pfx & PFX_LOCK) > 0);
727 /* Return True iff pfx has 66 set and F2 and F3 clear */
728 static Bool have66noF2noF3 ( Prefix pfx )
730 return
731 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
734 /* Return True iff pfx has F2 set and 66 and F3 clear */
735 static Bool haveF2no66noF3 ( Prefix pfx )
737 return
738 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
741 /* Return True iff pfx has F3 set and 66 and F2 clear */
742 static Bool haveF3no66noF2 ( Prefix pfx )
744 return
745 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
748 /* Return True iff pfx has F3 set and F2 clear */
749 static Bool haveF3noF2 ( Prefix pfx )
751 return
752 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
755 /* Return True iff pfx has F2 set and F3 clear */
756 static Bool haveF2noF3 ( Prefix pfx )
758 return
759 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
762 /* Return True iff pfx has F2 and F3 clear */
763 static Bool haveNoF2noF3 ( Prefix pfx )
765 return
766 toBool((pfx & (PFX_F2|PFX_F3)) == 0);
769 /* Return True iff pfx has 66, F2 and F3 clear */
770 static Bool haveNo66noF2noF3 ( Prefix pfx )
772 return
773 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
776 /* Return True iff pfx has any of 66, F2 and F3 set */
777 static Bool have66orF2orF3 ( Prefix pfx )
779 return toBool( ! haveNo66noF2noF3(pfx) );
782 /* Return True iff pfx has 66 or F3 set */
783 static Bool have66orF3 ( Prefix pfx )
785 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
788 /* Clear all the segment-override bits in a prefix. */
789 static Prefix clearSegBits ( Prefix p )
791 return
792 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
795 /* Get the (inverted, hence back to "normal") VEX.vvvv field. */
796 static UInt getVexNvvvv ( Prefix pfx ) {
797 UInt r = (UInt)pfx;
798 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
799 return r & 0xF;
802 static Bool haveVEX ( Prefix pfx ) {
803 return toBool(pfx & PFX_VEX);
806 static Int getVexL ( Prefix pfx ) {
807 return (pfx & PFX_VEXL) ? 1 : 0;
811 /*------------------------------------------------------------*/
812 /*--- For dealing with escapes ---*/
813 /*------------------------------------------------------------*/
816 /* Escapes come after the prefixes, but before the primary opcode
817 byte. They escape the primary opcode byte into a bigger space.
818 The 0xF0000000 isn't significant, except so as to make it not
819 overlap valid Prefix values, for sanity checking.
822 typedef
823 enum {
824 ESC_NONE=0xF0000000, // none
825 ESC_0F, // 0F
826 ESC_0F38, // 0F 38
827 ESC_0F3A // 0F 3A
829 Escape;
832 /*------------------------------------------------------------*/
833 /*--- For dealing with integer registers ---*/
834 /*------------------------------------------------------------*/
836 /* This is somewhat complex. The rules are:
838 For 64, 32 and 16 bit register references, the e or g fields in the
839 modrm bytes supply the low 3 bits of the register number. The
840 fourth (most-significant) bit of the register number is supplied by
841 the REX byte, if it is present; else that bit is taken to be zero.
843 The REX.R bit supplies the high bit corresponding to the g register
844 field, and the REX.B bit supplies the high bit corresponding to the
845 e register field (when the mod part of modrm indicates that modrm's
846 e component refers to a register and not to memory).
848 The REX.X bit supplies a high register bit for certain registers
849 in SIB address modes, and is generally rarely used.
851 For 8 bit register references, the presence of the REX byte itself
852 has significance. If there is no REX present, then the 3-bit
853 number extracted from the modrm e or g field is treated as an index
854 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
855 old x86 encoding scheme.
857 But if there is a REX present, the register reference is
858 interpreted in the same way as for 64/32/16-bit references: a high
859 bit is extracted from REX, giving a 4-bit number, and the denoted
860 register is the lowest 8 bits of the 16 integer registers denoted
861 by the number. In particular, values 3 through 7 of this sequence
862 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
863 %rsp %rbp %rsi %rdi.
865 The REX.W bit has no bearing at all on register numbers. Instead
866 its presence indicates that the operand size is to be overridden
867 from its default value (32 bits) to 64 bits instead. This is in
868 the same fashion that an 0x66 prefix indicates the operand size is
869 to be overridden from 32 bits down to 16 bits. When both REX.W and
870 0x66 are present there is a conflict, and REX.W takes precedence.
872 Rather than try to handle this complexity using a single huge
873 function, several smaller ones are provided. The aim is to make it
874 as difficult as possible to screw up register decoding in a subtle
875 and hard-to-track-down way.
877 Because these routines fish around in the host's memory (that is,
878 in the guest state area) for sub-parts of guest registers, their
879 correctness depends on the host's endianness. So far these
880 routines only work for little-endian hosts. Those for which
881 endianness is important have assertions to ensure sanity.
885 /* About the simplest question you can ask: where do the 64-bit
886 integer registers live (in the guest state) ? */
888 static Int integerGuestReg64Offset ( UInt reg )
890 switch (reg) {
891 case R_RAX: return OFFB_RAX;
892 case R_RCX: return OFFB_RCX;
893 case R_RDX: return OFFB_RDX;
894 case R_RBX: return OFFB_RBX;
895 case R_RSP: return OFFB_RSP;
896 case R_RBP: return OFFB_RBP;
897 case R_RSI: return OFFB_RSI;
898 case R_RDI: return OFFB_RDI;
899 case R_R8: return OFFB_R8;
900 case R_R9: return OFFB_R9;
901 case R_R10: return OFFB_R10;
902 case R_R11: return OFFB_R11;
903 case R_R12: return OFFB_R12;
904 case R_R13: return OFFB_R13;
905 case R_R14: return OFFB_R14;
906 case R_R15: return OFFB_R15;
907 default: vpanic("integerGuestReg64Offset(amd64)");
912 /* Produce the name of an integer register, for printing purposes.
913 reg is a number in the range 0 .. 15 that has been generated from a
914 3-bit reg-field number and a REX extension bit. irregular denotes
915 the case where sz==1 and no REX byte is present and where the denoted
916 sub-register is bits 15:8 of the containing 64-bit register. */
918 static
919 const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
921 static const HChar* ireg64_names[16]
922 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
923 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
924 static const HChar* ireg32_names[16]
925 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
926 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
927 static const HChar* ireg16_names[16]
928 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
929 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
930 static const HChar* ireg8_names[16]
931 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
932 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
933 static const HChar* ireg8_irregular[4]
934 = { "%ah", "%ch", "%dh", "%bh" };
936 vassert(reg < 16);
937 if (sz == 1) {
938 if (irregular)
939 vassert(reg >= 4 && reg < 8);
940 } else {
941 vassert(irregular == False);
944 switch (sz) {
945 case 8: return ireg64_names[reg];
946 case 4: return ireg32_names[reg];
947 case 2: return ireg16_names[reg];
948 case 1: if (irregular) {
949 vassert(reg >= 4 && reg < 8);
950 return ireg8_irregular[reg - 4];
951 } else {
952 return ireg8_names[reg];
954 default: vpanic("nameIReg(amd64)");
958 /* Using the same argument conventions as nameIReg, produce the
959 guest state offset of an integer register. */
961 static
962 Int offsetIReg ( Int sz, UInt reg, Bool irregular )
964 vassert(reg < 16);
965 if (sz == 1) {
966 if (irregular)
967 vassert(reg >= 4 && reg < 8);
968 } else {
969 vassert(irregular == False);
972 /* Deal with irregular case -- sz==1 and no REX present */
973 if (sz == 1 && irregular) {
974 switch (reg) {
975 case R_RSP: return 1+ OFFB_RAX;
976 case R_RBP: return 1+ OFFB_RCX;
977 case R_RSI: return 1+ OFFB_RDX;
978 case R_RDI: return 1+ OFFB_RBX;
979 default: break; /* use the normal case */
983 /* Normal case */
984 return integerGuestReg64Offset(reg);
988 /* Read the %CL register :: Ity_I8, for shift/rotate operations. */
990 static IRExpr* getIRegCL ( void )
992 vassert(host_endness == VexEndnessLE);
993 return unop(Iop_64to8, IRExpr_Get( OFFB_RCX, Ity_I64 ));
997 /* Write to the %AH register. */
999 static void putIRegAH ( IRExpr* e )
1001 vassert(host_endness == VexEndnessLE);
1002 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
1003 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
1007 /* Read/write various widths of %RAX, as it has various
1008 special-purpose uses. */
1010 static const HChar* nameIRegRAX ( Int sz )
1012 switch (sz) {
1013 case 1: return "%al";
1014 case 2: return "%ax";
1015 case 4: return "%eax";
1016 case 8: return "%rax";
1017 default: vpanic("nameIRegRAX(amd64)");
1021 static IRExpr* getIRegRAX ( Int sz )
1023 vassert(host_endness == VexEndnessLE);
1024 switch (sz) {
1025 case 1: return unop(Iop_64to8, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1026 case 2: return unop(Iop_64to16, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1027 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
1028 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1029 default: vpanic("getIRegRAX(amd64)");
1033 static void putIRegRAX ( Int sz, IRExpr* e )
1035 IRType ty = typeOfIRExpr(irsb->tyenv, e);
1036 vassert(host_endness == VexEndnessLE);
1037 switch (sz) {
1038 case 8: vassert(ty == Ity_I64);
1039 stmt( IRStmt_Put( OFFB_RAX, e ));
1040 break;
1041 case 4: vassert(ty == Ity_I32);
1042 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1043 break;
1044 case 2: vassert(ty == Ity_I16);
1045 stmt( IRStmt_Put( OFFB_RAX, e ));
1046 break;
1047 case 1: vassert(ty == Ity_I8);
1048 stmt( IRStmt_Put( OFFB_RAX, e ));
1049 break;
1050 default: vpanic("putIRegRAX(amd64)");
1055 /* Read/write various widths of %RDX, as it has various
1056 special-purpose uses. */
1058 static const HChar* nameIRegRDX ( Int sz )
1060 switch (sz) {
1061 case 1: return "%dl";
1062 case 2: return "%dx";
1063 case 4: return "%edx";
1064 case 8: return "%rdx";
1065 default: vpanic("nameIRegRDX(amd64)");
1069 static IRExpr* getIRegRDX ( Int sz )
1071 vassert(host_endness == VexEndnessLE);
1072 switch (sz) {
1073 case 1: return unop(Iop_64to8, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1074 case 2: return unop(Iop_64to16, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1075 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
1076 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1077 default: vpanic("getIRegRDX(amd64)");
1081 static void putIRegRDX ( Int sz, IRExpr* e )
1083 vassert(host_endness == VexEndnessLE);
1084 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1085 switch (sz) {
1086 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1087 break;
1088 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1089 break;
1090 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1091 break;
1092 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1093 break;
1094 default: vpanic("putIRegRDX(amd64)");
1099 /* Simplistic functions to deal with the integer registers as a
1100 straightforward bank of 16 64-bit regs. */
1102 static IRExpr* getIReg64 ( UInt regno )
1104 return IRExpr_Get( integerGuestReg64Offset(regno),
1105 Ity_I64 );
1108 static void putIReg64 ( UInt regno, IRExpr* e )
1110 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1111 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1114 static const HChar* nameIReg64 ( UInt regno )
1116 return nameIReg( 8, regno, False );
1120 /* Simplistic functions to deal with the lower halves of integer
1121 registers as a straightforward bank of 16 32-bit regs. */
1123 static IRExpr* getIReg32 ( UInt regno )
1125 vassert(host_endness == VexEndnessLE);
1126 return unop(Iop_64to32,
1127 IRExpr_Get( integerGuestReg64Offset(regno),
1128 Ity_I64 ));
1131 static void putIReg32 ( UInt regno, IRExpr* e )
1133 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1134 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1135 unop(Iop_32Uto64,e) ) );
1138 static const HChar* nameIReg32 ( UInt regno )
1140 return nameIReg( 4, regno, False );
1144 /* Simplistic functions to deal with the lower quarters of integer
1145 registers as a straightforward bank of 16 16-bit regs. */
1147 static IRExpr* getIReg16 ( UInt regno )
1149 vassert(host_endness == VexEndnessLE);
1150 return unop(Iop_64to16,
1151 IRExpr_Get( integerGuestReg64Offset(regno),
1152 Ity_I64 ));
1155 static void putIReg16 ( UInt regno, IRExpr* e )
1157 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1158 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1159 unop(Iop_16Uto64,e) ) );
1162 static const HChar* nameIReg16 ( UInt regno )
1164 return nameIReg( 2, regno, False );
1168 /* Sometimes what we know is a 3-bit register number, a REX byte, and
1169 which field of the REX byte is to be used to extend to a 4-bit
1170 number. These functions cater for that situation.
1172 static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1174 vassert(lo3bits < 8);
1175 vassert(IS_VALID_PFX(pfx));
1176 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1179 static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
1181 vassert(lo3bits < 8);
1182 vassert(IS_VALID_PFX(pfx));
1183 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1186 static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1188 vassert(lo3bits < 8);
1189 vassert(IS_VALID_PFX(pfx));
1190 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1191 UInt regNo = lo3bits | (getRexB(pfx) << 3);
1192 return nameIReg( sz, regNo,
1193 toBool(sz==1 && !haveREX(pfx) && regNo >= 4 && regNo < 8));
1196 static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1198 vassert(lo3bits < 8);
1199 vassert(IS_VALID_PFX(pfx));
1200 UInt regNo = (getRexB(pfx) << 3) | lo3bits;
1201 switch (sz) {
1202 case 8: {
1203 return IRExpr_Get(
1204 offsetIReg( 8, regNo, False/*!irregular*/ ),
1205 Ity_I64
1208 case 4: {
1209 return unop(Iop_64to32,
1210 IRExpr_Get(
1211 offsetIReg( 8, regNo, False/*!irregular*/ ),
1212 Ity_I64
1215 case 2: {
1216 return unop(Iop_64to16,
1217 IRExpr_Get(
1218 offsetIReg( 8, regNo, False/*!irregular*/ ),
1219 Ity_I64
1222 case 1: {
1223 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1224 if (irregular) {
1225 return IRExpr_Get(
1226 offsetIReg( 1, regNo, True/*irregular*/ ),
1227 Ity_I8
1229 } else {
1230 return unop(Iop_64to8,
1231 IRExpr_Get(
1232 offsetIReg( 8, regNo, False/*!irregular*/ ),
1233 Ity_I64
1237 default: {
1238 vpanic("getIRegRexB");
1243 static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1245 vassert(lo3bits < 8);
1246 vassert(IS_VALID_PFX(pfx));
1247 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1248 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
1249 Bool irregular = sz == 1 && !haveREX(pfx) && lo3bits >= 4 && lo3bits < 8;
1250 stmt( IRStmt_Put(
1251 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3), irregular ),
1252 sz==4 ? unop(Iop_32Uto64,e) : e
1257 /* Functions for getting register numbers from modrm bytes and REX
1258 when we don't have to consider the complexities of integer subreg
1259 accesses.
1261 /* Extract the g reg field from a modRM byte, and augment it using the
1262 REX.R bit from the supplied REX byte. The R bit usually is
1263 associated with the g register field.
1265 static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1267 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1268 reg += (pfx & PFX_REXR) ? 8 : 0;
1269 return reg;
1272 /* Extract the e reg field from a modRM byte, and augment it using the
1273 REX.B bit from the supplied REX byte. The B bit usually is
1274 associated with the e register field (when modrm indicates e is a
1275 register, that is).
1277 static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1279 Int rm;
1280 vassert(epartIsReg(mod_reg_rm));
1281 rm = (Int)(mod_reg_rm & 0x7);
1282 rm += (pfx & PFX_REXB) ? 8 : 0;
1283 return rm;
1287 /* General functions for dealing with integer register access. */
1289 /* Produce the guest state offset for a reference to the 'g' register
1290 field in a modrm byte, taking into account REX (or its absence),
1291 and the size of the access.
1293 static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1295 UInt reg;
1296 vassert(host_endness == VexEndnessLE);
1297 vassert(IS_VALID_PFX(pfx));
1298 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1299 reg = gregOfRexRM( pfx, mod_reg_rm );
1300 Bool irregular = sz == 1 && !haveREX(pfx) && reg >= 4 && reg < 8;
1301 return offsetIReg( sz, reg, irregular );
1304 static
1305 IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1307 switch (sz) {
1308 case 8: {
1309 return IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 );
1311 case 4: {
1312 return unop(Iop_64to32,
1313 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 ));
1315 case 2: {
1316 return unop(Iop_64to16,
1317 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ), Ity_I64 ));
1319 case 1: {
1320 UInt regNo = gregOfRexRM( pfx, mod_reg_rm );
1321 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1322 if (irregular) {
1323 return IRExpr_Get( offsetIRegG( 1, pfx, mod_reg_rm ), Ity_I8 );
1324 } else {
1325 return unop(Iop_64to8,
1326 IRExpr_Get( offsetIRegG( 8, pfx, mod_reg_rm ),
1327 Ity_I64 ));
1330 default: {
1331 vpanic("getIRegG");
1336 static
1337 void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1339 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1340 if (sz == 4) {
1341 e = unop(Iop_32Uto64,e);
1343 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1346 static
1347 const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1349 UInt regNo = gregOfRexRM( pfx, mod_reg_rm );
1350 Bool irregular = sz == 1 && !haveREX(pfx) && regNo >= 4 && regNo < 8;
1351 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm), irregular );
1355 static
1356 IRExpr* getIRegV ( Int sz, Prefix pfx )
1358 vassert(sz == 8 || sz == 4);
1359 if (sz == 4) {
1360 return unop(Iop_64to32,
1361 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx), False ),
1362 Ity_I64 ));
1363 } else if (sz == 2) {
1364 return unop(Iop_64to16,
1365 IRExpr_Get( offsetIReg( 8, getVexNvvvv(pfx), False ),
1366 Ity_I64 ));
1367 } else {
1368 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1369 szToITy(sz) );
1373 static
1374 void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1376 vassert(sz == 8 || sz == 4);
1377 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1378 if (sz == 4) {
1379 e = unop(Iop_32Uto64,e);
1381 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1384 static
1385 const HChar* nameIRegV ( Int sz, Prefix pfx )
1387 vassert(sz == 8 || sz == 4);
1388 return nameIReg( sz, getVexNvvvv(pfx), False );
1393 /* Produce the guest state offset for a reference to the 'e' register
1394 field in a modrm byte, taking into account REX (or its absence),
1395 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1396 denotes a memory access rather than a register access.
1398 static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1400 UInt reg;
1401 vassert(host_endness == VexEndnessLE);
1402 vassert(IS_VALID_PFX(pfx));
1403 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1404 reg = eregOfRexRM( pfx, mod_reg_rm );
1405 Bool irregular = sz == 1 && !haveREX(pfx) && (reg >= 4 && reg < 8);
1406 return offsetIReg( sz, reg, irregular );
1409 static
1410 IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1412 switch (sz) {
1413 case 8: {
1414 return IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 );
1416 case 4: {
1417 return unop(Iop_64to32,
1418 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 ));
1420 case 2: {
1421 return unop(Iop_64to16,
1422 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ), Ity_I64 ));
1424 case 1: {
1425 UInt regNo = eregOfRexRM( pfx, mod_reg_rm );
1426 Bool irregular = !haveREX(pfx) && regNo >= 4 && regNo < 8;
1427 if (irregular) {
1428 return IRExpr_Get( offsetIRegE( 1, pfx, mod_reg_rm ), Ity_I8 );
1429 } else {
1430 return unop(Iop_64to8,
1431 IRExpr_Get( offsetIRegE( 8, pfx, mod_reg_rm ),
1432 Ity_I64 ));
1435 default: {
1436 vpanic("getIRegE");
1441 static
1442 void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1444 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1445 if (sz == 4) {
1446 e = unop(Iop_32Uto64,e);
1448 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1451 static
1452 const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1454 UInt regNo = eregOfRexRM( pfx, mod_reg_rm );
1455 Bool irregular = sz == 1 && !haveREX(pfx) && regNo >= 4 && regNo < 8;
1456 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm), irregular );
1460 /*------------------------------------------------------------*/
1461 /*--- For dealing with XMM registers ---*/
1462 /*------------------------------------------------------------*/
1464 static Int ymmGuestRegOffset ( UInt ymmreg )
1466 switch (ymmreg) {
1467 case 0: return OFFB_YMM0;
1468 case 1: return OFFB_YMM1;
1469 case 2: return OFFB_YMM2;
1470 case 3: return OFFB_YMM3;
1471 case 4: return OFFB_YMM4;
1472 case 5: return OFFB_YMM5;
1473 case 6: return OFFB_YMM6;
1474 case 7: return OFFB_YMM7;
1475 case 8: return OFFB_YMM8;
1476 case 9: return OFFB_YMM9;
1477 case 10: return OFFB_YMM10;
1478 case 11: return OFFB_YMM11;
1479 case 12: return OFFB_YMM12;
1480 case 13: return OFFB_YMM13;
1481 case 14: return OFFB_YMM14;
1482 case 15: return OFFB_YMM15;
1483 default: vpanic("ymmGuestRegOffset(amd64)");
1487 static Int xmmGuestRegOffset ( UInt xmmreg )
1489 /* Correct for little-endian host only. */
1490 vassert(host_endness == VexEndnessLE);
1491 return ymmGuestRegOffset( xmmreg );
1494 /* Lanes of vector registers are always numbered from zero being the
1495 least significant lane (rightmost in the register). */
1497 static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1499 /* Correct for little-endian host only. */
1500 vassert(host_endness == VexEndnessLE);
1501 vassert(laneno >= 0 && laneno < 8);
1502 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1505 static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1507 /* Correct for little-endian host only. */
1508 vassert(host_endness == VexEndnessLE);
1509 vassert(laneno >= 0 && laneno < 4);
1510 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1513 static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1515 /* Correct for little-endian host only. */
1516 vassert(host_endness == VexEndnessLE);
1517 vassert(laneno >= 0 && laneno < 2);
1518 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1521 static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1523 /* Correct for little-endian host only. */
1524 vassert(host_endness == VexEndnessLE);
1525 vassert(laneno >= 0 && laneno < 2);
1526 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1529 static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1531 /* Correct for little-endian host only. */
1532 vassert(host_endness == VexEndnessLE);
1533 vassert(laneno >= 0 && laneno < 4);
1534 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1537 static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1539 /* Correct for little-endian host only. */
1540 vassert(host_endness == VexEndnessLE);
1541 vassert(laneno >= 0 && laneno < 8);
1542 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1545 static IRExpr* getXMMReg ( UInt xmmreg )
1547 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1550 static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1552 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1555 static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1557 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1560 static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1562 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1565 static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1567 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1570 static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1572 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1575 static void putXMMReg ( UInt xmmreg, IRExpr* e )
1577 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1578 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1581 static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1583 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1584 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1587 static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1589 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1590 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1593 static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1595 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1596 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1599 static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1601 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1602 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1605 static IRExpr* getYMMReg ( UInt xmmreg )
1607 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1610 static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1612 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1615 static IRExpr* getYMMRegLane64F ( UInt ymmreg, Int laneno )
1617 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_F64 );
1620 static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1622 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1625 static IRExpr* getYMMRegLane32F ( UInt ymmreg, Int laneno )
1627 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_F32 );
1630 static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1632 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1635 static void putYMMReg ( UInt ymmreg, IRExpr* e )
1637 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1638 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1641 static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1643 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1644 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1647 static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1649 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1650 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1653 static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1655 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1656 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1659 static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1661 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1662 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1665 static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1667 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1668 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1671 static IRExpr* mkV128 ( UShort mask )
1673 return IRExpr_Const(IRConst_V128(mask));
1676 /* Write the low half of a YMM reg and zero out the upper half. */
1677 static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1679 putYMMRegLane128( ymmreg, 0, e );
1680 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1683 static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1685 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1686 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
1687 return unop(Iop_64to1,
1688 binop(Iop_And64,
1689 unop(Iop_1Uto64,x),
1690 unop(Iop_1Uto64,y)));
1693 /* Generate a compare-and-swap operation, operating on memory at
1694 'addr'. The expected value is 'expVal' and the new value is
1695 'newVal'. If the operation fails, then transfer control (with a
1696 no-redir jump (XXX no -- see comment at top of this file)) to
1697 'restart_point', which is presumably the address of the guest
1698 instruction again -- retrying, essentially. */
1699 static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1700 Addr64 restart_point )
1702 IRCAS* cas;
1703 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1704 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1705 IRTemp oldTmp = newTemp(tyE);
1706 IRTemp expTmp = newTemp(tyE);
1707 vassert(tyE == tyN);
1708 vassert(tyE == Ity_I64 || tyE == Ity_I32
1709 || tyE == Ity_I16 || tyE == Ity_I8);
1710 assign(expTmp, expVal);
1711 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1712 NULL, mkexpr(expTmp), NULL, newVal );
1713 stmt( IRStmt_CAS(cas) );
1714 stmt( IRStmt_Exit(
1715 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1716 mkexpr(oldTmp), mkexpr(expTmp) ),
1717 Ijk_Boring, /*Ijk_NoRedir*/
1718 IRConst_U64( restart_point ),
1719 OFFB_RIP
1724 /*------------------------------------------------------------*/
1725 /*--- Helpers for %rflags. ---*/
1726 /*------------------------------------------------------------*/
1728 /* -------------- Evaluating the flags-thunk. -------------- */
1730 /* Build IR to calculate all the eflags from stored
1731 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1732 Ity_I64. */
1733 static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1735 IRExpr** args
1736 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1737 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1738 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1739 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1740 IRExpr* call
1741 = mkIRExprCCall(
1742 Ity_I64,
1743 0/*regparm*/,
1744 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1745 args
1747 /* Exclude OP and NDEP from definedness checking. We're only
1748 interested in DEP1 and DEP2. */
1749 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1750 return call;
1753 /* Build IR to calculate some particular condition from stored
1754 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1755 Ity_Bit. */
1756 static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1758 IRExpr** args
1759 = mkIRExprVec_5( mkU64(cond),
1760 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1761 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1762 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1763 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1764 IRExpr* call
1765 = mkIRExprCCall(
1766 Ity_I64,
1767 0/*regparm*/,
1768 "amd64g_calculate_condition", &amd64g_calculate_condition,
1769 args
1771 /* Exclude the requested condition, OP and NDEP from definedness
1772 checking. We're only interested in DEP1 and DEP2. */
1773 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
1774 return unop(Iop_64to1, call);
1777 /* Build IR to calculate just the carry flag from stored
1778 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1779 static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1781 IRExpr** args
1782 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1783 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1784 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1785 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1786 IRExpr* call
1787 = mkIRExprCCall(
1788 Ity_I64,
1789 0/*regparm*/,
1790 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1791 args
1793 /* Exclude OP and NDEP from definedness checking. We're only
1794 interested in DEP1 and DEP2. */
1795 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1796 return call;
1800 /* -------------- Building the flags-thunk. -------------- */
1802 /* The machinery in this section builds the flag-thunk following a
1803 flag-setting operation. Hence the various setFlags_* functions.
1806 static Bool isAddSub ( IROp op8 )
1808 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
1811 static Bool isLogic ( IROp op8 )
1813 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
1816 /* U-widen 1/8/16/32/64 bit int expr to 64. */
1817 static IRExpr* widenUto64 ( IRExpr* e )
1819 switch (typeOfIRExpr(irsb->tyenv,e)) {
1820 case Ity_I64: return e;
1821 case Ity_I32: return unop(Iop_32Uto64, e);
1822 case Ity_I16: return unop(Iop_16Uto64, e);
1823 case Ity_I8: return unop(Iop_8Uto64, e);
1824 case Ity_I1: return unop(Iop_1Uto64, e);
1825 default: vpanic("widenUto64");
1829 /* S-widen 8/16/32/64 bit int expr to 32. */
1830 static IRExpr* widenSto64 ( IRExpr* e )
1832 switch (typeOfIRExpr(irsb->tyenv,e)) {
1833 case Ity_I64: return e;
1834 case Ity_I32: return unop(Iop_32Sto64, e);
1835 case Ity_I16: return unop(Iop_16Sto64, e);
1836 case Ity_I8: return unop(Iop_8Sto64, e);
1837 default: vpanic("widenSto64");
1841 /* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1842 of these combinations make sense. */
1843 static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1845 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
1846 if (src_ty == dst_ty)
1847 return e;
1848 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1849 return unop(Iop_32to16, e);
1850 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1851 return unop(Iop_32to8, e);
1852 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1853 return unop(Iop_64to32, e);
1854 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
1855 return unop(Iop_64to16, e);
1856 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
1857 return unop(Iop_64to8, e);
1859 vex_printf("\nsrc, dst tys are: ");
1860 ppIRType(src_ty);
1861 vex_printf(", ");
1862 ppIRType(dst_ty);
1863 vex_printf("\n");
1864 vpanic("narrowTo(amd64)");
1868 /* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1869 auto-sized up to the real op. */
1871 static
1872 void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1874 Int ccOp = 0;
1875 switch (ty) {
1876 case Ity_I8: ccOp = 0; break;
1877 case Ity_I16: ccOp = 1; break;
1878 case Ity_I32: ccOp = 2; break;
1879 case Ity_I64: ccOp = 3; break;
1880 default: vassert(0);
1882 switch (op8) {
1883 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1884 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1885 default: ppIROp(op8);
1886 vpanic("setFlags_DEP1_DEP2(amd64)");
1888 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1889 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1890 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1891 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
1895 /* Set the OP and DEP1 fields only, and write zero to DEP2. */
1897 static
1898 void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1900 Int ccOp = 0;
1901 switch (ty) {
1902 case Ity_I8: ccOp = 0; break;
1903 case Ity_I16: ccOp = 1; break;
1904 case Ity_I32: ccOp = 2; break;
1905 case Ity_I64: ccOp = 3; break;
1906 default: vassert(0);
1908 switch (op8) {
1909 case Iop_Or8:
1910 case Iop_And8:
1911 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1912 default: ppIROp(op8);
1913 vpanic("setFlags_DEP1(amd64)");
1915 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1916 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1917 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1918 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
1922 /* For shift operations, we put in the result and the undershifted
1923 result. Except if the shift amount is zero, the thunk is left
1924 unchanged. */
1926 static void setFlags_DEP1_DEP2_shift ( IROp op64,
1927 IRTemp res,
1928 IRTemp resUS,
1929 IRType ty,
1930 IRTemp guard )
1932 Int ccOp = 0;
1933 switch (ty) {
1934 case Ity_I8: ccOp = 0; break;
1935 case Ity_I16: ccOp = 1; break;
1936 case Ity_I32: ccOp = 2; break;
1937 case Ity_I64: ccOp = 3; break;
1938 default: vassert(0);
1941 vassert(guard);
1943 /* Both kinds of right shifts are handled by the same thunk
1944 operation. */
1945 switch (op64) {
1946 case Iop_Shr64:
1947 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1948 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1949 default: ppIROp(op64);
1950 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1953 /* guard :: Ity_I8. We need to convert it to I1. */
1954 IRTemp guardB = newTemp(Ity_I1);
1955 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1957 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1958 stmt( IRStmt_Put( OFFB_CC_OP,
1959 IRExpr_ITE( mkexpr(guardB),
1960 mkU64(ccOp),
1961 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
1962 stmt( IRStmt_Put( OFFB_CC_DEP1,
1963 IRExpr_ITE( mkexpr(guardB),
1964 widenUto64(mkexpr(res)),
1965 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
1966 stmt( IRStmt_Put( OFFB_CC_DEP2,
1967 IRExpr_ITE( mkexpr(guardB),
1968 widenUto64(mkexpr(resUS)),
1969 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
1970 stmt( IRStmt_Put( OFFB_CC_NDEP,
1971 mkU64(0) ));
1975 /* For the inc/dec case, we store in DEP1 the result value and in NDEP
1976 the former value of the carry flag, which unfortunately we have to
1977 compute. */
1979 static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1981 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1983 switch (ty) {
1984 case Ity_I8: ccOp += 0; break;
1985 case Ity_I16: ccOp += 1; break;
1986 case Ity_I32: ccOp += 2; break;
1987 case Ity_I64: ccOp += 3; break;
1988 default: vassert(0);
1991 /* This has to come first, because calculating the C flag
1992 may require reading all four thunk fields. */
1993 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1994 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1995 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
1996 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
2000 /* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
2001 two arguments. */
2003 static
2004 void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
2006 switch (ty) {
2007 case Ity_I8:
2008 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
2009 break;
2010 case Ity_I16:
2011 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
2012 break;
2013 case Ity_I32:
2014 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
2015 break;
2016 case Ity_I64:
2017 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
2018 break;
2019 default:
2020 vpanic("setFlags_MUL(amd64)");
2022 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
2023 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
2024 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
2028 /* -------------- Condition codes. -------------- */
2030 /* Condition codes, using the AMD encoding. */
2032 static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
2034 switch (cond) {
2035 case AMD64CondO: return "o";
2036 case AMD64CondNO: return "no";
2037 case AMD64CondB: return "b";
2038 case AMD64CondNB: return "ae"; /*"nb";*/
2039 case AMD64CondZ: return "e"; /*"z";*/
2040 case AMD64CondNZ: return "ne"; /*"nz";*/
2041 case AMD64CondBE: return "be";
2042 case AMD64CondNBE: return "a"; /*"nbe";*/
2043 case AMD64CondS: return "s";
2044 case AMD64CondNS: return "ns";
2045 case AMD64CondP: return "p";
2046 case AMD64CondNP: return "np";
2047 case AMD64CondL: return "l";
2048 case AMD64CondNL: return "ge"; /*"nl";*/
2049 case AMD64CondLE: return "le";
2050 case AMD64CondNLE: return "g"; /*"nle";*/
2051 case AMD64CondAlways: return "ALWAYS";
2052 default: vpanic("name_AMD64Condcode");
2056 static
2057 AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
2058 /*OUT*/Bool* needInvert )
2060 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
2061 if (cond & 1) {
2062 *needInvert = True;
2063 return cond-1;
2064 } else {
2065 *needInvert = False;
2066 return cond;
2071 /* -------------- Helpers for ADD/SUB with carry. -------------- */
2073 /* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
2074 appropriately.
2076 Optionally, generate a store for the 'tres' value. This can either
2077 be a normal store, or it can be a cas-with-possible-failure style
2078 store:
2080 if taddr is IRTemp_INVALID, then no store is generated.
2082 if taddr is not IRTemp_INVALID, then a store (using taddr as
2083 the address) is generated:
2085 if texpVal is IRTemp_INVALID then a normal store is
2086 generated, and restart_point must be zero (it is irrelevant).
2088 if texpVal is not IRTemp_INVALID then a cas-style store is
2089 generated. texpVal is the expected value, restart_point
2090 is the restart point if the store fails, and texpVal must
2091 have the same type as tres.
2094 static void helper_ADC ( Int sz,
2095 IRTemp tres, IRTemp ta1, IRTemp ta2,
2096 /* info about optional store: */
2097 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2099 UInt thunkOp;
2100 IRType ty = szToITy(sz);
2101 IRTemp oldc = newTemp(Ity_I64);
2102 IRTemp oldcn = newTemp(ty);
2103 IROp plus = mkSizedOp(ty, Iop_Add8);
2104 IROp xor = mkSizedOp(ty, Iop_Xor8);
2106 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2108 switch (sz) {
2109 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2110 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2111 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2112 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2113 default: vassert(0);
2116 /* oldc = old carry flag, 0 or 1 */
2117 assign( oldc, binop(Iop_And64,
2118 mk_amd64g_calculate_rflags_c(),
2119 mkU64(1)) );
2121 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2123 assign( tres, binop(plus,
2124 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2125 mkexpr(oldcn)) );
2127 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2128 start of this function. */
2129 if (taddr != IRTemp_INVALID) {
2130 if (texpVal == IRTemp_INVALID) {
2131 vassert(restart_point == 0);
2132 storeLE( mkexpr(taddr), mkexpr(tres) );
2133 } else {
2134 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2135 /* .. and hence 'texpVal' has the same type as 'tres'. */
2136 casLE( mkexpr(taddr),
2137 mkexpr(texpVal), mkexpr(tres), restart_point );
2141 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2142 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2143 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2144 mkexpr(oldcn)) )) );
2145 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2149 /* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
2150 appropriately. As with helper_ADC, possibly generate a store of
2151 the result -- see comments on helper_ADC for details.
2153 static void helper_SBB ( Int sz,
2154 IRTemp tres, IRTemp ta1, IRTemp ta2,
2155 /* info about optional store: */
2156 IRTemp taddr, IRTemp texpVal, Addr64 restart_point )
2158 UInt thunkOp;
2159 IRType ty = szToITy(sz);
2160 IRTemp oldc = newTemp(Ity_I64);
2161 IRTemp oldcn = newTemp(ty);
2162 IROp minus = mkSizedOp(ty, Iop_Sub8);
2163 IROp xor = mkSizedOp(ty, Iop_Xor8);
2165 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2167 switch (sz) {
2168 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2169 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2170 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2171 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2172 default: vassert(0);
2175 /* oldc = old carry flag, 0 or 1 */
2176 assign( oldc, binop(Iop_And64,
2177 mk_amd64g_calculate_rflags_c(),
2178 mkU64(1)) );
2180 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2182 assign( tres, binop(minus,
2183 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2184 mkexpr(oldcn)) );
2186 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2187 start of this function. */
2188 if (taddr != IRTemp_INVALID) {
2189 if (texpVal == IRTemp_INVALID) {
2190 vassert(restart_point == 0);
2191 storeLE( mkexpr(taddr), mkexpr(tres) );
2192 } else {
2193 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2194 /* .. and hence 'texpVal' has the same type as 'tres'. */
2195 casLE( mkexpr(taddr),
2196 mkexpr(texpVal), mkexpr(tres), restart_point );
2200 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2201 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2202 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2203 mkexpr(oldcn)) )) );
2204 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2208 /* Given ta1, ta2 and tres, compute tres = ADCX(ta1,ta2) or tres = ADOX(ta1,ta2)
2209 and set flags appropriately.
2211 static void helper_ADCX_ADOX ( Bool isADCX, Int sz,
2212 IRTemp tres, IRTemp ta1, IRTemp ta2 )
2214 UInt thunkOp;
2215 IRType ty = szToITy(sz);
2216 IRTemp oldflags = newTemp(Ity_I64);
2217 IRTemp oldOC = newTemp(Ity_I64); // old O or C flag
2218 IRTemp oldOCn = newTemp(ty); // old O or C flag, narrowed
2219 IROp plus = mkSizedOp(ty, Iop_Add8);
2220 IROp xor = mkSizedOp(ty, Iop_Xor8);
2222 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2224 switch (sz) {
2225 case 8: thunkOp = isADCX ? AMD64G_CC_OP_ADCX64
2226 : AMD64G_CC_OP_ADOX64; break;
2227 case 4: thunkOp = isADCX ? AMD64G_CC_OP_ADCX32
2228 : AMD64G_CC_OP_ADOX32; break;
2229 default: vassert(0);
2232 assign( oldflags, mk_amd64g_calculate_rflags_all() );
2234 /* oldOC = old overflow/carry flag, 0 or 1 */
2235 assign( oldOC, binop(Iop_And64,
2236 binop(Iop_Shr64,
2237 mkexpr(oldflags),
2238 mkU8(isADCX ? AMD64G_CC_SHIFT_C
2239 : AMD64G_CC_SHIFT_O)),
2240 mkU64(1)) );
2242 assign( oldOCn, narrowTo(ty, mkexpr(oldOC)) );
2244 assign( tres, binop(plus,
2245 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2246 mkexpr(oldOCn)) );
2248 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
2249 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2250 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2251 mkexpr(oldOCn)) )) );
2252 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldflags) ) );
2256 /* -------------- Helpers for disassembly printing. -------------- */
2258 static const HChar* nameGrp1 ( Int opc_aux )
2260 static const HChar* grp1_names[8]
2261 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2262 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2263 return grp1_names[opc_aux];
2266 static const HChar* nameGrp2 ( Int opc_aux )
2268 static const HChar* grp2_names[8]
2269 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
2270 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
2271 return grp2_names[opc_aux];
2274 static const HChar* nameGrp4 ( Int opc_aux )
2276 static const HChar* grp4_names[8]
2277 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2278 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2279 return grp4_names[opc_aux];
2282 static const HChar* nameGrp5 ( Int opc_aux )
2284 static const HChar* grp5_names[8]
2285 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2286 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2287 return grp5_names[opc_aux];
2290 static const HChar* nameGrp8 ( Int opc_aux )
2292 static const HChar* grp8_names[8]
2293 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2294 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2295 return grp8_names[opc_aux];
2298 static const HChar* nameSReg ( UInt sreg )
2300 switch (sreg) {
2301 case R_ES: return "%es";
2302 case R_CS: return "%cs";
2303 case R_SS: return "%ss";
2304 case R_DS: return "%ds";
2305 case R_FS: return "%fs";
2306 case R_GS: return "%gs";
2307 default: vpanic("nameSReg(amd64)");
2311 static const HChar* nameMMXReg ( Int mmxreg )
2313 static const HChar* mmx_names[8]
2314 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2315 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2316 return mmx_names[mmxreg];
2319 static const HChar* nameXMMReg ( Int xmmreg )
2321 static const HChar* xmm_names[16]
2322 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2323 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2324 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2325 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2326 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2327 return xmm_names[xmmreg];
2330 static const HChar* nameMMXGran ( Int gran )
2332 switch (gran) {
2333 case 0: return "b";
2334 case 1: return "w";
2335 case 2: return "d";
2336 case 3: return "q";
2337 default: vpanic("nameMMXGran(amd64,guest)");
2341 static HChar nameISize ( Int size )
2343 switch (size) {
2344 case 8: return 'q';
2345 case 4: return 'l';
2346 case 2: return 'w';
2347 case 1: return 'b';
2348 default: vpanic("nameISize(amd64)");
2352 static const HChar* nameYMMReg ( Int ymmreg )
2354 static const HChar* ymm_names[16]
2355 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2356 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2357 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2358 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2359 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2360 return ymm_names[ymmreg];
2364 /*------------------------------------------------------------*/
2365 /*--- JMP helpers ---*/
2366 /*------------------------------------------------------------*/
2368 static void jmp_lit( /*MOD*/DisResult* dres,
2369 IRJumpKind kind, Addr64 d64 )
2371 vassert(dres->whatNext == Dis_Continue);
2372 vassert(dres->len == 0);
2373 vassert(dres->jk_StopHere == Ijk_INVALID);
2374 dres->whatNext = Dis_StopHere;
2375 dres->jk_StopHere = kind;
2376 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
2379 static void jmp_treg( /*MOD*/DisResult* dres,
2380 IRJumpKind kind, IRTemp t )
2382 vassert(dres->whatNext == Dis_Continue);
2383 vassert(dres->len == 0);
2384 vassert(dres->jk_StopHere == Ijk_INVALID);
2385 dres->whatNext = Dis_StopHere;
2386 dres->jk_StopHere = kind;
2387 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
2390 static
2391 void jcc_01 ( /*MOD*/DisResult* dres,
2392 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
2394 Bool invert;
2395 AMD64Condcode condPos;
2396 vassert(dres->whatNext == Dis_Continue);
2397 vassert(dres->len == 0);
2398 vassert(dres->jk_StopHere == Ijk_INVALID);
2399 dres->whatNext = Dis_StopHere;
2400 dres->jk_StopHere = Ijk_Boring;
2401 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2402 if (invert) {
2403 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2404 Ijk_Boring,
2405 IRConst_U64(d64_false),
2406 OFFB_RIP ) );
2407 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
2408 } else {
2409 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2410 Ijk_Boring,
2411 IRConst_U64(d64_true),
2412 OFFB_RIP ) );
2413 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
2417 /* Let new_rsp be the %rsp value after a call/return. Let nia be the
2418 guest address of the next instruction to be executed.
2420 This function generates an AbiHint to say that -128(%rsp)
2421 .. -1(%rsp) should now be regarded as uninitialised.
2423 static
2424 void make_redzone_AbiHint ( const VexAbiInfo* vbi,
2425 IRTemp new_rsp, IRTemp nia, const HChar* who )
2427 Int szB = vbi->guest_stack_redzone_size;
2428 vassert(szB >= 0);
2430 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2431 for is ELF. So just check it's the expected 128 value
2432 (paranoia). */
2433 vassert(szB == 128);
2435 if (0) vex_printf("AbiHint: %s\n", who);
2436 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
2437 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
2438 if (szB > 0)
2439 stmt( IRStmt_AbiHint(
2440 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
2441 szB,
2442 mkexpr(nia)
2447 /*------------------------------------------------------------*/
2448 /*--- Disassembling addressing modes ---*/
2449 /*------------------------------------------------------------*/
2451 static
2452 const HChar* segRegTxt ( Prefix pfx )
2454 if (pfx & PFX_CS) return "%cs:";
2455 if (pfx & PFX_DS) return "%ds:";
2456 if (pfx & PFX_ES) return "%es:";
2457 if (pfx & PFX_FS) return "%fs:";
2458 if (pfx & PFX_GS) return "%gs:";
2459 if (pfx & PFX_SS) return "%ss:";
2460 return ""; /* no override */
2464 /* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2465 linear address by adding any required segment override as indicated
2466 by sorb, and also dealing with any address size override
2467 present. */
2468 static
2469 IRExpr* handleAddrOverrides ( const VexAbiInfo* vbi,
2470 Prefix pfx, IRExpr* virtual )
2472 /* --- address size override --- */
2473 if (haveASO(pfx))
2474 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2476 /* Note that the below are hacks that relies on the assumption
2477 that %fs or %gs are constant.
2478 Typically, %fs is always 0x63 on linux (in the main thread, it
2479 stays at value 0), %gs always 0x60 on Darwin, ... */
2480 /* --- segment overrides --- */
2481 if (pfx & PFX_FS) {
2482 if (vbi->guest_amd64_assume_fs_is_const) {
2483 /* return virtual + guest_FS_CONST. */
2484 virtual = binop(Iop_Add64, virtual,
2485 IRExpr_Get(OFFB_FS_CONST, Ity_I64));
2486 } else {
2487 unimplemented("amd64 %fs segment override");
2491 if (pfx & PFX_GS) {
2492 if (vbi->guest_amd64_assume_gs_is_const) {
2493 /* return virtual + guest_GS_CONST. */
2494 virtual = binop(Iop_Add64, virtual,
2495 IRExpr_Get(OFFB_GS_CONST, Ity_I64));
2496 } else {
2497 unimplemented("amd64 %gs segment override");
2501 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
2503 return virtual;
2506 //.. {
2507 //.. Int sreg;
2508 //.. IRType hWordTy;
2509 //.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2510 //..
2511 //.. if (sorb == 0)
2512 //.. /* the common case - no override */
2513 //.. return virtual;
2514 //..
2515 //.. switch (sorb) {
2516 //.. case 0x3E: sreg = R_DS; break;
2517 //.. case 0x26: sreg = R_ES; break;
2518 //.. case 0x64: sreg = R_FS; break;
2519 //.. case 0x65: sreg = R_GS; break;
2520 //.. default: vpanic("handleAddrOverrides(x86,guest)");
2521 //.. }
2522 //..
2523 //.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2524 //..
2525 //.. seg_selector = newTemp(Ity_I32);
2526 //.. ldt_ptr = newTemp(hWordTy);
2527 //.. gdt_ptr = newTemp(hWordTy);
2528 //.. r64 = newTemp(Ity_I64);
2529 //..
2530 //.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2531 //.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2532 //.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2533 //..
2534 //.. /*
2535 //.. Call this to do the translation and limit checks:
2536 //.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2537 //.. UInt seg_selector, UInt virtual_addr )
2538 //.. */
2539 //.. assign(
2540 //.. r64,
2541 //.. mkIRExprCCall(
2542 //.. Ity_I64,
2543 //.. 0/*regparms*/,
2544 //.. "x86g_use_seg_selector",
2545 //.. &x86g_use_seg_selector,
2546 //.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2547 //.. mkexpr(seg_selector), virtual)
2548 //.. )
2549 //.. );
2550 //..
2551 //.. /* If the high 32 of the result are non-zero, there was a
2552 //.. failure in address translation. In which case, make a
2553 //.. quick exit.
2554 //.. */
2555 //.. stmt(
2556 //.. IRStmt_Exit(
2557 //.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2558 //.. Ijk_MapFail,
2559 //.. IRConst_U32( guest_eip_curr_instr )
2560 //.. )
2561 //.. );
2562 //..
2563 //.. /* otherwise, here's the translated result. */
2564 //.. return unop(Iop_64to32, mkexpr(r64));
2565 //.. }
2568 /* Generate IR to calculate an address indicated by a ModRM and
2569 following SIB bytes. The expression, and the number of bytes in
2570 the address mode, are returned (the latter in *len). Note that
2571 this fn should not be called if the R/M part of the address denotes
2572 a register instead of memory. If print_codegen is true, text of
2573 the addressing mode is placed in buf.
2575 The computed address is stored in a new tempreg, and the
2576 identity of the tempreg is returned.
2578 extra_bytes holds the number of bytes after the amode, as supplied
2579 by the caller. This is needed to make sense of %rip-relative
2580 addresses. Note that the value that *len is set to is only the
2581 length of the amode itself and does not include the value supplied
2582 in extra_bytes.
2585 static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2587 IRTemp tmp = newTemp(Ity_I64);
2588 assign( tmp, addr64 );
2589 return tmp;
2592 static
2593 IRTemp disAMode ( /*OUT*/Int* len,
2594 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2595 /*OUT*/HChar* buf, Int extra_bytes )
2597 UChar mod_reg_rm = getUChar(delta);
2598 delta++;
2600 buf[0] = (UChar)0;
2601 vassert(extra_bytes >= 0 && extra_bytes < 10);
2603 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2604 jump table seems a bit excessive.
2606 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2607 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2608 /* is now XX0XXYYY */
2609 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2610 switch (mod_reg_rm) {
2612 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2613 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2615 case 0x00: case 0x01: case 0x02: case 0x03:
2616 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2617 { UChar rm = toUChar(mod_reg_rm & 7);
2618 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2619 *len = 1;
2620 return disAMode_copy2tmp(
2621 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
2624 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2625 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2627 case 0x08: case 0x09: case 0x0A: case 0x0B:
2628 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2629 { UChar rm = toUChar(mod_reg_rm & 7);
2630 Long d = getSDisp8(delta);
2631 if (d == 0) {
2632 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
2633 } else {
2634 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2636 *len = 2;
2637 return disAMode_copy2tmp(
2638 handleAddrOverrides(vbi, pfx,
2639 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2642 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2643 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2645 case 0x10: case 0x11: case 0x12: case 0x13:
2646 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2647 { UChar rm = toUChar(mod_reg_rm & 7);
2648 Long d = getSDisp32(delta);
2649 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
2650 *len = 5;
2651 return disAMode_copy2tmp(
2652 handleAddrOverrides(vbi, pfx,
2653 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
2656 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2657 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2658 case 0x18: case 0x19: case 0x1A: case 0x1B:
2659 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2660 vpanic("disAMode(amd64): not an addr!");
2662 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
2663 correctly at the start of handling each instruction. */
2664 case 0x05:
2665 { Long d = getSDisp32(delta);
2666 *len = 5;
2667 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
2668 /* We need to know the next instruction's start address.
2669 Try and figure out what it is, record the guess, and ask
2670 the top-level driver logic (bbToIR_AMD64) to check we
2671 guessed right, after the instruction is completely
2672 decoded. */
2673 guest_RIP_next_mustcheck = True;
2674 guest_RIP_next_assumed = guest_RIP_bbstart
2675 + delta+4 + extra_bytes;
2676 return disAMode_copy2tmp(
2677 handleAddrOverrides(vbi, pfx,
2678 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
2679 mkU64(d))));
2682 case 0x04: {
2683 /* SIB, with no displacement. Special cases:
2684 -- %rsp cannot act as an index value.
2685 If index_r indicates %rsp, zero is used for the index.
2686 -- when mod is zero and base indicates RBP or R13, base is
2687 instead a 32-bit sign-extended literal.
2688 It's all madness, I tell you. Extract %index, %base and
2689 scale from the SIB byte. The value denoted is then:
2690 | %index == %RSP && (%base == %RBP || %base == %R13)
2691 = d32 following SIB byte
2692 | %index == %RSP && !(%base == %RBP || %base == %R13)
2693 = %base
2694 | %index != %RSP && (%base == %RBP || %base == %R13)
2695 = d32 following SIB byte + (%index << scale)
2696 | %index != %RSP && !(%base == %RBP || %base == %R13)
2697 = %base + (%index << scale)
2699 UChar sib = getUChar(delta);
2700 UChar scale = toUChar((sib >> 6) & 3);
2701 UChar index_r = toUChar((sib >> 3) & 7);
2702 UChar base_r = toUChar(sib & 7);
2703 /* correct since #(R13) == 8 + #(RBP) */
2704 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2705 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
2706 delta++;
2708 if ((!index_is_SP) && (!base_is_BPor13)) {
2709 if (scale == 0) {
2710 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2711 nameIRegRexB(8,pfx,base_r),
2712 nameIReg64rexX(pfx,index_r));
2713 } else {
2714 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2715 nameIRegRexB(8,pfx,base_r),
2716 nameIReg64rexX(pfx,index_r), 1<<scale);
2718 *len = 2;
2719 return
2720 disAMode_copy2tmp(
2721 handleAddrOverrides(vbi, pfx,
2722 binop(Iop_Add64,
2723 getIRegRexB(8,pfx,base_r),
2724 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2725 mkU8(scale)))));
2728 if ((!index_is_SP) && base_is_BPor13) {
2729 Long d = getSDisp32(delta);
2730 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
2731 nameIReg64rexX(pfx,index_r), 1<<scale);
2732 *len = 6;
2733 return
2734 disAMode_copy2tmp(
2735 handleAddrOverrides(vbi, pfx,
2736 binop(Iop_Add64,
2737 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
2738 mkU8(scale)),
2739 mkU64(d))));
2742 if (index_is_SP && (!base_is_BPor13)) {
2743 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
2744 *len = 2;
2745 return disAMode_copy2tmp(
2746 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
2749 if (index_is_SP && base_is_BPor13) {
2750 Long d = getSDisp32(delta);
2751 DIS(buf, "%s%lld", segRegTxt(pfx), d);
2752 *len = 6;
2753 return disAMode_copy2tmp(
2754 handleAddrOverrides(vbi, pfx, mkU64(d)));
2757 vassert(0);
2760 /* SIB, with 8-bit displacement. Special cases:
2761 -- %esp cannot act as an index value.
2762 If index_r indicates %esp, zero is used for the index.
2763 Denoted value is:
2764 | %index == %ESP
2765 = d8 + %base
2766 | %index != %ESP
2767 = d8 + %base + (%index << scale)
2769 case 0x0C: {
2770 UChar sib = getUChar(delta);
2771 UChar scale = toUChar((sib >> 6) & 3);
2772 UChar index_r = toUChar((sib >> 3) & 7);
2773 UChar base_r = toUChar(sib & 7);
2774 Long d = getSDisp8(delta+1);
2776 if (index_r == R_RSP && 0==getRexX(pfx)) {
2777 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2778 d, nameIRegRexB(8,pfx,base_r));
2779 *len = 3;
2780 return disAMode_copy2tmp(
2781 handleAddrOverrides(vbi, pfx,
2782 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2783 } else {
2784 if (scale == 0) {
2785 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2786 nameIRegRexB(8,pfx,base_r),
2787 nameIReg64rexX(pfx,index_r));
2788 } else {
2789 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2790 nameIRegRexB(8,pfx,base_r),
2791 nameIReg64rexX(pfx,index_r), 1<<scale);
2793 *len = 3;
2794 return
2795 disAMode_copy2tmp(
2796 handleAddrOverrides(vbi, pfx,
2797 binop(Iop_Add64,
2798 binop(Iop_Add64,
2799 getIRegRexB(8,pfx,base_r),
2800 binop(Iop_Shl64,
2801 getIReg64rexX(pfx,index_r), mkU8(scale))),
2802 mkU64(d))));
2804 vassert(0); /*NOTREACHED*/
2807 /* SIB, with 32-bit displacement. Special cases:
2808 -- %rsp cannot act as an index value.
2809 If index_r indicates %rsp, zero is used for the index.
2810 Denoted value is:
2811 | %index == %RSP
2812 = d32 + %base
2813 | %index != %RSP
2814 = d32 + %base + (%index << scale)
2816 case 0x14: {
2817 UChar sib = getUChar(delta);
2818 UChar scale = toUChar((sib >> 6) & 3);
2819 UChar index_r = toUChar((sib >> 3) & 7);
2820 UChar base_r = toUChar(sib & 7);
2821 Long d = getSDisp32(delta+1);
2823 if (index_r == R_RSP && 0==getRexX(pfx)) {
2824 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
2825 d, nameIRegRexB(8,pfx,base_r));
2826 *len = 6;
2827 return disAMode_copy2tmp(
2828 handleAddrOverrides(vbi, pfx,
2829 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
2830 } else {
2831 if (scale == 0) {
2832 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2833 nameIRegRexB(8,pfx,base_r),
2834 nameIReg64rexX(pfx,index_r));
2835 } else {
2836 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2837 nameIRegRexB(8,pfx,base_r),
2838 nameIReg64rexX(pfx,index_r), 1<<scale);
2840 *len = 6;
2841 return
2842 disAMode_copy2tmp(
2843 handleAddrOverrides(vbi, pfx,
2844 binop(Iop_Add64,
2845 binop(Iop_Add64,
2846 getIRegRexB(8,pfx,base_r),
2847 binop(Iop_Shl64,
2848 getIReg64rexX(pfx,index_r), mkU8(scale))),
2849 mkU64(d))));
2851 vassert(0); /*NOTREACHED*/
2854 default:
2855 vpanic("disAMode(amd64)");
2856 return 0; /*notreached*/
2861 /* Similarly for VSIB addressing. This returns just the addend,
2862 and fills in *rI and *vscale with the register number of the vector
2863 index and its multiplicand. */
2864 static
2865 IRTemp disAVSIBMode ( /*OUT*/Int* len,
2866 const VexAbiInfo* vbi, Prefix pfx, Long delta,
2867 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2868 IRType ty, /*OUT*/Int* vscale )
2870 UChar mod_reg_rm = getUChar(delta);
2871 const HChar *vindex;
2873 *len = 0;
2874 *rI = 0;
2875 *vscale = 0;
2876 buf[0] = (UChar)0;
2877 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2878 return IRTemp_INVALID;
2880 UChar sib = getUChar(delta+1);
2881 UChar scale = toUChar((sib >> 6) & 3);
2882 UChar index_r = toUChar((sib >> 3) & 7);
2883 UChar base_r = toUChar(sib & 7);
2884 Long d = 0;
2885 /* correct since #(R13) == 8 + #(RBP) */
2886 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2887 delta += 2;
2888 *len = 2;
2890 *rI = index_r | (getRexX(pfx) << 3);
2891 if (ty == Ity_V128)
2892 vindex = nameXMMReg(*rI);
2893 else
2894 vindex = nameYMMReg(*rI);
2895 *vscale = 1<<scale;
2897 switch (mod_reg_rm >> 6) {
2898 case 0:
2899 if (base_is_BPor13) {
2900 d = getSDisp32(delta);
2901 *len += 4;
2902 if (scale == 0) {
2903 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2904 } else {
2905 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2907 return disAMode_copy2tmp( mkU64(d) );
2908 } else {
2909 if (scale == 0) {
2910 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2911 nameIRegRexB(8,pfx,base_r), vindex);
2912 } else {
2913 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2914 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2917 break;
2918 case 1:
2919 d = getSDisp8(delta);
2920 *len += 1;
2921 goto have_disp;
2922 case 2:
2923 d = getSDisp32(delta);
2924 *len += 4;
2925 have_disp:
2926 if (scale == 0) {
2927 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2928 nameIRegRexB(8,pfx,base_r), vindex);
2929 } else {
2930 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2931 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2933 break;
2936 if (!d)
2937 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2938 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2939 mkU64(d)) );
2943 /* Figure out the number of (insn-stream) bytes constituting the amode
2944 beginning at delta. Is useful for getting hold of literals beyond
2945 the end of the amode before it has been disassembled. */
2947 static UInt lengthAMode ( Prefix pfx, Long delta )
2949 UChar mod_reg_rm = getUChar(delta);
2950 delta++;
2952 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2953 jump table seems a bit excessive.
2955 mod_reg_rm &= 0xC7; /* is now XX000YYY */
2956 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2957 /* is now XX0XXYYY */
2958 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
2959 switch (mod_reg_rm) {
2961 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2962 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2964 case 0x00: case 0x01: case 0x02: case 0x03:
2965 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
2966 return 1;
2968 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2969 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2971 case 0x08: case 0x09: case 0x0A: case 0x0B:
2972 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
2973 return 2;
2975 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2976 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2978 case 0x10: case 0x11: case 0x12: case 0x13:
2979 /* ! 14 */ case 0x15: case 0x16: case 0x17:
2980 return 5;
2982 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2983 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2984 /* Not an address, but still handled. */
2985 case 0x18: case 0x19: case 0x1A: case 0x1B:
2986 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2987 return 1;
2989 /* RIP + disp32. */
2990 case 0x05:
2991 return 5;
2993 case 0x04: {
2994 /* SIB, with no displacement. */
2995 UChar sib = getUChar(delta);
2996 UChar base_r = toUChar(sib & 7);
2997 /* correct since #(R13) == 8 + #(RBP) */
2998 Bool base_is_BPor13 = toBool(base_r == R_RBP);
3000 if (base_is_BPor13) {
3001 return 6;
3002 } else {
3003 return 2;
3007 /* SIB, with 8-bit displacement. */
3008 case 0x0C:
3009 return 3;
3011 /* SIB, with 32-bit displacement. */
3012 case 0x14:
3013 return 6;
3015 default:
3016 vpanic("lengthAMode(amd64)");
3017 return 0; /*notreached*/
3022 /*------------------------------------------------------------*/
3023 /*--- Disassembling common idioms ---*/
3024 /*------------------------------------------------------------*/
3026 typedef
3027 enum { WithFlagNone=2, WithFlagCarry, WithFlagCarryX, WithFlagOverX }
3028 WithFlag;
3030 /* Handle binary integer instructions of the form
3031 op E, G meaning
3032 op reg-or-mem, reg
3033 Is passed the a ptr to the modRM byte, the actual operation, and the
3034 data size. Returns the address advanced completely over this
3035 instruction.
3037 E(src) is reg-or-mem
3038 G(dst) is reg.
3040 If E is reg, --> GET %G, tmp
3041 OP %E, tmp
3042 PUT tmp, %G
3044 If E is mem and OP is not reversible,
3045 --> (getAddr E) -> tmpa
3046 LD (tmpa), tmpa
3047 GET %G, tmp2
3048 OP tmpa, tmp2
3049 PUT tmp2, %G
3051 If E is mem and OP is reversible
3052 --> (getAddr E) -> tmpa
3053 LD (tmpa), tmpa
3054 OP %G, tmpa
3055 PUT tmpa, %G
3057 static
3058 ULong dis_op2_E_G ( const VexAbiInfo* vbi,
3059 Prefix pfx,
3060 IROp op8,
3061 WithFlag flag,
3062 Bool keep,
3063 Int size,
3064 Long delta0,
3065 const HChar* t_amd64opc )
3067 HChar dis_buf[50];
3068 Int len;
3069 IRType ty = szToITy(size);
3070 IRTemp dst1 = newTemp(ty);
3071 IRTemp src = newTemp(ty);
3072 IRTemp dst0 = newTemp(ty);
3073 UChar rm = getUChar(delta0);
3074 IRTemp addr = IRTemp_INVALID;
3076 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3077 switch (op8) {
3078 case Iop_Add8:
3079 switch (flag) {
3080 case WithFlagNone: case WithFlagCarry:
3081 case WithFlagCarryX: case WithFlagOverX:
3082 vassert(keep);
3083 break;
3084 default:
3085 vassert(0);
3087 break;
3088 case Iop_Sub8:
3089 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3090 if (flag == WithFlagCarry) vassert(keep);
3091 break;
3092 case Iop_And8:
3093 vassert(flag == WithFlagNone);
3094 break;
3095 case Iop_Or8: case Iop_Xor8:
3096 vassert(flag == WithFlagNone);
3097 vassert(keep);
3098 break;
3099 default:
3100 vassert(0);
3103 if (epartIsReg(rm)) {
3104 /* Specially handle XOR reg,reg, because that doesn't really
3105 depend on reg, and doing the obvious thing potentially
3106 generates a spurious value check failure due to the bogus
3107 dependency. Ditto SUB/SBB reg,reg. */
3108 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3109 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3110 putIRegG(size,pfx,rm, mkU(ty,0));
3113 assign( dst0, getIRegG(size,pfx,rm) );
3114 assign( src, getIRegE(size,pfx,rm) );
3116 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3117 helper_ADC( size, dst1, dst0, src,
3118 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3119 putIRegG(size, pfx, rm, mkexpr(dst1));
3120 } else
3121 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3122 helper_SBB( size, dst1, dst0, src,
3123 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3124 putIRegG(size, pfx, rm, mkexpr(dst1));
3125 } else
3126 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3127 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3128 putIRegG(size, pfx, rm, mkexpr(dst1));
3129 } else
3130 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3131 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3132 putIRegG(size, pfx, rm, mkexpr(dst1));
3133 } else {
3134 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3135 if (isAddSub(op8))
3136 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3137 else
3138 setFlags_DEP1(op8, dst1, ty);
3139 if (keep)
3140 putIRegG(size, pfx, rm, mkexpr(dst1));
3143 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3144 nameIRegE(size,pfx,rm),
3145 nameIRegG(size,pfx,rm));
3146 return 1+delta0;
3147 } else {
3148 /* E refers to memory */
3149 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3150 assign( dst0, getIRegG(size,pfx,rm) );
3151 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
3153 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3154 helper_ADC( size, dst1, dst0, src,
3155 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3156 putIRegG(size, pfx, rm, mkexpr(dst1));
3157 } else
3158 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3159 helper_SBB( size, dst1, dst0, src,
3160 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3161 putIRegG(size, pfx, rm, mkexpr(dst1));
3162 } else
3163 if (op8 == Iop_Add8 && flag == WithFlagCarryX) {
3164 helper_ADCX_ADOX( True/*isADCX*/, size, dst1, dst0, src );
3165 putIRegG(size, pfx, rm, mkexpr(dst1));
3166 } else
3167 if (op8 == Iop_Add8 && flag == WithFlagOverX) {
3168 helper_ADCX_ADOX( False/*!isADCX*/, size, dst1, dst0, src );
3169 putIRegG(size, pfx, rm, mkexpr(dst1));
3170 } else {
3171 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3172 if (isAddSub(op8))
3173 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3174 else
3175 setFlags_DEP1(op8, dst1, ty);
3176 if (keep)
3177 putIRegG(size, pfx, rm, mkexpr(dst1));
3180 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3181 dis_buf, nameIRegG(size, pfx, rm));
3182 return len+delta0;
3188 /* Handle binary integer instructions of the form
3189 op G, E meaning
3190 op reg, reg-or-mem
3191 Is passed the a ptr to the modRM byte, the actual operation, and the
3192 data size. Returns the address advanced completely over this
3193 instruction.
3195 G(src) is reg.
3196 E(dst) is reg-or-mem
3198 If E is reg, --> GET %E, tmp
3199 OP %G, tmp
3200 PUT tmp, %E
3202 If E is mem, --> (getAddr E) -> tmpa
3203 LD (tmpa), tmpv
3204 OP %G, tmpv
3205 ST tmpv, (tmpa)
3207 static
3208 ULong dis_op2_G_E ( const VexAbiInfo* vbi,
3209 Prefix pfx,
3210 IROp op8,
3211 WithFlag flag,
3212 Bool keep,
3213 Int size,
3214 Long delta0,
3215 const HChar* t_amd64opc )
3217 HChar dis_buf[50];
3218 Int len;
3219 IRType ty = szToITy(size);
3220 IRTemp dst1 = newTemp(ty);
3221 IRTemp src = newTemp(ty);
3222 IRTemp dst0 = newTemp(ty);
3223 UChar rm = getUChar(delta0);
3224 IRTemp addr = IRTemp_INVALID;
3226 /* Stay sane -- check for valid (op8, flag, keep) combinations. */
3227 switch (op8) {
3228 case Iop_Add8:
3229 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3230 vassert(keep);
3231 break;
3232 case Iop_Sub8:
3233 vassert(flag == WithFlagNone || flag == WithFlagCarry);
3234 if (flag == WithFlagCarry) vassert(keep);
3235 break;
3236 case Iop_And8: case Iop_Or8: case Iop_Xor8:
3237 vassert(flag == WithFlagNone);
3238 vassert(keep);
3239 break;
3240 default:
3241 vassert(0);
3244 /* flag != WithFlagNone is only allowed for Add and Sub and indicates the
3245 intended operation is add-with-carry or subtract-with-borrow. */
3247 if (epartIsReg(rm)) {
3248 /* Specially handle XOR reg,reg, because that doesn't really
3249 depend on reg, and doing the obvious thing potentially
3250 generates a spurious value check failure due to the bogus
3251 dependency. Ditto SUB/SBB reg,reg. */
3252 if ((op8 == Iop_Xor8 || ((op8 == Iop_Sub8) && keep))
3253 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3254 putIRegE(size,pfx,rm, mkU(ty,0));
3257 assign(dst0, getIRegE(size,pfx,rm));
3258 assign(src, getIRegG(size,pfx,rm));
3260 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3261 helper_ADC( size, dst1, dst0, src,
3262 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3263 putIRegE(size, pfx, rm, mkexpr(dst1));
3264 } else
3265 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3266 helper_SBB( size, dst1, dst0, src,
3267 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3268 putIRegE(size, pfx, rm, mkexpr(dst1));
3269 } else {
3270 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3271 if (isAddSub(op8))
3272 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3273 else
3274 setFlags_DEP1(op8, dst1, ty);
3275 if (keep)
3276 putIRegE(size, pfx, rm, mkexpr(dst1));
3279 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3280 nameIRegG(size,pfx,rm),
3281 nameIRegE(size,pfx,rm));
3282 return 1+delta0;
3285 /* E refers to memory */
3287 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3288 assign(dst0, loadLE(ty,mkexpr(addr)));
3289 assign(src, getIRegG(size,pfx,rm));
3291 if (op8 == Iop_Add8 && flag == WithFlagCarry) {
3292 if (haveLOCK(pfx)) {
3293 /* cas-style store */
3294 helper_ADC( size, dst1, dst0, src,
3295 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3296 } else {
3297 /* normal store */
3298 helper_ADC( size, dst1, dst0, src,
3299 /*store*/addr, IRTemp_INVALID, 0 );
3301 } else
3302 if (op8 == Iop_Sub8 && flag == WithFlagCarry) {
3303 if (haveLOCK(pfx)) {
3304 /* cas-style store */
3305 helper_SBB( size, dst1, dst0, src,
3306 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3307 } else {
3308 /* normal store */
3309 helper_SBB( size, dst1, dst0, src,
3310 /*store*/addr, IRTemp_INVALID, 0 );
3312 } else {
3313 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3314 if (keep) {
3315 if (haveLOCK(pfx)) {
3316 if (0) vex_printf("locked case\n" );
3317 casLE( mkexpr(addr),
3318 mkexpr(dst0)/*expval*/,
3319 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3320 } else {
3321 if (0) vex_printf("nonlocked case\n");
3322 storeLE(mkexpr(addr), mkexpr(dst1));
3325 if (isAddSub(op8))
3326 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3327 else
3328 setFlags_DEP1(op8, dst1, ty);
3331 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
3332 nameIRegG(size,pfx,rm), dis_buf);
3333 return len+delta0;
3338 /* Handle move instructions of the form
3339 mov E, G meaning
3340 mov reg-or-mem, reg
3341 Is passed the a ptr to the modRM byte, and the data size. Returns
3342 the address advanced completely over this instruction.
3344 E(src) is reg-or-mem
3345 G(dst) is reg.
3347 If E is reg, --> GET %E, tmpv
3348 PUT tmpv, %G
3350 If E is mem --> (getAddr E) -> tmpa
3351 LD (tmpa), tmpb
3352 PUT tmpb, %G
3354 static
3355 ULong dis_mov_E_G ( const VexAbiInfo* vbi,
3356 Prefix pfx,
3357 Int size,
3358 Long delta0 )
3360 Int len;
3361 UChar rm = getUChar(delta0);
3362 HChar dis_buf[50];
3364 if (epartIsReg(rm)) {
3365 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
3366 DIP("mov%c %s,%s\n", nameISize(size),
3367 nameIRegE(size,pfx,rm),
3368 nameIRegG(size,pfx,rm));
3369 return 1+delta0;
3372 /* E refers to memory */
3374 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3375 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
3376 DIP("mov%c %s,%s\n", nameISize(size),
3377 dis_buf,
3378 nameIRegG(size,pfx,rm));
3379 return delta0+len;
3384 /* Handle move instructions of the form
3385 mov G, E meaning
3386 mov reg, reg-or-mem
3387 Is passed the a ptr to the modRM byte, and the data size. Returns
3388 the address advanced completely over this instruction.
3389 We have to decide here whether F2 or F3 are acceptable. F2 never is.
3391 G(src) is reg.
3392 E(dst) is reg-or-mem
3394 If E is reg, --> GET %G, tmp
3395 PUT tmp, %E
3397 If E is mem, --> (getAddr E) -> tmpa
3398 GET %G, tmpv
3399 ST tmpv, (tmpa)
3401 static
3402 ULong dis_mov_G_E ( const VexAbiInfo* vbi,
3403 Prefix pfx,
3404 Int size,
3405 Long delta0,
3406 /*OUT*/Bool* ok )
3408 Int len;
3409 UChar rm = getUChar(delta0);
3410 HChar dis_buf[50];
3412 *ok = True;
3414 if (epartIsReg(rm)) {
3415 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
3416 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
3417 DIP("mov%c %s,%s\n", nameISize(size),
3418 nameIRegG(size,pfx,rm),
3419 nameIRegE(size,pfx,rm));
3420 return 1+delta0;
3423 /* E refers to memory */
3425 if (haveF2(pfx)) { *ok = False; return delta0; }
3426 /* F3(XRELEASE) is acceptable, though. */
3427 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
3428 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
3429 DIP("mov%c %s,%s\n", nameISize(size),
3430 nameIRegG(size,pfx,rm),
3431 dis_buf);
3432 return len+delta0;
3437 /* op $immediate, AL/AX/EAX/RAX. */
3438 static
3439 ULong dis_op_imm_A ( Int size,
3440 Bool carrying,
3441 IROp op8,
3442 Bool keep,
3443 Long delta,
3444 const HChar* t_amd64opc )
3446 Int size4 = imin(size,4);
3447 IRType ty = szToITy(size);
3448 IRTemp dst0 = newTemp(ty);
3449 IRTemp src = newTemp(ty);
3450 IRTemp dst1 = newTemp(ty);
3451 Long lit = getSDisp(size4,delta);
3452 assign(dst0, getIRegRAX(size));
3453 assign(src, mkU(ty,lit & mkSizeMask(size)));
3455 if (isAddSub(op8) && !carrying) {
3456 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3457 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3459 else
3460 if (isLogic(op8)) {
3461 vassert(!carrying);
3462 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
3463 setFlags_DEP1(op8, dst1, ty);
3465 else
3466 if (op8 == Iop_Add8 && carrying) {
3467 helper_ADC( size, dst1, dst0, src,
3468 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3470 else
3471 if (op8 == Iop_Sub8 && carrying) {
3472 helper_SBB( size, dst1, dst0, src,
3473 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3475 else
3476 vpanic("dis_op_imm_A(amd64,guest)");
3478 if (keep)
3479 putIRegRAX(size, mkexpr(dst1));
3481 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
3482 lit, nameIRegRAX(size));
3483 return delta+size4;
3487 /* Sign- and Zero-extending moves. */
3488 static
3489 ULong dis_movx_E_G ( const VexAbiInfo* vbi,
3490 Prefix pfx,
3491 Long delta, Int szs, Int szd, Bool sign_extend )
3493 UChar rm = getUChar(delta);
3494 if (epartIsReg(rm)) {
3495 putIRegG(szd, pfx, rm,
3496 doScalarWidening(
3497 szs,szd,sign_extend,
3498 getIRegE(szs,pfx,rm)));
3499 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3500 nameISize(szs),
3501 nameISize(szd),
3502 nameIRegE(szs,pfx,rm),
3503 nameIRegG(szd,pfx,rm));
3504 return 1+delta;
3507 /* E refers to memory */
3509 Int len;
3510 HChar dis_buf[50];
3511 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
3512 putIRegG(szd, pfx, rm,
3513 doScalarWidening(
3514 szs,szd,sign_extend,
3515 loadLE(szToITy(szs),mkexpr(addr))));
3516 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3517 nameISize(szs),
3518 nameISize(szd),
3519 dis_buf,
3520 nameIRegG(szd,pfx,rm));
3521 return len+delta;
3526 /* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3527 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
3528 static
3529 void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3531 /* special-case the 64-bit case */
3532 if (sz == 8) {
3533 IROp op = signed_divide ? Iop_DivModS128to64
3534 : Iop_DivModU128to64;
3535 IRTemp src128 = newTemp(Ity_I128);
3536 IRTemp dst128 = newTemp(Ity_I128);
3537 assign( src128, binop(Iop_64HLto128,
3538 getIReg64(R_RDX),
3539 getIReg64(R_RAX)) );
3540 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
3541 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3542 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
3543 } else {
3544 IROp op = signed_divide ? Iop_DivModS64to32
3545 : Iop_DivModU64to32;
3546 IRTemp src64 = newTemp(Ity_I64);
3547 IRTemp dst64 = newTemp(Ity_I64);
3548 switch (sz) {
3549 case 4:
3550 assign( src64,
3551 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3552 assign( dst64,
3553 binop(op, mkexpr(src64), mkexpr(t)) );
3554 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3555 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
3556 break;
3557 case 2: {
3558 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3559 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3560 assign( src64, unop(widen3264,
3561 binop(Iop_16HLto32,
3562 getIRegRDX(2),
3563 getIRegRAX(2))) );
3564 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
3565 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3566 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
3567 break;
3569 case 1: {
3570 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3571 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3572 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3573 assign( src64, unop(widen3264,
3574 unop(widen1632, getIRegRAX(2))) );
3575 assign( dst64,
3576 binop(op, mkexpr(src64),
3577 unop(widen1632, unop(widen816, mkexpr(t)))) );
3578 putIRegRAX( 1, unop(Iop_16to8,
3579 unop(Iop_32to16,
3580 unop(Iop_64to32,mkexpr(dst64)))) );
3581 putIRegAH( unop(Iop_16to8,
3582 unop(Iop_32to16,
3583 unop(Iop_64HIto32,mkexpr(dst64)))) );
3584 break;
3586 default:
3587 vpanic("codegen_div(amd64)");
3592 static
3593 ULong dis_Grp1 ( const VexAbiInfo* vbi,
3594 Prefix pfx,
3595 Long delta, UChar modrm,
3596 Int am_sz, Int d_sz, Int sz, Long d64 )
3598 Int len;
3599 HChar dis_buf[50];
3600 IRType ty = szToITy(sz);
3601 IRTemp dst1 = newTemp(ty);
3602 IRTemp src = newTemp(ty);
3603 IRTemp dst0 = newTemp(ty);
3604 IRTemp addr = IRTemp_INVALID;
3605 IROp op8 = Iop_INVALID;
3606 ULong mask = mkSizeMask(sz);
3608 switch (gregLO3ofRM(modrm)) {
3609 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3610 case 2: break; // ADC
3611 case 3: break; // SBB
3612 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3613 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
3614 /*NOTREACHED*/
3615 default: vpanic("dis_Grp1(amd64): unhandled case");
3618 if (epartIsReg(modrm)) {
3619 vassert(am_sz == 1);
3621 assign(dst0, getIRegE(sz,pfx,modrm));
3622 assign(src, mkU(ty,d64 & mask));
3624 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3625 helper_ADC( sz, dst1, dst0, src,
3626 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3627 } else
3628 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3629 helper_SBB( sz, dst1, dst0, src,
3630 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
3631 } else {
3632 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3633 if (isAddSub(op8))
3634 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3635 else
3636 setFlags_DEP1(op8, dst1, ty);
3639 if (gregLO3ofRM(modrm) < 7)
3640 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3642 delta += (am_sz + d_sz);
3643 DIP("%s%c $%lld, %s\n",
3644 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
3645 nameIRegE(sz,pfx,modrm));
3646 } else {
3647 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3649 assign(dst0, loadLE(ty,mkexpr(addr)));
3650 assign(src, mkU(ty,d64 & mask));
3652 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
3653 if (haveLOCK(pfx)) {
3654 /* cas-style store */
3655 helper_ADC( sz, dst1, dst0, src,
3656 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3657 } else {
3658 /* normal store */
3659 helper_ADC( sz, dst1, dst0, src,
3660 /*store*/addr, IRTemp_INVALID, 0 );
3662 } else
3663 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
3664 if (haveLOCK(pfx)) {
3665 /* cas-style store */
3666 helper_SBB( sz, dst1, dst0, src,
3667 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3668 } else {
3669 /* normal store */
3670 helper_SBB( sz, dst1, dst0, src,
3671 /*store*/addr, IRTemp_INVALID, 0 );
3673 } else {
3674 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3675 if (gregLO3ofRM(modrm) < 7) {
3676 if (haveLOCK(pfx)) {
3677 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3678 mkexpr(dst1)/*newVal*/,
3679 guest_RIP_curr_instr );
3680 } else {
3681 storeLE(mkexpr(addr), mkexpr(dst1));
3684 if (isAddSub(op8))
3685 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3686 else
3687 setFlags_DEP1(op8, dst1, ty);
3690 delta += (len+d_sz);
3691 DIP("%s%c $%lld, %s\n",
3692 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
3693 d64, dis_buf);
3695 return delta;
3699 /* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3700 expression. */
3702 static
3703 ULong dis_Grp2 ( const VexAbiInfo* vbi,
3704 Prefix pfx,
3705 Long delta, UChar modrm,
3706 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
3707 const HChar* shift_expr_txt, Bool* decode_OK )
3709 /* delta on entry points at the modrm byte. */
3710 HChar dis_buf[50];
3711 Int len;
3712 Bool isShift, isRotate, isRotateC;
3713 IRType ty = szToITy(sz);
3714 IRTemp dst0 = newTemp(ty);
3715 IRTemp dst1 = newTemp(ty);
3716 IRTemp addr = IRTemp_INVALID;
3718 *decode_OK = True;
3720 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3722 /* Put value to shift/rotate in dst0. */
3723 if (epartIsReg(modrm)) {
3724 assign(dst0, getIRegE(sz, pfx, modrm));
3725 delta += (am_sz + d_sz);
3726 } else {
3727 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
3728 assign(dst0, loadLE(ty,mkexpr(addr)));
3729 delta += len + d_sz;
3732 isShift = False;
3733 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
3735 isRotate = False;
3736 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
3738 isRotateC = False;
3739 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
3741 if (!isShift && !isRotate && !isRotateC) {
3742 /*NOTREACHED*/
3743 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3746 if (isRotateC) {
3747 /* Call a helper; this insn is so ridiculous it does not deserve
3748 better. One problem is, the helper has to calculate both the
3749 new value and the new flags. This is more than 64 bits, and
3750 there is no way to return more than 64 bits from the helper.
3751 Hence the crude and obvious solution is to call it twice,
3752 using the sign of the sz field to indicate whether it is the
3753 value or rflags result we want.
3755 Bool left = toBool(gregLO3ofRM(modrm) == 2);
3756 IRExpr** argsVALUE;
3757 IRExpr** argsRFLAGS;
3759 IRTemp new_value = newTemp(Ity_I64);
3760 IRTemp new_rflags = newTemp(Ity_I64);
3761 IRTemp old_rflags = newTemp(Ity_I64);
3763 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3765 argsVALUE
3766 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3767 widenUto64(shift_expr), /* rotate amount */
3768 mkexpr(old_rflags),
3769 mkU64(sz) );
3770 assign( new_value,
3771 mkIRExprCCall(
3772 Ity_I64,
3773 0/*regparm*/,
3774 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3775 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3776 argsVALUE
3780 argsRFLAGS
3781 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3782 widenUto64(shift_expr), /* rotate amount */
3783 mkexpr(old_rflags),
3784 mkU64(-sz) );
3785 assign( new_rflags,
3786 mkIRExprCCall(
3787 Ity_I64,
3788 0/*regparm*/,
3789 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3790 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
3791 argsRFLAGS
3795 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3796 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3797 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3798 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3799 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3802 else
3803 if (isShift) {
3805 IRTemp pre64 = newTemp(Ity_I64);
3806 IRTemp res64 = newTemp(Ity_I64);
3807 IRTemp res64ss = newTemp(Ity_I64);
3808 IRTemp shift_amt = newTemp(Ity_I8);
3809 UChar mask = toUChar(sz==8 ? 63 : 31);
3810 IROp op64;
3812 switch (gregLO3ofRM(modrm)) {
3813 case 4: op64 = Iop_Shl64; break;
3814 case 5: op64 = Iop_Shr64; break;
3815 case 6: op64 = Iop_Shl64; break;
3816 case 7: op64 = Iop_Sar64; break;
3817 /*NOTREACHED*/
3818 default: vpanic("dis_Grp2:shift"); break;
3821 /* Widen the value to be shifted to 64 bits, do the shift, and
3822 narrow back down. This seems surprisingly long-winded, but
3823 unfortunately the AMD semantics requires that 8/16/32-bit
3824 shifts give defined results for shift values all the way up
3825 to 32, and this seems the simplest way to do it. It has the
3826 advantage that the only IR level shifts generated are of 64
3827 bit values, and the shift amount is guaranteed to be in the
3828 range 0 .. 63, thereby observing the IR semantics requiring
3829 all shift values to be in the range 0 .. 2^word_size-1.
3831 Therefore the shift amount is masked with 63 for 64-bit shifts
3832 and 31 for all others.
3834 /* shift_amt = shift_expr & MASK, regardless of operation size */
3835 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
3837 /* suitably widen the value to be shifted to 64 bits. */
3838 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3839 : widenUto64(mkexpr(dst0)) );
3841 /* res64 = pre64 `shift` shift_amt */
3842 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3844 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
3845 assign( res64ss,
3846 binop(op64,
3847 mkexpr(pre64),
3848 binop(Iop_And8,
3849 binop(Iop_Sub8,
3850 mkexpr(shift_amt), mkU8(1)),
3851 mkU8(mask))) );
3853 /* Build the flags thunk. */
3854 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3856 /* Narrow the result back down. */
3857 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3859 } /* if (isShift) */
3861 else
3862 if (isRotate) {
3863 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3864 : (ty==Ity_I32 ? 2 : 3));
3865 Bool left = toBool(gregLO3ofRM(modrm) == 0);
3866 IRTemp rot_amt = newTemp(Ity_I8);
3867 IRTemp rot_amt64 = newTemp(Ity_I8);
3868 IRTemp oldFlags = newTemp(Ity_I64);
3869 UChar mask = toUChar(sz==8 ? 63 : 31);
3871 /* rot_amt = shift_expr & mask */
3872 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3873 expressions never shift beyond the word size and thus remain
3874 well defined. */
3875 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
3877 if (ty == Ity_I64)
3878 assign(rot_amt, mkexpr(rot_amt64));
3879 else
3880 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3882 if (left) {
3884 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3885 assign(dst1,
3886 binop( mkSizedOp(ty,Iop_Or8),
3887 binop( mkSizedOp(ty,Iop_Shl8),
3888 mkexpr(dst0),
3889 mkexpr(rot_amt)
3891 binop( mkSizedOp(ty,Iop_Shr8),
3892 mkexpr(dst0),
3893 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3897 ccOp += AMD64G_CC_OP_ROLB;
3899 } else { /* right */
3901 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3902 assign(dst1,
3903 binop( mkSizedOp(ty,Iop_Or8),
3904 binop( mkSizedOp(ty,Iop_Shr8),
3905 mkexpr(dst0),
3906 mkexpr(rot_amt)
3908 binop( mkSizedOp(ty,Iop_Shl8),
3909 mkexpr(dst0),
3910 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3914 ccOp += AMD64G_CC_OP_RORB;
3918 /* dst1 now holds the rotated value. Build flag thunk. We
3919 need the resulting value for this, and the previous flags.
3920 Except don't set it if the rotate count is zero. */
3922 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3924 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3925 IRTemp rot_amt64b = newTemp(Ity_I1);
3926 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3928 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3929 stmt( IRStmt_Put( OFFB_CC_OP,
3930 IRExpr_ITE( mkexpr(rot_amt64b),
3931 mkU64(ccOp),
3932 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
3933 stmt( IRStmt_Put( OFFB_CC_DEP1,
3934 IRExpr_ITE( mkexpr(rot_amt64b),
3935 widenUto64(mkexpr(dst1)),
3936 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
3937 stmt( IRStmt_Put( OFFB_CC_DEP2,
3938 IRExpr_ITE( mkexpr(rot_amt64b),
3939 mkU64(0),
3940 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
3941 stmt( IRStmt_Put( OFFB_CC_NDEP,
3942 IRExpr_ITE( mkexpr(rot_amt64b),
3943 mkexpr(oldFlags),
3944 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
3945 } /* if (isRotate) */
3947 /* Save result, and finish up. */
3948 if (epartIsReg(modrm)) {
3949 putIRegE(sz, pfx, modrm, mkexpr(dst1));
3950 if (vex_traceflags & VEX_TRACE_FE) {
3951 vex_printf("%s%c ",
3952 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3953 if (shift_expr_txt)
3954 vex_printf("%s", shift_expr_txt);
3955 else
3956 ppIRExpr(shift_expr);
3957 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
3959 } else {
3960 storeLE(mkexpr(addr), mkexpr(dst1));
3961 if (vex_traceflags & VEX_TRACE_FE) {
3962 vex_printf("%s%c ",
3963 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
3964 if (shift_expr_txt)
3965 vex_printf("%s", shift_expr_txt);
3966 else
3967 ppIRExpr(shift_expr);
3968 vex_printf(", %s\n", dis_buf);
3971 return delta;
3975 /* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3976 static
3977 ULong dis_Grp8_Imm ( const VexAbiInfo* vbi,
3978 Prefix pfx,
3979 Long delta, UChar modrm,
3980 Int am_sz, Int sz, ULong src_val,
3981 Bool* decode_OK )
3983 /* src_val denotes a d8.
3984 And delta on entry points at the modrm byte. */
3986 IRType ty = szToITy(sz);
3987 IRTemp t2 = newTemp(Ity_I64);
3988 IRTemp t2m = newTemp(Ity_I64);
3989 IRTemp t_addr = IRTemp_INVALID;
3990 HChar dis_buf[50];
3991 ULong mask;
3993 /* we're optimists :-) */
3994 *decode_OK = True;
3996 /* Check whether F2 or F3 are acceptable. */
3997 if (epartIsReg(modrm)) {
3998 /* F2 or F3 are not allowed in the register case. */
3999 if (haveF2orF3(pfx)) {
4000 *decode_OK = False;
4001 return delta;
4003 } else {
4004 /* F2 or F3 (but not both) are allowable provided LOCK is also
4005 present. */
4006 if (haveF2orF3(pfx)) {
4007 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
4008 *decode_OK = False;
4009 return delta;
4014 /* Limit src_val -- the bit offset -- to something within a word.
4015 The Intel docs say that literal offsets larger than a word are
4016 masked in this way. */
4017 switch (sz) {
4018 case 2: src_val &= 15; break;
4019 case 4: src_val &= 31; break;
4020 case 8: src_val &= 63; break;
4021 default: *decode_OK = False; return delta;
4024 /* Invent a mask suitable for the operation. */
4025 switch (gregLO3ofRM(modrm)) {
4026 case 4: /* BT */ mask = 0; break;
4027 case 5: /* BTS */ mask = 1ULL << src_val; break;
4028 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
4029 case 7: /* BTC */ mask = 1ULL << src_val; break;
4030 /* If this needs to be extended, probably simplest to make a
4031 new function to handle the other cases (0 .. 3). The
4032 Intel docs do however not indicate any use for 0 .. 3, so
4033 we don't expect this to happen. */
4034 default: *decode_OK = False; return delta;
4037 /* Fetch the value to be tested and modified into t2, which is
4038 64-bits wide regardless of sz. */
4039 if (epartIsReg(modrm)) {
4040 vassert(am_sz == 1);
4041 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
4042 delta += (am_sz + 1);
4043 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
4044 nameISize(sz),
4045 src_val, nameIRegE(sz,pfx,modrm));
4046 } else {
4047 Int len;
4048 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
4049 delta += (len+1);
4050 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
4051 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
4052 nameISize(sz),
4053 src_val, dis_buf);
4056 /* Compute the new value into t2m, if non-BT. */
4057 switch (gregLO3ofRM(modrm)) {
4058 case 4: /* BT */
4059 break;
4060 case 5: /* BTS */
4061 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
4062 break;
4063 case 6: /* BTR */
4064 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
4065 break;
4066 case 7: /* BTC */
4067 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
4068 break;
4069 default:
4070 /*NOTREACHED*/ /*the previous switch guards this*/
4071 vassert(0);
4074 /* Write the result back, if non-BT. */
4075 if (gregLO3ofRM(modrm) != 4 /* BT */) {
4076 if (epartIsReg(modrm)) {
4077 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
4078 } else {
4079 if (haveLOCK(pfx)) {
4080 casLE( mkexpr(t_addr),
4081 narrowTo(ty, mkexpr(t2))/*expd*/,
4082 narrowTo(ty, mkexpr(t2m))/*new*/,
4083 guest_RIP_curr_instr );
4084 } else {
4085 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
4090 /* Copy relevant bit from t2 into the carry flag. */
4091 /* Flags: C=selected bit, O,S,A,P undefined, Z unchanged */
4092 /* so let's also keep O,S,A,P unchanged */
4093 const ULong maskC = AMD64G_CC_MASK_C;
4094 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
4095 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
4096 | AMD64G_CC_MASK_P;
4098 IRTemp old_rflags = newTemp(Ity_I64);
4099 assign(old_rflags, mk_amd64g_calculate_rflags_all());
4101 IRTemp new_rflags = newTemp(Ity_I64);
4102 assign(new_rflags,
4103 binop(Iop_Or64,
4104 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
4105 binop(Iop_And64,
4106 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
4107 mkU64(maskC)) ));
4109 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
4110 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
4111 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
4112 /* Set NDEP even though it isn't used. This makes redundant-PUT
4113 elimination of previous stores to this field work better. */
4114 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
4116 return delta;
4120 /* Signed/unsigned widening multiply. Generate IR to multiply the
4121 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
4122 RDX:RAX/EDX:EAX/DX:AX/AX.
4124 static void codegen_mulL_A_D ( Int sz, Bool syned,
4125 IRTemp tmp, const HChar* tmp_txt )
4127 IRType ty = szToITy(sz);
4128 IRTemp t1 = newTemp(ty);
4130 assign( t1, getIRegRAX(sz) );
4132 switch (ty) {
4133 case Ity_I64: {
4134 IRTemp res128 = newTemp(Ity_I128);
4135 IRTemp resHi = newTemp(Ity_I64);
4136 IRTemp resLo = newTemp(Ity_I64);
4137 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
4138 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4139 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
4140 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4141 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
4142 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
4143 putIReg64(R_RDX, mkexpr(resHi));
4144 putIReg64(R_RAX, mkexpr(resLo));
4145 break;
4147 case Ity_I32: {
4148 IRTemp res64 = newTemp(Ity_I64);
4149 IRTemp resHi = newTemp(Ity_I32);
4150 IRTemp resLo = newTemp(Ity_I32);
4151 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
4152 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4153 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
4154 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4155 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
4156 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
4157 putIRegRDX(4, mkexpr(resHi));
4158 putIRegRAX(4, mkexpr(resLo));
4159 break;
4161 case Ity_I16: {
4162 IRTemp res32 = newTemp(Ity_I32);
4163 IRTemp resHi = newTemp(Ity_I16);
4164 IRTemp resLo = newTemp(Ity_I16);
4165 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
4166 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4167 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
4168 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4169 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
4170 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
4171 putIRegRDX(2, mkexpr(resHi));
4172 putIRegRAX(2, mkexpr(resLo));
4173 break;
4175 case Ity_I8: {
4176 IRTemp res16 = newTemp(Ity_I16);
4177 IRTemp resHi = newTemp(Ity_I8);
4178 IRTemp resLo = newTemp(Ity_I8);
4179 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
4180 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
4181 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
4182 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
4183 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
4184 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
4185 putIRegRAX(2, mkexpr(res16));
4186 break;
4188 default:
4189 ppIRType(ty);
4190 vpanic("codegen_mulL_A_D(amd64)");
4192 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
4196 /* Group 3 extended opcodes. We have to decide here whether F2 and F3
4197 might be valid.*/
4198 static
4199 ULong dis_Grp3 ( const VexAbiInfo* vbi,
4200 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
4202 Long d64;
4203 UChar modrm;
4204 HChar dis_buf[50];
4205 Int len;
4206 IRTemp addr;
4207 IRType ty = szToITy(sz);
4208 IRTemp t1 = newTemp(ty);
4209 IRTemp dst1, src, dst0;
4210 *decode_OK = True;
4211 modrm = getUChar(delta);
4212 if (epartIsReg(modrm)) {
4213 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4214 if (haveF2orF3(pfx)) goto unhandled;
4215 switch (gregLO3ofRM(modrm)) {
4216 case 0: { /* TEST */
4217 delta++;
4218 d64 = getSDisp(imin(4,sz), delta);
4219 delta += imin(4,sz);
4220 dst1 = newTemp(ty);
4221 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4222 getIRegE(sz,pfx,modrm),
4223 mkU(ty, d64 & mkSizeMask(sz))));
4224 setFlags_DEP1( Iop_And8, dst1, ty );
4225 DIP("test%c $%lld, %s\n",
4226 nameISize(sz), d64,
4227 nameIRegE(sz, pfx, modrm));
4228 break;
4230 case 1:
4231 *decode_OK = False;
4232 return delta;
4233 case 2: /* NOT */
4234 delta++;
4235 putIRegE(sz, pfx, modrm,
4236 unop(mkSizedOp(ty,Iop_Not8),
4237 getIRegE(sz, pfx, modrm)));
4238 DIP("not%c %s\n", nameISize(sz),
4239 nameIRegE(sz, pfx, modrm));
4240 break;
4241 case 3: /* NEG */
4242 delta++;
4243 dst0 = newTemp(ty);
4244 src = newTemp(ty);
4245 dst1 = newTemp(ty);
4246 assign(dst0, mkU(ty,0));
4247 assign(src, getIRegE(sz, pfx, modrm));
4248 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4249 mkexpr(src)));
4250 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4251 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4252 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
4253 break;
4254 case 4: /* MUL (unsigned widening) */
4255 delta++;
4256 src = newTemp(ty);
4257 assign(src, getIRegE(sz,pfx,modrm));
4258 codegen_mulL_A_D ( sz, False, src,
4259 nameIRegE(sz,pfx,modrm) );
4260 break;
4261 case 5: /* IMUL (signed widening) */
4262 delta++;
4263 src = newTemp(ty);
4264 assign(src, getIRegE(sz,pfx,modrm));
4265 codegen_mulL_A_D ( sz, True, src,
4266 nameIRegE(sz,pfx,modrm) );
4267 break;
4268 case 6: /* DIV */
4269 delta++;
4270 assign( t1, getIRegE(sz, pfx, modrm) );
4271 codegen_div ( sz, t1, False );
4272 DIP("div%c %s\n", nameISize(sz),
4273 nameIRegE(sz, pfx, modrm));
4274 break;
4275 case 7: /* IDIV */
4276 delta++;
4277 assign( t1, getIRegE(sz, pfx, modrm) );
4278 codegen_div ( sz, t1, True );
4279 DIP("idiv%c %s\n", nameISize(sz),
4280 nameIRegE(sz, pfx, modrm));
4281 break;
4282 default:
4283 /*NOTREACHED*/
4284 vpanic("Grp3(amd64,R)");
4286 } else {
4287 /* Decide if F2/XACQ or F3/XREL might be valid. */
4288 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4289 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4290 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4291 validF2orF3 = True;
4293 if (!validF2orF3) goto unhandled;
4294 /* */
4295 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
4296 /* we have to inform disAMode of any immediate
4297 bytes used */
4298 gregLO3ofRM(modrm)==0/*TEST*/
4299 ? imin(4,sz)
4302 t1 = newTemp(ty);
4303 delta += len;
4304 assign(t1, loadLE(ty,mkexpr(addr)));
4305 switch (gregLO3ofRM(modrm)) {
4306 case 0: { /* TEST */
4307 d64 = getSDisp(imin(4,sz), delta);
4308 delta += imin(4,sz);
4309 dst1 = newTemp(ty);
4310 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4311 mkexpr(t1),
4312 mkU(ty, d64 & mkSizeMask(sz))));
4313 setFlags_DEP1( Iop_And8, dst1, ty );
4314 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4315 break;
4317 case 1:
4318 *decode_OK = False;
4319 return delta;
4320 case 2: /* NOT */
4321 dst1 = newTemp(ty);
4322 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
4323 if (haveLOCK(pfx)) {
4324 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4325 guest_RIP_curr_instr );
4326 } else {
4327 storeLE( mkexpr(addr), mkexpr(dst1) );
4329 DIP("not%c %s\n", nameISize(sz), dis_buf);
4330 break;
4331 case 3: /* NEG */
4332 dst0 = newTemp(ty);
4333 src = newTemp(ty);
4334 dst1 = newTemp(ty);
4335 assign(dst0, mkU(ty,0));
4336 assign(src, mkexpr(t1));
4337 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4338 mkexpr(src)));
4339 if (haveLOCK(pfx)) {
4340 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4341 guest_RIP_curr_instr );
4342 } else {
4343 storeLE( mkexpr(addr), mkexpr(dst1) );
4345 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
4346 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4347 break;
4348 case 4: /* MUL (unsigned widening) */
4349 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4350 break;
4351 case 5: /* IMUL */
4352 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4353 break;
4354 case 6: /* DIV */
4355 codegen_div ( sz, t1, False );
4356 DIP("div%c %s\n", nameISize(sz), dis_buf);
4357 break;
4358 case 7: /* IDIV */
4359 codegen_div ( sz, t1, True );
4360 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4361 break;
4362 default:
4363 /*NOTREACHED*/
4364 vpanic("Grp3(amd64,M)");
4367 return delta;
4368 unhandled:
4369 *decode_OK = False;
4370 return delta;
4374 /* Group 4 extended opcodes. We have to decide here whether F2 and F3
4375 might be valid. */
4376 static
4377 ULong dis_Grp4 ( const VexAbiInfo* vbi,
4378 Prefix pfx, Long delta, Bool* decode_OK )
4380 Int alen;
4381 UChar modrm;
4382 HChar dis_buf[50];
4383 IRType ty = Ity_I8;
4384 IRTemp t1 = newTemp(ty);
4385 IRTemp t2 = newTemp(ty);
4387 *decode_OK = True;
4389 modrm = getUChar(delta);
4390 if (epartIsReg(modrm)) {
4391 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4392 if (haveF2orF3(pfx)) goto unhandled;
4393 assign(t1, getIRegE(1, pfx, modrm));
4394 switch (gregLO3ofRM(modrm)) {
4395 case 0: /* INC */
4396 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4397 putIRegE(1, pfx, modrm, mkexpr(t2));
4398 setFlags_INC_DEC( True, t2, ty );
4399 break;
4400 case 1: /* DEC */
4401 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4402 putIRegE(1, pfx, modrm, mkexpr(t2));
4403 setFlags_INC_DEC( False, t2, ty );
4404 break;
4405 default:
4406 *decode_OK = False;
4407 return delta;
4409 delta++;
4410 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
4411 nameIRegE(1, pfx, modrm));
4412 } else {
4413 /* Decide if F2/XACQ or F3/XREL might be valid. */
4414 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4415 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4416 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4417 validF2orF3 = True;
4419 if (!validF2orF3) goto unhandled;
4420 /* */
4421 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
4422 assign( t1, loadLE(ty, mkexpr(addr)) );
4423 switch (gregLO3ofRM(modrm)) {
4424 case 0: /* INC */
4425 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
4426 if (haveLOCK(pfx)) {
4427 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4428 guest_RIP_curr_instr );
4429 } else {
4430 storeLE( mkexpr(addr), mkexpr(t2) );
4432 setFlags_INC_DEC( True, t2, ty );
4433 break;
4434 case 1: /* DEC */
4435 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
4436 if (haveLOCK(pfx)) {
4437 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4438 guest_RIP_curr_instr );
4439 } else {
4440 storeLE( mkexpr(addr), mkexpr(t2) );
4442 setFlags_INC_DEC( False, t2, ty );
4443 break;
4444 default:
4445 *decode_OK = False;
4446 return delta;
4448 delta += alen;
4449 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
4451 return delta;
4452 unhandled:
4453 *decode_OK = False;
4454 return delta;
4458 /* Group 5 extended opcodes. We have to decide here whether F2 and F3
4459 might be valid. */
4460 static
4461 ULong dis_Grp5 ( const VexAbiInfo* vbi,
4462 Prefix pfx, Int sz, Long delta,
4463 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
4465 Int len;
4466 UChar modrm;
4467 HChar dis_buf[50];
4468 IRTemp addr = IRTemp_INVALID;
4469 IRType ty = szToITy(sz);
4470 IRTemp t1 = newTemp(ty);
4471 IRTemp t2 = IRTemp_INVALID;
4472 IRTemp t3 = IRTemp_INVALID;
4473 Bool showSz = True;
4475 *decode_OK = True;
4477 modrm = getUChar(delta);
4478 if (epartIsReg(modrm)) {
4479 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4480 F2/CALL and F2/JMP may have bnd prefix. */
4481 if (haveF2orF3(pfx)
4482 && ! (haveF2(pfx)
4483 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4484 goto unhandledR;
4485 assign(t1, getIRegE(sz,pfx,modrm));
4486 switch (gregLO3ofRM(modrm)) {
4487 case 0: /* INC */
4488 t2 = newTemp(ty);
4489 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4490 mkexpr(t1), mkU(ty,1)));
4491 setFlags_INC_DEC( True, t2, ty );
4492 putIRegE(sz,pfx,modrm, mkexpr(t2));
4493 break;
4494 case 1: /* DEC */
4495 t2 = newTemp(ty);
4496 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4497 mkexpr(t1), mkU(ty,1)));
4498 setFlags_INC_DEC( False, t2, ty );
4499 putIRegE(sz,pfx,modrm, mkexpr(t2));
4500 break;
4501 case 2: /* call Ev */
4502 /* Ignore any sz value and operate as if sz==8. */
4503 if (!(sz == 4 || sz == 8)) goto unhandledR;
4504 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4505 sz = 8;
4506 t3 = newTemp(Ity_I64);
4507 assign(t3, getIRegE(sz,pfx,modrm));
4508 t2 = newTemp(Ity_I64);
4509 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4510 putIReg64(R_RSP, mkexpr(t2));
4511 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
4512 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
4513 jmp_treg(dres, Ijk_Call, t3);
4514 vassert(dres->whatNext == Dis_StopHere);
4515 showSz = False;
4516 break;
4517 case 4: /* jmp Ev */
4518 /* Ignore any sz value and operate as if sz==8. */
4519 if (!(sz == 4 || sz == 8)) goto unhandledR;
4520 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4521 sz = 8;
4522 t3 = newTemp(Ity_I64);
4523 assign(t3, getIRegE(sz,pfx,modrm));
4524 jmp_treg(dres, Ijk_Boring, t3);
4525 vassert(dres->whatNext == Dis_StopHere);
4526 showSz = False;
4527 break;
4528 case 6: /* PUSH Ev */
4529 /* There is no encoding for 32-bit operand size; hence ... */
4530 if (sz == 4) sz = 8;
4531 if (sz == 8 || sz == 2) {
4532 ty = szToITy(sz); /* redo it, since sz might have changed */
4533 t3 = newTemp(ty);
4534 assign(t3, getIRegE(sz,pfx,modrm));
4535 t2 = newTemp(Ity_I64);
4536 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4537 putIReg64(R_RSP, mkexpr(t2) );
4538 storeLE( mkexpr(t2), mkexpr(t3) );
4539 break;
4540 } else {
4541 goto unhandledR; /* awaiting test case */
4543 default:
4544 unhandledR:
4545 *decode_OK = False;
4546 return delta;
4548 delta++;
4549 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4550 showSz ? nameISize(sz) : ' ',
4551 nameIRegE(sz, pfx, modrm));
4552 } else {
4553 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
4554 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4555 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4556 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4557 validF2orF3 = True;
4558 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4559 && (haveF2(pfx) && !haveF3(pfx))) {
4560 validF2orF3 = True;
4562 if (!validF2orF3) goto unhandledM;
4563 /* */
4564 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
4565 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4566 && gregLO3ofRM(modrm) != 6) {
4567 assign(t1, loadLE(ty,mkexpr(addr)));
4569 switch (gregLO3ofRM(modrm)) {
4570 case 0: /* INC */
4571 t2 = newTemp(ty);
4572 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4573 mkexpr(t1), mkU(ty,1)));
4574 if (haveLOCK(pfx)) {
4575 casLE( mkexpr(addr),
4576 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4577 } else {
4578 storeLE(mkexpr(addr),mkexpr(t2));
4580 setFlags_INC_DEC( True, t2, ty );
4581 break;
4582 case 1: /* DEC */
4583 t2 = newTemp(ty);
4584 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4585 mkexpr(t1), mkU(ty,1)));
4586 if (haveLOCK(pfx)) {
4587 casLE( mkexpr(addr),
4588 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4589 } else {
4590 storeLE(mkexpr(addr),mkexpr(t2));
4592 setFlags_INC_DEC( False, t2, ty );
4593 break;
4594 case 2: /* call Ev */
4595 /* Ignore any sz value and operate as if sz==8. */
4596 if (!(sz == 4 || sz == 8)) goto unhandledM;
4597 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4598 sz = 8;
4599 t3 = newTemp(Ity_I64);
4600 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4601 t2 = newTemp(Ity_I64);
4602 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4603 putIReg64(R_RSP, mkexpr(t2));
4604 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
4605 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
4606 jmp_treg(dres, Ijk_Call, t3);
4607 vassert(dres->whatNext == Dis_StopHere);
4608 showSz = False;
4609 break;
4610 case 4: /* JMP Ev */
4611 /* Ignore any sz value and operate as if sz==8. */
4612 if (!(sz == 4 || sz == 8)) goto unhandledM;
4613 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
4614 sz = 8;
4615 t3 = newTemp(Ity_I64);
4616 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4617 jmp_treg(dres, Ijk_Boring, t3);
4618 vassert(dres->whatNext == Dis_StopHere);
4619 showSz = False;
4620 break;
4621 case 6: /* PUSH Ev */
4622 /* There is no encoding for 32-bit operand size; hence ... */
4623 if (sz == 4) sz = 8;
4624 if (sz == 8 || sz == 2) {
4625 ty = szToITy(sz); /* redo it, since sz might have changed */
4626 t3 = newTemp(ty);
4627 assign(t3, loadLE(ty,mkexpr(addr)));
4628 t2 = newTemp(Ity_I64);
4629 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4630 putIReg64(R_RSP, mkexpr(t2) );
4631 storeLE( mkexpr(t2), mkexpr(t3) );
4632 break;
4633 } else {
4634 goto unhandledM; /* awaiting test case */
4636 default:
4637 unhandledM:
4638 *decode_OK = False;
4639 return delta;
4641 delta += len;
4642 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
4643 showSz ? nameISize(sz) : ' ',
4644 dis_buf);
4646 return delta;
4650 /*------------------------------------------------------------*/
4651 /*--- Disassembling string ops (including REP prefixes) ---*/
4652 /*------------------------------------------------------------*/
4654 /* Code shared by all the string ops */
4655 static
4656 void dis_string_op_increment ( Int sz, IRTemp t_inc )
4658 UChar logSz;
4659 if (sz == 8 || sz == 4 || sz == 2) {
4660 logSz = 1;
4661 if (sz == 4) logSz = 2;
4662 if (sz == 8) logSz = 3;
4663 assign( t_inc,
4664 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4665 mkU8(logSz) ) );
4666 } else {
4667 assign( t_inc,
4668 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4672 static
4673 void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
4674 Int sz, const HChar* name, Prefix pfx )
4676 IRTemp t_inc = newTemp(Ity_I64);
4677 /* Really we ought to inspect the override prefixes, but we don't.
4678 The following assertion catches any resulting sillyness. */
4679 vassert(pfx == clearSegBits(pfx));
4680 dis_string_op_increment(sz, t_inc);
4681 dis_OP( sz, t_inc, pfx );
4682 DIP("%s%c\n", name, nameISize(sz));
4685 static
4686 void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
4688 IRType ty = szToITy(sz);
4689 IRTemp td = newTemp(Ity_I64); /* RDI */
4690 IRTemp ts = newTemp(Ity_I64); /* RSI */
4691 IRExpr *incd, *incs;
4693 if (haveASO(pfx)) {
4694 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4695 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4696 } else {
4697 assign( td, getIReg64(R_RDI) );
4698 assign( ts, getIReg64(R_RSI) );
4701 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4703 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4704 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4705 if (haveASO(pfx)) {
4706 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4707 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4709 putIReg64( R_RDI, incd );
4710 putIReg64( R_RSI, incs );
4713 static
4714 void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
4716 IRType ty = szToITy(sz);
4717 IRTemp ts = newTemp(Ity_I64); /* RSI */
4718 IRExpr *incs;
4720 if (haveASO(pfx))
4721 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4722 else
4723 assign( ts, getIReg64(R_RSI) );
4725 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4727 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4728 if (haveASO(pfx))
4729 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4730 putIReg64( R_RSI, incs );
4733 static
4734 void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
4736 IRType ty = szToITy(sz);
4737 IRTemp ta = newTemp(ty); /* rAX */
4738 IRTemp td = newTemp(Ity_I64); /* RDI */
4739 IRExpr *incd;
4741 assign( ta, getIRegRAX(sz) );
4743 if (haveASO(pfx))
4744 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4745 else
4746 assign( td, getIReg64(R_RDI) );
4748 storeLE( mkexpr(td), mkexpr(ta) );
4750 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4751 if (haveASO(pfx))
4752 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4753 putIReg64( R_RDI, incd );
4756 static
4757 void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
4759 IRType ty = szToITy(sz);
4760 IRTemp tdv = newTemp(ty); /* (RDI) */
4761 IRTemp tsv = newTemp(ty); /* (RSI) */
4762 IRTemp td = newTemp(Ity_I64); /* RDI */
4763 IRTemp ts = newTemp(Ity_I64); /* RSI */
4764 IRExpr *incd, *incs;
4766 if (haveASO(pfx)) {
4767 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4768 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4769 } else {
4770 assign( td, getIReg64(R_RDI) );
4771 assign( ts, getIReg64(R_RSI) );
4774 assign( tdv, loadLE(ty,mkexpr(td)) );
4776 assign( tsv, loadLE(ty,mkexpr(ts)) );
4778 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4780 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4781 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4782 if (haveASO(pfx)) {
4783 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4784 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4786 putIReg64( R_RDI, incd );
4787 putIReg64( R_RSI, incs );
4790 static
4791 void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
4793 IRType ty = szToITy(sz);
4794 IRTemp ta = newTemp(ty); /* rAX */
4795 IRTemp td = newTemp(Ity_I64); /* RDI */
4796 IRTemp tdv = newTemp(ty); /* (RDI) */
4797 IRExpr *incd;
4799 assign( ta, getIRegRAX(sz) );
4801 if (haveASO(pfx))
4802 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4803 else
4804 assign( td, getIReg64(R_RDI) );
4806 assign( tdv, loadLE(ty,mkexpr(td)) );
4808 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4810 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4811 if (haveASO(pfx))
4812 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4813 putIReg64( R_RDI, incd );
4817 /* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4818 the insn is the last one in the basic block, and so emit a jump to
4819 the next insn, rather than just falling through. */
4820 static
4821 void dis_REP_op ( /*MOD*/DisResult* dres,
4822 AMD64Condcode cond,
4823 void (*dis_OP)(Int, IRTemp, Prefix),
4824 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
4825 Prefix pfx )
4827 IRTemp t_inc = newTemp(Ity_I64);
4828 IRTemp tc;
4829 IRExpr* cmp;
4831 /* Really we ought to inspect the override prefixes, but we don't.
4832 The following assertion catches any resulting sillyness. */
4833 vassert(pfx == clearSegBits(pfx));
4835 if (haveASO(pfx)) {
4836 tc = newTemp(Ity_I32); /* ECX */
4837 assign( tc, getIReg32(R_RCX) );
4838 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4839 } else {
4840 tc = newTemp(Ity_I64); /* RCX */
4841 assign( tc, getIReg64(R_RCX) );
4842 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4845 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4846 IRConst_U64(rip_next), OFFB_RIP ) );
4848 if (haveASO(pfx))
4849 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4850 else
4851 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
4853 dis_string_op_increment(sz, t_inc);
4854 dis_OP (sz, t_inc, pfx);
4856 if (cond == AMD64CondAlways) {
4857 jmp_lit(dres, Ijk_Boring, rip);
4858 vassert(dres->whatNext == Dis_StopHere);
4859 } else {
4860 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4861 Ijk_Boring,
4862 IRConst_U64(rip),
4863 OFFB_RIP ) );
4864 jmp_lit(dres, Ijk_Boring, rip_next);
4865 vassert(dres->whatNext == Dis_StopHere);
4867 DIP("%s%c\n", name, nameISize(sz));
4871 /*------------------------------------------------------------*/
4872 /*--- Arithmetic, etc. ---*/
4873 /*------------------------------------------------------------*/
4875 /* IMUL E, G. Supplied eip points to the modR/M byte. */
4876 static
4877 ULong dis_mul_E_G ( const VexAbiInfo* vbi,
4878 Prefix pfx,
4879 Int size,
4880 Long delta0 )
4882 Int alen;
4883 HChar dis_buf[50];
4884 UChar rm = getUChar(delta0);
4885 IRType ty = szToITy(size);
4886 IRTemp te = newTemp(ty);
4887 IRTemp tg = newTemp(ty);
4888 IRTemp resLo = newTemp(ty);
4890 assign( tg, getIRegG(size, pfx, rm) );
4891 if (epartIsReg(rm)) {
4892 assign( te, getIRegE(size, pfx, rm) );
4893 } else {
4894 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
4895 assign( te, loadLE(ty,mkexpr(addr)) );
4898 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4900 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4902 putIRegG(size, pfx, rm, mkexpr(resLo) );
4904 if (epartIsReg(rm)) {
4905 DIP("imul%c %s, %s\n", nameISize(size),
4906 nameIRegE(size,pfx,rm),
4907 nameIRegG(size,pfx,rm));
4908 return 1+delta0;
4909 } else {
4910 DIP("imul%c %s, %s\n", nameISize(size),
4911 dis_buf,
4912 nameIRegG(size,pfx,rm));
4913 return alen+delta0;
4918 /* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4919 static
4920 ULong dis_imul_I_E_G ( const VexAbiInfo* vbi,
4921 Prefix pfx,
4922 Int size,
4923 Long delta,
4924 Int litsize )
4926 Long d64;
4927 Int alen;
4928 HChar dis_buf[50];
4929 UChar rm = getUChar(delta);
4930 IRType ty = szToITy(size);
4931 IRTemp te = newTemp(ty);
4932 IRTemp tl = newTemp(ty);
4933 IRTemp resLo = newTemp(ty);
4935 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
4937 if (epartIsReg(rm)) {
4938 assign(te, getIRegE(size, pfx, rm));
4939 delta++;
4940 } else {
4941 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
4942 imin(4,litsize) );
4943 assign(te, loadLE(ty, mkexpr(addr)));
4944 delta += alen;
4946 d64 = getSDisp(imin(4,litsize),delta);
4947 delta += imin(4,litsize);
4949 d64 &= mkSizeMask(size);
4950 assign(tl, mkU(ty,d64));
4952 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4954 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4956 putIRegG(size, pfx, rm, mkexpr(resLo));
4958 DIP("imul%c $%lld, %s, %s\n",
4959 nameISize(size), d64,
4960 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4961 nameIRegG(size,pfx,rm) );
4962 return delta;
4966 /* Generate an IR sequence to do a popcount operation on the supplied
4967 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4968 Ity_I16, Ity_I32 or Ity_I64 only. */
4969 static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4971 Int i;
4972 if (ty == Ity_I16) {
4973 IRTemp old = IRTemp_INVALID;
4974 IRTemp nyu = IRTemp_INVALID;
4975 IRTemp mask[4], shift[4];
4976 for (i = 0; i < 4; i++) {
4977 mask[i] = newTemp(ty);
4978 shift[i] = 1 << i;
4980 assign(mask[0], mkU16(0x5555));
4981 assign(mask[1], mkU16(0x3333));
4982 assign(mask[2], mkU16(0x0F0F));
4983 assign(mask[3], mkU16(0x00FF));
4984 old = src;
4985 for (i = 0; i < 4; i++) {
4986 nyu = newTemp(ty);
4987 assign(nyu,
4988 binop(Iop_Add16,
4989 binop(Iop_And16,
4990 mkexpr(old),
4991 mkexpr(mask[i])),
4992 binop(Iop_And16,
4993 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4994 mkexpr(mask[i]))));
4995 old = nyu;
4997 return nyu;
4999 if (ty == Ity_I32) {
5000 IRTemp old = IRTemp_INVALID;
5001 IRTemp nyu = IRTemp_INVALID;
5002 IRTemp mask[5], shift[5];
5003 for (i = 0; i < 5; i++) {
5004 mask[i] = newTemp(ty);
5005 shift[i] = 1 << i;
5007 assign(mask[0], mkU32(0x55555555));
5008 assign(mask[1], mkU32(0x33333333));
5009 assign(mask[2], mkU32(0x0F0F0F0F));
5010 assign(mask[3], mkU32(0x00FF00FF));
5011 assign(mask[4], mkU32(0x0000FFFF));
5012 old = src;
5013 for (i = 0; i < 5; i++) {
5014 nyu = newTemp(ty);
5015 assign(nyu,
5016 binop(Iop_Add32,
5017 binop(Iop_And32,
5018 mkexpr(old),
5019 mkexpr(mask[i])),
5020 binop(Iop_And32,
5021 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
5022 mkexpr(mask[i]))));
5023 old = nyu;
5025 return nyu;
5027 if (ty == Ity_I64) {
5028 IRTemp old = IRTemp_INVALID;
5029 IRTemp nyu = IRTemp_INVALID;
5030 IRTemp mask[6], shift[6];
5031 for (i = 0; i < 6; i++) {
5032 mask[i] = newTemp(ty);
5033 shift[i] = 1 << i;
5035 assign(mask[0], mkU64(0x5555555555555555ULL));
5036 assign(mask[1], mkU64(0x3333333333333333ULL));
5037 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
5038 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
5039 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
5040 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
5041 old = src;
5042 for (i = 0; i < 6; i++) {
5043 nyu = newTemp(ty);
5044 assign(nyu,
5045 binop(Iop_Add64,
5046 binop(Iop_And64,
5047 mkexpr(old),
5048 mkexpr(mask[i])),
5049 binop(Iop_And64,
5050 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
5051 mkexpr(mask[i]))));
5052 old = nyu;
5054 return nyu;
5056 /*NOTREACHED*/
5057 vassert(0);
5061 /* Generate an IR sequence to do a count-leading-zeroes operation on
5062 the supplied IRTemp, and return a new IRTemp holding the result.
5063 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5064 the argument is zero, return the number of bits in the word (the
5065 natural semantics). */
5066 static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
5068 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5070 IRTemp src64 = newTemp(Ity_I64);
5071 assign(src64, widenUto64( mkexpr(src) ));
5073 IRTemp src64x = newTemp(Ity_I64);
5074 assign(src64x,
5075 binop(Iop_Shl64, mkexpr(src64),
5076 mkU8(64 - 8 * sizeofIRType(ty))));
5078 // Clz64 has undefined semantics when its input is zero, so
5079 // special-case around that.
5080 IRTemp res64 = newTemp(Ity_I64);
5081 assign(res64,
5082 IRExpr_ITE(
5083 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
5084 mkU64(8 * sizeofIRType(ty)),
5085 unop(Iop_Clz64, mkexpr(src64x))
5088 IRTemp res = newTemp(ty);
5089 assign(res, narrowTo(ty, mkexpr(res64)));
5090 return res;
5094 /* Generate an IR sequence to do a count-trailing-zeroes operation on
5095 the supplied IRTemp, and return a new IRTemp holding the result.
5096 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
5097 the argument is zero, return the number of bits in the word (the
5098 natural semantics). */
5099 static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
5101 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
5103 IRTemp src64 = newTemp(Ity_I64);
5104 assign(src64, widenUto64( mkexpr(src) ));
5106 // Ctz64 has undefined semantics when its input is zero, so
5107 // special-case around that.
5108 IRTemp res64 = newTemp(Ity_I64);
5109 assign(res64,
5110 IRExpr_ITE(
5111 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
5112 mkU64(8 * sizeofIRType(ty)),
5113 unop(Iop_Ctz64, mkexpr(src64))
5116 IRTemp res = newTemp(ty);
5117 assign(res, narrowTo(ty, mkexpr(res64)));
5118 return res;
5122 /*------------------------------------------------------------*/
5123 /*--- ---*/
5124 /*--- x87 FLOATING POINT INSTRUCTIONS ---*/
5125 /*--- ---*/
5126 /*------------------------------------------------------------*/
5128 /* --- Helper functions for dealing with the register stack. --- */
5130 /* --- Set the emulation-warning pseudo-register. --- */
5132 static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
5134 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5135 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
5138 /* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
5140 static IRExpr* mkQNaN64 ( void )
5142 /* QNaN is 0 2047 1 0(51times)
5143 == 0b 11111111111b 1 0(51times)
5144 == 0x7FF8 0000 0000 0000
5146 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
5149 /* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
5151 static IRExpr* get_ftop ( void )
5153 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
5156 static void put_ftop ( IRExpr* e )
5158 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5159 stmt( IRStmt_Put( OFFB_FTOP, e ) );
5162 /* --------- Get/put the C3210 bits. --------- */
5164 static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
5166 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
5169 static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
5171 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
5172 stmt( IRStmt_Put( OFFB_FC3210, e ) );
5175 /* --------- Get/put the FPU rounding mode. --------- */
5176 static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
5178 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
5181 static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
5183 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
5184 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
5188 /* --------- Synthesise a 2-bit FPU rounding mode. --------- */
5189 /* Produces a value in 0 .. 3, which is encoded as per the type
5190 IRRoundingMode. Since the guest_FPROUND value is also encoded as
5191 per IRRoundingMode, we merely need to get it and mask it for
5192 safety.
5194 static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
5196 return binop( Iop_And32, get_fpround(), mkU32(3) );
5199 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
5201 return mkU32(Irrm_NEAREST);
5205 /* --------- Get/set FP register tag bytes. --------- */
5207 /* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
5209 static void put_ST_TAG ( Int i, IRExpr* value )
5211 IRRegArray* descr;
5212 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5213 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5214 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5217 /* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5218 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5220 static IRExpr* get_ST_TAG ( Int i )
5222 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
5223 return IRExpr_GetI( descr, get_ftop(), i );
5227 /* --------- Get/set FP registers. --------- */
5229 /* Given i, and some expression e, emit 'ST(i) = e' and set the
5230 register's tag to indicate the register is full. The previous
5231 state of the register is not checked. */
5233 static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5235 IRRegArray* descr;
5236 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5237 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5238 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
5239 /* Mark the register as in-use. */
5240 put_ST_TAG(i, mkU8(1));
5243 /* Given i, and some expression e, emit
5244 ST(i) = is_full(i) ? NaN : e
5245 and set the tag accordingly.
5248 static void put_ST ( Int i, IRExpr* value )
5250 put_ST_UNCHECKED(
5252 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5253 /* non-0 means full */
5254 mkQNaN64(),
5255 /* 0 means empty */
5256 value
5262 /* Given i, generate an expression yielding 'ST(i)'. */
5264 static IRExpr* get_ST_UNCHECKED ( Int i )
5266 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
5267 return IRExpr_GetI( descr, get_ftop(), i );
5271 /* Given i, generate an expression yielding
5272 is_full(i) ? ST(i) : NaN
5275 static IRExpr* get_ST ( Int i )
5277 return
5278 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5279 /* non-0 means full */
5280 get_ST_UNCHECKED(i),
5281 /* 0 means empty */
5282 mkQNaN64());
5286 /* Given i, and some expression e, and a condition cond, generate IR
5287 which has the same effect as put_ST(i,e) when cond is true and has
5288 no effect when cond is false. Given the lack of proper
5289 if-then-else in the IR, this is pretty tricky.
5292 static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5294 // new_tag = if cond then FULL else old_tag
5295 // new_val = if cond then (if old_tag==FULL then NaN else val)
5296 // else old_val
5298 IRTemp old_tag = newTemp(Ity_I8);
5299 assign(old_tag, get_ST_TAG(i));
5300 IRTemp new_tag = newTemp(Ity_I8);
5301 assign(new_tag,
5302 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5304 IRTemp old_val = newTemp(Ity_F64);
5305 assign(old_val, get_ST_UNCHECKED(i));
5306 IRTemp new_val = newTemp(Ity_F64);
5307 assign(new_val,
5308 IRExpr_ITE(mkexpr(cond),
5309 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5310 /* non-0 means full */
5311 mkQNaN64(),
5312 /* 0 means empty */
5313 value),
5314 mkexpr(old_val)));
5316 put_ST_UNCHECKED(i, mkexpr(new_val));
5317 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5318 // now set it to new_tag instead.
5319 put_ST_TAG(i, mkexpr(new_tag));
5322 /* Adjust FTOP downwards by one register. */
5324 static void fp_push ( void )
5326 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5329 /* Adjust FTOP downwards by one register when COND is 1:I1. Else
5330 don't change it. */
5332 static void maybe_fp_push ( IRTemp cond )
5334 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5337 /* Adjust FTOP upwards by one register, and mark the vacated register
5338 as empty. */
5340 static void fp_pop ( void )
5342 put_ST_TAG(0, mkU8(0));
5343 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5346 /* Set the C2 bit of the FPU status register to e[0]. Assumes that
5347 e[31:1] == 0.
5349 static void set_C2 ( IRExpr* e )
5351 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5352 put_C3210( binop(Iop_Or64,
5353 cleared,
5354 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5357 /* Generate code to check that abs(d64) < 2^63 and is finite. This is
5358 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5359 test is simple, but the derivation of it is not so simple.
5361 The exponent field for an IEEE754 double is 11 bits. That means it
5362 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5363 the number is either a NaN or an Infinity and so is not finite.
5364 Furthermore, a finite value of exactly 2^63 is the smallest value
5365 that has exponent value 0x43E. Hence, what we need to do is
5366 extract the exponent, ignoring the sign bit and mantissa, and check
5367 it is < 0x43E, or <= 0x43D.
5369 To make this easily applicable to 32- and 64-bit targets, a
5370 roundabout approach is used. First the number is converted to I64,
5371 then the top 32 bits are taken. Shifting them right by 20 bits
5372 places the sign bit and exponent in the bottom 12 bits. Anding
5373 with 0x7FF gets rid of the sign bit, leaving just the exponent
5374 available for comparison.
5376 static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5378 IRTemp i64 = newTemp(Ity_I64);
5379 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5380 IRTemp exponent = newTemp(Ity_I32);
5381 assign(exponent,
5382 binop(Iop_And32,
5383 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5384 mkU32(0x7FF)));
5385 IRTemp in_range_and_finite = newTemp(Ity_I1);
5386 assign(in_range_and_finite,
5387 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5388 return in_range_and_finite;
5391 /* Invent a plausible-looking FPU status word value:
5392 ((ftop & 7) << 11) | (c3210 & 0x4700)
5394 static IRExpr* get_FPU_sw ( void )
5396 return
5397 unop(Iop_32to16,
5398 binop(Iop_Or32,
5399 binop(Iop_Shl32,
5400 binop(Iop_And32, get_ftop(), mkU32(7)),
5401 mkU8(11)),
5402 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5403 mkU32(0x4700))
5408 /* Generate a dirty helper call that initialises the x87 state a la
5409 FINIT. If |guard| is NULL, it is done unconditionally. Otherwise
5410 |guard| is used as a guarding condition.
5412 static void gen_FINIT_SEQUENCE ( IRExpr* guard )
5414 /* Uses dirty helper:
5415 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
5416 IRDirty* d = unsafeIRDirty_0_N (
5417 0/*regparms*/,
5418 "amd64g_dirtyhelper_FINIT",
5419 &amd64g_dirtyhelper_FINIT,
5420 mkIRExprVec_1( IRExpr_GSPTR() )
5423 /* declare we're writing guest state */
5424 d->nFxState = 5;
5425 vex_bzero(&d->fxState, sizeof(d->fxState));
5427 d->fxState[0].fx = Ifx_Write;
5428 d->fxState[0].offset = OFFB_FTOP;
5429 d->fxState[0].size = sizeof(UInt);
5431 d->fxState[1].fx = Ifx_Write;
5432 d->fxState[1].offset = OFFB_FPREGS;
5433 d->fxState[1].size = 8 * sizeof(ULong);
5435 d->fxState[2].fx = Ifx_Write;
5436 d->fxState[2].offset = OFFB_FPTAGS;
5437 d->fxState[2].size = 8 * sizeof(UChar);
5439 d->fxState[3].fx = Ifx_Write;
5440 d->fxState[3].offset = OFFB_FPROUND;
5441 d->fxState[3].size = sizeof(ULong);
5443 d->fxState[4].fx = Ifx_Write;
5444 d->fxState[4].offset = OFFB_FC3210;
5445 d->fxState[4].size = sizeof(ULong);
5447 if (guard)
5448 d->guard = guard;
5450 stmt( IRStmt_Dirty(d) );
5454 /* ------------------------------------------------------- */
5455 /* Given all that stack-mangling junk, we can now go ahead
5456 and describe FP instructions.
5459 /* ST(0) = ST(0) `op` mem64/32(addr)
5460 Need to check ST(0)'s tag on read, but not on write.
5462 static
5463 void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5464 IROp op, Bool dbl )
5466 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5467 if (dbl) {
5468 put_ST_UNCHECKED(0,
5469 triop( op,
5470 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5471 get_ST(0),
5472 loadLE(Ity_F64,mkexpr(addr))
5474 } else {
5475 put_ST_UNCHECKED(0,
5476 triop( op,
5477 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5478 get_ST(0),
5479 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5485 /* ST(0) = mem64/32(addr) `op` ST(0)
5486 Need to check ST(0)'s tag on read, but not on write.
5488 static
5489 void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
5490 IROp op, Bool dbl )
5492 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5493 if (dbl) {
5494 put_ST_UNCHECKED(0,
5495 triop( op,
5496 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5497 loadLE(Ity_F64,mkexpr(addr)),
5498 get_ST(0)
5500 } else {
5501 put_ST_UNCHECKED(0,
5502 triop( op,
5503 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5504 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5505 get_ST(0)
5511 /* ST(dst) = ST(dst) `op` ST(src).
5512 Check dst and src tags when reading but not on write.
5514 static
5515 void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5516 Bool pop_after )
5518 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5519 put_ST_UNCHECKED(
5520 st_dst,
5521 triop( op,
5522 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5523 get_ST(st_dst),
5524 get_ST(st_src) )
5526 if (pop_after)
5527 fp_pop();
5530 /* ST(dst) = ST(src) `op` ST(dst).
5531 Check dst and src tags when reading but not on write.
5533 static
5534 void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
5535 Bool pop_after )
5537 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5538 put_ST_UNCHECKED(
5539 st_dst,
5540 triop( op,
5541 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5542 get_ST(st_src),
5543 get_ST(st_dst) )
5545 if (pop_after)
5546 fp_pop();
5549 /* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5550 static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5552 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
5553 /* This is a bit of a hack (and isn't really right). It sets
5554 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5555 documentation implies A and S are unchanged.
5557 /* It's also fishy in that it is used both for COMIP and
5558 UCOMIP, and they aren't the same (although similar). */
5559 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5560 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5561 stmt( IRStmt_Put(
5562 OFFB_CC_DEP1,
5563 binop( Iop_And64,
5564 unop( Iop_32Uto64,
5565 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5566 mkU64(0x45)
5567 )));
5568 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
5569 if (pop_after)
5570 fp_pop();
5574 /* returns
5575 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5577 static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5579 IRTemp t32 = newTemp(Ity_I32);
5580 assign( t32, e32 );
5581 return
5582 IRExpr_ITE(
5583 binop(Iop_CmpLT64U,
5584 unop(Iop_32Uto64,
5585 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5586 mkU64(65536)),
5587 unop(Iop_32to16, mkexpr(t32)),
5588 mkU16( 0x8000 ) );
5592 static
5593 ULong dis_FPU ( /*OUT*/Bool* decode_ok,
5594 const VexAbiInfo* vbi, Prefix pfx, Long delta )
5596 Int len;
5597 UInt r_src, r_dst;
5598 HChar dis_buf[50];
5599 IRTemp t1, t2;
5601 /* On entry, delta points at the second byte of the insn (the modrm
5602 byte).*/
5603 UChar first_opcode = getUChar(delta-1);
5604 UChar modrm = getUChar(delta+0);
5606 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5608 if (first_opcode == 0xD8) {
5609 if (modrm < 0xC0) {
5611 /* bits 5,4,3 are an opcode extension, and the modRM also
5612 specifies an address. */
5613 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5614 delta += len;
5616 switch (gregLO3ofRM(modrm)) {
5618 case 0: /* FADD single-real */
5619 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5620 break;
5622 case 1: /* FMUL single-real */
5623 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5624 break;
5626 case 2: /* FCOM single-real */
5627 DIP("fcoms %s\n", dis_buf);
5628 /* This forces C1 to zero, which isn't right. */
5629 /* The AMD documentation suggests that forcing C1 to
5630 zero is correct (Eliot Moss) */
5631 put_C3210(
5632 unop( Iop_32Uto64,
5633 binop( Iop_And32,
5634 binop(Iop_Shl32,
5635 binop(Iop_CmpF64,
5636 get_ST(0),
5637 unop(Iop_F32toF64,
5638 loadLE(Ity_F32,mkexpr(addr)))),
5639 mkU8(8)),
5640 mkU32(0x4500)
5641 )));
5642 break;
5644 case 3: /* FCOMP single-real */
5645 /* The AMD documentation suggests that forcing C1 to
5646 zero is correct (Eliot Moss) */
5647 DIP("fcomps %s\n", dis_buf);
5648 /* This forces C1 to zero, which isn't right. */
5649 put_C3210(
5650 unop( Iop_32Uto64,
5651 binop( Iop_And32,
5652 binop(Iop_Shl32,
5653 binop(Iop_CmpF64,
5654 get_ST(0),
5655 unop(Iop_F32toF64,
5656 loadLE(Ity_F32,mkexpr(addr)))),
5657 mkU8(8)),
5658 mkU32(0x4500)
5659 )));
5660 fp_pop();
5661 break;
5663 case 4: /* FSUB single-real */
5664 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5665 break;
5667 case 5: /* FSUBR single-real */
5668 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5669 break;
5671 case 6: /* FDIV single-real */
5672 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5673 break;
5675 case 7: /* FDIVR single-real */
5676 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5677 break;
5679 default:
5680 vex_printf("unhandled opc_aux = 0x%2x\n",
5681 (UInt)gregLO3ofRM(modrm));
5682 vex_printf("first_opcode == 0xD8\n");
5683 goto decode_fail;
5685 } else {
5686 delta++;
5687 switch (modrm) {
5689 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5690 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5691 break;
5693 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5694 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5695 break;
5697 /* Dunno if this is right */
5698 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5699 r_dst = (UInt)modrm - 0xD0;
5700 DIP("fcom %%st(0),%%st(%u)\n", r_dst);
5701 /* This forces C1 to zero, which isn't right. */
5702 put_C3210(
5703 unop(Iop_32Uto64,
5704 binop( Iop_And32,
5705 binop(Iop_Shl32,
5706 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5707 mkU8(8)),
5708 mkU32(0x4500)
5709 )));
5710 break;
5712 /* Dunno if this is right */
5713 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5714 r_dst = (UInt)modrm - 0xD8;
5715 DIP("fcomp %%st(0),%%st(%u)\n", r_dst);
5716 /* This forces C1 to zero, which isn't right. */
5717 put_C3210(
5718 unop(Iop_32Uto64,
5719 binop( Iop_And32,
5720 binop(Iop_Shl32,
5721 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5722 mkU8(8)),
5723 mkU32(0x4500)
5724 )));
5725 fp_pop();
5726 break;
5728 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5729 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5730 break;
5732 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5733 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5734 break;
5736 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5737 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5738 break;
5740 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5741 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5742 break;
5744 default:
5745 goto decode_fail;
5750 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
5751 else
5752 if (first_opcode == 0xD9) {
5753 if (modrm < 0xC0) {
5755 /* bits 5,4,3 are an opcode extension, and the modRM also
5756 specifies an address. */
5757 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
5758 delta += len;
5760 switch (gregLO3ofRM(modrm)) {
5762 case 0: /* FLD single-real */
5763 DIP("flds %s\n", dis_buf);
5764 fp_push();
5765 put_ST(0, unop(Iop_F32toF64,
5766 loadLE(Ity_F32, mkexpr(addr))));
5767 break;
5769 case 2: /* FST single-real */
5770 DIP("fsts %s\n", dis_buf);
5771 storeLE(mkexpr(addr),
5772 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5773 break;
5775 case 3: /* FSTP single-real */
5776 DIP("fstps %s\n", dis_buf);
5777 storeLE(mkexpr(addr),
5778 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5779 fp_pop();
5780 break;
5782 case 4: { /* FLDENV m28 */
5783 /* Uses dirty helper:
5784 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
5785 IRTemp ew = newTemp(Ity_I32);
5786 IRTemp w64 = newTemp(Ity_I64);
5787 IRDirty* d = unsafeIRDirty_0_N (
5788 0/*regparms*/,
5789 "amd64g_dirtyhelper_FLDENV",
5790 &amd64g_dirtyhelper_FLDENV,
5791 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5793 d->tmp = w64;
5794 /* declare we're reading memory */
5795 d->mFx = Ifx_Read;
5796 d->mAddr = mkexpr(addr);
5797 d->mSize = 28;
5799 /* declare we're writing guest state */
5800 d->nFxState = 4;
5801 vex_bzero(&d->fxState, sizeof(d->fxState));
5803 d->fxState[0].fx = Ifx_Write;
5804 d->fxState[0].offset = OFFB_FTOP;
5805 d->fxState[0].size = sizeof(UInt);
5807 d->fxState[1].fx = Ifx_Write;
5808 d->fxState[1].offset = OFFB_FPTAGS;
5809 d->fxState[1].size = 8 * sizeof(UChar);
5811 d->fxState[2].fx = Ifx_Write;
5812 d->fxState[2].offset = OFFB_FPROUND;
5813 d->fxState[2].size = sizeof(ULong);
5815 d->fxState[3].fx = Ifx_Write;
5816 d->fxState[3].offset = OFFB_FC3210;
5817 d->fxState[3].size = sizeof(ULong);
5819 stmt( IRStmt_Dirty(d) );
5821 /* ew contains any emulation warning we may need to
5822 issue. If needed, side-exit to the next insn,
5823 reporting the warning, so that Valgrind's dispatcher
5824 sees the warning. */
5825 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
5826 put_emwarn( mkexpr(ew) );
5827 stmt(
5828 IRStmt_Exit(
5829 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5830 Ijk_EmWarn,
5831 IRConst_U64( guest_RIP_bbstart+delta ),
5832 OFFB_RIP
5836 DIP("fldenv %s\n", dis_buf);
5837 break;
5840 case 5: {/* FLDCW */
5841 /* The only thing we observe in the control word is the
5842 rounding mode. Therefore, pass the 16-bit value
5843 (x87 native-format control word) to a clean helper,
5844 getting back a 64-bit value, the lower half of which
5845 is the FPROUND value to store, and the upper half of
5846 which is the emulation-warning token which may be
5847 generated.
5849 /* ULong amd64h_check_fldcw ( ULong ); */
5850 IRTemp t64 = newTemp(Ity_I64);
5851 IRTemp ew = newTemp(Ity_I32);
5852 DIP("fldcw %s\n", dis_buf);
5853 assign( t64, mkIRExprCCall(
5854 Ity_I64, 0/*regparms*/,
5855 "amd64g_check_fldcw",
5856 &amd64g_check_fldcw,
5857 mkIRExprVec_1(
5858 unop( Iop_16Uto64,
5859 loadLE(Ity_I16, mkexpr(addr)))
5864 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5865 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5866 put_emwarn( mkexpr(ew) );
5867 /* Finally, if an emulation warning was reported,
5868 side-exit to the next insn, reporting the warning,
5869 so that Valgrind's dispatcher sees the warning. */
5870 stmt(
5871 IRStmt_Exit(
5872 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5873 Ijk_EmWarn,
5874 IRConst_U64( guest_RIP_bbstart+delta ),
5875 OFFB_RIP
5878 break;
5881 case 6: { /* FNSTENV m28 */
5882 /* Uses dirty helper:
5883 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5884 IRDirty* d = unsafeIRDirty_0_N (
5885 0/*regparms*/,
5886 "amd64g_dirtyhelper_FSTENV",
5887 &amd64g_dirtyhelper_FSTENV,
5888 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
5890 /* declare we're writing memory */
5891 d->mFx = Ifx_Write;
5892 d->mAddr = mkexpr(addr);
5893 d->mSize = 28;
5895 /* declare we're reading guest state */
5896 d->nFxState = 4;
5897 vex_bzero(&d->fxState, sizeof(d->fxState));
5899 d->fxState[0].fx = Ifx_Read;
5900 d->fxState[0].offset = OFFB_FTOP;
5901 d->fxState[0].size = sizeof(UInt);
5903 d->fxState[1].fx = Ifx_Read;
5904 d->fxState[1].offset = OFFB_FPTAGS;
5905 d->fxState[1].size = 8 * sizeof(UChar);
5907 d->fxState[2].fx = Ifx_Read;
5908 d->fxState[2].offset = OFFB_FPROUND;
5909 d->fxState[2].size = sizeof(ULong);
5911 d->fxState[3].fx = Ifx_Read;
5912 d->fxState[3].offset = OFFB_FC3210;
5913 d->fxState[3].size = sizeof(ULong);
5915 stmt( IRStmt_Dirty(d) );
5917 DIP("fnstenv %s\n", dis_buf);
5918 break;
5921 case 7: /* FNSTCW */
5922 /* Fake up a native x87 FPU control word. The only
5923 thing it depends on is FPROUND[1:0], so call a clean
5924 helper to cook it up. */
5925 /* ULong amd64g_create_fpucw ( ULong fpround ) */
5926 DIP("fnstcw %s\n", dis_buf);
5927 storeLE(
5928 mkexpr(addr),
5929 unop( Iop_64to16,
5930 mkIRExprCCall(
5931 Ity_I64, 0/*regp*/,
5932 "amd64g_create_fpucw", &amd64g_create_fpucw,
5933 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5937 break;
5939 default:
5940 vex_printf("unhandled opc_aux = 0x%2x\n",
5941 (UInt)gregLO3ofRM(modrm));
5942 vex_printf("first_opcode == 0xD9\n");
5943 goto decode_fail;
5946 } else {
5947 delta++;
5948 switch (modrm) {
5950 case 0xC0 ... 0xC7: /* FLD %st(?) */
5951 r_src = (UInt)modrm - 0xC0;
5952 DIP("fld %%st(%u)\n", r_src);
5953 t1 = newTemp(Ity_F64);
5954 assign(t1, get_ST(r_src));
5955 fp_push();
5956 put_ST(0, mkexpr(t1));
5957 break;
5959 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5960 r_src = (UInt)modrm - 0xC8;
5961 DIP("fxch %%st(%u)\n", r_src);
5962 t1 = newTemp(Ity_F64);
5963 t2 = newTemp(Ity_F64);
5964 assign(t1, get_ST(0));
5965 assign(t2, get_ST(r_src));
5966 put_ST_UNCHECKED(0, mkexpr(t2));
5967 put_ST_UNCHECKED(r_src, mkexpr(t1));
5968 break;
5970 case 0xE0: /* FCHS */
5971 DIP("fchs\n");
5972 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5973 break;
5975 case 0xE1: /* FABS */
5976 DIP("fabs\n");
5977 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5978 break;
5980 case 0xE5: { /* FXAM */
5981 /* This is an interesting one. It examines %st(0),
5982 regardless of whether the tag says it's empty or not.
5983 Here, just pass both the tag (in our format) and the
5984 value (as a double, actually a ULong) to a helper
5985 function. */
5986 IRExpr** args
5987 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5988 unop(Iop_ReinterpF64asI64,
5989 get_ST_UNCHECKED(0)) );
5990 put_C3210(mkIRExprCCall(
5991 Ity_I64,
5992 0/*regparm*/,
5993 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5994 args
5996 DIP("fxam\n");
5997 break;
6000 case 0xE8: /* FLD1 */
6001 DIP("fld1\n");
6002 fp_push();
6003 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
6004 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
6005 break;
6007 case 0xE9: /* FLDL2T */
6008 DIP("fldl2t\n");
6009 fp_push();
6010 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
6011 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
6012 break;
6014 case 0xEA: /* FLDL2E */
6015 DIP("fldl2e\n");
6016 fp_push();
6017 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
6018 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
6019 break;
6021 case 0xEB: /* FLDPI */
6022 DIP("fldpi\n");
6023 fp_push();
6024 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
6025 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
6026 break;
6028 case 0xEC: /* FLDLG2 */
6029 DIP("fldlg2\n");
6030 fp_push();
6031 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
6032 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
6033 break;
6035 case 0xED: /* FLDLN2 */
6036 DIP("fldln2\n");
6037 fp_push();
6038 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
6039 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
6040 break;
6042 case 0xEE: /* FLDZ */
6043 DIP("fldz\n");
6044 fp_push();
6045 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
6046 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
6047 break;
6049 case 0xF0: /* F2XM1 */
6050 DIP("f2xm1\n");
6051 put_ST_UNCHECKED(0,
6052 binop(Iop_2xm1F64,
6053 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6054 get_ST(0)));
6055 break;
6057 case 0xF1: /* FYL2X */
6058 DIP("fyl2x\n");
6059 put_ST_UNCHECKED(1,
6060 triop(Iop_Yl2xF64,
6061 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6062 get_ST(1),
6063 get_ST(0)));
6064 fp_pop();
6065 break;
6067 case 0xF2: { /* FPTAN */
6068 DIP("fptan\n");
6069 IRTemp argD = newTemp(Ity_F64);
6070 assign(argD, get_ST(0));
6071 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6072 IRTemp resD = newTemp(Ity_F64);
6073 assign(resD,
6074 IRExpr_ITE(
6075 mkexpr(argOK),
6076 binop(Iop_TanF64,
6077 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6078 mkexpr(argD)),
6079 mkexpr(argD))
6081 put_ST_UNCHECKED(0, mkexpr(resD));
6082 /* Conditionally push 1.0 on the stack, if the arg is
6083 in range */
6084 maybe_fp_push(argOK);
6085 maybe_put_ST(argOK, 0,
6086 IRExpr_Const(IRConst_F64(1.0)));
6087 set_C2( binop(Iop_Xor64,
6088 unop(Iop_1Uto64, mkexpr(argOK)),
6089 mkU64(1)) );
6090 break;
6093 case 0xF3: /* FPATAN */
6094 DIP("fpatan\n");
6095 put_ST_UNCHECKED(1,
6096 triop(Iop_AtanF64,
6097 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6098 get_ST(1),
6099 get_ST(0)));
6100 fp_pop();
6101 break;
6103 case 0xF4: { /* FXTRACT */
6104 IRTemp argF = newTemp(Ity_F64);
6105 IRTemp sigF = newTemp(Ity_F64);
6106 IRTemp expF = newTemp(Ity_F64);
6107 IRTemp argI = newTemp(Ity_I64);
6108 IRTemp sigI = newTemp(Ity_I64);
6109 IRTemp expI = newTemp(Ity_I64);
6110 DIP("fxtract\n");
6111 assign( argF, get_ST(0) );
6112 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
6113 assign( sigI,
6114 mkIRExprCCall(
6115 Ity_I64, 0/*regparms*/,
6116 "x86amd64g_calculate_FXTRACT",
6117 &x86amd64g_calculate_FXTRACT,
6118 mkIRExprVec_2( mkexpr(argI),
6119 mkIRExpr_HWord(0)/*sig*/ ))
6121 assign( expI,
6122 mkIRExprCCall(
6123 Ity_I64, 0/*regparms*/,
6124 "x86amd64g_calculate_FXTRACT",
6125 &x86amd64g_calculate_FXTRACT,
6126 mkIRExprVec_2( mkexpr(argI),
6127 mkIRExpr_HWord(1)/*exp*/ ))
6129 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
6130 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
6131 /* exponent */
6132 put_ST_UNCHECKED(0, mkexpr(expF) );
6133 fp_push();
6134 /* significand */
6135 put_ST(0, mkexpr(sigF) );
6136 break;
6139 case 0xF5: { /* FPREM1 -- IEEE compliant */
6140 IRTemp a1 = newTemp(Ity_F64);
6141 IRTemp a2 = newTemp(Ity_F64);
6142 DIP("fprem1\n");
6143 /* Do FPREM1 twice, once to get the remainder, and once
6144 to get the C3210 flag values. */
6145 assign( a1, get_ST(0) );
6146 assign( a2, get_ST(1) );
6147 put_ST_UNCHECKED(0,
6148 triop(Iop_PRem1F64,
6149 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6150 mkexpr(a1),
6151 mkexpr(a2)));
6152 put_C3210(
6153 unop(Iop_32Uto64,
6154 triop(Iop_PRem1C3210F64,
6155 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6156 mkexpr(a1),
6157 mkexpr(a2)) ));
6158 break;
6161 case 0xF7: /* FINCSTP */
6162 DIP("fincstp\n");
6163 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
6164 break;
6166 case 0xF8: { /* FPREM -- not IEEE compliant */
6167 IRTemp a1 = newTemp(Ity_F64);
6168 IRTemp a2 = newTemp(Ity_F64);
6169 DIP("fprem\n");
6170 /* Do FPREM twice, once to get the remainder, and once
6171 to get the C3210 flag values. */
6172 assign( a1, get_ST(0) );
6173 assign( a2, get_ST(1) );
6174 put_ST_UNCHECKED(0,
6175 triop(Iop_PRemF64,
6176 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6177 mkexpr(a1),
6178 mkexpr(a2)));
6179 put_C3210(
6180 unop(Iop_32Uto64,
6181 triop(Iop_PRemC3210F64,
6182 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6183 mkexpr(a1),
6184 mkexpr(a2)) ));
6185 break;
6188 case 0xF9: /* FYL2XP1 */
6189 DIP("fyl2xp1\n");
6190 put_ST_UNCHECKED(1,
6191 triop(Iop_Yl2xp1F64,
6192 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6193 get_ST(1),
6194 get_ST(0)));
6195 fp_pop();
6196 break;
6198 case 0xFA: /* FSQRT */
6199 DIP("fsqrt\n");
6200 put_ST_UNCHECKED(0,
6201 binop(Iop_SqrtF64,
6202 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6203 get_ST(0)));
6204 break;
6206 case 0xFB: { /* FSINCOS */
6207 DIP("fsincos\n");
6208 IRTemp argD = newTemp(Ity_F64);
6209 assign(argD, get_ST(0));
6210 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6211 IRTemp resD = newTemp(Ity_F64);
6212 assign(resD,
6213 IRExpr_ITE(
6214 mkexpr(argOK),
6215 binop(Iop_SinF64,
6216 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6217 mkexpr(argD)),
6218 mkexpr(argD))
6220 put_ST_UNCHECKED(0, mkexpr(resD));
6221 /* Conditionally push the cos value on the stack, if
6222 the arg is in range */
6223 maybe_fp_push(argOK);
6224 maybe_put_ST(argOK, 0,
6225 binop(Iop_CosF64,
6226 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6227 mkexpr(argD)));
6228 set_C2( binop(Iop_Xor64,
6229 unop(Iop_1Uto64, mkexpr(argOK)),
6230 mkU64(1)) );
6231 break;
6234 case 0xFC: /* FRNDINT */
6235 DIP("frndint\n");
6236 put_ST_UNCHECKED(0,
6237 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
6238 break;
6240 case 0xFD: /* FSCALE */
6241 DIP("fscale\n");
6242 put_ST_UNCHECKED(0,
6243 triop(Iop_ScaleF64,
6244 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6245 get_ST(0),
6246 get_ST(1)));
6247 break;
6249 case 0xFE: /* FSIN */
6250 case 0xFF: { /* FCOS */
6251 Bool isSIN = modrm == 0xFE;
6252 DIP("%s\n", isSIN ? "fsin" : "fcos");
6253 IRTemp argD = newTemp(Ity_F64);
6254 assign(argD, get_ST(0));
6255 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
6256 IRTemp resD = newTemp(Ity_F64);
6257 assign(resD,
6258 IRExpr_ITE(
6259 mkexpr(argOK),
6260 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6261 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6262 mkexpr(argD)),
6263 mkexpr(argD))
6265 put_ST_UNCHECKED(0, mkexpr(resD));
6266 set_C2( binop(Iop_Xor64,
6267 unop(Iop_1Uto64, mkexpr(argOK)),
6268 mkU64(1)) );
6269 break;
6272 default:
6273 goto decode_fail;
6278 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6279 else
6280 if (first_opcode == 0xDA) {
6282 if (modrm < 0xC0) {
6284 /* bits 5,4,3 are an opcode extension, and the modRM also
6285 specifies an address. */
6286 IROp fop;
6287 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6288 delta += len;
6289 switch (gregLO3ofRM(modrm)) {
6291 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6292 DIP("fiaddl %s\n", dis_buf);
6293 fop = Iop_AddF64;
6294 goto do_fop_m32;
6296 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6297 DIP("fimull %s\n", dis_buf);
6298 fop = Iop_MulF64;
6299 goto do_fop_m32;
6301 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6302 DIP("fisubl %s\n", dis_buf);
6303 fop = Iop_SubF64;
6304 goto do_fop_m32;
6306 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6307 DIP("fisubrl %s\n", dis_buf);
6308 fop = Iop_SubF64;
6309 goto do_foprev_m32;
6311 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6312 DIP("fisubl %s\n", dis_buf);
6313 fop = Iop_DivF64;
6314 goto do_fop_m32;
6316 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6317 DIP("fidivrl %s\n", dis_buf);
6318 fop = Iop_DivF64;
6319 goto do_foprev_m32;
6321 do_fop_m32:
6322 put_ST_UNCHECKED(0,
6323 triop(fop,
6324 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6325 get_ST(0),
6326 unop(Iop_I32StoF64,
6327 loadLE(Ity_I32, mkexpr(addr)))));
6328 break;
6330 do_foprev_m32:
6331 put_ST_UNCHECKED(0,
6332 triop(fop,
6333 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6334 unop(Iop_I32StoF64,
6335 loadLE(Ity_I32, mkexpr(addr))),
6336 get_ST(0)));
6337 break;
6339 default:
6340 vex_printf("unhandled opc_aux = 0x%2x\n",
6341 (UInt)gregLO3ofRM(modrm));
6342 vex_printf("first_opcode == 0xDA\n");
6343 goto decode_fail;
6346 } else {
6348 delta++;
6349 switch (modrm) {
6351 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6352 r_src = (UInt)modrm - 0xC0;
6353 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
6354 put_ST_UNCHECKED(0,
6355 IRExpr_ITE(
6356 mk_amd64g_calculate_condition(AMD64CondB),
6357 get_ST(r_src), get_ST(0)) );
6358 break;
6360 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6361 r_src = (UInt)modrm - 0xC8;
6362 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
6363 put_ST_UNCHECKED(0,
6364 IRExpr_ITE(
6365 mk_amd64g_calculate_condition(AMD64CondZ),
6366 get_ST(r_src), get_ST(0)) );
6367 break;
6369 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6370 r_src = (UInt)modrm - 0xD0;
6371 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
6372 put_ST_UNCHECKED(0,
6373 IRExpr_ITE(
6374 mk_amd64g_calculate_condition(AMD64CondBE),
6375 get_ST(r_src), get_ST(0)) );
6376 break;
6378 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6379 r_src = (UInt)modrm - 0xD8;
6380 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6381 put_ST_UNCHECKED(0,
6382 IRExpr_ITE(
6383 mk_amd64g_calculate_condition(AMD64CondP),
6384 get_ST(r_src), get_ST(0)) );
6385 break;
6387 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6388 DIP("fucompp %%st(0),%%st(1)\n");
6389 /* This forces C1 to zero, which isn't right. */
6390 put_C3210(
6391 unop(Iop_32Uto64,
6392 binop( Iop_And32,
6393 binop(Iop_Shl32,
6394 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6395 mkU8(8)),
6396 mkU32(0x4500)
6397 )));
6398 fp_pop();
6399 fp_pop();
6400 break;
6402 default:
6403 goto decode_fail;
6409 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6410 else
6411 if (first_opcode == 0xDB) {
6412 if (modrm < 0xC0) {
6414 /* bits 5,4,3 are an opcode extension, and the modRM also
6415 specifies an address. */
6416 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6417 delta += len;
6419 switch (gregLO3ofRM(modrm)) {
6421 case 0: /* FILD m32int */
6422 DIP("fildl %s\n", dis_buf);
6423 fp_push();
6424 put_ST(0, unop(Iop_I32StoF64,
6425 loadLE(Ity_I32, mkexpr(addr))));
6426 break;
6428 case 1: /* FISTTPL m32 (SSE3) */
6429 DIP("fisttpl %s\n", dis_buf);
6430 storeLE( mkexpr(addr),
6431 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
6432 fp_pop();
6433 break;
6435 case 2: /* FIST m32 */
6436 DIP("fistl %s\n", dis_buf);
6437 storeLE( mkexpr(addr),
6438 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6439 break;
6441 case 3: /* FISTP m32 */
6442 DIP("fistpl %s\n", dis_buf);
6443 storeLE( mkexpr(addr),
6444 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
6445 fp_pop();
6446 break;
6448 case 5: { /* FLD extended-real */
6449 /* Uses dirty helper:
6450 ULong amd64g_loadF80le ( ULong )
6451 addr holds the address. First, do a dirty call to
6452 get hold of the data. */
6453 IRTemp val = newTemp(Ity_I64);
6454 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6456 IRDirty* d = unsafeIRDirty_1_N (
6457 val,
6458 0/*regparms*/,
6459 "amd64g_dirtyhelper_loadF80le",
6460 &amd64g_dirtyhelper_loadF80le,
6461 args
6463 /* declare that we're reading memory */
6464 d->mFx = Ifx_Read;
6465 d->mAddr = mkexpr(addr);
6466 d->mSize = 10;
6468 /* execute the dirty call, dumping the result in val. */
6469 stmt( IRStmt_Dirty(d) );
6470 fp_push();
6471 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6473 DIP("fldt %s\n", dis_buf);
6474 break;
6477 case 7: { /* FSTP extended-real */
6478 /* Uses dirty helper:
6479 void amd64g_storeF80le ( ULong addr, ULong data )
6481 IRExpr** args
6482 = mkIRExprVec_2( mkexpr(addr),
6483 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6485 IRDirty* d = unsafeIRDirty_0_N (
6486 0/*regparms*/,
6487 "amd64g_dirtyhelper_storeF80le",
6488 &amd64g_dirtyhelper_storeF80le,
6489 args
6491 /* declare we're writing memory */
6492 d->mFx = Ifx_Write;
6493 d->mAddr = mkexpr(addr);
6494 d->mSize = 10;
6496 /* execute the dirty call. */
6497 stmt( IRStmt_Dirty(d) );
6498 fp_pop();
6500 DIP("fstpt\n %s", dis_buf);
6501 break;
6504 default:
6505 vex_printf("unhandled opc_aux = 0x%2x\n",
6506 (UInt)gregLO3ofRM(modrm));
6507 vex_printf("first_opcode == 0xDB\n");
6508 goto decode_fail;
6511 } else {
6513 delta++;
6514 switch (modrm) {
6516 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6517 r_src = (UInt)modrm - 0xC0;
6518 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
6519 put_ST_UNCHECKED(0,
6520 IRExpr_ITE(
6521 mk_amd64g_calculate_condition(AMD64CondNB),
6522 get_ST(r_src), get_ST(0)) );
6523 break;
6525 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6526 r_src = (UInt)modrm - 0xC8;
6527 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
6528 put_ST_UNCHECKED(
6530 IRExpr_ITE(
6531 mk_amd64g_calculate_condition(AMD64CondNZ),
6532 get_ST(r_src),
6533 get_ST(0)
6536 break;
6538 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6539 r_src = (UInt)modrm - 0xD0;
6540 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
6541 put_ST_UNCHECKED(
6543 IRExpr_ITE(
6544 mk_amd64g_calculate_condition(AMD64CondNBE),
6545 get_ST(r_src),
6546 get_ST(0)
6549 break;
6551 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6552 r_src = (UInt)modrm - 0xD8;
6553 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6554 put_ST_UNCHECKED(
6556 IRExpr_ITE(
6557 mk_amd64g_calculate_condition(AMD64CondNP),
6558 get_ST(r_src),
6559 get_ST(0)
6562 break;
6564 case 0xE2:
6565 DIP("fnclex\n");
6566 break;
6568 case 0xE3: {
6569 gen_FINIT_SEQUENCE(NULL/*no guarding condition*/);
6570 DIP("fninit\n");
6571 break;
6574 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6575 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6576 break;
6578 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6579 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6580 break;
6582 default:
6583 goto decode_fail;
6588 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6589 else
6590 if (first_opcode == 0xDC) {
6591 if (modrm < 0xC0) {
6593 /* bits 5,4,3 are an opcode extension, and the modRM also
6594 specifies an address. */
6595 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6596 delta += len;
6598 switch (gregLO3ofRM(modrm)) {
6600 case 0: /* FADD double-real */
6601 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6602 break;
6604 case 1: /* FMUL double-real */
6605 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6606 break;
6608 case 2: /* FCOM double-real */
6609 DIP("fcoml %s\n", dis_buf);
6610 /* This forces C1 to zero, which isn't right. */
6611 put_C3210(
6612 unop(Iop_32Uto64,
6613 binop( Iop_And32,
6614 binop(Iop_Shl32,
6615 binop(Iop_CmpF64,
6616 get_ST(0),
6617 loadLE(Ity_F64,mkexpr(addr))),
6618 mkU8(8)),
6619 mkU32(0x4500)
6620 )));
6621 break;
6623 case 3: /* FCOMP double-real */
6624 DIP("fcompl %s\n", dis_buf);
6625 /* This forces C1 to zero, which isn't right. */
6626 put_C3210(
6627 unop(Iop_32Uto64,
6628 binop( Iop_And32,
6629 binop(Iop_Shl32,
6630 binop(Iop_CmpF64,
6631 get_ST(0),
6632 loadLE(Ity_F64,mkexpr(addr))),
6633 mkU8(8)),
6634 mkU32(0x4500)
6635 )));
6636 fp_pop();
6637 break;
6639 case 4: /* FSUB double-real */
6640 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6641 break;
6643 case 5: /* FSUBR double-real */
6644 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6645 break;
6647 case 6: /* FDIV double-real */
6648 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6649 break;
6651 case 7: /* FDIVR double-real */
6652 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6653 break;
6655 default:
6656 vex_printf("unhandled opc_aux = 0x%2x\n",
6657 (UInt)gregLO3ofRM(modrm));
6658 vex_printf("first_opcode == 0xDC\n");
6659 goto decode_fail;
6662 } else {
6664 delta++;
6665 switch (modrm) {
6667 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6668 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6669 break;
6671 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6672 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6673 break;
6675 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6676 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6677 break;
6679 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6680 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6681 break;
6683 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6684 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6685 break;
6687 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6688 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6689 break;
6691 default:
6692 goto decode_fail;
6698 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6699 else
6700 if (first_opcode == 0xDD) {
6702 if (modrm < 0xC0) {
6704 /* bits 5,4,3 are an opcode extension, and the modRM also
6705 specifies an address. */
6706 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6707 delta += len;
6709 switch (gregLO3ofRM(modrm)) {
6711 case 0: /* FLD double-real */
6712 DIP("fldl %s\n", dis_buf);
6713 fp_push();
6714 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
6715 break;
6717 case 1: /* FISTTPQ m64 (SSE3) */
6718 DIP("fistppll %s\n", dis_buf);
6719 storeLE( mkexpr(addr),
6720 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
6721 fp_pop();
6722 break;
6724 case 2: /* FST double-real */
6725 DIP("fstl %s\n", dis_buf);
6726 storeLE(mkexpr(addr), get_ST(0));
6727 break;
6729 case 3: /* FSTP double-real */
6730 DIP("fstpl %s\n", dis_buf);
6731 storeLE(mkexpr(addr), get_ST(0));
6732 fp_pop();
6733 break;
6735 case 4: { /* FRSTOR m94/m108 */
6736 IRTemp ew = newTemp(Ity_I32);
6737 IRTemp w64 = newTemp(Ity_I64);
6738 IRDirty* d;
6739 if ( have66(pfx) ) {
6740 /* Uses dirty helper:
6741 VexEmNote amd64g_dirtyhelper_FRSTORS
6742 ( VexGuestAMD64State*, HWord ) */
6743 d = unsafeIRDirty_0_N (
6744 0/*regparms*/,
6745 "amd64g_dirtyhelper_FRSTORS",
6746 &amd64g_dirtyhelper_FRSTORS,
6747 mkIRExprVec_1( mkexpr(addr) )
6749 d->mSize = 94;
6750 } else {
6751 /* Uses dirty helper:
6752 VexEmNote amd64g_dirtyhelper_FRSTOR
6753 ( VexGuestAMD64State*, HWord ) */
6754 d = unsafeIRDirty_0_N (
6755 0/*regparms*/,
6756 "amd64g_dirtyhelper_FRSTOR",
6757 &amd64g_dirtyhelper_FRSTOR,
6758 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6760 d->mSize = 108;
6763 d->tmp = w64;
6764 /* declare we're reading memory */
6765 d->mFx = Ifx_Read;
6766 d->mAddr = mkexpr(addr);
6767 /* d->mSize set above */
6769 /* declare we're writing guest state */
6770 d->nFxState = 5;
6771 vex_bzero(&d->fxState, sizeof(d->fxState));
6773 d->fxState[0].fx = Ifx_Write;
6774 d->fxState[0].offset = OFFB_FTOP;
6775 d->fxState[0].size = sizeof(UInt);
6777 d->fxState[1].fx = Ifx_Write;
6778 d->fxState[1].offset = OFFB_FPREGS;
6779 d->fxState[1].size = 8 * sizeof(ULong);
6781 d->fxState[2].fx = Ifx_Write;
6782 d->fxState[2].offset = OFFB_FPTAGS;
6783 d->fxState[2].size = 8 * sizeof(UChar);
6785 d->fxState[3].fx = Ifx_Write;
6786 d->fxState[3].offset = OFFB_FPROUND;
6787 d->fxState[3].size = sizeof(ULong);
6789 d->fxState[4].fx = Ifx_Write;
6790 d->fxState[4].offset = OFFB_FC3210;
6791 d->fxState[4].size = sizeof(ULong);
6793 stmt( IRStmt_Dirty(d) );
6795 /* ew contains any emulation warning we may need to
6796 issue. If needed, side-exit to the next insn,
6797 reporting the warning, so that Valgrind's dispatcher
6798 sees the warning. */
6799 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6800 put_emwarn( mkexpr(ew) );
6801 stmt(
6802 IRStmt_Exit(
6803 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6804 Ijk_EmWarn,
6805 IRConst_U64( guest_RIP_bbstart+delta ),
6806 OFFB_RIP
6810 if ( have66(pfx) ) {
6811 DIP("frstors %s\n", dis_buf);
6812 } else {
6813 DIP("frstor %s\n", dis_buf);
6815 break;
6818 case 6: { /* FNSAVE m94/m108 */
6819 IRDirty *d;
6820 if ( have66(pfx) ) {
6821 /* Uses dirty helper:
6822 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6823 HWord ) */
6824 d = unsafeIRDirty_0_N (
6825 0/*regparms*/,
6826 "amd64g_dirtyhelper_FNSAVES",
6827 &amd64g_dirtyhelper_FNSAVES,
6828 mkIRExprVec_1( mkexpr(addr) )
6830 d->mSize = 94;
6831 } else {
6832 /* Uses dirty helper:
6833 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6834 HWord ) */
6835 d = unsafeIRDirty_0_N (
6836 0/*regparms*/,
6837 "amd64g_dirtyhelper_FNSAVE",
6838 &amd64g_dirtyhelper_FNSAVE,
6839 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
6841 d->mSize = 108;
6844 /* declare we're writing memory */
6845 d->mFx = Ifx_Write;
6846 d->mAddr = mkexpr(addr);
6847 /* d->mSize set above */
6849 /* declare we're reading guest state */
6850 d->nFxState = 5;
6851 vex_bzero(&d->fxState, sizeof(d->fxState));
6853 d->fxState[0].fx = Ifx_Read;
6854 d->fxState[0].offset = OFFB_FTOP;
6855 d->fxState[0].size = sizeof(UInt);
6857 d->fxState[1].fx = Ifx_Read;
6858 d->fxState[1].offset = OFFB_FPREGS;
6859 d->fxState[1].size = 8 * sizeof(ULong);
6861 d->fxState[2].fx = Ifx_Read;
6862 d->fxState[2].offset = OFFB_FPTAGS;
6863 d->fxState[2].size = 8 * sizeof(UChar);
6865 d->fxState[3].fx = Ifx_Read;
6866 d->fxState[3].offset = OFFB_FPROUND;
6867 d->fxState[3].size = sizeof(ULong);
6869 d->fxState[4].fx = Ifx_Read;
6870 d->fxState[4].offset = OFFB_FC3210;
6871 d->fxState[4].size = sizeof(ULong);
6873 stmt( IRStmt_Dirty(d) );
6875 if ( have66(pfx) ) {
6876 DIP("fnsaves %s\n", dis_buf);
6877 } else {
6878 DIP("fnsave %s\n", dis_buf);
6880 break;
6883 case 7: { /* FNSTSW m16 */
6884 IRExpr* sw = get_FPU_sw();
6885 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
6886 storeLE( mkexpr(addr), sw );
6887 DIP("fnstsw %s\n", dis_buf);
6888 break;
6891 default:
6892 vex_printf("unhandled opc_aux = 0x%2x\n",
6893 (UInt)gregLO3ofRM(modrm));
6894 vex_printf("first_opcode == 0xDD\n");
6895 goto decode_fail;
6897 } else {
6898 delta++;
6899 switch (modrm) {
6901 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6902 r_dst = (UInt)modrm - 0xC0;
6903 DIP("ffree %%st(%u)\n", r_dst);
6904 put_ST_TAG ( r_dst, mkU8(0) );
6905 break;
6907 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6908 r_dst = (UInt)modrm - 0xD0;
6909 DIP("fst %%st(0),%%st(%u)\n", r_dst);
6910 /* P4 manual says: "If the destination operand is a
6911 non-empty register, the invalid-operation exception
6912 is not generated. Hence put_ST_UNCHECKED. */
6913 put_ST_UNCHECKED(r_dst, get_ST(0));
6914 break;
6916 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6917 r_dst = (UInt)modrm - 0xD8;
6918 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
6919 /* P4 manual says: "If the destination operand is a
6920 non-empty register, the invalid-operation exception
6921 is not generated. Hence put_ST_UNCHECKED. */
6922 put_ST_UNCHECKED(r_dst, get_ST(0));
6923 fp_pop();
6924 break;
6926 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6927 r_dst = (UInt)modrm - 0xE0;
6928 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
6929 /* This forces C1 to zero, which isn't right. */
6930 put_C3210(
6931 unop(Iop_32Uto64,
6932 binop( Iop_And32,
6933 binop(Iop_Shl32,
6934 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6935 mkU8(8)),
6936 mkU32(0x4500)
6937 )));
6938 break;
6940 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6941 r_dst = (UInt)modrm - 0xE8;
6942 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
6943 /* This forces C1 to zero, which isn't right. */
6944 put_C3210(
6945 unop(Iop_32Uto64,
6946 binop( Iop_And32,
6947 binop(Iop_Shl32,
6948 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6949 mkU8(8)),
6950 mkU32(0x4500)
6951 )));
6952 fp_pop();
6953 break;
6955 default:
6956 goto decode_fail;
6961 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6962 else
6963 if (first_opcode == 0xDE) {
6965 if (modrm < 0xC0) {
6967 /* bits 5,4,3 are an opcode extension, and the modRM also
6968 specifies an address. */
6969 IROp fop;
6970 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
6971 delta += len;
6973 switch (gregLO3ofRM(modrm)) {
6975 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6976 DIP("fiaddw %s\n", dis_buf);
6977 fop = Iop_AddF64;
6978 goto do_fop_m16;
6980 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6981 DIP("fimulw %s\n", dis_buf);
6982 fop = Iop_MulF64;
6983 goto do_fop_m16;
6985 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6986 DIP("fisubw %s\n", dis_buf);
6987 fop = Iop_SubF64;
6988 goto do_fop_m16;
6990 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6991 DIP("fisubrw %s\n", dis_buf);
6992 fop = Iop_SubF64;
6993 goto do_foprev_m16;
6995 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6996 DIP("fisubw %s\n", dis_buf);
6997 fop = Iop_DivF64;
6998 goto do_fop_m16;
7000 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
7001 DIP("fidivrw %s\n", dis_buf);
7002 fop = Iop_DivF64;
7003 goto do_foprev_m16;
7005 do_fop_m16:
7006 put_ST_UNCHECKED(0,
7007 triop(fop,
7008 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7009 get_ST(0),
7010 unop(Iop_I32StoF64,
7011 unop(Iop_16Sto32,
7012 loadLE(Ity_I16, mkexpr(addr))))));
7013 break;
7015 do_foprev_m16:
7016 put_ST_UNCHECKED(0,
7017 triop(fop,
7018 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
7019 unop(Iop_I32StoF64,
7020 unop(Iop_16Sto32,
7021 loadLE(Ity_I16, mkexpr(addr)))),
7022 get_ST(0)));
7023 break;
7025 default:
7026 vex_printf("unhandled opc_aux = 0x%2x\n",
7027 (UInt)gregLO3ofRM(modrm));
7028 vex_printf("first_opcode == 0xDE\n");
7029 goto decode_fail;
7032 } else {
7034 delta++;
7035 switch (modrm) {
7037 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
7038 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
7039 break;
7041 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
7042 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
7043 break;
7045 case 0xD9: /* FCOMPP %st(0),%st(1) */
7046 DIP("fcompp %%st(0),%%st(1)\n");
7047 /* This forces C1 to zero, which isn't right. */
7048 put_C3210(
7049 unop(Iop_32Uto64,
7050 binop( Iop_And32,
7051 binop(Iop_Shl32,
7052 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
7053 mkU8(8)),
7054 mkU32(0x4500)
7055 )));
7056 fp_pop();
7057 fp_pop();
7058 break;
7060 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
7061 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
7062 break;
7064 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
7065 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
7066 break;
7068 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
7069 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
7070 break;
7072 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
7073 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
7074 break;
7076 default:
7077 goto decode_fail;
7083 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
7084 else
7085 if (first_opcode == 0xDF) {
7087 if (modrm < 0xC0) {
7089 /* bits 5,4,3 are an opcode extension, and the modRM also
7090 specifies an address. */
7091 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7092 delta += len;
7094 switch (gregLO3ofRM(modrm)) {
7096 case 0: /* FILD m16int */
7097 DIP("fildw %s\n", dis_buf);
7098 fp_push();
7099 put_ST(0, unop(Iop_I32StoF64,
7100 unop(Iop_16Sto32,
7101 loadLE(Ity_I16, mkexpr(addr)))));
7102 break;
7104 case 1: /* FISTTPS m16 (SSE3) */
7105 DIP("fisttps %s\n", dis_buf);
7106 storeLE( mkexpr(addr),
7107 x87ishly_qnarrow_32_to_16(
7108 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
7109 fp_pop();
7110 break;
7112 case 2: /* FIST m16 */
7113 DIP("fists %s\n", dis_buf);
7114 storeLE( mkexpr(addr),
7115 x87ishly_qnarrow_32_to_16(
7116 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7117 break;
7119 case 3: /* FISTP m16 */
7120 DIP("fistps %s\n", dis_buf);
7121 storeLE( mkexpr(addr),
7122 x87ishly_qnarrow_32_to_16(
7123 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
7124 fp_pop();
7125 break;
7127 case 5: /* FILD m64 */
7128 DIP("fildll %s\n", dis_buf);
7129 fp_push();
7130 put_ST(0, binop(Iop_I64StoF64,
7131 get_roundingmode(),
7132 loadLE(Ity_I64, mkexpr(addr))));
7133 break;
7135 case 7: /* FISTP m64 */
7136 DIP("fistpll %s\n", dis_buf);
7137 storeLE( mkexpr(addr),
7138 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
7139 fp_pop();
7140 break;
7142 default:
7143 vex_printf("unhandled opc_aux = 0x%2x\n",
7144 (UInt)gregLO3ofRM(modrm));
7145 vex_printf("first_opcode == 0xDF\n");
7146 goto decode_fail;
7149 } else {
7151 delta++;
7152 switch (modrm) {
7154 case 0xC0: /* FFREEP %st(0) */
7155 DIP("ffreep %%st(%d)\n", 0);
7156 put_ST_TAG ( 0, mkU8(0) );
7157 fp_pop();
7158 break;
7160 case 0xE0: /* FNSTSW %ax */
7161 DIP("fnstsw %%ax\n");
7162 /* Invent a plausible-looking FPU status word value and
7163 dump it in %AX:
7164 ((ftop & 7) << 11) | (c3210 & 0x4700)
7166 putIRegRAX(
7168 unop(Iop_32to16,
7169 binop(Iop_Or32,
7170 binop(Iop_Shl32,
7171 binop(Iop_And32, get_ftop(), mkU32(7)),
7172 mkU8(11)),
7173 binop(Iop_And32,
7174 unop(Iop_64to32, get_C3210()),
7175 mkU32(0x4700))
7176 )));
7177 break;
7179 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
7180 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
7181 break;
7183 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
7184 /* not really right since COMIP != UCOMIP */
7185 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
7186 break;
7188 default:
7189 goto decode_fail;
7195 else
7196 goto decode_fail;
7198 *decode_ok = True;
7199 return delta;
7201 decode_fail:
7202 *decode_ok = False;
7203 return delta;
7207 /*------------------------------------------------------------*/
7208 /*--- ---*/
7209 /*--- MMX INSTRUCTIONS ---*/
7210 /*--- ---*/
7211 /*------------------------------------------------------------*/
7213 /* Effect of MMX insns on x87 FPU state (table 11-2 of
7214 IA32 arch manual, volume 3):
7216 Read from, or write to MMX register (viz, any insn except EMMS):
7217 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
7218 * FP stack pointer set to zero
7220 EMMS:
7221 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
7222 * FP stack pointer set to zero
7225 static void do_MMX_preamble ( void )
7227 Int i;
7228 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7229 IRExpr* zero = mkU32(0);
7230 IRExpr* tag1 = mkU8(1);
7231 put_ftop(zero);
7232 for (i = 0; i < 8; i++)
7233 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
7236 static void do_EMMS_preamble ( void )
7238 Int i;
7239 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7240 IRExpr* zero = mkU32(0);
7241 IRExpr* tag0 = mkU8(0);
7242 put_ftop(zero);
7243 for (i = 0; i < 8; i++)
7244 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
7248 static IRExpr* getMMXReg ( UInt archreg )
7250 vassert(archreg < 8);
7251 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7255 static void putMMXReg ( UInt archreg, IRExpr* e )
7257 vassert(archreg < 8);
7258 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
7259 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7263 /* Helper for non-shift MMX insns. Note this is incomplete in the
7264 sense that it does not first call do_MMX_preamble() -- that is the
7265 responsibility of its caller. */
7267 static
7268 ULong dis_MMXop_regmem_to_reg ( const VexAbiInfo* vbi,
7269 Prefix pfx,
7270 Long delta,
7271 UChar opc,
7272 const HChar* name,
7273 Bool show_granularity )
7275 HChar dis_buf[50];
7276 UChar modrm = getUChar(delta);
7277 Bool isReg = epartIsReg(modrm);
7278 IRExpr* argL = NULL;
7279 IRExpr* argR = NULL;
7280 IRExpr* argG = NULL;
7281 IRExpr* argE = NULL;
7282 IRTemp res = newTemp(Ity_I64);
7284 Bool invG = False;
7285 IROp op = Iop_INVALID;
7286 void* hAddr = NULL;
7287 const HChar* hName = NULL;
7288 Bool eLeft = False;
7290 # define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7292 switch (opc) {
7293 /* Original MMX ones */
7294 case 0xFC: op = Iop_Add8x8; break;
7295 case 0xFD: op = Iop_Add16x4; break;
7296 case 0xFE: op = Iop_Add32x2; break;
7298 case 0xEC: op = Iop_QAdd8Sx8; break;
7299 case 0xED: op = Iop_QAdd16Sx4; break;
7301 case 0xDC: op = Iop_QAdd8Ux8; break;
7302 case 0xDD: op = Iop_QAdd16Ux4; break;
7304 case 0xF8: op = Iop_Sub8x8; break;
7305 case 0xF9: op = Iop_Sub16x4; break;
7306 case 0xFA: op = Iop_Sub32x2; break;
7308 case 0xE8: op = Iop_QSub8Sx8; break;
7309 case 0xE9: op = Iop_QSub16Sx4; break;
7311 case 0xD8: op = Iop_QSub8Ux8; break;
7312 case 0xD9: op = Iop_QSub16Ux4; break;
7314 case 0xE5: op = Iop_MulHi16Sx4; break;
7315 case 0xD5: op = Iop_Mul16x4; break;
7316 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7318 case 0x74: op = Iop_CmpEQ8x8; break;
7319 case 0x75: op = Iop_CmpEQ16x4; break;
7320 case 0x76: op = Iop_CmpEQ32x2; break;
7322 case 0x64: op = Iop_CmpGT8Sx8; break;
7323 case 0x65: op = Iop_CmpGT16Sx4; break;
7324 case 0x66: op = Iop_CmpGT32Sx2; break;
7326 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7327 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7328 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
7330 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7331 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7332 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7334 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7335 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7336 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7338 case 0xDB: op = Iop_And64; break;
7339 case 0xDF: op = Iop_And64; invG = True; break;
7340 case 0xEB: op = Iop_Or64; break;
7341 case 0xEF: /* Possibly do better here if argL and argR are the
7342 same reg */
7343 op = Iop_Xor64; break;
7345 /* Introduced in SSE1 */
7346 case 0xE0: op = Iop_Avg8Ux8; break;
7347 case 0xE3: op = Iop_Avg16Ux4; break;
7348 case 0xEE: op = Iop_Max16Sx4; break;
7349 case 0xDE: op = Iop_Max8Ux8; break;
7350 case 0xEA: op = Iop_Min16Sx4; break;
7351 case 0xDA: op = Iop_Min8Ux8; break;
7352 case 0xE4: op = Iop_MulHi16Ux4; break;
7353 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
7355 /* Introduced in SSE2 */
7356 case 0xD4: op = Iop_Add64; break;
7357 case 0xFB: op = Iop_Sub64; break;
7359 default:
7360 vex_printf("\n0x%x\n", (UInt)opc);
7361 vpanic("dis_MMXop_regmem_to_reg");
7364 # undef XXX
7366 argG = getMMXReg(gregLO3ofRM(modrm));
7367 if (invG)
7368 argG = unop(Iop_Not64, argG);
7370 if (isReg) {
7371 delta++;
7372 argE = getMMXReg(eregLO3ofRM(modrm));
7373 } else {
7374 Int len;
7375 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7376 delta += len;
7377 argE = loadLE(Ity_I64, mkexpr(addr));
7380 if (eLeft) {
7381 argL = argE;
7382 argR = argG;
7383 } else {
7384 argL = argG;
7385 argR = argE;
7388 if (op != Iop_INVALID) {
7389 vassert(hName == NULL);
7390 vassert(hAddr == NULL);
7391 assign(res, binop(op, argL, argR));
7392 } else {
7393 vassert(hName != NULL);
7394 vassert(hAddr != NULL);
7395 assign( res,
7396 mkIRExprCCall(
7397 Ity_I64,
7398 0/*regparms*/, hName, hAddr,
7399 mkIRExprVec_2( argL, argR )
7404 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7406 DIP("%s%s %s, %s\n",
7407 name, show_granularity ? nameMMXGran(opc & 3) : "",
7408 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7409 nameMMXReg(gregLO3ofRM(modrm)) );
7411 return delta;
7415 /* Vector by scalar shift of G by the amount specified at the bottom
7416 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7418 static ULong dis_MMX_shiftG_byE ( const VexAbiInfo* vbi,
7419 Prefix pfx, Long delta,
7420 const HChar* opname, IROp op )
7422 HChar dis_buf[50];
7423 Int alen, size;
7424 IRTemp addr;
7425 Bool shl, shr, sar;
7426 UChar rm = getUChar(delta);
7427 IRTemp g0 = newTemp(Ity_I64);
7428 IRTemp g1 = newTemp(Ity_I64);
7429 IRTemp amt = newTemp(Ity_I64);
7430 IRTemp amt8 = newTemp(Ity_I8);
7432 if (epartIsReg(rm)) {
7433 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7434 DIP("%s %s,%s\n", opname,
7435 nameMMXReg(eregLO3ofRM(rm)),
7436 nameMMXReg(gregLO3ofRM(rm)) );
7437 delta++;
7438 } else {
7439 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
7440 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7441 DIP("%s %s,%s\n", opname,
7442 dis_buf,
7443 nameMMXReg(gregLO3ofRM(rm)) );
7444 delta += alen;
7446 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7447 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7449 shl = shr = sar = False;
7450 size = 0;
7451 switch (op) {
7452 case Iop_ShlN16x4: shl = True; size = 32; break;
7453 case Iop_ShlN32x2: shl = True; size = 32; break;
7454 case Iop_Shl64: shl = True; size = 64; break;
7455 case Iop_ShrN16x4: shr = True; size = 16; break;
7456 case Iop_ShrN32x2: shr = True; size = 32; break;
7457 case Iop_Shr64: shr = True; size = 64; break;
7458 case Iop_SarN16x4: sar = True; size = 16; break;
7459 case Iop_SarN32x2: sar = True; size = 32; break;
7460 default: vassert(0);
7463 if (shl || shr) {
7464 assign(
7466 IRExpr_ITE(
7467 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7468 binop(op, mkexpr(g0), mkexpr(amt8)),
7469 mkU64(0)
7472 } else
7473 if (sar) {
7474 assign(
7476 IRExpr_ITE(
7477 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
7478 binop(op, mkexpr(g0), mkexpr(amt8)),
7479 binop(op, mkexpr(g0), mkU8(size-1))
7482 } else {
7483 vassert(0);
7486 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7487 return delta;
7491 /* Vector by scalar shift of E by an immediate byte. This is a
7492 straight copy of dis_SSE_shiftE_imm. */
7494 static
7495 ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
7497 Bool shl, shr, sar;
7498 UChar rm = getUChar(delta);
7499 IRTemp e0 = newTemp(Ity_I64);
7500 IRTemp e1 = newTemp(Ity_I64);
7501 UChar amt, size;
7502 vassert(epartIsReg(rm));
7503 vassert(gregLO3ofRM(rm) == 2
7504 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
7505 amt = getUChar(delta+1);
7506 delta += 2;
7507 DIP("%s $%d,%s\n", opname,
7508 (Int)amt,
7509 nameMMXReg(eregLO3ofRM(rm)) );
7511 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7513 shl = shr = sar = False;
7514 size = 0;
7515 switch (op) {
7516 case Iop_ShlN16x4: shl = True; size = 16; break;
7517 case Iop_ShlN32x2: shl = True; size = 32; break;
7518 case Iop_Shl64: shl = True; size = 64; break;
7519 case Iop_SarN16x4: sar = True; size = 16; break;
7520 case Iop_SarN32x2: sar = True; size = 32; break;
7521 case Iop_ShrN16x4: shr = True; size = 16; break;
7522 case Iop_ShrN32x2: shr = True; size = 32; break;
7523 case Iop_Shr64: shr = True; size = 64; break;
7524 default: vassert(0);
7527 if (shl || shr) {
7528 assign( e1, amt >= size
7529 ? mkU64(0)
7530 : binop(op, mkexpr(e0), mkU8(amt))
7532 } else
7533 if (sar) {
7534 assign( e1, amt >= size
7535 ? binop(op, mkexpr(e0), mkU8(size-1))
7536 : binop(op, mkexpr(e0), mkU8(amt))
7538 } else {
7539 vassert(0);
7542 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7543 return delta;
7547 /* Completely handle all MMX instructions except emms. */
7549 static
7550 ULong dis_MMX ( Bool* decode_ok,
7551 const VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
7553 Int len;
7554 UChar modrm;
7555 HChar dis_buf[50];
7556 UChar opc = getUChar(delta);
7557 delta++;
7559 /* dis_MMX handles all insns except emms. */
7560 do_MMX_preamble();
7562 switch (opc) {
7564 case 0x6E:
7565 if (sz == 4) {
7566 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7567 modrm = getUChar(delta);
7568 if (epartIsReg(modrm)) {
7569 delta++;
7570 putMMXReg(
7571 gregLO3ofRM(modrm),
7572 binop( Iop_32HLto64,
7573 mkU32(0),
7574 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7575 DIP("movd %s, %s\n",
7576 nameIReg32(eregOfRexRM(pfx,modrm)),
7577 nameMMXReg(gregLO3ofRM(modrm)));
7578 } else {
7579 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7580 delta += len;
7581 putMMXReg(
7582 gregLO3ofRM(modrm),
7583 binop( Iop_32HLto64,
7584 mkU32(0),
7585 loadLE(Ity_I32, mkexpr(addr)) ) );
7586 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7589 else
7590 if (sz == 8) {
7591 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7592 modrm = getUChar(delta);
7593 if (epartIsReg(modrm)) {
7594 delta++;
7595 putMMXReg( gregLO3ofRM(modrm),
7596 getIReg64(eregOfRexRM(pfx,modrm)) );
7597 DIP("movd %s, %s\n",
7598 nameIReg64(eregOfRexRM(pfx,modrm)),
7599 nameMMXReg(gregLO3ofRM(modrm)));
7600 } else {
7601 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7602 delta += len;
7603 putMMXReg( gregLO3ofRM(modrm),
7604 loadLE(Ity_I64, mkexpr(addr)) );
7605 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7608 else {
7609 goto mmx_decode_failure;
7611 break;
7613 case 0x7E:
7614 if (sz == 4) {
7615 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7616 modrm = getUChar(delta);
7617 if (epartIsReg(modrm)) {
7618 delta++;
7619 putIReg32( eregOfRexRM(pfx,modrm),
7620 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7621 DIP("movd %s, %s\n",
7622 nameMMXReg(gregLO3ofRM(modrm)),
7623 nameIReg32(eregOfRexRM(pfx,modrm)));
7624 } else {
7625 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7626 delta += len;
7627 storeLE( mkexpr(addr),
7628 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7629 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7632 else
7633 if (sz == 8) {
7634 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7635 modrm = getUChar(delta);
7636 if (epartIsReg(modrm)) {
7637 delta++;
7638 putIReg64( eregOfRexRM(pfx,modrm),
7639 getMMXReg(gregLO3ofRM(modrm)) );
7640 DIP("movd %s, %s\n",
7641 nameMMXReg(gregLO3ofRM(modrm)),
7642 nameIReg64(eregOfRexRM(pfx,modrm)));
7643 } else {
7644 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7645 delta += len;
7646 storeLE( mkexpr(addr),
7647 getMMXReg(gregLO3ofRM(modrm)) );
7648 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7650 } else {
7651 goto mmx_decode_failure;
7653 break;
7655 case 0x6F:
7656 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
7657 if (sz != 4
7658 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7659 goto mmx_decode_failure;
7660 modrm = getUChar(delta);
7661 if (epartIsReg(modrm)) {
7662 delta++;
7663 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7664 DIP("movq %s, %s\n",
7665 nameMMXReg(eregLO3ofRM(modrm)),
7666 nameMMXReg(gregLO3ofRM(modrm)));
7667 } else {
7668 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7669 delta += len;
7670 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7671 DIP("movq %s, %s\n",
7672 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7674 break;
7676 case 0x7F:
7677 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
7678 if (sz != 4
7679 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7680 goto mmx_decode_failure;
7681 modrm = getUChar(delta);
7682 if (epartIsReg(modrm)) {
7683 delta++;
7684 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7685 DIP("movq %s, %s\n",
7686 nameMMXReg(gregLO3ofRM(modrm)),
7687 nameMMXReg(eregLO3ofRM(modrm)));
7688 } else {
7689 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
7690 delta += len;
7691 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7692 DIP("mov(nt)q %s, %s\n",
7693 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7695 break;
7697 case 0xFC:
7698 case 0xFD:
7699 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7700 if (sz != 4)
7701 goto mmx_decode_failure;
7702 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
7703 break;
7705 case 0xEC:
7706 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
7707 if (sz != 4
7708 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7709 goto mmx_decode_failure;
7710 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
7711 break;
7713 case 0xDC:
7714 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7715 if (sz != 4)
7716 goto mmx_decode_failure;
7717 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
7718 break;
7720 case 0xF8:
7721 case 0xF9:
7722 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7723 if (sz != 4)
7724 goto mmx_decode_failure;
7725 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
7726 break;
7728 case 0xE8:
7729 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7730 if (sz != 4)
7731 goto mmx_decode_failure;
7732 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
7733 break;
7735 case 0xD8:
7736 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7737 if (sz != 4)
7738 goto mmx_decode_failure;
7739 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
7740 break;
7742 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7743 if (sz != 4)
7744 goto mmx_decode_failure;
7745 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
7746 break;
7748 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7749 if (sz != 4)
7750 goto mmx_decode_failure;
7751 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
7752 break;
7754 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7755 vassert(sz == 4);
7756 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
7757 break;
7759 case 0x74:
7760 case 0x75:
7761 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7762 if (sz != 4)
7763 goto mmx_decode_failure;
7764 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
7765 break;
7767 case 0x64:
7768 case 0x65:
7769 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7770 if (sz != 4)
7771 goto mmx_decode_failure;
7772 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
7773 break;
7775 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7776 if (sz != 4)
7777 goto mmx_decode_failure;
7778 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
7779 break;
7781 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7782 if (sz != 4)
7783 goto mmx_decode_failure;
7784 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
7785 break;
7787 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7788 if (sz != 4)
7789 goto mmx_decode_failure;
7790 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
7791 break;
7793 case 0x68:
7794 case 0x69:
7795 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
7796 if (sz != 4
7797 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7798 goto mmx_decode_failure;
7799 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
7800 break;
7802 case 0x60:
7803 case 0x61:
7804 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
7805 if (sz != 4
7806 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
7807 goto mmx_decode_failure;
7808 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
7809 break;
7811 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7812 if (sz != 4)
7813 goto mmx_decode_failure;
7814 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
7815 break;
7817 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7818 if (sz != 4)
7819 goto mmx_decode_failure;
7820 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
7821 break;
7823 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7824 if (sz != 4)
7825 goto mmx_decode_failure;
7826 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
7827 break;
7829 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7830 if (sz != 4)
7831 goto mmx_decode_failure;
7832 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
7833 break;
7835 # define SHIFT_BY_REG(_name,_op) \
7836 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
7837 break;
7839 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7840 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7841 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7842 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7844 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7845 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7846 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7847 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7849 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7850 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7851 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7853 # undef SHIFT_BY_REG
7855 case 0x71:
7856 case 0x72:
7857 case 0x73: {
7858 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
7859 UChar byte2, subopc;
7860 if (sz != 4)
7861 goto mmx_decode_failure;
7862 byte2 = getUChar(delta); /* amode / sub-opcode */
7863 subopc = toUChar( (byte2 >> 3) & 7 );
7865 # define SHIFT_BY_IMM(_name,_op) \
7866 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7867 } while (0)
7869 if (subopc == 2 /*SRL*/ && opc == 0x71)
7870 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7871 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7872 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7873 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7874 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7876 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7877 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7878 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7879 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7881 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7882 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7883 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7884 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7885 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7886 SHIFT_BY_IMM("psllq", Iop_Shl64);
7888 else goto mmx_decode_failure;
7890 # undef SHIFT_BY_IMM
7891 break;
7894 case 0xF7: {
7895 IRTemp addr = newTemp(Ity_I64);
7896 IRTemp regD = newTemp(Ity_I64);
7897 IRTemp regM = newTemp(Ity_I64);
7898 IRTemp mask = newTemp(Ity_I64);
7899 IRTemp olddata = newTemp(Ity_I64);
7900 IRTemp newdata = newTemp(Ity_I64);
7902 modrm = getUChar(delta);
7903 if (sz != 4 || (!epartIsReg(modrm)))
7904 goto mmx_decode_failure;
7905 delta++;
7907 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
7908 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7909 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7910 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7911 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7912 assign( newdata,
7913 binop(Iop_Or64,
7914 binop(Iop_And64,
7915 mkexpr(regD),
7916 mkexpr(mask) ),
7917 binop(Iop_And64,
7918 mkexpr(olddata),
7919 unop(Iop_Not64, mkexpr(mask)))) );
7920 storeLE( mkexpr(addr), mkexpr(newdata) );
7921 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7922 nameMMXReg( gregLO3ofRM(modrm) ) );
7923 break;
7926 /* --- MMX decode failure --- */
7927 default:
7928 mmx_decode_failure:
7929 *decode_ok = False;
7930 return delta; /* ignored */
7934 *decode_ok = True;
7935 return delta;
7939 /*------------------------------------------------------------*/
7940 /*--- More misc arithmetic and other obscure insns. ---*/
7941 /*------------------------------------------------------------*/
7943 /* Generate base << amt with vacated places filled with stuff
7944 from xtra. amt guaranteed in 0 .. 63. */
7945 static
7946 IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7948 /* if amt == 0
7949 then base
7950 else (base << amt) | (xtra >>u (64-amt))
7952 return
7953 IRExpr_ITE(
7954 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7955 binop(Iop_Or64,
7956 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7957 binop(Iop_Shr64, mkexpr(xtra),
7958 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7960 mkexpr(base)
7964 /* Generate base >>u amt with vacated places filled with stuff
7965 from xtra. amt guaranteed in 0 .. 63. */
7966 static
7967 IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7969 /* if amt == 0
7970 then base
7971 else (base >>u amt) | (xtra << (64-amt))
7973 return
7974 IRExpr_ITE(
7975 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
7976 binop(Iop_Or64,
7977 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7978 binop(Iop_Shl64, mkexpr(xtra),
7979 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
7981 mkexpr(base)
7985 /* Double length left and right shifts. Apparently only required in
7986 v-size (no b- variant). */
7987 static
7988 ULong dis_SHLRD_Gv_Ev ( const VexAbiInfo* vbi,
7989 Prefix pfx,
7990 Long delta, UChar modrm,
7991 Int sz,
7992 IRExpr* shift_amt,
7993 Bool amt_is_literal,
7994 const HChar* shift_amt_txt,
7995 Bool left_shift )
7997 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7998 for printing it. And eip on entry points at the modrm byte. */
7999 Int len;
8000 HChar dis_buf[50];
8002 IRType ty = szToITy(sz);
8003 IRTemp gsrc = newTemp(ty);
8004 IRTemp esrc = newTemp(ty);
8005 IRTemp addr = IRTemp_INVALID;
8006 IRTemp tmpSH = newTemp(Ity_I8);
8007 IRTemp tmpSS = newTemp(Ity_I8);
8008 IRTemp tmp64 = IRTemp_INVALID;
8009 IRTemp res64 = IRTemp_INVALID;
8010 IRTemp rss64 = IRTemp_INVALID;
8011 IRTemp resTy = IRTemp_INVALID;
8012 IRTemp rssTy = IRTemp_INVALID;
8013 Int mask = sz==8 ? 63 : 31;
8015 vassert(sz == 2 || sz == 4 || sz == 8);
8017 /* The E-part is the destination; this is shifted. The G-part
8018 supplies bits to be shifted into the E-part, but is not
8019 changed.
8021 If shifting left, form a double-length word with E at the top
8022 and G at the bottom, and shift this left. The result is then in
8023 the high part.
8025 If shifting right, form a double-length word with G at the top
8026 and E at the bottom, and shift this right. The result is then
8027 at the bottom. */
8029 /* Fetch the operands. */
8031 assign( gsrc, getIRegG(sz, pfx, modrm) );
8033 if (epartIsReg(modrm)) {
8034 delta++;
8035 assign( esrc, getIRegE(sz, pfx, modrm) );
8036 DIP("sh%cd%c %s, %s, %s\n",
8037 ( left_shift ? 'l' : 'r' ), nameISize(sz),
8038 shift_amt_txt,
8039 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
8040 } else {
8041 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
8042 /* # bytes following amode */
8043 amt_is_literal ? 1 : 0 );
8044 delta += len;
8045 assign( esrc, loadLE(ty, mkexpr(addr)) );
8046 DIP("sh%cd%c %s, %s, %s\n",
8047 ( left_shift ? 'l' : 'r' ), nameISize(sz),
8048 shift_amt_txt,
8049 nameIRegG(sz, pfx, modrm), dis_buf);
8052 /* Calculate the masked shift amount (tmpSH), the masked subshift
8053 amount (tmpSS), the shifted value (res64) and the subshifted
8054 value (rss64). */
8056 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
8057 assign( tmpSS, binop(Iop_And8,
8058 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
8059 mkU8(mask)));
8061 tmp64 = newTemp(Ity_I64);
8062 res64 = newTemp(Ity_I64);
8063 rss64 = newTemp(Ity_I64);
8065 if (sz == 2 || sz == 4) {
8067 /* G is xtra; E is data */
8068 /* what a freaking nightmare: */
8069 if (sz == 4 && left_shift) {
8070 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
8071 assign( res64,
8072 binop(Iop_Shr64,
8073 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8074 mkU8(32)) );
8075 assign( rss64,
8076 binop(Iop_Shr64,
8077 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
8078 mkU8(32)) );
8080 else
8081 if (sz == 4 && !left_shift) {
8082 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
8083 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8084 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
8086 else
8087 if (sz == 2 && left_shift) {
8088 assign( tmp64,
8089 binop(Iop_32HLto64,
8090 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
8091 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
8093 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
8094 assign( res64,
8095 binop(Iop_Shr64,
8096 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
8097 mkU8(48)) );
8098 /* subshift formed by shifting [esrc'0000'0000'0000] */
8099 assign( rss64,
8100 binop(Iop_Shr64,
8101 binop(Iop_Shl64,
8102 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
8103 mkU8(48)),
8104 mkexpr(tmpSS)),
8105 mkU8(48)) );
8107 else
8108 if (sz == 2 && !left_shift) {
8109 assign( tmp64,
8110 binop(Iop_32HLto64,
8111 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
8112 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
8114 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
8115 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
8116 /* subshift formed by shifting [0000'0000'0000'esrc] */
8117 assign( rss64, binop(Iop_Shr64,
8118 unop(Iop_16Uto64, mkexpr(esrc)),
8119 mkexpr(tmpSS)) );
8122 } else {
8124 vassert(sz == 8);
8125 if (left_shift) {
8126 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
8127 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
8128 } else {
8129 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
8130 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
8135 resTy = newTemp(ty);
8136 rssTy = newTemp(ty);
8137 assign( resTy, narrowTo(ty, mkexpr(res64)) );
8138 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
8140 /* Put result back and write the flags thunk. */
8141 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
8142 resTy, rssTy, ty, tmpSH );
8144 if (epartIsReg(modrm)) {
8145 putIRegE(sz, pfx, modrm, mkexpr(resTy));
8146 } else {
8147 storeLE( mkexpr(addr), mkexpr(resTy) );
8150 if (amt_is_literal) delta++;
8151 return delta;
8155 /* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
8156 required. */
8158 typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
8160 static const HChar* nameBtOp ( BtOp op )
8162 switch (op) {
8163 case BtOpNone: return "";
8164 case BtOpSet: return "s";
8165 case BtOpReset: return "r";
8166 case BtOpComp: return "c";
8167 default: vpanic("nameBtOp(amd64)");
8172 static
8173 ULong dis_bt_G_E ( const VexAbiInfo* vbi,
8174 Prefix pfx, Int sz, Long delta, BtOp op,
8175 /*OUT*/Bool* decode_OK )
8177 HChar dis_buf[50];
8178 UChar modrm;
8179 Int len;
8180 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
8181 t_addr1, t_rsp, t_mask, t_new;
8183 vassert(sz == 2 || sz == 4 || sz == 8);
8185 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
8186 = t_addr0 = t_addr1 = t_rsp
8187 = t_mask = t_new = IRTemp_INVALID;
8189 t_fetched = newTemp(Ity_I8);
8190 t_new = newTemp(Ity_I8);
8191 t_bitno0 = newTemp(Ity_I64);
8192 t_bitno1 = newTemp(Ity_I64);
8193 t_bitno2 = newTemp(Ity_I8);
8194 t_addr1 = newTemp(Ity_I64);
8195 modrm = getUChar(delta);
8197 *decode_OK = True;
8198 if (epartIsReg(modrm)) {
8199 /* F2 and F3 are never acceptable. */
8200 if (haveF2orF3(pfx)) {
8201 *decode_OK = False;
8202 return delta;
8204 } else {
8205 /* F2 or F3 (but not both) are allowed, provided LOCK is also
8206 present, and only for the BTC/BTS/BTR cases (not BT). */
8207 if (haveF2orF3(pfx)) {
8208 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
8209 *decode_OK = False;
8210 return delta;
8215 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
8217 if (epartIsReg(modrm)) {
8218 delta++;
8219 /* Get it onto the client's stack. Oh, this is a horrible
8220 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
8221 Because of the ELF ABI stack redzone, there may be live data
8222 up to 128 bytes below %RSP. So we can't just push it on the
8223 stack, else we may wind up trashing live data, and causing
8224 impossible-to-find simulation errors. (Yes, this did
8225 happen.) So we need to drop RSP before at least 128 before
8226 pushing it. That unfortunately means hitting Memcheck's
8227 fast-case painting code. Ideally we should drop more than
8228 128, to reduce the chances of breaking buggy programs that
8229 have live data below -128(%RSP). Memcheck fast-cases moves
8230 of 288 bytes due to the need to handle ppc64-linux quickly,
8231 so let's use 288. Of course the real fix is to get rid of
8232 this kludge entirely. */
8233 t_rsp = newTemp(Ity_I64);
8234 t_addr0 = newTemp(Ity_I64);
8236 vassert(vbi->guest_stack_redzone_size == 128);
8237 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
8238 putIReg64(R_RSP, mkexpr(t_rsp));
8240 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8242 /* Make t_addr0 point at it. */
8243 assign( t_addr0, mkexpr(t_rsp) );
8245 /* Mask out upper bits of the shift amount, since we're doing a
8246 reg. */
8247 assign( t_bitno1, binop(Iop_And64,
8248 mkexpr(t_bitno0),
8249 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8251 } else {
8252 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
8253 delta += len;
8254 assign( t_bitno1, mkexpr(t_bitno0) );
8257 /* At this point: t_addr0 is the address being operated on. If it
8258 was a reg, we will have pushed it onto the client's stack.
8259 t_bitno1 is the bit number, suitably masked in the case of a
8260 reg. */
8262 /* Now the main sequence. */
8263 assign( t_addr1,
8264 binop(Iop_Add64,
8265 mkexpr(t_addr0),
8266 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8268 /* t_addr1 now holds effective address */
8270 assign( t_bitno2,
8271 unop(Iop_64to8,
8272 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8274 /* t_bitno2 contains offset of bit within byte */
8276 if (op != BtOpNone) {
8277 t_mask = newTemp(Ity_I8);
8278 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8281 /* t_mask is now a suitable byte mask */
8283 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8285 if (op != BtOpNone) {
8286 switch (op) {
8287 case BtOpSet:
8288 assign( t_new,
8289 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
8290 break;
8291 case BtOpComp:
8292 assign( t_new,
8293 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
8294 break;
8295 case BtOpReset:
8296 assign( t_new,
8297 binop(Iop_And8, mkexpr(t_fetched),
8298 unop(Iop_Not8, mkexpr(t_mask))) );
8299 break;
8300 default:
8301 vpanic("dis_bt_G_E(amd64)");
8303 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
8304 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8305 mkexpr(t_new)/*new*/,
8306 guest_RIP_curr_instr );
8307 } else {
8308 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8312 /* Side effect done; now get selected bit into Carry flag. The Intel docs
8313 (as of 2015, at least) say that C holds the result, Z is unchanged, and
8314 O,S,A and P are undefined. However, on Skylake it appears that O,S,A,P
8315 are also unchanged, so let's do that. */
8316 const ULong maskC = AMD64G_CC_MASK_C;
8317 const ULong maskOSZAP = AMD64G_CC_MASK_O | AMD64G_CC_MASK_S
8318 | AMD64G_CC_MASK_Z | AMD64G_CC_MASK_A
8319 | AMD64G_CC_MASK_P;
8321 IRTemp old_rflags = newTemp(Ity_I64);
8322 assign(old_rflags, mk_amd64g_calculate_rflags_all());
8324 IRTemp new_rflags = newTemp(Ity_I64);
8325 assign(new_rflags,
8326 binop(Iop_Or64,
8327 binop(Iop_And64, mkexpr(old_rflags), mkU64(maskOSZAP)),
8328 binop(Iop_And64,
8329 binop(Iop_Shr64,
8330 unop(Iop_8Uto64, mkexpr(t_fetched)),
8331 mkexpr(t_bitno2)),
8332 mkU64(maskC))));
8334 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8335 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8336 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
8337 /* Set NDEP even though it isn't used. This makes redundant-PUT
8338 elimination of previous stores to this field work better. */
8339 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8341 /* Move reg operand from stack back to reg */
8342 if (epartIsReg(modrm)) {
8343 /* t_rsp still points at it. */
8344 /* only write the reg if actually modifying it; doing otherwise
8345 zeroes the top half erroneously when doing btl due to
8346 standard zero-extend rule */
8347 if (op != BtOpNone)
8348 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
8349 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
8352 DIP("bt%s%c %s, %s\n",
8353 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8354 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8356 return delta;
8361 /* Handle BSF/BSR. Only v-size seems necessary. */
8362 static
8363 ULong dis_bs_E_G ( const VexAbiInfo* vbi,
8364 Prefix pfx, Int sz, Long delta, Bool fwds )
8366 Bool isReg;
8367 UChar modrm;
8368 HChar dis_buf[50];
8370 IRType ty = szToITy(sz);
8371 IRTemp src = newTemp(ty);
8372 IRTemp dst = newTemp(ty);
8373 IRTemp src64 = newTemp(Ity_I64);
8374 IRTemp dst64 = newTemp(Ity_I64);
8375 IRTemp srcB = newTemp(Ity_I1);
8377 vassert(sz == 8 || sz == 4 || sz == 2);
8379 modrm = getUChar(delta);
8380 isReg = epartIsReg(modrm);
8381 if (isReg) {
8382 delta++;
8383 assign( src, getIRegE(sz, pfx, modrm) );
8384 } else {
8385 Int len;
8386 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
8387 delta += len;
8388 assign( src, loadLE(ty, mkexpr(addr)) );
8391 DIP("bs%c%c %s, %s\n",
8392 fwds ? 'f' : 'r', nameISize(sz),
8393 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8394 nameIRegG(sz, pfx, modrm));
8396 /* First, widen src to 64 bits if it is not already. */
8397 assign( src64, widenUto64(mkexpr(src)) );
8399 /* Generate a bool expression which is zero iff the original is
8400 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8401 instrumented by Memcheck, is instrumented expensively, since
8402 this may be used on the output of a preceding movmskb insn,
8403 which has been known to be partially defined, and in need of
8404 careful handling. */
8405 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
8407 /* Flags: Z is 1 iff source value is zero. All others
8408 are undefined -- we force them to zero. */
8409 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8410 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8411 stmt( IRStmt_Put(
8412 OFFB_CC_DEP1,
8413 IRExpr_ITE( mkexpr(srcB),
8414 /* src!=0 */
8415 mkU64(0),
8416 /* src==0 */
8417 mkU64(AMD64G_CC_MASK_Z)
8420 /* Set NDEP even though it isn't used. This makes redundant-PUT
8421 elimination of previous stores to this field work better. */
8422 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8424 /* Result: iff source value is zero, we can't use
8425 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8426 But anyway, amd64 semantics say the result is undefined in
8427 such situations. Hence handle the zero case specially. */
8429 /* Bleh. What we compute:
8431 bsf64: if src == 0 then {dst is unchanged}
8432 else Ctz64(src)
8434 bsr64: if src == 0 then {dst is unchanged}
8435 else 63 - Clz64(src)
8437 bsf32: if src == 0 then {dst is unchanged}
8438 else Ctz64(32Uto64(src))
8440 bsr32: if src == 0 then {dst is unchanged}
8441 else 63 - Clz64(32Uto64(src))
8443 bsf16: if src == 0 then {dst is unchanged}
8444 else Ctz64(32Uto64(16Uto32(src)))
8446 bsr16: if src == 0 then {dst is unchanged}
8447 else 63 - Clz64(32Uto64(16Uto32(src)))
8450 /* The main computation, guarding against zero. */
8451 assign( dst64,
8452 IRExpr_ITE(
8453 mkexpr(srcB),
8454 /* src != 0 */
8455 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8456 : binop(Iop_Sub64,
8457 mkU64(63),
8458 unop(Iop_Clz64, mkexpr(src64))),
8459 /* src == 0 -- leave dst unchanged */
8460 widenUto64( getIRegG( sz, pfx, modrm ) )
8464 if (sz == 2)
8465 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
8466 else
8467 if (sz == 4)
8468 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8469 else
8470 assign( dst, mkexpr(dst64) );
8472 /* dump result back */
8473 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8475 return delta;
8479 /* swap rAX with the reg specified by reg and REX.B */
8480 static
8481 void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
8483 IRType ty = szToITy(sz);
8484 IRTemp t1 = newTemp(ty);
8485 IRTemp t2 = newTemp(ty);
8486 vassert(sz == 2 || sz == 4 || sz == 8);
8487 vassert(regLo3 < 8);
8488 if (sz == 8) {
8489 assign( t1, getIReg64(R_RAX) );
8490 assign( t2, getIRegRexB(8, pfx, regLo3) );
8491 putIReg64( R_RAX, mkexpr(t2) );
8492 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
8493 } else if (sz == 4) {
8494 assign( t1, getIReg32(R_RAX) );
8495 assign( t2, getIRegRexB(4, pfx, regLo3) );
8496 putIReg32( R_RAX, mkexpr(t2) );
8497 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
8498 } else {
8499 assign( t1, getIReg16(R_RAX) );
8500 assign( t2, getIRegRexB(2, pfx, regLo3) );
8501 putIReg16( R_RAX, mkexpr(t2) );
8502 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
8504 DIP("xchg%c %s, %s\n",
8505 nameISize(sz), nameIRegRAX(sz),
8506 nameIRegRexB(sz,pfx, regLo3));
8510 static
8511 void codegen_SAHF ( void )
8513 /* Set the flags to:
8514 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8515 -- retain the old O flag
8516 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8517 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8519 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8520 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8521 IRTemp oldflags = newTemp(Ity_I64);
8522 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8523 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8524 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8525 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8526 stmt( IRStmt_Put( OFFB_CC_DEP1,
8527 binop(Iop_Or64,
8528 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8529 binop(Iop_And64,
8530 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8531 mkU64(mask_SZACP))
8537 static
8538 void codegen_LAHF ( void )
8540 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8541 IRExpr* rax_with_hole;
8542 IRExpr* new_byte;
8543 IRExpr* new_rax;
8544 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8545 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8547 IRTemp flags = newTemp(Ity_I64);
8548 assign( flags, mk_amd64g_calculate_rflags_all() );
8550 rax_with_hole
8551 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8552 new_byte
8553 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8554 mkU64(1<<1));
8555 new_rax
8556 = binop(Iop_Or64, rax_with_hole,
8557 binop(Iop_Shl64, new_byte, mkU8(8)));
8558 putIReg64(R_RAX, new_rax);
8562 static
8563 ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
8564 const VexAbiInfo* vbi,
8565 Prefix pfx,
8566 Int size,
8567 Long delta0 )
8569 HChar dis_buf[50];
8570 Int len;
8572 IRType ty = szToITy(size);
8573 IRTemp acc = newTemp(ty);
8574 IRTemp src = newTemp(ty);
8575 IRTemp dest = newTemp(ty);
8576 IRTemp dest2 = newTemp(ty);
8577 IRTemp acc2 = newTemp(ty);
8578 IRTemp cond = newTemp(Ity_I1);
8579 IRTemp addr = IRTemp_INVALID;
8580 UChar rm = getUChar(delta0);
8582 /* There are 3 cases to consider:
8584 reg-reg: ignore any lock prefix, generate sequence based
8585 on ITE
8587 reg-mem, not locked: ignore any lock prefix, generate sequence
8588 based on ITE
8590 reg-mem, locked: use IRCAS
8593 /* Decide whether F2 or F3 are acceptable. Never for register
8594 case, but for the memory case, one or the other is OK provided
8595 LOCK is also present. */
8596 if (epartIsReg(rm)) {
8597 if (haveF2orF3(pfx)) {
8598 *ok = False;
8599 return delta0;
8601 } else {
8602 if (haveF2orF3(pfx)) {
8603 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8604 *ok = False;
8605 return delta0;
8610 if (epartIsReg(rm)) {
8611 /* case 1 */
8612 assign( dest, getIRegE(size, pfx, rm) );
8613 delta0++;
8614 assign( src, getIRegG(size, pfx, rm) );
8615 assign( acc, getIRegRAX(size) );
8616 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8617 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8618 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8619 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8620 putIRegRAX(size, mkexpr(acc2));
8621 putIRegE(size, pfx, rm, mkexpr(dest2));
8622 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8623 nameIRegG(size,pfx,rm),
8624 nameIRegE(size,pfx,rm) );
8626 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8627 /* case 2 */
8628 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8629 assign( dest, loadLE(ty, mkexpr(addr)) );
8630 delta0 += len;
8631 assign( src, getIRegG(size, pfx, rm) );
8632 assign( acc, getIRegRAX(size) );
8633 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8634 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8635 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8636 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8637 putIRegRAX(size, mkexpr(acc2));
8638 storeLE( mkexpr(addr), mkexpr(dest2) );
8639 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8640 nameIRegG(size,pfx,rm), dis_buf);
8642 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8643 /* case 3 */
8644 /* src is new value. acc is expected value. dest is old value.
8645 Compute success from the output of the IRCAS, and steer the
8646 new value for RAX accordingly: in case of success, RAX is
8647 unchanged. */
8648 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8649 delta0 += len;
8650 assign( src, getIRegG(size, pfx, rm) );
8651 assign( acc, getIRegRAX(size) );
8652 stmt( IRStmt_CAS(
8653 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8654 NULL, mkexpr(acc), NULL, mkexpr(src) )
8656 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
8657 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
8658 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
8659 putIRegRAX(size, mkexpr(acc2));
8660 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8661 nameIRegG(size,pfx,rm), dis_buf);
8663 else vassert(0);
8665 *ok = True;
8666 return delta0;
8670 /* Handle conditional move instructions of the form
8671 cmovcc E(reg-or-mem), G(reg)
8673 E(src) is reg-or-mem
8674 G(dst) is reg.
8676 If E is reg, --> GET %E, tmps
8677 GET %G, tmpd
8678 CMOVcc tmps, tmpd
8679 PUT tmpd, %G
8681 If E is mem --> (getAddr E) -> tmpa
8682 LD (tmpa), tmps
8683 GET %G, tmpd
8684 CMOVcc tmps, tmpd
8685 PUT tmpd, %G
8687 static
8688 ULong dis_cmov_E_G ( const VexAbiInfo* vbi,
8689 Prefix pfx,
8690 Int sz,
8691 AMD64Condcode cond,
8692 Long delta0 )
8694 UChar rm = getUChar(delta0);
8695 HChar dis_buf[50];
8696 Int len;
8698 IRType ty = szToITy(sz);
8699 IRTemp tmps = newTemp(ty);
8700 IRTemp tmpd = newTemp(ty);
8702 if (epartIsReg(rm)) {
8703 assign( tmps, getIRegE(sz, pfx, rm) );
8704 assign( tmpd, getIRegG(sz, pfx, rm) );
8706 putIRegG( sz, pfx, rm,
8707 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8708 mkexpr(tmps),
8709 mkexpr(tmpd) )
8711 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8712 nameIRegE(sz,pfx,rm),
8713 nameIRegG(sz,pfx,rm));
8714 return 1+delta0;
8717 /* E refers to memory */
8719 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8720 assign( tmps, loadLE(ty, mkexpr(addr)) );
8721 assign( tmpd, getIRegG(sz, pfx, rm) );
8723 putIRegG( sz, pfx, rm,
8724 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8725 mkexpr(tmps),
8726 mkexpr(tmpd) )
8729 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8730 dis_buf,
8731 nameIRegG(sz,pfx,rm));
8732 return len+delta0;
8737 static
8738 ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
8739 const VexAbiInfo* vbi,
8740 Prefix pfx, Int sz, Long delta0 )
8742 Int len;
8743 UChar rm = getUChar(delta0);
8744 HChar dis_buf[50];
8746 IRType ty = szToITy(sz);
8747 IRTemp tmpd = newTemp(ty);
8748 IRTemp tmpt0 = newTemp(ty);
8749 IRTemp tmpt1 = newTemp(ty);
8751 /* There are 3 cases to consider:
8753 reg-reg: ignore any lock prefix,
8754 generate 'naive' (non-atomic) sequence
8756 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8757 (non-atomic) sequence
8759 reg-mem, locked: use IRCAS
8762 if (epartIsReg(rm)) {
8763 /* case 1 */
8764 assign( tmpd, getIRegE(sz, pfx, rm) );
8765 assign( tmpt0, getIRegG(sz, pfx, rm) );
8766 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8767 mkexpr(tmpd), mkexpr(tmpt0)) );
8768 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8769 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8770 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8771 DIP("xadd%c %s, %s\n",
8772 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
8773 *decode_ok = True;
8774 return 1+delta0;
8776 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
8777 /* case 2 */
8778 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8779 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8780 assign( tmpt0, getIRegG(sz, pfx, rm) );
8781 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8782 mkexpr(tmpd), mkexpr(tmpt0)) );
8783 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8784 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8785 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8786 DIP("xadd%c %s, %s\n",
8787 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8788 *decode_ok = True;
8789 return len+delta0;
8791 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
8792 /* case 3 */
8793 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8794 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8795 assign( tmpt0, getIRegG(sz, pfx, rm) );
8796 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8797 mkexpr(tmpd), mkexpr(tmpt0)) );
8798 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8799 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8800 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8801 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8802 DIP("xadd%c %s, %s\n",
8803 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8804 *decode_ok = True;
8805 return len+delta0;
8807 /*UNREACHED*/
8808 vassert(0);
8811 //.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8812 //..
8813 //.. static
8814 //.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
8815 //.. {
8816 //.. Int len;
8817 //.. IRTemp addr;
8818 //.. UChar rm = getUChar(delta0);
8819 //.. HChar dis_buf[50];
8820 //..
8821 //.. if (epartIsReg(rm)) {
8822 //.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8823 //.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8824 //.. return 1+delta0;
8825 //.. } else {
8826 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8827 //.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8828 //.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8829 //.. return len+delta0;
8830 //.. }
8831 //.. }
8832 //..
8833 //.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8834 //.. dst is ireg and sz==4, zero out top half of it. */
8835 //..
8836 //.. static
8837 //.. UInt dis_mov_Sw_Ew ( UChar sorb,
8838 //.. Int sz,
8839 //.. UInt delta0 )
8840 //.. {
8841 //.. Int len;
8842 //.. IRTemp addr;
8843 //.. UChar rm = getUChar(delta0);
8844 //.. HChar dis_buf[50];
8845 //..
8846 //.. vassert(sz == 2 || sz == 4);
8847 //..
8848 //.. if (epartIsReg(rm)) {
8849 //.. if (sz == 4)
8850 //.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8851 //.. else
8852 //.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8853 //..
8854 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8855 //.. return 1+delta0;
8856 //.. } else {
8857 //.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8858 //.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8859 //.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8860 //.. return len+delta0;
8861 //.. }
8862 //.. }
8864 /* Handle move instructions of the form
8865 mov S, E meaning
8866 mov sreg, reg-or-mem
8867 Is passed the a ptr to the modRM byte, and the data size. Returns
8868 the address advanced completely over this instruction.
8870 VEX does not currently simulate segment registers on AMD64 which means that
8871 instead of moving a value of a segment register, zero is moved to the
8872 destination. The zero value represents a null (unused) selector. This is
8873 not correct (especially for the %cs, %fs and %gs registers) but it seems to
8874 provide a sufficient simulation for currently seen programs that use this
8875 instruction. If some program actually decides to use the obtained segment
8876 selector for something meaningful then the zero value should be a clear
8877 indicator that there is some problem.
8879 S(src) is sreg.
8880 E(dst) is reg-or-mem
8882 If E is reg, --> PUT $0, %E
8884 If E is mem, --> (getAddr E) -> tmpa
8885 ST $0, (tmpa)
8887 static
8888 ULong dis_mov_S_E ( const VexAbiInfo* vbi,
8889 Prefix pfx,
8890 Int size,
8891 Long delta0 )
8893 Int len;
8894 UChar rm = getUChar(delta0);
8895 HChar dis_buf[50];
8897 if (epartIsReg(rm)) {
8898 putIRegE(size, pfx, rm, mkU(szToITy(size), 0));
8899 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8900 nameIRegE(size, pfx, rm));
8901 return 1+delta0;
8904 /* E refers to memory */
8906 IRTemp addr = disAMode(&len, vbi, pfx, delta0, dis_buf, 0);
8907 storeLE(mkexpr(addr), mkU16(0));
8908 DIP("mov %s,%s\n", nameSReg(gregOfRexRM(pfx, rm)),
8909 dis_buf);
8910 return len+delta0;
8914 //.. static
8915 //.. void dis_push_segreg ( UInt sreg, Int sz )
8916 //.. {
8917 //.. IRTemp t1 = newTemp(Ity_I16);
8918 //.. IRTemp ta = newTemp(Ity_I32);
8919 //.. vassert(sz == 2 || sz == 4);
8920 //..
8921 //.. assign( t1, getSReg(sreg) );
8922 //.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8923 //.. putIReg(4, R_ESP, mkexpr(ta));
8924 //.. storeLE( mkexpr(ta), mkexpr(t1) );
8925 //..
8926 //.. DIP("pushw %s\n", nameSReg(sreg));
8927 //.. }
8928 //..
8929 //.. static
8930 //.. void dis_pop_segreg ( UInt sreg, Int sz )
8931 //.. {
8932 //.. IRTemp t1 = newTemp(Ity_I16);
8933 //.. IRTemp ta = newTemp(Ity_I32);
8934 //.. vassert(sz == 2 || sz == 4);
8935 //..
8936 //.. assign( ta, getIReg(4, R_ESP) );
8937 //.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8938 //..
8939 //.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8940 //.. putSReg( sreg, mkexpr(t1) );
8941 //.. DIP("pop %s\n", nameSReg(sreg));
8942 //.. }
8944 static
8945 void dis_ret ( /*MOD*/DisResult* dres, const VexAbiInfo* vbi, ULong d64 )
8947 IRTemp t1 = newTemp(Ity_I64);
8948 IRTemp t2 = newTemp(Ity_I64);
8949 IRTemp t3 = newTemp(Ity_I64);
8950 assign(t1, getIReg64(R_RSP));
8951 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
8952 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8953 putIReg64(R_RSP, mkexpr(t3));
8954 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
8955 jmp_treg(dres, Ijk_Ret, t2);
8956 vassert(dres->whatNext == Dis_StopHere);
8960 /*------------------------------------------------------------*/
8961 /*--- SSE/SSE2/SSE3 helpers ---*/
8962 /*------------------------------------------------------------*/
8964 /* Indicates whether the op requires a rounding-mode argument. Note
8965 that this covers only vector floating point arithmetic ops, and
8966 omits the scalar ones that need rounding modes. Note also that
8967 inconsistencies here will get picked up later by the IR sanity
8968 checker, so this isn't correctness-critical. */
8969 static Bool requiresRMode ( IROp op )
8971 switch (op) {
8972 /* 128 bit ops */
8973 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8974 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8975 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8976 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8977 /* 256 bit ops */
8978 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8979 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8980 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8981 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8982 return True;
8983 default:
8984 break;
8986 return False;
8990 /* Worker function; do not call directly.
8991 Handles full width G = G `op` E and G = (not G) `op` E.
8994 static ULong dis_SSE_E_to_G_all_wrk (
8995 const VexAbiInfo* vbi,
8996 Prefix pfx, Long delta,
8997 const HChar* opname, IROp op,
8998 Bool invertG
9001 HChar dis_buf[50];
9002 Int alen;
9003 IRTemp addr;
9004 UChar rm = getUChar(delta);
9005 Bool needsRMode = requiresRMode(op);
9006 IRExpr* gpart
9007 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
9008 : getXMMReg(gregOfRexRM(pfx,rm));
9009 if (epartIsReg(rm)) {
9010 putXMMReg(
9011 gregOfRexRM(pfx,rm),
9012 needsRMode
9013 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9014 gpart,
9015 getXMMReg(eregOfRexRM(pfx,rm)))
9016 : binop(op, gpart,
9017 getXMMReg(eregOfRexRM(pfx,rm)))
9019 DIP("%s %s,%s\n", opname,
9020 nameXMMReg(eregOfRexRM(pfx,rm)),
9021 nameXMMReg(gregOfRexRM(pfx,rm)) );
9022 return delta+1;
9023 } else {
9024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9025 putXMMReg(
9026 gregOfRexRM(pfx,rm),
9027 needsRMode
9028 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
9029 gpart,
9030 loadLE(Ity_V128, mkexpr(addr)))
9031 : binop(op, gpart,
9032 loadLE(Ity_V128, mkexpr(addr)))
9034 DIP("%s %s,%s\n", opname,
9035 dis_buf,
9036 nameXMMReg(gregOfRexRM(pfx,rm)) );
9037 return delta+alen;
9042 /* All lanes SSE binary operation, G = G `op` E. */
9044 static
9045 ULong dis_SSE_E_to_G_all ( const VexAbiInfo* vbi,
9046 Prefix pfx, Long delta,
9047 const HChar* opname, IROp op )
9049 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
9052 /* All lanes SSE binary operation, G = (not G) `op` E. */
9054 static
9055 ULong dis_SSE_E_to_G_all_invG ( const VexAbiInfo* vbi,
9056 Prefix pfx, Long delta,
9057 const HChar* opname, IROp op )
9059 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
9063 /* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
9065 static ULong dis_SSE_E_to_G_lo32 ( const VexAbiInfo* vbi,
9066 Prefix pfx, Long delta,
9067 const HChar* opname, IROp op )
9069 HChar dis_buf[50];
9070 Int alen;
9071 IRTemp addr;
9072 UChar rm = getUChar(delta);
9073 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9074 if (epartIsReg(rm)) {
9075 putXMMReg( gregOfRexRM(pfx,rm),
9076 binop(op, gpart,
9077 getXMMReg(eregOfRexRM(pfx,rm))) );
9078 DIP("%s %s,%s\n", opname,
9079 nameXMMReg(eregOfRexRM(pfx,rm)),
9080 nameXMMReg(gregOfRexRM(pfx,rm)) );
9081 return delta+1;
9082 } else {
9083 /* We can only do a 32-bit memory read, so the upper 3/4 of the
9084 E operand needs to be made simply of zeroes. */
9085 IRTemp epart = newTemp(Ity_V128);
9086 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9087 assign( epart, unop( Iop_32UtoV128,
9088 loadLE(Ity_I32, mkexpr(addr))) );
9089 putXMMReg( gregOfRexRM(pfx,rm),
9090 binop(op, gpart, mkexpr(epart)) );
9091 DIP("%s %s,%s\n", opname,
9092 dis_buf,
9093 nameXMMReg(gregOfRexRM(pfx,rm)) );
9094 return delta+alen;
9099 /* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
9101 static ULong dis_SSE_E_to_G_lo64 ( const VexAbiInfo* vbi,
9102 Prefix pfx, Long delta,
9103 const HChar* opname, IROp op )
9105 HChar dis_buf[50];
9106 Int alen;
9107 IRTemp addr;
9108 UChar rm = getUChar(delta);
9109 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9110 if (epartIsReg(rm)) {
9111 putXMMReg( gregOfRexRM(pfx,rm),
9112 binop(op, gpart,
9113 getXMMReg(eregOfRexRM(pfx,rm))) );
9114 DIP("%s %s,%s\n", opname,
9115 nameXMMReg(eregOfRexRM(pfx,rm)),
9116 nameXMMReg(gregOfRexRM(pfx,rm)) );
9117 return delta+1;
9118 } else {
9119 /* We can only do a 64-bit memory read, so the upper half of the
9120 E operand needs to be made simply of zeroes. */
9121 IRTemp epart = newTemp(Ity_V128);
9122 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9123 assign( epart, unop( Iop_64UtoV128,
9124 loadLE(Ity_I64, mkexpr(addr))) );
9125 putXMMReg( gregOfRexRM(pfx,rm),
9126 binop(op, gpart, mkexpr(epart)) );
9127 DIP("%s %s,%s\n", opname,
9128 dis_buf,
9129 nameXMMReg(gregOfRexRM(pfx,rm)) );
9130 return delta+alen;
9135 /* All lanes unary SSE operation, G = op(E). */
9137 static ULong dis_SSE_E_to_G_unary_all (
9138 const VexAbiInfo* vbi,
9139 Prefix pfx, Long delta,
9140 const HChar* opname, IROp op
9143 HChar dis_buf[50];
9144 Int alen;
9145 IRTemp addr;
9146 UChar rm = getUChar(delta);
9147 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
9148 // up in the usual way.
9149 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
9150 if (epartIsReg(rm)) {
9151 IRExpr* src = getXMMReg(eregOfRexRM(pfx,rm));
9152 /* XXXROUNDINGFIXME */
9153 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9154 : unop(op, src);
9155 putXMMReg( gregOfRexRM(pfx,rm), res );
9156 DIP("%s %s,%s\n", opname,
9157 nameXMMReg(eregOfRexRM(pfx,rm)),
9158 nameXMMReg(gregOfRexRM(pfx,rm)) );
9159 return delta+1;
9160 } else {
9161 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9162 IRExpr* src = loadLE(Ity_V128, mkexpr(addr));
9163 /* XXXROUNDINGFIXME */
9164 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), src)
9165 : unop(op, src);
9166 putXMMReg( gregOfRexRM(pfx,rm), res );
9167 DIP("%s %s,%s\n", opname,
9168 dis_buf,
9169 nameXMMReg(gregOfRexRM(pfx,rm)) );
9170 return delta+alen;
9175 /* Lowest 32-bit lane only unary SSE operation, G = op(E). */
9177 static ULong dis_SSE_E_to_G_unary_lo32 (
9178 const VexAbiInfo* vbi,
9179 Prefix pfx, Long delta,
9180 const HChar* opname, IROp op
9183 /* First we need to get the old G value and patch the low 32 bits
9184 of the E operand into it. Then apply op and write back to G. */
9185 HChar dis_buf[50];
9186 Int alen;
9187 IRTemp addr;
9188 UChar rm = getUChar(delta);
9189 IRTemp oldG0 = newTemp(Ity_V128);
9190 IRTemp oldG1 = newTemp(Ity_V128);
9192 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9194 if (epartIsReg(rm)) {
9195 assign( oldG1,
9196 binop( Iop_SetV128lo32,
9197 mkexpr(oldG0),
9198 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
9199 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9200 DIP("%s %s,%s\n", opname,
9201 nameXMMReg(eregOfRexRM(pfx,rm)),
9202 nameXMMReg(gregOfRexRM(pfx,rm)) );
9203 return delta+1;
9204 } else {
9205 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9206 assign( oldG1,
9207 binop( Iop_SetV128lo32,
9208 mkexpr(oldG0),
9209 loadLE(Ity_I32, mkexpr(addr)) ));
9210 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9211 DIP("%s %s,%s\n", opname,
9212 dis_buf,
9213 nameXMMReg(gregOfRexRM(pfx,rm)) );
9214 return delta+alen;
9219 /* Lowest 64-bit lane only unary SSE operation, G = op(E). */
9221 static ULong dis_SSE_E_to_G_unary_lo64 (
9222 const VexAbiInfo* vbi,
9223 Prefix pfx, Long delta,
9224 const HChar* opname, IROp op
9227 /* First we need to get the old G value and patch the low 64 bits
9228 of the E operand into it. Then apply op and write back to G. */
9229 HChar dis_buf[50];
9230 Int alen;
9231 IRTemp addr;
9232 UChar rm = getUChar(delta);
9233 IRTemp oldG0 = newTemp(Ity_V128);
9234 IRTemp oldG1 = newTemp(Ity_V128);
9236 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
9238 if (epartIsReg(rm)) {
9239 assign( oldG1,
9240 binop( Iop_SetV128lo64,
9241 mkexpr(oldG0),
9242 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
9243 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9244 DIP("%s %s,%s\n", opname,
9245 nameXMMReg(eregOfRexRM(pfx,rm)),
9246 nameXMMReg(gregOfRexRM(pfx,rm)) );
9247 return delta+1;
9248 } else {
9249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9250 assign( oldG1,
9251 binop( Iop_SetV128lo64,
9252 mkexpr(oldG0),
9253 loadLE(Ity_I64, mkexpr(addr)) ));
9254 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
9255 DIP("%s %s,%s\n", opname,
9256 dis_buf,
9257 nameXMMReg(gregOfRexRM(pfx,rm)) );
9258 return delta+alen;
9263 /* SSE integer binary operation:
9264 G = G `op` E (eLeft == False)
9265 G = E `op` G (eLeft == True)
9267 static ULong dis_SSEint_E_to_G(
9268 const VexAbiInfo* vbi,
9269 Prefix pfx, Long delta,
9270 const HChar* opname, IROp op,
9271 Bool eLeft
9274 HChar dis_buf[50];
9275 Int alen;
9276 IRTemp addr;
9277 UChar rm = getUChar(delta);
9278 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
9279 IRExpr* epart = NULL;
9280 if (epartIsReg(rm)) {
9281 epart = getXMMReg(eregOfRexRM(pfx,rm));
9282 DIP("%s %s,%s\n", opname,
9283 nameXMMReg(eregOfRexRM(pfx,rm)),
9284 nameXMMReg(gregOfRexRM(pfx,rm)) );
9285 delta += 1;
9286 } else {
9287 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9288 epart = loadLE(Ity_V128, mkexpr(addr));
9289 DIP("%s %s,%s\n", opname,
9290 dis_buf,
9291 nameXMMReg(gregOfRexRM(pfx,rm)) );
9292 delta += alen;
9294 putXMMReg( gregOfRexRM(pfx,rm),
9295 eLeft ? binop(op, epart, gpart)
9296 : binop(op, gpart, epart) );
9297 return delta;
9301 /* Helper for doing SSE FP comparisons. False return ==> unhandled.
9302 This is all a bit of a kludge in that it ignores the subtleties of
9303 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9304 spec. The meaning of the outputs is as follows:
9306 preZeroP: the active lanes of both incoming arguments should be set to zero
9307 before performing the operation. IOW the actual args are to be ignored
9308 and instead zero bits are to be used. This is a bit strange but is needed
9309 to make the constant-false/true variants (FALSE_OQ, TRUE_UQ, FALSE_OS,
9310 TRUE_US) work.
9312 preSwapP: the args should be swapped before performing the operation. Note
9313 that zeroing arg input sections (per preZeroP) and swapping them (per
9314 preSwapP) are allowed to happen in either order; the result is the same.
9316 opP: this returns the actual comparison op to perform.
9318 postNotP: if true, the result(ing vector) of the comparison operation should
9319 be bitwise-not-ed. Note that only the lanes of the output actually
9320 computed by opP should be not-ed.
9322 static Bool findSSECmpOp ( /*OUT*/Bool* preZeroP,
9323 /*OUT*/Bool* preSwapP,
9324 /*OUT*/IROp* opP,
9325 /*OUT*/Bool* postNotP,
9326 UInt imm8, Bool all_lanes, Int sz )
9328 vassert(*preZeroP == False);
9329 vassert(*preSwapP == False);
9330 vassert(*opP == Iop_INVALID);
9331 vassert(*postNotP == False);
9333 if (imm8 >= 32) return False;
9335 /* First, compute a (preZero, preSwap, op, postNot) quad from
9336 the supplied imm8. */
9337 Bool preZero = False;
9338 Bool preSwap = False;
9339 IROp op = Iop_INVALID;
9340 Bool postNot = False;
9342 # define XXX(_preZero, _preSwap, _op, _postNot) \
9343 { preZero = _preZero; preSwap = _preSwap; op = _op; postNot = _postNot; }
9344 // If you add a case here, add a corresponding test for both VCMPSD_128
9345 // and VCMPSS_128 in avx-1.c.
9346 // Cases 0xA and above are
9347 // "Enhanced Comparison Predicate[s] for VEX-Encoded [insns]"
9348 switch (imm8) {
9349 // "O" = ordered, "U" = unordered
9350 // "Q" = non-signalling (quiet), "S" = signalling
9352 // replace active arg lanes in operands with zero
9353 // |
9354 // | swap operands before applying the cmp op?
9355 // | |
9356 // | | cmp op invert active lanes after?
9357 // | | | |
9358 // v v v v
9359 case 0x0: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9360 case 0x8: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
9361 case 0x10: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_OS
9362 case 0x18: XXX(False, False, Iop_CmpEQ32Fx4, False); break; // EQ_US
9364 case 0x1: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OS
9365 case 0x11: XXX(False, False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9367 case 0x2: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OS
9368 case 0x12: XXX(False, False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9370 case 0x3: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9371 case 0x13: XXX(False, False, Iop_CmpUN32Fx4, False); break; // UNORD_S
9373 // 0xC: this isn't really right because it returns all-1s when
9374 // either operand is a NaN, and it should return all-0s.
9375 case 0x4: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9376 case 0xC: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9377 case 0x14: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_US
9378 case 0x1C: XXX(False, False, Iop_CmpEQ32Fx4, True); break; // NEQ_OS
9380 case 0x5: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_US
9381 case 0x15: XXX(False, False, Iop_CmpLT32Fx4, True); break; // NLT_UQ
9383 case 0x6: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_US
9384 case 0x16: XXX(False, False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9386 case 0x7: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_Q
9387 case 0x17: XXX(False, False, Iop_CmpUN32Fx4, True); break; // ORD_S
9389 case 0x9: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_US
9390 case 0x19: XXX(False, True, Iop_CmpLE32Fx4, True); break; // NGE_UQ
9392 case 0xA: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_US
9393 case 0x1A: XXX(False, True, Iop_CmpLT32Fx4, True); break; // NGT_UQ
9395 case 0xD: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OS
9396 case 0x1D: XXX(False, True, Iop_CmpLE32Fx4, False); break; // GE_OQ
9398 case 0xE: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OS
9399 case 0x1E: XXX(False, True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9400 // Constant-value-result ops
9401 case 0xB: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OQ
9402 case 0xF: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_UQ
9403 case 0x1B: XXX(True, False, Iop_CmpEQ32Fx4, True); break; // FALSE_OS
9404 case 0x1F: XXX(True, False, Iop_CmpEQ32Fx4, False); break; // TRUE_US
9405 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9406 avx-1.c if new cases turn up. */
9407 default: break;
9409 # undef XXX
9410 if (op == Iop_INVALID) return False;
9412 /* Now convert the op into one with the same arithmetic but that is
9413 correct for the width and laneage requirements. */
9415 /**/ if (sz == 4 && all_lanes) {
9416 switch (op) {
9417 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9418 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9419 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9420 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9421 default: vassert(0);
9424 else if (sz == 4 && !all_lanes) {
9425 switch (op) {
9426 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9427 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9428 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9429 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9430 default: vassert(0);
9433 else if (sz == 8 && all_lanes) {
9434 switch (op) {
9435 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9436 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9437 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9438 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9439 default: vassert(0);
9442 else if (sz == 8 && !all_lanes) {
9443 switch (op) {
9444 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9445 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9446 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9447 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9448 default: vassert(0);
9451 else {
9452 vpanic("findSSECmpOp(amd64,guest)");
9455 if (preZero) {
9456 // In this case, preSwap is irrelevant, but assert anyway.
9457 vassert(preSwap == False);
9459 *preZeroP = preZero; *preSwapP = preSwap; *opP = op; *postNotP = postNot;
9460 return True;
9464 /* Handles SSE 32F/64F comparisons. It can fail, in which case it
9465 returns the original delta to indicate failure. */
9467 static Long dis_SSE_cmp_E_to_G ( const VexAbiInfo* vbi,
9468 Prefix pfx, Long delta,
9469 const HChar* opname, Bool all_lanes, Int sz )
9471 Long delta0 = delta;
9472 HChar dis_buf[50];
9473 Int alen;
9474 UInt imm8;
9475 IRTemp addr;
9476 Bool preZero = False;
9477 Bool preSwap = False;
9478 IROp op = Iop_INVALID;
9479 Bool postNot = False;
9480 IRTemp plain = newTemp(Ity_V128);
9481 UChar rm = getUChar(delta);
9482 UShort mask = 0;
9483 vassert(sz == 4 || sz == 8);
9484 if (epartIsReg(rm)) {
9485 imm8 = getUChar(delta+1);
9486 if (imm8 >= 8) return delta0; /* FAIL */
9487 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9488 imm8, all_lanes, sz);
9489 if (!ok) return delta0; /* FAIL */
9490 vassert(!preZero); /* never needed for imm8 < 8 */
9491 vassert(!preSwap); /* never needed for imm8 < 8 */
9492 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9493 getXMMReg(eregOfRexRM(pfx,rm))) );
9494 delta += 2;
9495 DIP("%s $%u,%s,%s\n", opname,
9496 imm8,
9497 nameXMMReg(eregOfRexRM(pfx,rm)),
9498 nameXMMReg(gregOfRexRM(pfx,rm)) );
9499 } else {
9500 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
9501 imm8 = getUChar(delta+alen);
9502 if (imm8 >= 8) return delta0; /* FAIL */
9503 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
9504 imm8, all_lanes, sz);
9505 if (!ok) return delta0; /* FAIL */
9506 vassert(!preZero); /* never needed for imm8 < 8 */
9507 vassert(!preSwap); /* never needed for imm8 < 8 */
9508 assign( plain,
9509 binop(
9511 getXMMReg(gregOfRexRM(pfx,rm)),
9512 all_lanes
9513 ? loadLE(Ity_V128, mkexpr(addr))
9514 : sz == 8
9515 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9516 : /*sz==4*/
9517 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
9520 delta += alen+1;
9521 DIP("%s $%u,%s,%s\n", opname,
9522 imm8,
9523 dis_buf,
9524 nameXMMReg(gregOfRexRM(pfx,rm)) );
9527 if (postNot && all_lanes) {
9528 putXMMReg( gregOfRexRM(pfx,rm),
9529 unop(Iop_NotV128, mkexpr(plain)) );
9531 else
9532 if (postNot && !all_lanes) {
9533 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
9534 putXMMReg( gregOfRexRM(pfx,rm),
9535 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9537 else {
9538 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9541 return delta;
9545 /* Vector by scalar shift of G by the amount specified at the bottom
9546 of E. */
9548 static ULong dis_SSE_shiftG_byE ( const VexAbiInfo* vbi,
9549 Prefix pfx, Long delta,
9550 const HChar* opname, IROp op )
9552 HChar dis_buf[50];
9553 Int alen, size;
9554 IRTemp addr;
9555 Bool shl, shr, sar;
9556 UChar rm = getUChar(delta);
9557 IRTemp g0 = newTemp(Ity_V128);
9558 IRTemp g1 = newTemp(Ity_V128);
9559 IRTemp amt = newTemp(Ity_I64);
9560 IRTemp amt8 = newTemp(Ity_I8);
9561 if (epartIsReg(rm)) {
9562 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
9563 DIP("%s %s,%s\n", opname,
9564 nameXMMReg(eregOfRexRM(pfx,rm)),
9565 nameXMMReg(gregOfRexRM(pfx,rm)) );
9566 delta++;
9567 } else {
9568 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9569 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
9570 DIP("%s %s,%s\n", opname,
9571 dis_buf,
9572 nameXMMReg(gregOfRexRM(pfx,rm)) );
9573 delta += alen;
9575 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
9576 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
9578 shl = shr = sar = False;
9579 size = 0;
9580 switch (op) {
9581 case Iop_ShlN16x8: shl = True; size = 32; break;
9582 case Iop_ShlN32x4: shl = True; size = 32; break;
9583 case Iop_ShlN64x2: shl = True; size = 64; break;
9584 case Iop_SarN16x8: sar = True; size = 16; break;
9585 case Iop_SarN32x4: sar = True; size = 32; break;
9586 case Iop_ShrN16x8: shr = True; size = 16; break;
9587 case Iop_ShrN32x4: shr = True; size = 32; break;
9588 case Iop_ShrN64x2: shr = True; size = 64; break;
9589 default: vassert(0);
9592 if (shl || shr) {
9593 assign(
9595 IRExpr_ITE(
9596 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9597 binop(op, mkexpr(g0), mkexpr(amt8)),
9598 mkV128(0x0000)
9601 } else
9602 if (sar) {
9603 assign(
9605 IRExpr_ITE(
9606 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
9607 binop(op, mkexpr(g0), mkexpr(amt8)),
9608 binop(op, mkexpr(g0), mkU8(size-1))
9611 } else {
9612 vassert(0);
9615 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9616 return delta;
9620 /* Vector by scalar shift of E by an immediate byte. */
9622 static
9623 ULong dis_SSE_shiftE_imm ( Prefix pfx,
9624 Long delta, const HChar* opname, IROp op )
9626 Bool shl, shr, sar;
9627 UChar rm = getUChar(delta);
9628 IRTemp e0 = newTemp(Ity_V128);
9629 IRTemp e1 = newTemp(Ity_V128);
9630 UChar amt, size;
9631 vassert(epartIsReg(rm));
9632 vassert(gregLO3ofRM(rm) == 2
9633 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
9634 amt = getUChar(delta+1);
9635 delta += 2;
9636 DIP("%s $%d,%s\n", opname,
9637 (Int)amt,
9638 nameXMMReg(eregOfRexRM(pfx,rm)) );
9639 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9641 shl = shr = sar = False;
9642 size = 0;
9643 switch (op) {
9644 case Iop_ShlN16x8: shl = True; size = 16; break;
9645 case Iop_ShlN32x4: shl = True; size = 32; break;
9646 case Iop_ShlN64x2: shl = True; size = 64; break;
9647 case Iop_SarN16x8: sar = True; size = 16; break;
9648 case Iop_SarN32x4: sar = True; size = 32; break;
9649 case Iop_ShrN16x8: shr = True; size = 16; break;
9650 case Iop_ShrN32x4: shr = True; size = 32; break;
9651 case Iop_ShrN64x2: shr = True; size = 64; break;
9652 default: vassert(0);
9655 if (shl || shr) {
9656 assign( e1, amt >= size
9657 ? mkV128(0x0000)
9658 : binop(op, mkexpr(e0), mkU8(amt))
9660 } else
9661 if (sar) {
9662 assign( e1, amt >= size
9663 ? binop(op, mkexpr(e0), mkU8(size-1))
9664 : binop(op, mkexpr(e0), mkU8(amt))
9666 } else {
9667 vassert(0);
9670 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9671 return delta;
9675 /* Get the current SSE rounding mode. */
9677 static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9679 return
9680 unop( Iop_64to32,
9681 binop( Iop_And64,
9682 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9683 mkU64(3) ));
9686 static void put_sse_roundingmode ( IRExpr* sseround )
9688 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
9689 stmt( IRStmt_Put( OFFB_SSEROUND,
9690 unop(Iop_32Uto64,sseround) ) );
9693 /* Break a V128-bit value up into four 32-bit ints. */
9695 static void breakupV128to32s ( IRTemp t128,
9696 /*OUTs*/
9697 IRTemp* t3, IRTemp* t2,
9698 IRTemp* t1, IRTemp* t0 )
9700 IRTemp hi64 = newTemp(Ity_I64);
9701 IRTemp lo64 = newTemp(Ity_I64);
9702 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9703 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9705 vassert(t0 && *t0 == IRTemp_INVALID);
9706 vassert(t1 && *t1 == IRTemp_INVALID);
9707 vassert(t2 && *t2 == IRTemp_INVALID);
9708 vassert(t3 && *t3 == IRTemp_INVALID);
9710 *t0 = newTemp(Ity_I32);
9711 *t1 = newTemp(Ity_I32);
9712 *t2 = newTemp(Ity_I32);
9713 *t3 = newTemp(Ity_I32);
9714 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9715 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9716 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9717 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9720 /* Construct a V128-bit value from four 32-bit ints. */
9722 static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9723 IRTemp t1, IRTemp t0 )
9725 return
9726 binop( Iop_64HLtoV128,
9727 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9728 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9732 /* Break a 64-bit value up into four 16-bit ints. */
9734 static void breakup64to16s ( IRTemp t64,
9735 /*OUTs*/
9736 IRTemp* t3, IRTemp* t2,
9737 IRTemp* t1, IRTemp* t0 )
9739 IRTemp hi32 = newTemp(Ity_I32);
9740 IRTemp lo32 = newTemp(Ity_I32);
9741 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9742 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9744 vassert(t0 && *t0 == IRTemp_INVALID);
9745 vassert(t1 && *t1 == IRTemp_INVALID);
9746 vassert(t2 && *t2 == IRTemp_INVALID);
9747 vassert(t3 && *t3 == IRTemp_INVALID);
9749 *t0 = newTemp(Ity_I16);
9750 *t1 = newTemp(Ity_I16);
9751 *t2 = newTemp(Ity_I16);
9752 *t3 = newTemp(Ity_I16);
9753 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9754 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9755 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9756 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9759 /* Construct a 64-bit value from four 16-bit ints. */
9761 static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9762 IRTemp t1, IRTemp t0 )
9764 return
9765 binop( Iop_32HLto64,
9766 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9767 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9771 /* Break a V256-bit value up into four 64-bit ints. */
9773 static void breakupV256to64s ( IRTemp t256,
9774 /*OUTs*/
9775 IRTemp* t3, IRTemp* t2,
9776 IRTemp* t1, IRTemp* t0 )
9778 vassert(t0 && *t0 == IRTemp_INVALID);
9779 vassert(t1 && *t1 == IRTemp_INVALID);
9780 vassert(t2 && *t2 == IRTemp_INVALID);
9781 vassert(t3 && *t3 == IRTemp_INVALID);
9782 *t0 = newTemp(Ity_I64);
9783 *t1 = newTemp(Ity_I64);
9784 *t2 = newTemp(Ity_I64);
9785 *t3 = newTemp(Ity_I64);
9786 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9787 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9788 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9789 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9792 /* Break a V256-bit value up into two V128s. */
9794 static void breakupV256toV128s ( IRTemp t256,
9795 /*OUTs*/
9796 IRTemp* t1, IRTemp* t0 )
9798 vassert(t0 && *t0 == IRTemp_INVALID);
9799 vassert(t1 && *t1 == IRTemp_INVALID);
9800 *t0 = newTemp(Ity_V128);
9801 *t1 = newTemp(Ity_V128);
9802 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9803 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9806 /* Break a V256-bit value up into eight 32-bit ints. */
9808 static void breakupV256to32s ( IRTemp t256,
9809 /*OUTs*/
9810 IRTemp* t7, IRTemp* t6,
9811 IRTemp* t5, IRTemp* t4,
9812 IRTemp* t3, IRTemp* t2,
9813 IRTemp* t1, IRTemp* t0 )
9815 IRTemp t128_1 = IRTemp_INVALID;
9816 IRTemp t128_0 = IRTemp_INVALID;
9817 breakupV256toV128s( t256, &t128_1, &t128_0 );
9818 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9819 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9822 /* Break a V128-bit value up into two 64-bit ints. */
9824 static void breakupV128to64s ( IRTemp t128,
9825 /*OUTs*/
9826 IRTemp* t1, IRTemp* t0 )
9828 vassert(t0 && *t0 == IRTemp_INVALID);
9829 vassert(t1 && *t1 == IRTemp_INVALID);
9830 *t0 = newTemp(Ity_I64);
9831 *t1 = newTemp(Ity_I64);
9832 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9833 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9836 /* Construct a V256-bit value from eight 32-bit ints. */
9838 static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9839 IRTemp t5, IRTemp t4,
9840 IRTemp t3, IRTemp t2,
9841 IRTemp t1, IRTemp t0 )
9843 return
9844 binop( Iop_V128HLtoV256,
9845 binop( Iop_64HLtoV128,
9846 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9847 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9848 binop( Iop_64HLtoV128,
9849 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9850 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9854 /* Construct a V256-bit value from four 64-bit ints. */
9856 static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9857 IRTemp t1, IRTemp t0 )
9859 return
9860 binop( Iop_V128HLtoV256,
9861 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9862 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9866 /* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9867 values (aa,bb), computes, for each of the 4 16-bit lanes:
9869 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9871 static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9873 IRTemp aa = newTemp(Ity_I64);
9874 IRTemp bb = newTemp(Ity_I64);
9875 IRTemp aahi32s = newTemp(Ity_I64);
9876 IRTemp aalo32s = newTemp(Ity_I64);
9877 IRTemp bbhi32s = newTemp(Ity_I64);
9878 IRTemp bblo32s = newTemp(Ity_I64);
9879 IRTemp rHi = newTemp(Ity_I64);
9880 IRTemp rLo = newTemp(Ity_I64);
9881 IRTemp one32x2 = newTemp(Ity_I64);
9882 assign(aa, aax);
9883 assign(bb, bbx);
9884 assign( aahi32s,
9885 binop(Iop_SarN32x2,
9886 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9887 mkU8(16) ));
9888 assign( aalo32s,
9889 binop(Iop_SarN32x2,
9890 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9891 mkU8(16) ));
9892 assign( bbhi32s,
9893 binop(Iop_SarN32x2,
9894 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9895 mkU8(16) ));
9896 assign( bblo32s,
9897 binop(Iop_SarN32x2,
9898 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9899 mkU8(16) ));
9900 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9901 assign(
9902 rHi,
9903 binop(
9904 Iop_ShrN32x2,
9905 binop(
9906 Iop_Add32x2,
9907 binop(
9908 Iop_ShrN32x2,
9909 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9910 mkU8(14)
9912 mkexpr(one32x2)
9914 mkU8(1)
9917 assign(
9918 rLo,
9919 binop(
9920 Iop_ShrN32x2,
9921 binop(
9922 Iop_Add32x2,
9923 binop(
9924 Iop_ShrN32x2,
9925 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9926 mkU8(14)
9928 mkexpr(one32x2)
9930 mkU8(1)
9933 return
9934 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9937 /* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9938 values (aa,bb), computes, for each lane:
9940 if aa_lane < 0 then - bb_lane
9941 else if aa_lane > 0 then bb_lane
9942 else 0
9944 static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9946 IRTemp aa = newTemp(Ity_I64);
9947 IRTemp bb = newTemp(Ity_I64);
9948 IRTemp zero = newTemp(Ity_I64);
9949 IRTemp bbNeg = newTemp(Ity_I64);
9950 IRTemp negMask = newTemp(Ity_I64);
9951 IRTemp posMask = newTemp(Ity_I64);
9952 IROp opSub = Iop_INVALID;
9953 IROp opCmpGTS = Iop_INVALID;
9955 switch (laneszB) {
9956 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9957 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9958 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9959 default: vassert(0);
9962 assign( aa, aax );
9963 assign( bb, bbx );
9964 assign( zero, mkU64(0) );
9965 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9966 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9967 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9969 return
9970 binop(Iop_Or64,
9971 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9972 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9977 /* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9978 value aa, computes, for each lane
9980 if aa < 0 then -aa else aa
9982 Note that the result is interpreted as unsigned, so that the
9983 absolute value of the most negative signed input can be
9984 represented.
9986 static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
9988 IRTemp res = newTemp(Ity_I64);
9989 IRTemp zero = newTemp(Ity_I64);
9990 IRTemp aaNeg = newTemp(Ity_I64);
9991 IRTemp negMask = newTemp(Ity_I64);
9992 IRTemp posMask = newTemp(Ity_I64);
9993 IROp opSub = Iop_INVALID;
9994 IROp opSarN = Iop_INVALID;
9996 switch (laneszB) {
9997 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9998 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9999 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
10000 default: vassert(0);
10003 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
10004 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
10005 assign( zero, mkU64(0) );
10006 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
10007 assign( res,
10008 binop(Iop_Or64,
10009 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
10010 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
10011 return res;
10014 /* XMM version of math_PABS_MMX. */
10015 static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
10017 IRTemp res = newTemp(Ity_V128);
10018 IRTemp aaHi = newTemp(Ity_I64);
10019 IRTemp aaLo = newTemp(Ity_I64);
10020 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
10021 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
10022 assign(res, binop(Iop_64HLtoV128,
10023 mkexpr(math_PABS_MMX(aaHi, laneszB)),
10024 mkexpr(math_PABS_MMX(aaLo, laneszB))));
10025 return res;
10028 /* Specialisations of math_PABS_XMM, since there's no easy way to do
10029 partial applications in C :-( */
10030 static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
10031 return math_PABS_XMM(aa, 4);
10034 static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
10035 return math_PABS_XMM(aa, 2);
10038 static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
10039 return math_PABS_XMM(aa, 1);
10042 /* YMM version of math_PABS_XMM. */
10043 static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
10045 IRTemp res = newTemp(Ity_V256);
10046 IRTemp aaHi = IRTemp_INVALID;
10047 IRTemp aaLo = IRTemp_INVALID;
10048 breakupV256toV128s(aa, &aaHi, &aaLo);
10049 assign(res, binop(Iop_V128HLtoV256,
10050 mkexpr(math_PABS_XMM(aaHi, laneszB)),
10051 mkexpr(math_PABS_XMM(aaLo, laneszB))));
10052 return res;
10055 static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
10056 return math_PABS_YMM(aa, 4);
10059 static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
10060 return math_PABS_YMM(aa, 2);
10063 static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
10064 return math_PABS_YMM(aa, 1);
10067 static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
10068 IRTemp lo64, Long byteShift )
10070 vassert(byteShift >= 1 && byteShift <= 7);
10071 return
10072 binop(Iop_Or64,
10073 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
10074 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
10078 static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
10080 IRTemp res = newTemp(Ity_V128);
10081 IRTemp sHi = newTemp(Ity_I64);
10082 IRTemp sLo = newTemp(Ity_I64);
10083 IRTemp dHi = newTemp(Ity_I64);
10084 IRTemp dLo = newTemp(Ity_I64);
10085 IRTemp rHi = newTemp(Ity_I64);
10086 IRTemp rLo = newTemp(Ity_I64);
10088 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
10089 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
10090 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
10091 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
10093 if (imm8 == 0) {
10094 assign( rHi, mkexpr(sHi) );
10095 assign( rLo, mkexpr(sLo) );
10097 else if (imm8 >= 1 && imm8 <= 7) {
10098 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
10099 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
10101 else if (imm8 == 8) {
10102 assign( rHi, mkexpr(dLo) );
10103 assign( rLo, mkexpr(sHi) );
10105 else if (imm8 >= 9 && imm8 <= 15) {
10106 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
10107 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
10109 else if (imm8 == 16) {
10110 assign( rHi, mkexpr(dHi) );
10111 assign( rLo, mkexpr(dLo) );
10113 else if (imm8 >= 17 && imm8 <= 23) {
10114 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
10115 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
10117 else if (imm8 == 24) {
10118 assign( rHi, mkU64(0) );
10119 assign( rLo, mkexpr(dHi) );
10121 else if (imm8 >= 25 && imm8 <= 31) {
10122 assign( rHi, mkU64(0) );
10123 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
10125 else if (imm8 >= 32 && imm8 <= 255) {
10126 assign( rHi, mkU64(0) );
10127 assign( rLo, mkU64(0) );
10129 else
10130 vassert(0);
10132 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
10133 return res;
10136 /* Generate a SIGSEGV followed by a restart of the current instruction
10137 if effective_addr is not 16-aligned. This is required behaviour
10138 for some SSE3 instructions and all 128-bit SSSE3 instructions.
10139 This assumes that guest_RIP_curr_instr is set correctly!
10140 On FreeBSD, this kind of error generates a SIGBUS. */
10141 static
10142 void gen_SIGNAL_if_not_XX_aligned ( const VexAbiInfo* vbi,
10143 IRTemp effective_addr, ULong mask )
10145 stmt(
10146 IRStmt_Exit(
10147 binop(Iop_CmpNE64,
10148 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
10149 mkU64(0)),
10150 vbi->guest_amd64_sigbus_on_misalign ? Ijk_SigBUS : Ijk_SigSEGV,
10151 IRConst_U64(guest_RIP_curr_instr),
10152 OFFB_RIP
10157 static void gen_SIGNAL_if_not_16_aligned ( const VexAbiInfo* vbi,
10158 IRTemp effective_addr ) {
10159 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 16-1);
10162 static void gen_SIGNAL_if_not_32_aligned ( const VexAbiInfo* vbi,
10163 IRTemp effective_addr ) {
10164 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 32-1);
10167 static void gen_SIGNAL_if_not_64_aligned ( const VexAbiInfo* vbi,
10168 IRTemp effective_addr ) {
10169 gen_SIGNAL_if_not_XX_aligned(vbi, effective_addr, 64-1);
10173 /* Helper for deciding whether a given insn (starting at the opcode
10174 byte) may validly be used with a LOCK prefix. The following insns
10175 may be used with LOCK when their destination operand is in memory.
10176 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
10178 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
10179 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
10180 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
10181 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
10182 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
10183 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
10184 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
10186 DEC FE /1, FF /1
10187 INC FE /0, FF /0
10189 NEG F6 /3, F7 /3
10190 NOT F6 /2, F7 /2
10192 XCHG 86, 87
10194 BTC 0F BB, 0F BA /7
10195 BTR 0F B3, 0F BA /6
10196 BTS 0F AB, 0F BA /5
10198 CMPXCHG 0F B0, 0F B1
10199 CMPXCHG8B 0F C7 /1
10201 XADD 0F C0, 0F C1
10203 ------------------------------
10205 80 /0 = addb $imm8, rm8
10206 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
10207 82 /0 = addb $imm8, rm8
10208 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
10210 00 = addb r8, rm8
10211 01 = addl r32, rm32 and addw r16, rm16
10213 Same for ADD OR ADC SBB AND SUB XOR
10215 FE /1 = dec rm8
10216 FF /1 = dec rm32 and dec rm16
10218 FE /0 = inc rm8
10219 FF /0 = inc rm32 and inc rm16
10221 F6 /3 = neg rm8
10222 F7 /3 = neg rm32 and neg rm16
10224 F6 /2 = not rm8
10225 F7 /2 = not rm32 and not rm16
10227 0F BB = btcw r16, rm16 and btcl r32, rm32
10228 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
10230 Same for BTS, BTR
10232 static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
10234 switch (opc[0]) {
10235 case 0x00: case 0x01: case 0x08: case 0x09:
10236 case 0x10: case 0x11: case 0x18: case 0x19:
10237 case 0x20: case 0x21: case 0x28: case 0x29:
10238 case 0x30: case 0x31:
10239 if (!epartIsReg(opc[1]))
10240 return True;
10241 break;
10243 case 0x80: case 0x81: case 0x82: case 0x83:
10244 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
10245 && !epartIsReg(opc[1]))
10246 return True;
10247 break;
10249 case 0xFE: case 0xFF:
10250 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
10251 && !epartIsReg(opc[1]))
10252 return True;
10253 break;
10255 case 0xF6: case 0xF7:
10256 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
10257 && !epartIsReg(opc[1]))
10258 return True;
10259 break;
10261 case 0x86: case 0x87:
10262 if (!epartIsReg(opc[1]))
10263 return True;
10264 break;
10266 case 0x0F: {
10267 switch (opc[1]) {
10268 case 0xBB: case 0xB3: case 0xAB:
10269 if (!epartIsReg(opc[2]))
10270 return True;
10271 break;
10272 case 0xBA:
10273 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
10274 && !epartIsReg(opc[2]))
10275 return True;
10276 break;
10277 case 0xB0: case 0xB1:
10278 if (!epartIsReg(opc[2]))
10279 return True;
10280 break;
10281 case 0xC7:
10282 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
10283 return True;
10284 break;
10285 case 0xC0: case 0xC1:
10286 if (!epartIsReg(opc[2]))
10287 return True;
10288 break;
10289 default:
10290 break;
10291 } /* switch (opc[1]) */
10292 break;
10295 default:
10296 break;
10297 } /* switch (opc[0]) */
10299 return False;
10303 /*------------------------------------------------------------*/
10304 /*--- ---*/
10305 /*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
10306 /*--- ---*/
10307 /*------------------------------------------------------------*/
10309 static Long dis_COMISD ( const VexAbiInfo* vbi, Prefix pfx,
10310 Long delta, Bool isAvx, UChar opc )
10312 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
10313 Int alen = 0;
10314 HChar dis_buf[50];
10315 IRTemp argL = newTemp(Ity_F64);
10316 IRTemp argR = newTemp(Ity_F64);
10317 UChar modrm = getUChar(delta);
10318 IRTemp addr = IRTemp_INVALID;
10319 if (epartIsReg(modrm)) {
10320 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
10321 0/*lowest lane*/ ) );
10322 delta += 1;
10323 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10324 opc==0x2E ? "u" : "",
10325 nameXMMReg(eregOfRexRM(pfx,modrm)),
10326 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10327 } else {
10328 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10329 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
10330 delta += alen;
10331 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
10332 opc==0x2E ? "u" : "",
10333 dis_buf,
10334 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10336 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
10337 0/*lowest lane*/ ) );
10339 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10340 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10341 stmt( IRStmt_Put(
10342 OFFB_CC_DEP1,
10343 binop( Iop_And64,
10344 unop( Iop_32Uto64,
10345 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
10346 mkU64(0x45)
10347 )));
10348 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
10349 return delta;
10353 static Long dis_COMISS ( const VexAbiInfo* vbi, Prefix pfx,
10354 Long delta, Bool isAvx, UChar opc )
10356 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
10357 Int alen = 0;
10358 HChar dis_buf[50];
10359 IRTemp argL = newTemp(Ity_F32);
10360 IRTemp argR = newTemp(Ity_F32);
10361 UChar modrm = getUChar(delta);
10362 IRTemp addr = IRTemp_INVALID;
10363 if (epartIsReg(modrm)) {
10364 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10365 0/*lowest lane*/ ) );
10366 delta += 1;
10367 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10368 opc==0x2E ? "u" : "",
10369 nameXMMReg(eregOfRexRM(pfx,modrm)),
10370 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10371 } else {
10372 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10373 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10374 delta += alen;
10375 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10376 opc==0x2E ? "u" : "",
10377 dis_buf,
10378 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10380 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10381 0/*lowest lane*/ ) );
10383 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10384 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10385 stmt( IRStmt_Put(
10386 OFFB_CC_DEP1,
10387 binop( Iop_And64,
10388 unop( Iop_32Uto64,
10389 binop(Iop_CmpF64,
10390 unop(Iop_F32toF64,mkexpr(argL)),
10391 unop(Iop_F32toF64,mkexpr(argR)))),
10392 mkU64(0x45)
10393 )));
10394 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
10395 return delta;
10399 static Long dis_PSHUFD_32x4 ( const VexAbiInfo* vbi, Prefix pfx,
10400 Long delta, Bool writesYmm )
10402 Int order;
10403 Int alen = 0;
10404 HChar dis_buf[50];
10405 IRTemp sV = newTemp(Ity_V128);
10406 UChar modrm = getUChar(delta);
10407 const HChar* strV = writesYmm ? "v" : "";
10408 IRTemp addr = IRTemp_INVALID;
10409 if (epartIsReg(modrm)) {
10410 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10411 order = (Int)getUChar(delta+1);
10412 delta += 1+1;
10413 DIP("%spshufd $%d,%s,%s\n", strV, order,
10414 nameXMMReg(eregOfRexRM(pfx,modrm)),
10415 nameXMMReg(gregOfRexRM(pfx,modrm)));
10416 } else {
10417 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10418 1/*byte after the amode*/ );
10419 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10420 order = (Int)getUChar(delta+alen);
10421 delta += alen+1;
10422 DIP("%spshufd $%d,%s,%s\n", strV, order,
10423 dis_buf,
10424 nameXMMReg(gregOfRexRM(pfx,modrm)));
10427 IRTemp s3, s2, s1, s0;
10428 s3 = s2 = s1 = s0 = IRTemp_INVALID;
10429 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
10431 # define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10432 IRTemp dV = newTemp(Ity_V128);
10433 assign(dV,
10434 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10435 SEL((order>>2)&3), SEL((order>>0)&3) )
10437 # undef SEL
10439 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10440 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10441 return delta;
10445 static Long dis_PSHUFD_32x8 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
10447 Int order;
10448 Int alen = 0;
10449 HChar dis_buf[50];
10450 IRTemp sV = newTemp(Ity_V256);
10451 UChar modrm = getUChar(delta);
10452 IRTemp addr = IRTemp_INVALID;
10453 UInt rG = gregOfRexRM(pfx,modrm);
10454 if (epartIsReg(modrm)) {
10455 UInt rE = eregOfRexRM(pfx,modrm);
10456 assign( sV, getYMMReg(rE) );
10457 order = (Int)getUChar(delta+1);
10458 delta += 1+1;
10459 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10460 } else {
10461 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10462 1/*byte after the amode*/ );
10463 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10464 order = (Int)getUChar(delta+alen);
10465 delta += alen+1;
10466 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10469 IRTemp s[8];
10470 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10471 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10472 &s[3], &s[2], &s[1], &s[0] );
10474 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10475 s[4 + ((order>>4)&3)],
10476 s[4 + ((order>>2)&3)],
10477 s[4 + ((order>>0)&3)],
10478 s[0 + ((order>>6)&3)],
10479 s[0 + ((order>>4)&3)],
10480 s[0 + ((order>>2)&3)],
10481 s[0 + ((order>>0)&3)] ) );
10482 return delta;
10486 static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10488 IRTemp dV = newTemp(Ity_V128);
10489 IRTemp hi64 = newTemp(Ity_I64);
10490 IRTemp lo64 = newTemp(Ity_I64);
10491 IRTemp hi64r = newTemp(Ity_I64);
10492 IRTemp lo64r = newTemp(Ity_I64);
10494 vassert(imm >= 0 && imm <= 255);
10495 if (imm >= 16) {
10496 assign(dV, mkV128(0x0000));
10497 return dV;
10500 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10501 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10503 if (imm == 0) {
10504 assign( lo64r, mkexpr(lo64) );
10505 assign( hi64r, mkexpr(hi64) );
10507 else
10508 if (imm == 8) {
10509 assign( hi64r, mkU64(0) );
10510 assign( lo64r, mkexpr(hi64) );
10512 else
10513 if (imm > 8) {
10514 assign( hi64r, mkU64(0) );
10515 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
10516 } else {
10517 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
10518 assign( lo64r,
10519 binop( Iop_Or64,
10520 binop(Iop_Shr64, mkexpr(lo64),
10521 mkU8(8 * imm)),
10522 binop(Iop_Shl64, mkexpr(hi64),
10523 mkU8(8 * (8 - imm)) )
10528 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10529 return dV;
10533 static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10535 IRTemp dV = newTemp(Ity_V128);
10536 IRTemp hi64 = newTemp(Ity_I64);
10537 IRTemp lo64 = newTemp(Ity_I64);
10538 IRTemp hi64r = newTemp(Ity_I64);
10539 IRTemp lo64r = newTemp(Ity_I64);
10541 vassert(imm >= 0 && imm <= 255);
10542 if (imm >= 16) {
10543 assign(dV, mkV128(0x0000));
10544 return dV;
10547 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10548 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10550 if (imm == 0) {
10551 assign( lo64r, mkexpr(lo64) );
10552 assign( hi64r, mkexpr(hi64) );
10554 else
10555 if (imm == 8) {
10556 assign( lo64r, mkU64(0) );
10557 assign( hi64r, mkexpr(lo64) );
10559 else
10560 if (imm > 8) {
10561 assign( lo64r, mkU64(0) );
10562 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10563 } else {
10564 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10565 assign( hi64r,
10566 binop( Iop_Or64,
10567 binop(Iop_Shl64, mkexpr(hi64),
10568 mkU8(8 * imm)),
10569 binop(Iop_Shr64, mkexpr(lo64),
10570 mkU8(8 * (8 - imm)) )
10575 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10576 return dV;
10580 static Long dis_CVTxSD2SI ( const VexAbiInfo* vbi, Prefix pfx,
10581 Long delta, Bool isAvx, UChar opc, Int sz )
10583 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
10584 HChar dis_buf[50];
10585 Int alen = 0;
10586 UChar modrm = getUChar(delta);
10587 IRTemp addr = IRTemp_INVALID;
10588 IRTemp rmode = newTemp(Ity_I32);
10589 IRTemp f64lo = newTemp(Ity_F64);
10590 Bool r2zero = toBool(opc == 0x2C);
10592 if (epartIsReg(modrm)) {
10593 delta += 1;
10594 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10595 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10596 nameXMMReg(eregOfRexRM(pfx,modrm)),
10597 nameIReg(sz, gregOfRexRM(pfx,modrm),
10598 False));
10599 } else {
10600 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10601 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10602 delta += alen;
10603 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10604 dis_buf,
10605 nameIReg(sz, gregOfRexRM(pfx,modrm),
10606 False));
10609 if (r2zero) {
10610 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10611 } else {
10612 assign( rmode, get_sse_roundingmode() );
10615 if (sz == 4) {
10616 putIReg32( gregOfRexRM(pfx,modrm),
10617 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10618 } else {
10619 vassert(sz == 8);
10620 putIReg64( gregOfRexRM(pfx,modrm),
10621 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10624 return delta;
10628 static Long dis_CVTxSS2SI ( const VexAbiInfo* vbi, Prefix pfx,
10629 Long delta, Bool isAvx, UChar opc, Int sz )
10631 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10632 HChar dis_buf[50];
10633 Int alen = 0;
10634 UChar modrm = getUChar(delta);
10635 IRTemp addr = IRTemp_INVALID;
10636 IRTemp rmode = newTemp(Ity_I32);
10637 IRTemp f32lo = newTemp(Ity_F32);
10638 Bool r2zero = toBool(opc == 0x2C);
10640 if (epartIsReg(modrm)) {
10641 delta += 1;
10642 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10643 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10644 nameXMMReg(eregOfRexRM(pfx,modrm)),
10645 nameIReg(sz, gregOfRexRM(pfx,modrm),
10646 False));
10647 } else {
10648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10649 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10650 delta += alen;
10651 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10652 dis_buf,
10653 nameIReg(sz, gregOfRexRM(pfx,modrm),
10654 False));
10657 if (r2zero) {
10658 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10659 } else {
10660 assign( rmode, get_sse_roundingmode() );
10663 if (sz == 4) {
10664 putIReg32( gregOfRexRM(pfx,modrm),
10665 binop( Iop_F64toI32S,
10666 mkexpr(rmode),
10667 unop(Iop_F32toF64, mkexpr(f32lo))) );
10668 } else {
10669 vassert(sz == 8);
10670 putIReg64( gregOfRexRM(pfx,modrm),
10671 binop( Iop_F64toI64S,
10672 mkexpr(rmode),
10673 unop(Iop_F32toF64, mkexpr(f32lo))) );
10676 return delta;
10680 static Long dis_CVTPS2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
10681 Long delta, Bool isAvx )
10683 IRTemp addr = IRTemp_INVALID;
10684 Int alen = 0;
10685 HChar dis_buf[50];
10686 IRTemp f32lo = newTemp(Ity_F32);
10687 IRTemp f32hi = newTemp(Ity_F32);
10688 UChar modrm = getUChar(delta);
10689 UInt rG = gregOfRexRM(pfx,modrm);
10690 if (epartIsReg(modrm)) {
10691 UInt rE = eregOfRexRM(pfx,modrm);
10692 assign( f32lo, getXMMRegLane32F(rE, 0) );
10693 assign( f32hi, getXMMRegLane32F(rE, 1) );
10694 delta += 1;
10695 DIP("%scvtps2pd %s,%s\n",
10696 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10697 } else {
10698 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10699 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10700 assign( f32hi, loadLE(Ity_F32,
10701 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10702 delta += alen;
10703 DIP("%scvtps2pd %s,%s\n",
10704 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10707 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10708 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10709 if (isAvx)
10710 putYMMRegLane128( rG, 1, mkV128(0));
10711 return delta;
10715 static Long dis_CVTPS2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
10716 Long delta )
10718 IRTemp addr = IRTemp_INVALID;
10719 Int alen = 0;
10720 HChar dis_buf[50];
10721 IRTemp f32_0 = newTemp(Ity_F32);
10722 IRTemp f32_1 = newTemp(Ity_F32);
10723 IRTemp f32_2 = newTemp(Ity_F32);
10724 IRTemp f32_3 = newTemp(Ity_F32);
10725 UChar modrm = getUChar(delta);
10726 UInt rG = gregOfRexRM(pfx,modrm);
10727 if (epartIsReg(modrm)) {
10728 UInt rE = eregOfRexRM(pfx,modrm);
10729 assign( f32_0, getXMMRegLane32F(rE, 0) );
10730 assign( f32_1, getXMMRegLane32F(rE, 1) );
10731 assign( f32_2, getXMMRegLane32F(rE, 2) );
10732 assign( f32_3, getXMMRegLane32F(rE, 3) );
10733 delta += 1;
10734 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10735 } else {
10736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10737 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10738 assign( f32_1, loadLE(Ity_F32,
10739 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10740 assign( f32_2, loadLE(Ity_F32,
10741 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10742 assign( f32_3, loadLE(Ity_F32,
10743 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10744 delta += alen;
10745 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10748 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10749 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10750 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10751 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10752 return delta;
10756 static Long dis_CVTPD2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10757 Long delta, Bool isAvx )
10759 IRTemp addr = IRTemp_INVALID;
10760 Int alen = 0;
10761 HChar dis_buf[50];
10762 UChar modrm = getUChar(delta);
10763 UInt rG = gregOfRexRM(pfx,modrm);
10764 IRTemp argV = newTemp(Ity_V128);
10765 IRTemp rmode = newTemp(Ity_I32);
10766 if (epartIsReg(modrm)) {
10767 UInt rE = eregOfRexRM(pfx,modrm);
10768 assign( argV, getXMMReg(rE) );
10769 delta += 1;
10770 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10771 nameXMMReg(rE), nameXMMReg(rG));
10772 } else {
10773 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10774 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10775 delta += alen;
10776 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10777 dis_buf, nameXMMReg(rG) );
10780 assign( rmode, get_sse_roundingmode() );
10781 IRTemp t0 = newTemp(Ity_F64);
10782 IRTemp t1 = newTemp(Ity_F64);
10783 assign( t0, unop(Iop_ReinterpI64asF64,
10784 unop(Iop_V128to64, mkexpr(argV))) );
10785 assign( t1, unop(Iop_ReinterpI64asF64,
10786 unop(Iop_V128HIto64, mkexpr(argV))) );
10788 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10789 putXMMRegLane32( rG, 3, mkU32(0) );
10790 putXMMRegLane32( rG, 2, mkU32(0) );
10791 putXMMRegLane32F( rG, 1, CVT(t1) );
10792 putXMMRegLane32F( rG, 0, CVT(t0) );
10793 # undef CVT
10794 if (isAvx)
10795 putYMMRegLane128( rG, 1, mkV128(0) );
10797 return delta;
10801 static Long dis_CVTxPS2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10802 Long delta, Bool isAvx, Bool r2zero )
10804 IRTemp addr = IRTemp_INVALID;
10805 Int alen = 0;
10806 HChar dis_buf[50];
10807 UChar modrm = getUChar(delta);
10808 IRTemp argV = newTemp(Ity_V128);
10809 IRTemp rmode = newTemp(Ity_I32);
10810 UInt rG = gregOfRexRM(pfx,modrm);
10812 if (epartIsReg(modrm)) {
10813 UInt rE = eregOfRexRM(pfx,modrm);
10814 assign( argV, getXMMReg(rE) );
10815 delta += 1;
10816 DIP("%scvt%sps2dq %s,%s\n",
10817 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10818 } else {
10819 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10820 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10821 delta += alen;
10822 DIP("%scvt%sps2dq %s,%s\n",
10823 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10826 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10827 : get_sse_roundingmode() );
10828 putXMMReg( rG, binop(Iop_F32toI32Sx4, mkexpr(rmode), mkexpr(argV)) );
10829 if (isAvx)
10830 putYMMRegLane128( rG, 1, mkV128(0) );
10832 return delta;
10836 static Long dis_CVTxPS2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10837 Long delta, Bool r2zero )
10839 IRTemp addr = IRTemp_INVALID;
10840 Int alen = 0;
10841 HChar dis_buf[50];
10842 UChar modrm = getUChar(delta);
10843 IRTemp argV = newTemp(Ity_V256);
10844 IRTemp rmode = newTemp(Ity_I32);
10845 UInt rG = gregOfRexRM(pfx,modrm);
10847 if (epartIsReg(modrm)) {
10848 UInt rE = eregOfRexRM(pfx,modrm);
10849 assign( argV, getYMMReg(rE) );
10850 delta += 1;
10851 DIP("vcvt%sps2dq %s,%s\n",
10852 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10853 } else {
10854 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10855 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10856 delta += alen;
10857 DIP("vcvt%sps2dq %s,%s\n",
10858 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10861 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10862 : get_sse_roundingmode() );
10863 putYMMReg( rG, binop(Iop_F32toI32Sx8, mkexpr(rmode), mkexpr(argV)) );
10864 return delta;
10868 static Long dis_CVTxPD2DQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
10869 Long delta, Bool isAvx, Bool r2zero )
10871 IRTemp addr = IRTemp_INVALID;
10872 Int alen = 0;
10873 HChar dis_buf[50];
10874 UChar modrm = getUChar(delta);
10875 IRTemp argV = newTemp(Ity_V128);
10876 IRTemp rmode = newTemp(Ity_I32);
10877 UInt rG = gregOfRexRM(pfx,modrm);
10878 IRTemp t0, t1;
10880 if (epartIsReg(modrm)) {
10881 UInt rE = eregOfRexRM(pfx,modrm);
10882 assign( argV, getXMMReg(rE) );
10883 delta += 1;
10884 DIP("%scvt%spd2dq %s,%s\n",
10885 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10886 } else {
10887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10888 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10889 delta += alen;
10890 DIP("%scvt%spd2dqx %s,%s\n",
10891 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10894 if (r2zero) {
10895 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10896 } else {
10897 assign( rmode, get_sse_roundingmode() );
10900 t0 = newTemp(Ity_F64);
10901 t1 = newTemp(Ity_F64);
10902 assign( t0, unop(Iop_ReinterpI64asF64,
10903 unop(Iop_V128to64, mkexpr(argV))) );
10904 assign( t1, unop(Iop_ReinterpI64asF64,
10905 unop(Iop_V128HIto64, mkexpr(argV))) );
10907 # define CVT(_t) binop( Iop_F64toI32S, \
10908 mkexpr(rmode), \
10909 mkexpr(_t) )
10911 putXMMRegLane32( rG, 3, mkU32(0) );
10912 putXMMRegLane32( rG, 2, mkU32(0) );
10913 putXMMRegLane32( rG, 1, CVT(t1) );
10914 putXMMRegLane32( rG, 0, CVT(t0) );
10915 # undef CVT
10916 if (isAvx)
10917 putYMMRegLane128( rG, 1, mkV128(0) );
10919 return delta;
10923 static Long dis_CVTxPD2DQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
10924 Long delta, Bool r2zero )
10926 IRTemp addr = IRTemp_INVALID;
10927 Int alen = 0;
10928 HChar dis_buf[50];
10929 UChar modrm = getUChar(delta);
10930 IRTemp argV = newTemp(Ity_V256);
10931 IRTemp rmode = newTemp(Ity_I32);
10932 UInt rG = gregOfRexRM(pfx,modrm);
10933 IRTemp t0, t1, t2, t3;
10935 if (epartIsReg(modrm)) {
10936 UInt rE = eregOfRexRM(pfx,modrm);
10937 assign( argV, getYMMReg(rE) );
10938 delta += 1;
10939 DIP("vcvt%spd2dq %s,%s\n",
10940 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10941 } else {
10942 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10943 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10944 delta += alen;
10945 DIP("vcvt%spd2dqy %s,%s\n",
10946 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10949 if (r2zero) {
10950 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10951 } else {
10952 assign( rmode, get_sse_roundingmode() );
10955 t0 = IRTemp_INVALID;
10956 t1 = IRTemp_INVALID;
10957 t2 = IRTemp_INVALID;
10958 t3 = IRTemp_INVALID;
10959 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10961 # define CVT(_t) binop( Iop_F64toI32S, \
10962 mkexpr(rmode), \
10963 unop( Iop_ReinterpI64asF64, \
10964 mkexpr(_t) ) )
10966 putXMMRegLane32( rG, 3, CVT(t3) );
10967 putXMMRegLane32( rG, 2, CVT(t2) );
10968 putXMMRegLane32( rG, 1, CVT(t1) );
10969 putXMMRegLane32( rG, 0, CVT(t0) );
10970 # undef CVT
10971 putYMMRegLane128( rG, 1, mkV128(0) );
10973 return delta;
10977 static Long dis_CVTDQ2PS_128 ( const VexAbiInfo* vbi, Prefix pfx,
10978 Long delta, Bool isAvx )
10980 IRTemp addr = IRTemp_INVALID;
10981 Int alen = 0;
10982 HChar dis_buf[50];
10983 UChar modrm = getUChar(delta);
10984 IRTemp argV = newTemp(Ity_V128);
10985 IRTemp rmode = newTemp(Ity_I32);
10986 UInt rG = gregOfRexRM(pfx,modrm);
10988 if (epartIsReg(modrm)) {
10989 UInt rE = eregOfRexRM(pfx,modrm);
10990 assign( argV, getXMMReg(rE) );
10991 delta += 1;
10992 DIP("%scvtdq2ps %s,%s\n",
10993 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10994 } else {
10995 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10996 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10997 delta += alen;
10998 DIP("%scvtdq2ps %s,%s\n",
10999 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
11002 assign( rmode, get_sse_roundingmode() );
11003 putXMMReg(rG, binop(Iop_I32StoF32x4, mkexpr(rmode), mkexpr(argV)));
11005 if (isAvx)
11006 putYMMRegLane128( rG, 1, mkV128(0) );
11008 return delta;
11011 static Long dis_CVTDQ2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
11012 Long delta )
11014 IRTemp addr = IRTemp_INVALID;
11015 Int alen = 0;
11016 HChar dis_buf[50];
11017 UChar modrm = getUChar(delta);
11018 IRTemp argV = newTemp(Ity_V256);
11019 IRTemp rmode = newTemp(Ity_I32);
11020 UInt rG = gregOfRexRM(pfx,modrm);
11022 if (epartIsReg(modrm)) {
11023 UInt rE = eregOfRexRM(pfx,modrm);
11024 assign( argV, getYMMReg(rE) );
11025 delta += 1;
11026 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
11027 } else {
11028 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11029 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
11030 delta += alen;
11031 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
11034 assign( rmode, get_sse_roundingmode() );
11035 putYMMReg(rG, binop(Iop_I32StoF32x8, mkexpr(rmode), mkexpr(argV)));
11037 return delta;
11041 static Long dis_PMOVMSKB_128 ( const VexAbiInfo* vbi, Prefix pfx,
11042 Long delta, Bool isAvx )
11044 UChar modrm = getUChar(delta);
11045 vassert(epartIsReg(modrm)); /* ensured by caller */
11046 UInt rE = eregOfRexRM(pfx,modrm);
11047 UInt rG = gregOfRexRM(pfx,modrm);
11048 IRTemp t0 = newTemp(Ity_V128);
11049 IRTemp t1 = newTemp(Ity_I32);
11050 assign(t0, getXMMReg(rE));
11051 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
11052 putIReg32(rG, mkexpr(t1));
11053 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
11054 nameIReg32(rG));
11055 delta += 1;
11056 return delta;
11060 static Long dis_PMOVMSKB_256 ( const VexAbiInfo* vbi, Prefix pfx,
11061 Long delta )
11063 UChar modrm = getUChar(delta);
11064 vassert(epartIsReg(modrm)); /* ensured by caller */
11065 UInt rE = eregOfRexRM(pfx,modrm);
11066 UInt rG = gregOfRexRM(pfx,modrm);
11067 IRTemp t0 = newTemp(Ity_V128);
11068 IRTemp t1 = newTemp(Ity_V128);
11069 IRTemp t2 = newTemp(Ity_I16);
11070 IRTemp t3 = newTemp(Ity_I16);
11071 assign(t0, getYMMRegLane128(rE, 0));
11072 assign(t1, getYMMRegLane128(rE, 1));
11073 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
11074 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
11075 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
11076 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11077 delta += 1;
11078 return delta;
11082 /* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
11083 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
11084 /* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
11085 static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11087 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11088 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11089 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11090 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11091 IRTemp res = newTemp(Ity_V128);
11092 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
11093 : mkV128from32s( s1, d1, s0, d0 ));
11094 return res;
11098 /* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
11099 /* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
11100 static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
11102 IRTemp s1 = newTemp(Ity_I64);
11103 IRTemp s0 = newTemp(Ity_I64);
11104 IRTemp d1 = newTemp(Ity_I64);
11105 IRTemp d0 = newTemp(Ity_I64);
11106 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11107 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11108 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11109 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11110 IRTemp res = newTemp(Ity_V128);
11111 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
11112 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
11113 return res;
11117 /* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
11118 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
11119 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
11120 way. */
11121 static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11123 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11124 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11125 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
11126 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
11127 IRTemp res = newTemp(Ity_V256);
11128 assign(res, xIsH
11129 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
11130 mkexpr(s1), mkexpr(d1))
11131 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
11132 mkexpr(s0), mkexpr(d0)));
11133 return res;
11137 /* FIXME: this is really bad. Surely can do something better here?
11138 One observation is that the steering in the upper and lower 128 bit
11139 halves is the same as with math_UNPCKxPS_128, so we simply split
11140 into two halves, and use that. Consequently any improvement in
11141 math_UNPCKxPS_128 (probably, to use interleave-style primops)
11142 benefits this too. */
11143 static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
11145 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11146 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11147 breakupV256toV128s( sV, &sVhi, &sVlo );
11148 breakupV256toV128s( dV, &dVhi, &dVlo );
11149 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
11150 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
11151 IRTemp rV = newTemp(Ity_V256);
11152 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11153 return rV;
11157 static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11159 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11160 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11161 vassert(imm8 < 256);
11163 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11164 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11166 # define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
11167 # define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11168 IRTemp res = newTemp(Ity_V128);
11169 assign(res,
11170 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
11171 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
11172 # undef SELD
11173 # undef SELS
11174 return res;
11178 /* 256-bit SHUFPS appears to steer each of the 128-bit halves
11179 identically. Hence do the clueless thing and use math_SHUFPS_128
11180 twice. */
11181 static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11183 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11184 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11185 breakupV256toV128s( sV, &sVhi, &sVlo );
11186 breakupV256toV128s( dV, &dVhi, &dVlo );
11187 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
11188 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
11189 IRTemp rV = newTemp(Ity_V256);
11190 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11191 return rV;
11195 static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11197 IRTemp s1 = newTemp(Ity_I64);
11198 IRTemp s0 = newTemp(Ity_I64);
11199 IRTemp d1 = newTemp(Ity_I64);
11200 IRTemp d0 = newTemp(Ity_I64);
11202 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
11203 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
11204 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
11205 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
11207 # define SELD(n) mkexpr((n)==0 ? d0 : d1)
11208 # define SELS(n) mkexpr((n)==0 ? s0 : s1)
11210 IRTemp res = newTemp(Ity_V128);
11211 assign(res, binop( Iop_64HLtoV128,
11212 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
11214 # undef SELD
11215 # undef SELS
11216 return res;
11220 static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11222 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11223 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11224 breakupV256toV128s( sV, &sVhi, &sVlo );
11225 breakupV256toV128s( dV, &dVhi, &dVlo );
11226 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11227 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
11228 IRTemp rV = newTemp(Ity_V256);
11229 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11230 return rV;
11234 static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11236 UShort imm8_mask_16;
11237 IRTemp imm8_mask = newTemp(Ity_V128);
11239 switch( imm8 & 3 ) {
11240 case 0: imm8_mask_16 = 0x0000; break;
11241 case 1: imm8_mask_16 = 0x00FF; break;
11242 case 2: imm8_mask_16 = 0xFF00; break;
11243 case 3: imm8_mask_16 = 0xFFFF; break;
11244 default: vassert(0); break;
11246 assign( imm8_mask, mkV128( imm8_mask_16 ) );
11248 IRTemp res = newTemp(Ity_V128);
11249 assign ( res, binop( Iop_OrV128,
11250 binop( Iop_AndV128, mkexpr(sV),
11251 mkexpr(imm8_mask) ),
11252 binop( Iop_AndV128, mkexpr(dV),
11253 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11254 return res;
11258 static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11260 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11261 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11262 breakupV256toV128s( sV, &sVhi, &sVlo );
11263 breakupV256toV128s( dV, &dVhi, &dVlo );
11264 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
11265 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
11266 IRTemp rV = newTemp(Ity_V256);
11267 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11268 return rV;
11272 static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11274 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
11275 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
11276 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
11277 0xFFFF };
11278 IRTemp imm8_mask = newTemp(Ity_V128);
11279 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
11281 IRTemp res = newTemp(Ity_V128);
11282 assign ( res, binop( Iop_OrV128,
11283 binop( Iop_AndV128, mkexpr(sV),
11284 mkexpr(imm8_mask) ),
11285 binop( Iop_AndV128, mkexpr(dV),
11286 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
11287 return res;
11291 static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11293 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11294 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11295 breakupV256toV128s( sV, &sVhi, &sVlo );
11296 breakupV256toV128s( dV, &dVhi, &dVlo );
11297 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11298 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11299 IRTemp rV = newTemp(Ity_V256);
11300 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11301 return rV;
11305 static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11307 /* Make w be a 16-bit version of imm8, formed by duplicating each
11308 bit in imm8. */
11309 Int i;
11310 UShort imm16 = 0;
11311 for (i = 0; i < 8; i++) {
11312 if (imm8 & (1 << i))
11313 imm16 |= (3 << (2*i));
11315 IRTemp imm16_mask = newTemp(Ity_V128);
11316 assign( imm16_mask, mkV128( imm16 ));
11318 IRTemp res = newTemp(Ity_V128);
11319 assign ( res, binop( Iop_OrV128,
11320 binop( Iop_AndV128, mkexpr(sV),
11321 mkexpr(imm16_mask) ),
11322 binop( Iop_AndV128, mkexpr(dV),
11323 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11324 return res;
11328 static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11330 /* This is a really poor translation -- could be improved if
11331 performance critical */
11332 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11333 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11334 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11335 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11336 IRTemp res = newTemp(Ity_V128);
11337 assign(res, binop(Iop_64HLtoV128,
11338 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11339 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11340 return res;
11344 static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11346 /* This is a really poor translation -- could be improved if
11347 performance critical */
11348 IRTemp sHi, sLo, dHi, dLo;
11349 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11350 breakupV256toV128s( dV, &dHi, &dLo);
11351 breakupV256toV128s( sV, &sHi, &sLo);
11352 IRTemp res = newTemp(Ity_V256);
11353 assign(res, binop(Iop_V128HLtoV256,
11354 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11355 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11356 return res;
11360 static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11362 /* This is a really poor translation -- could be improved if
11363 performance critical */
11364 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11365 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11366 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11367 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11368 IRTemp res = newTemp(Ity_V128);
11369 assign(res, binop(Iop_64HLtoV128,
11370 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11371 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11372 return res;
11376 static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11378 /* This is a really poor translation -- could be improved if
11379 performance critical */
11380 IRTemp sHi, sLo, dHi, dLo;
11381 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11382 breakupV256toV128s( dV, &dHi, &dLo);
11383 breakupV256toV128s( sV, &sHi, &sLo);
11384 IRTemp res = newTemp(Ity_V256);
11385 assign(res, binop(Iop_V128HLtoV256,
11386 mkexpr(math_PMULDQ_128(sHi, dHi)),
11387 mkexpr(math_PMULDQ_128(sLo, dLo))));
11388 return res;
11392 static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11394 IRTemp sVhi, sVlo, dVhi, dVlo;
11395 IRTemp resHi = newTemp(Ity_I64);
11396 IRTemp resLo = newTemp(Ity_I64);
11397 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11398 breakupV128to64s( sV, &sVhi, &sVlo );
11399 breakupV128to64s( dV, &dVhi, &dVlo );
11400 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11401 "amd64g_calculate_mmx_pmaddwd",
11402 &amd64g_calculate_mmx_pmaddwd,
11403 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11404 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11405 "amd64g_calculate_mmx_pmaddwd",
11406 &amd64g_calculate_mmx_pmaddwd,
11407 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11408 IRTemp res = newTemp(Ity_V128);
11409 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11410 return res;
11414 static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11416 IRTemp sHi, sLo, dHi, dLo;
11417 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11418 breakupV256toV128s( dV, &dHi, &dLo);
11419 breakupV256toV128s( sV, &sHi, &sLo);
11420 IRTemp res = newTemp(Ity_V256);
11421 assign(res, binop(Iop_V128HLtoV256,
11422 mkexpr(math_PMADDWD_128(dHi, sHi)),
11423 mkexpr(math_PMADDWD_128(dLo, sLo))));
11424 return res;
11428 static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11430 IRTemp addV = newTemp(Ity_V128);
11431 IRTemp subV = newTemp(Ity_V128);
11432 IRTemp a1 = newTemp(Ity_I64);
11433 IRTemp s0 = newTemp(Ity_I64);
11434 IRTemp rm = newTemp(Ity_I32);
11436 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11437 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11438 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11440 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11441 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11443 IRTemp res = newTemp(Ity_V128);
11444 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11445 return res;
11449 static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11451 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11452 IRTemp addV = newTemp(Ity_V256);
11453 IRTemp subV = newTemp(Ity_V256);
11454 IRTemp rm = newTemp(Ity_I32);
11455 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11457 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11458 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11459 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11461 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11462 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11464 IRTemp res = newTemp(Ity_V256);
11465 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11466 return res;
11470 static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11472 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11473 IRTemp addV = newTemp(Ity_V128);
11474 IRTemp subV = newTemp(Ity_V128);
11475 IRTemp rm = newTemp(Ity_I32);
11476 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11478 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11479 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11480 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11482 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11483 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11485 IRTemp res = newTemp(Ity_V128);
11486 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11487 return res;
11491 static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11493 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11494 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11495 IRTemp addV = newTemp(Ity_V256);
11496 IRTemp subV = newTemp(Ity_V256);
11497 IRTemp rm = newTemp(Ity_I32);
11498 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11499 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11501 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11502 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11503 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11505 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11506 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11508 IRTemp res = newTemp(Ity_V256);
11509 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11510 return res;
11514 /* Handle 128 bit PSHUFLW and PSHUFHW. */
11515 static Long dis_PSHUFxW_128 ( const VexAbiInfo* vbi, Prefix pfx,
11516 Long delta, Bool isAvx, Bool xIsH )
11518 IRTemp addr = IRTemp_INVALID;
11519 Int alen = 0;
11520 HChar dis_buf[50];
11521 UChar modrm = getUChar(delta);
11522 UInt rG = gregOfRexRM(pfx,modrm);
11523 UInt imm8;
11524 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11525 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11526 sV = newTemp(Ity_V128);
11527 dV = newTemp(Ity_V128);
11528 sVmut = newTemp(Ity_I64);
11529 dVmut = newTemp(Ity_I64);
11530 sVcon = newTemp(Ity_I64);
11531 if (epartIsReg(modrm)) {
11532 UInt rE = eregOfRexRM(pfx,modrm);
11533 assign( sV, getXMMReg(rE) );
11534 imm8 = (UInt)getUChar(delta+1);
11535 delta += 1+1;
11536 DIP("%spshuf%cw $%u,%s,%s\n",
11537 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11538 imm8, nameXMMReg(rE), nameXMMReg(rG));
11539 } else {
11540 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11541 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11542 imm8 = (UInt)getUChar(delta+alen);
11543 delta += alen+1;
11544 DIP("%spshuf%cw $%u,%s,%s\n",
11545 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11546 imm8, dis_buf, nameXMMReg(rG));
11549 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11550 source. */
11551 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11552 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11554 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11555 # define SEL(n) \
11556 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11557 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11558 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11559 # undef SEL
11561 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11562 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11564 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11565 return delta;
11569 /* Handle 256 bit PSHUFLW and PSHUFHW. */
11570 static Long dis_PSHUFxW_256 ( const VexAbiInfo* vbi, Prefix pfx,
11571 Long delta, Bool xIsH )
11573 IRTemp addr = IRTemp_INVALID;
11574 Int alen = 0;
11575 HChar dis_buf[50];
11576 UChar modrm = getUChar(delta);
11577 UInt rG = gregOfRexRM(pfx,modrm);
11578 UInt imm8;
11579 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11580 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11581 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11582 sV = newTemp(Ity_V256);
11583 dVhi = newTemp(Ity_I64);
11584 dVlo = newTemp(Ity_I64);
11585 if (epartIsReg(modrm)) {
11586 UInt rE = eregOfRexRM(pfx,modrm);
11587 assign( sV, getYMMReg(rE) );
11588 imm8 = (UInt)getUChar(delta+1);
11589 delta += 1+1;
11590 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11591 imm8, nameYMMReg(rE), nameYMMReg(rG));
11592 } else {
11593 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11594 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11595 imm8 = (UInt)getUChar(delta+alen);
11596 delta += alen+1;
11597 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11598 imm8, dis_buf, nameYMMReg(rG));
11601 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11602 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11603 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11605 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11606 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11607 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11608 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11609 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11610 xIsH ? sV64[2] : dVhi,
11611 xIsH ? dVlo : sV64[1],
11612 xIsH ? sV64[0] : dVlo ) );
11613 return delta;
11617 static Long dis_PEXTRW_128_EregOnly_toG ( const VexAbiInfo* vbi, Prefix pfx,
11618 Long delta, Bool isAvx )
11620 Long deltaIN = delta;
11621 UChar modrm = getUChar(delta);
11622 UInt rG = gregOfRexRM(pfx,modrm);
11623 IRTemp sV = newTemp(Ity_V128);
11624 IRTemp d16 = newTemp(Ity_I16);
11625 UInt imm8;
11626 IRTemp s0, s1, s2, s3;
11627 if (epartIsReg(modrm)) {
11628 UInt rE = eregOfRexRM(pfx,modrm);
11629 assign(sV, getXMMReg(rE));
11630 imm8 = getUChar(delta+1) & 7;
11631 delta += 1+1;
11632 DIP("%spextrw $%u,%s,%s\n", isAvx ? "v" : "",
11633 imm8, nameXMMReg(rE), nameIReg32(rG));
11634 } else {
11635 /* The memory case is disallowed, apparently. */
11636 return deltaIN; /* FAIL */
11638 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11639 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11640 switch (imm8) {
11641 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11642 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11643 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11644 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11645 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11646 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11647 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11648 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11649 default: vassert(0);
11651 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11652 return delta;
11656 static Long dis_CVTDQ2PD_128 ( const VexAbiInfo* vbi, Prefix pfx,
11657 Long delta, Bool isAvx )
11659 IRTemp addr = IRTemp_INVALID;
11660 Int alen = 0;
11661 HChar dis_buf[50];
11662 UChar modrm = getUChar(delta);
11663 IRTemp arg64 = newTemp(Ity_I64);
11664 UInt rG = gregOfRexRM(pfx,modrm);
11665 const HChar* mbV = isAvx ? "v" : "";
11666 if (epartIsReg(modrm)) {
11667 UInt rE = eregOfRexRM(pfx,modrm);
11668 assign( arg64, getXMMRegLane64(rE, 0) );
11669 delta += 1;
11670 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11671 } else {
11672 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11673 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11674 delta += alen;
11675 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11677 putXMMRegLane64F(
11678 rG, 0,
11679 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11681 putXMMRegLane64F(
11682 rG, 1,
11683 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11685 if (isAvx)
11686 putYMMRegLane128(rG, 1, mkV128(0));
11687 return delta;
11691 static Long dis_STMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11692 Long delta, Bool isAvx )
11694 IRTemp addr = IRTemp_INVALID;
11695 Int alen = 0;
11696 HChar dis_buf[50];
11697 UChar modrm = getUChar(delta);
11698 vassert(!epartIsReg(modrm)); /* ensured by caller */
11699 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11701 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11702 delta += alen;
11704 /* Fake up a native SSE mxcsr word. The only thing it depends on
11705 is SSEROUND[1:0], so call a clean helper to cook it up.
11707 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11708 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11709 storeLE(
11710 mkexpr(addr),
11711 unop(Iop_64to32,
11712 mkIRExprCCall(
11713 Ity_I64, 0/*regp*/,
11714 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11715 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11719 return delta;
11723 static Long dis_LDMXCSR ( const VexAbiInfo* vbi, Prefix pfx,
11724 Long delta, Bool isAvx )
11726 IRTemp addr = IRTemp_INVALID;
11727 Int alen = 0;
11728 HChar dis_buf[50];
11729 UChar modrm = getUChar(delta);
11730 vassert(!epartIsReg(modrm)); /* ensured by caller */
11731 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11733 IRTemp t64 = newTemp(Ity_I64);
11734 IRTemp ew = newTemp(Ity_I32);
11736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11737 delta += alen;
11738 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11740 /* The only thing we observe in %mxcsr is the rounding mode.
11741 Therefore, pass the 32-bit value (SSE native-format control
11742 word) to a clean helper, getting back a 64-bit value, the
11743 lower half of which is the SSEROUND value to store, and the
11744 upper half of which is the emulation-warning token which may
11745 be generated.
11747 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11748 assign( t64, mkIRExprCCall(
11749 Ity_I64, 0/*regparms*/,
11750 "amd64g_check_ldmxcsr",
11751 &amd64g_check_ldmxcsr,
11752 mkIRExprVec_1(
11753 unop(Iop_32Uto64,
11754 loadLE(Ity_I32, mkexpr(addr))
11760 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11761 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11762 put_emwarn( mkexpr(ew) );
11763 /* Finally, if an emulation warning was reported, side-exit to
11764 the next insn, reporting the warning, so that Valgrind's
11765 dispatcher sees the warning. */
11766 stmt(
11767 IRStmt_Exit(
11768 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11769 Ijk_EmWarn,
11770 IRConst_U64(guest_RIP_bbstart+delta),
11771 OFFB_RIP
11774 return delta;
11778 static void gen_XSAVE_SEQUENCE ( IRTemp addr, IRTemp rfbm )
11780 /* ------ rfbm[0] gates the x87 state ------ */
11782 /* Uses dirty helper:
11783 void amd64g_do_XSAVE_COMPONENT_0 ( VexGuestAMD64State*, ULong )
11785 IRDirty* d0 = unsafeIRDirty_0_N (
11786 0/*regparms*/,
11787 "amd64g_dirtyhelper_XSAVE_COMPONENT_0",
11788 &amd64g_dirtyhelper_XSAVE_COMPONENT_0,
11789 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11791 d0->guard = binop(Iop_CmpEQ64, binop(Iop_And64, mkexpr(rfbm), mkU64(1)),
11792 mkU64(1));
11794 /* Declare we're writing memory. Really, bytes 24 through 31
11795 (MXCSR and MXCSR_MASK) aren't written, but we can't express more
11796 than 1 memory area here, so just mark the whole thing as
11797 written. */
11798 d0->mFx = Ifx_Write;
11799 d0->mAddr = mkexpr(addr);
11800 d0->mSize = 160;
11802 /* declare we're reading guest state */
11803 d0->nFxState = 5;
11804 vex_bzero(&d0->fxState, sizeof(d0->fxState));
11806 d0->fxState[0].fx = Ifx_Read;
11807 d0->fxState[0].offset = OFFB_FTOP;
11808 d0->fxState[0].size = sizeof(UInt);
11810 d0->fxState[1].fx = Ifx_Read;
11811 d0->fxState[1].offset = OFFB_FPREGS;
11812 d0->fxState[1].size = 8 * sizeof(ULong);
11814 d0->fxState[2].fx = Ifx_Read;
11815 d0->fxState[2].offset = OFFB_FPTAGS;
11816 d0->fxState[2].size = 8 * sizeof(UChar);
11818 d0->fxState[3].fx = Ifx_Read;
11819 d0->fxState[3].offset = OFFB_FPROUND;
11820 d0->fxState[3].size = sizeof(ULong);
11822 d0->fxState[4].fx = Ifx_Read;
11823 d0->fxState[4].offset = OFFB_FC3210;
11824 d0->fxState[4].size = sizeof(ULong);
11826 stmt( IRStmt_Dirty(d0) );
11828 /* ------ rfbm[1] gates the SSE state ------ */
11830 IRTemp rfbm_1 = newTemp(Ity_I64);
11831 IRTemp rfbm_1or2 = newTemp(Ity_I64);
11832 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
11833 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
11835 IRExpr* guard_1 = binop(Iop_CmpEQ64, mkexpr(rfbm_1), mkU64(2));
11836 IRExpr* guard_1or2 = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
11838 /* Uses dirty helper:
11839 void amd64g_do_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS
11840 ( VexGuestAMD64State*, ULong )
11841 This creates only MXCSR and MXCSR_MASK. We need to do this if
11842 either components 1 (SSE) or 2 (AVX) are requested. Hence the
11843 guard condition is a bit more complex.
11845 IRDirty* d1 = unsafeIRDirty_0_N (
11846 0/*regparms*/,
11847 "amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS",
11848 &amd64g_dirtyhelper_XSAVE_COMPONENT_1_EXCLUDING_XMMREGS,
11849 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
11851 d1->guard = guard_1or2;
11853 /* Declare we're writing memory: MXCSR and MXCSR_MASK. Note that
11854 the code for rbfm[0] just above claims a write of 0 .. 159, so
11855 this duplicates it. But at least correctly connects 24 .. 31 to
11856 the MXCSR guest state representation (SSEROUND field). */
11857 d1->mFx = Ifx_Write;
11858 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
11859 d1->mSize = 8;
11861 /* declare we're reading guest state */
11862 d1->nFxState = 1;
11863 vex_bzero(&d1->fxState, sizeof(d1->fxState));
11865 d1->fxState[0].fx = Ifx_Read;
11866 d1->fxState[0].offset = OFFB_SSEROUND;
11867 d1->fxState[0].size = sizeof(ULong);
11869 /* Call the helper. This creates MXCSR and MXCSR_MASK but nothing
11870 else. We do the actual register array, XMM[0..15], separately,
11871 in order that any undefinedness in the XMM registers is tracked
11872 separately by Memcheck and does not "infect" the in-memory
11873 shadow for the other parts of the image. */
11874 stmt( IRStmt_Dirty(d1) );
11876 /* And now the XMMs themselves. */
11877 UInt reg;
11878 for (reg = 0; reg < 16; reg++) {
11879 stmt( IRStmt_StoreG(
11880 Iend_LE,
11881 binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16)),
11882 getXMMReg(reg),
11883 guard_1
11887 /* ------ rfbm[2] gates the AVX state ------ */
11888 /* Component 2 is just a bunch of register saves, so we'll do it
11889 inline, just to be simple and to be Memcheck friendly. */
11891 IRTemp rfbm_2 = newTemp(Ity_I64);
11892 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
11894 IRExpr* guard_2 = binop(Iop_CmpEQ64, mkexpr(rfbm_2), mkU64(4));
11896 for (reg = 0; reg < 16; reg++) {
11897 stmt( IRStmt_StoreG(
11898 Iend_LE,
11899 binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16)),
11900 getYMMRegLane128(reg,1),
11901 guard_2
11907 static Long dis_XSAVE ( const VexAbiInfo* vbi,
11908 Prefix pfx, Long delta, Int sz )
11910 /* Note that the presence or absence of REX.W (indicated here by
11911 |sz|) slightly affects the written format: whether the saved FPU
11912 IP and DP pointers are 64 or 32 bits. But the helper function
11913 we call simply writes zero bits in the relevant fields, which
11914 are 64 bits regardless of what REX.W is, and so it's good enough
11915 (iow, equally broken) in both cases. */
11916 IRTemp addr = IRTemp_INVALID;
11917 Int alen = 0;
11918 HChar dis_buf[50];
11919 UChar modrm = getUChar(delta);
11920 vassert(!epartIsReg(modrm)); /* ensured by caller */
11921 vassert(sz == 4 || sz == 8); /* ditto */
11923 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11924 delta += alen;
11925 gen_SIGNAL_if_not_64_aligned(vbi, addr);
11927 DIP("%sxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11929 /* VEX's caller is assumed to have checked this. */
11930 const ULong aSSUMED_XCR0_VALUE = 7;
11932 IRTemp rfbm = newTemp(Ity_I64);
11933 assign(rfbm,
11934 binop(Iop_And64,
11935 binop(Iop_Or64,
11936 binop(Iop_Shl64,
11937 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
11938 unop(Iop_32Uto64, getIRegRAX(4))),
11939 mkU64(aSSUMED_XCR0_VALUE)));
11941 gen_XSAVE_SEQUENCE(addr, rfbm);
11943 /* Finally, we need to update XSTATE_BV in the XSAVE header area, by
11944 OR-ing the RFBM value into it. */
11945 IRTemp addr_plus_512 = newTemp(Ity_I64);
11946 assign(addr_plus_512, binop(Iop_Add64, mkexpr(addr), mkU64(512)));
11947 storeLE( mkexpr(addr_plus_512),
11948 binop(Iop_Or8,
11949 unop(Iop_64to8, mkexpr(rfbm)),
11950 loadLE(Ity_I8, mkexpr(addr_plus_512))) );
11952 return delta;
11956 static Long dis_FXSAVE ( const VexAbiInfo* vbi,
11957 Prefix pfx, Long delta, Int sz )
11959 /* See comment in dis_XSAVE about the significance of REX.W. */
11960 IRTemp addr = IRTemp_INVALID;
11961 Int alen = 0;
11962 HChar dis_buf[50];
11963 UChar modrm = getUChar(delta);
11964 vassert(!epartIsReg(modrm)); /* ensured by caller */
11965 vassert(sz == 4 || sz == 8); /* ditto */
11967 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11968 delta += alen;
11969 gen_SIGNAL_if_not_16_aligned(vbi, addr);
11971 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
11973 /* FXSAVE is just XSAVE with components 0 and 1 selected. Set rfbm
11974 to 0b011, generate the XSAVE sequence accordingly, and let iropt
11975 fold out the unused (AVX) parts accordingly. */
11976 IRTemp rfbm = newTemp(Ity_I64);
11977 assign(rfbm, mkU64(3));
11978 gen_XSAVE_SEQUENCE(addr, rfbm);
11980 return delta;
11984 static void gen_XRSTOR_SEQUENCE ( IRTemp addr, IRTemp xstate_bv, IRTemp rfbm )
11986 /* ------ rfbm[0] gates the x87 state ------ */
11988 /* If rfbm[0] == 1, we have to write the x87 state. If
11989 xstate_bv[0] == 1, we will read it from the memory image, else
11990 we'll set it to initial values. Doing this with a helper
11991 function and getting the definedness flow annotations correct is
11992 too difficult, so generate stupid but simple code: first set the
11993 registers to initial values, regardless of xstate_bv[0]. Then,
11994 conditionally restore from the memory image. */
11996 IRTemp rfbm_0 = newTemp(Ity_I64);
11997 IRTemp xstate_bv_0 = newTemp(Ity_I64);
11998 IRTemp restore_0 = newTemp(Ity_I64);
11999 assign(rfbm_0, binop(Iop_And64, mkexpr(rfbm), mkU64(1)));
12000 assign(xstate_bv_0, binop(Iop_And64, mkexpr(xstate_bv), mkU64(1)));
12001 assign(restore_0, binop(Iop_And64, mkexpr(rfbm_0), mkexpr(xstate_bv_0)));
12003 gen_FINIT_SEQUENCE( binop(Iop_CmpNE64, mkexpr(rfbm_0), mkU64(0)) );
12005 /* Uses dirty helper:
12006 void amd64g_do_XRSTOR_COMPONENT_0 ( VexGuestAMD64State*, ULong )
12008 IRDirty* d0 = unsafeIRDirty_0_N (
12009 0/*regparms*/,
12010 "amd64g_dirtyhelper_XRSTOR_COMPONENT_0",
12011 &amd64g_dirtyhelper_XRSTOR_COMPONENT_0,
12012 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12014 d0->guard = binop(Iop_CmpNE64, mkexpr(restore_0), mkU64(0));
12016 /* Declare we're reading memory. Really, bytes 24 through 31
12017 (MXCSR and MXCSR_MASK) aren't read, but we can't express more
12018 than 1 memory area here, so just mark the whole thing as
12019 read. */
12020 d0->mFx = Ifx_Read;
12021 d0->mAddr = mkexpr(addr);
12022 d0->mSize = 160;
12024 /* declare we're writing guest state */
12025 d0->nFxState = 5;
12026 vex_bzero(&d0->fxState, sizeof(d0->fxState));
12028 d0->fxState[0].fx = Ifx_Write;
12029 d0->fxState[0].offset = OFFB_FTOP;
12030 d0->fxState[0].size = sizeof(UInt);
12032 d0->fxState[1].fx = Ifx_Write;
12033 d0->fxState[1].offset = OFFB_FPREGS;
12034 d0->fxState[1].size = 8 * sizeof(ULong);
12036 d0->fxState[2].fx = Ifx_Write;
12037 d0->fxState[2].offset = OFFB_FPTAGS;
12038 d0->fxState[2].size = 8 * sizeof(UChar);
12040 d0->fxState[3].fx = Ifx_Write;
12041 d0->fxState[3].offset = OFFB_FPROUND;
12042 d0->fxState[3].size = sizeof(ULong);
12044 d0->fxState[4].fx = Ifx_Write;
12045 d0->fxState[4].offset = OFFB_FC3210;
12046 d0->fxState[4].size = sizeof(ULong);
12048 stmt( IRStmt_Dirty(d0) );
12050 /* ------ rfbm[1] gates the SSE state ------ */
12052 /* Same scheme as component 0: first zero it out, and then possibly
12053 restore from the memory area. */
12054 IRTemp rfbm_1 = newTemp(Ity_I64);
12055 IRTemp xstate_bv_1 = newTemp(Ity_I64);
12056 IRTemp restore_1 = newTemp(Ity_I64);
12057 assign(rfbm_1, binop(Iop_And64, mkexpr(rfbm), mkU64(2)));
12058 assign(xstate_bv_1, binop(Iop_And64, mkexpr(xstate_bv), mkU64(2)));
12059 assign(restore_1, binop(Iop_And64, mkexpr(rfbm_1), mkexpr(xstate_bv_1)));
12060 IRExpr* rfbm_1e = binop(Iop_CmpNE64, mkexpr(rfbm_1), mkU64(0));
12061 IRExpr* restore_1e = binop(Iop_CmpNE64, mkexpr(restore_1), mkU64(0));
12063 IRTemp rfbm_1or2 = newTemp(Ity_I64);
12064 IRTemp xstate_bv_1or2 = newTemp(Ity_I64);
12065 IRTemp restore_1or2 = newTemp(Ity_I64);
12066 assign(rfbm_1or2, binop(Iop_And64, mkexpr(rfbm), mkU64(6)));
12067 assign(xstate_bv_1or2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(6)));
12068 assign(restore_1or2, binop(Iop_And64, mkexpr(rfbm_1or2),
12069 mkexpr(xstate_bv_1or2)));
12070 IRExpr* rfbm_1or2e = binop(Iop_CmpNE64, mkexpr(rfbm_1or2), mkU64(0));
12071 IRExpr* restore_1or2e = binop(Iop_CmpNE64, mkexpr(restore_1or2), mkU64(0));
12073 /* The areas in question are: SSEROUND, and the XMM register array. */
12074 putGuarded(OFFB_SSEROUND, rfbm_1or2e, mkU64(Irrm_NEAREST));
12076 UInt reg;
12077 for (reg = 0; reg < 16; reg++) {
12078 putGuarded(xmmGuestRegOffset(reg), rfbm_1e, mkV128(0));
12081 /* And now possibly restore from MXCSR/MXCSR_MASK */
12082 /* Uses dirty helper:
12083 void amd64g_do_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS
12084 ( VexGuestAMD64State*, ULong )
12085 This restores from only MXCSR and MXCSR_MASK. We need to do
12086 this if either components 1 (SSE) or 2 (AVX) are requested.
12087 Hence the guard condition is a bit more complex.
12089 IRDirty* d1 = unsafeIRDirty_0_N (
12090 0/*regparms*/,
12091 "amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS",
12092 &amd64g_dirtyhelper_XRSTOR_COMPONENT_1_EXCLUDING_XMMREGS,
12093 mkIRExprVec_2( IRExpr_GSPTR(), mkexpr(addr) )
12095 d1->guard = restore_1or2e;
12097 /* Declare we're reading memory: MXCSR and MXCSR_MASK. Note that
12098 the code for rbfm[0] just above claims a read of 0 .. 159, so
12099 this duplicates it. But at least correctly connects 24 .. 31 to
12100 the MXCSR guest state representation (SSEROUND field). */
12101 d1->mFx = Ifx_Read;
12102 d1->mAddr = binop(Iop_Add64, mkexpr(addr), mkU64(24));
12103 d1->mSize = 8;
12105 /* declare we're writing guest state */
12106 d1->nFxState = 1;
12107 vex_bzero(&d1->fxState, sizeof(d1->fxState));
12109 d1->fxState[0].fx = Ifx_Write;
12110 d1->fxState[0].offset = OFFB_SSEROUND;
12111 d1->fxState[0].size = sizeof(ULong);
12113 /* Call the helper. This creates SSEROUND but nothing
12114 else. We do the actual register array, XMM[0..15], separately,
12115 in order that any undefinedness in the XMM registers is tracked
12116 separately by Memcheck and is not "infected" by the in-memory
12117 shadow for the other parts of the image. */
12118 stmt( IRStmt_Dirty(d1) );
12120 /* And now the XMMs themselves. For each register, we PUT either
12121 its old value, or the value loaded from memory. One convenient
12122 way to do that is with a conditional load that has its the
12123 default value, the old value of the register. */
12124 for (reg = 0; reg < 16; reg++) {
12125 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(160 + reg * 16));
12126 IRExpr* alt = getXMMReg(reg);
12127 IRTemp loadedValue = newTemp(Ity_V128);
12128 stmt( IRStmt_LoadG(Iend_LE,
12129 ILGop_IdentV128,
12130 loadedValue, ea, alt, restore_1e) );
12131 putXMMReg(reg, mkexpr(loadedValue));
12134 /* ------ rfbm[2] gates the AVX state ------ */
12135 /* Component 2 is just a bunch of register loads, so we'll do it
12136 inline, just to be simple and to be Memcheck friendly. */
12138 /* Same scheme as component 0: first zero it out, and then possibly
12139 restore from the memory area. */
12140 IRTemp rfbm_2 = newTemp(Ity_I64);
12141 IRTemp xstate_bv_2 = newTemp(Ity_I64);
12142 IRTemp restore_2 = newTemp(Ity_I64);
12143 assign(rfbm_2, binop(Iop_And64, mkexpr(rfbm), mkU64(4)));
12144 assign(xstate_bv_2, binop(Iop_And64, mkexpr(xstate_bv), mkU64(4)));
12145 assign(restore_2, binop(Iop_And64, mkexpr(rfbm_2), mkexpr(xstate_bv_2)));
12147 IRExpr* rfbm_2e = binop(Iop_CmpNE64, mkexpr(rfbm_2), mkU64(0));
12148 IRExpr* restore_2e = binop(Iop_CmpNE64, mkexpr(restore_2), mkU64(0));
12150 for (reg = 0; reg < 16; reg++) {
12151 putGuarded(ymmGuestRegLane128offset(reg, 1), rfbm_2e, mkV128(0));
12154 for (reg = 0; reg < 16; reg++) {
12155 IRExpr* ea = binop(Iop_Add64, mkexpr(addr), mkU64(576 + reg * 16));
12156 IRExpr* alt = getYMMRegLane128(reg, 1);
12157 IRTemp loadedValue = newTemp(Ity_V128);
12158 stmt( IRStmt_LoadG(Iend_LE,
12159 ILGop_IdentV128,
12160 loadedValue, ea, alt, restore_2e) );
12161 putYMMRegLane128(reg, 1, mkexpr(loadedValue));
12166 static Long dis_XRSTOR ( const VexAbiInfo* vbi,
12167 Prefix pfx, Long delta, Int sz )
12169 /* As with XRSTOR above we ignore the value of REX.W since we're
12170 not bothering with the FPU DP and IP fields. */
12171 IRTemp addr = IRTemp_INVALID;
12172 Int alen = 0;
12173 HChar dis_buf[50];
12174 UChar modrm = getUChar(delta);
12175 vassert(!epartIsReg(modrm)); /* ensured by caller */
12176 vassert(sz == 4 || sz == 8); /* ditto */
12178 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12179 delta += alen;
12180 gen_SIGNAL_if_not_64_aligned(vbi, addr);
12182 DIP("%sxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12184 /* VEX's caller is assumed to have checked this. */
12185 const ULong aSSUMED_XCR0_VALUE = 7;
12187 IRTemp rfbm = newTemp(Ity_I64);
12188 assign(rfbm,
12189 binop(Iop_And64,
12190 binop(Iop_Or64,
12191 binop(Iop_Shl64,
12192 unop(Iop_32Uto64, getIRegRDX(4)), mkU8(32)),
12193 unop(Iop_32Uto64, getIRegRAX(4))),
12194 mkU64(aSSUMED_XCR0_VALUE)));
12196 IRTemp xstate_bv = newTemp(Ity_I64);
12197 assign(xstate_bv, loadLE(Ity_I64,
12198 binop(Iop_Add64, mkexpr(addr), mkU64(512+0))));
12200 IRTemp xcomp_bv = newTemp(Ity_I64);
12201 assign(xcomp_bv, loadLE(Ity_I64,
12202 binop(Iop_Add64, mkexpr(addr), mkU64(512+8))));
12204 IRTemp xsavehdr_23_16 = newTemp(Ity_I64);
12205 assign( xsavehdr_23_16,
12206 loadLE(Ity_I64,
12207 binop(Iop_Add64, mkexpr(addr), mkU64(512+16))));
12209 /* We must fault if
12210 * xcomp_bv[63] == 1, since this simulated CPU does not support
12211 the compaction extension.
12212 * xstate_bv sets a bit outside of XCR0 (which we assume to be 7).
12213 * any of the xsave header bytes 23 .. 8 are nonzero. This seems to
12214 imply that xcomp_bv must be zero.
12215 xcomp_bv is header bytes 15 .. 8 and xstate_bv is header bytes 7 .. 0
12217 IRTemp fault_if_nonzero = newTemp(Ity_I64);
12218 assign(fault_if_nonzero,
12219 binop(Iop_Or64,
12220 binop(Iop_And64, mkexpr(xstate_bv), mkU64(~aSSUMED_XCR0_VALUE)),
12221 binop(Iop_Or64, mkexpr(xcomp_bv), mkexpr(xsavehdr_23_16))));
12222 stmt( IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(fault_if_nonzero), mkU64(0)),
12223 Ijk_SigSEGV,
12224 IRConst_U64(guest_RIP_curr_instr),
12225 OFFB_RIP
12228 /* We are guaranteed now that both xstate_bv and rfbm are in the
12229 range 0 .. 7. Generate the restore sequence proper. */
12230 gen_XRSTOR_SEQUENCE(addr, xstate_bv, rfbm);
12232 return delta;
12236 static Long dis_FXRSTOR ( const VexAbiInfo* vbi,
12237 Prefix pfx, Long delta, Int sz )
12239 /* As with FXSAVE above we ignore the value of REX.W since we're
12240 not bothering with the FPU DP and IP fields. */
12241 IRTemp addr = IRTemp_INVALID;
12242 Int alen = 0;
12243 HChar dis_buf[50];
12244 UChar modrm = getUChar(delta);
12245 vassert(!epartIsReg(modrm)); /* ensured by caller */
12246 vassert(sz == 4 || sz == 8); /* ditto */
12248 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12249 delta += alen;
12250 gen_SIGNAL_if_not_16_aligned(vbi, addr);
12252 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
12254 /* FXRSTOR is just XRSTOR with components 0 and 1 selected and also
12255 as if components 0 and 1 are set as present in XSTATE_BV in the
12256 XSAVE header. Set both rfbm and xstate_bv to 0b011 therefore,
12257 generate the XRSTOR sequence accordingly, and let iropt fold out
12258 the unused (AVX) parts accordingly. */
12259 IRTemp three = newTemp(Ity_I64);
12260 assign(three, mkU64(3));
12261 gen_XRSTOR_SEQUENCE(addr, three/*xstate_bv*/, three/*rfbm*/);
12263 return delta;
12267 static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
12269 vassert(imm8 <= 7);
12271 // Create a V128 value which has the selected word in the
12272 // specified lane, and zeroes everywhere else.
12273 IRTemp tmp128 = newTemp(Ity_V128);
12274 IRTemp halfshift = newTemp(Ity_I64);
12275 assign(halfshift, binop(Iop_Shl64,
12276 unop(Iop_16Uto64, mkexpr(u16)),
12277 mkU8(16 * (imm8 & 3))));
12278 if (imm8 < 4) {
12279 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
12280 } else {
12281 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
12284 UShort mask = ~(3 << (imm8 * 2));
12285 IRTemp res = newTemp(Ity_V128);
12286 assign( res, binop(Iop_OrV128,
12287 mkexpr(tmp128),
12288 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
12289 return res;
12293 static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
12295 IRTemp s1, s0, d1, d0;
12296 s1 = s0 = d1 = d0 = IRTemp_INVALID;
12298 breakupV128to64s( sV, &s1, &s0 );
12299 breakupV128to64s( dV, &d1, &d0 );
12301 IRTemp res = newTemp(Ity_V128);
12302 assign( res,
12303 binop(Iop_64HLtoV128,
12304 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12305 "amd64g_calculate_mmx_psadbw",
12306 &amd64g_calculate_mmx_psadbw,
12307 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
12308 mkIRExprCCall(Ity_I64, 0/*regparms*/,
12309 "amd64g_calculate_mmx_psadbw",
12310 &amd64g_calculate_mmx_psadbw,
12311 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
12312 return res;
12316 static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
12318 IRTemp sHi, sLo, dHi, dLo;
12319 sHi = sLo = dHi = dLo = IRTemp_INVALID;
12320 breakupV256toV128s( dV, &dHi, &dLo);
12321 breakupV256toV128s( sV, &sHi, &sLo);
12322 IRTemp res = newTemp(Ity_V256);
12323 assign(res, binop(Iop_V128HLtoV256,
12324 mkexpr(math_PSADBW_128(dHi, sHi)),
12325 mkexpr(math_PSADBW_128(dLo, sLo))));
12326 return res;
12330 static Long dis_MASKMOVDQU ( const VexAbiInfo* vbi, Prefix pfx,
12331 Long delta, Bool isAvx )
12333 IRTemp regD = newTemp(Ity_V128);
12334 IRTemp mask = newTemp(Ity_V128);
12335 IRTemp olddata = newTemp(Ity_V128);
12336 IRTemp newdata = newTemp(Ity_V128);
12337 IRTemp addr = newTemp(Ity_I64);
12338 UChar modrm = getUChar(delta);
12339 UInt rG = gregOfRexRM(pfx,modrm);
12340 UInt rE = eregOfRexRM(pfx,modrm);
12342 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
12343 assign( regD, getXMMReg( rG ));
12345 /* Unfortunately can't do the obvious thing with SarN8x16
12346 here since that can't be re-emitted as SSE2 code - no such
12347 insn. */
12348 assign( mask,
12349 binop(Iop_64HLtoV128,
12350 binop(Iop_SarN8x8,
12351 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
12352 mkU8(7) ),
12353 binop(Iop_SarN8x8,
12354 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
12355 mkU8(7) ) ));
12356 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
12357 assign( newdata, binop(Iop_OrV128,
12358 binop(Iop_AndV128,
12359 mkexpr(regD),
12360 mkexpr(mask) ),
12361 binop(Iop_AndV128,
12362 mkexpr(olddata),
12363 unop(Iop_NotV128, mkexpr(mask)))) );
12364 storeLE( mkexpr(addr), mkexpr(newdata) );
12366 delta += 1;
12367 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
12368 nameXMMReg(rE), nameXMMReg(rG) );
12369 return delta;
12373 static Long dis_MOVMSKPS_128 ( const VexAbiInfo* vbi, Prefix pfx,
12374 Long delta, Bool isAvx )
12376 UChar modrm = getUChar(delta);
12377 UInt rG = gregOfRexRM(pfx,modrm);
12378 UInt rE = eregOfRexRM(pfx,modrm);
12379 IRTemp t0 = newTemp(Ity_I32);
12380 IRTemp t1 = newTemp(Ity_I32);
12381 IRTemp t2 = newTemp(Ity_I32);
12382 IRTemp t3 = newTemp(Ity_I32);
12383 delta += 1;
12384 assign( t0, binop( Iop_And32,
12385 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
12386 mkU32(1) ));
12387 assign( t1, binop( Iop_And32,
12388 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
12389 mkU32(2) ));
12390 assign( t2, binop( Iop_And32,
12391 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
12392 mkU32(4) ));
12393 assign( t3, binop( Iop_And32,
12394 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
12395 mkU32(8) ));
12396 putIReg32( rG, binop(Iop_Or32,
12397 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12398 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12399 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
12400 nameXMMReg(rE), nameIReg32(rG));
12401 return delta;
12405 static Long dis_MOVMSKPS_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12407 UChar modrm = getUChar(delta);
12408 UInt rG = gregOfRexRM(pfx,modrm);
12409 UInt rE = eregOfRexRM(pfx,modrm);
12410 IRTemp t0 = newTemp(Ity_I32);
12411 IRTemp t1 = newTemp(Ity_I32);
12412 IRTemp t2 = newTemp(Ity_I32);
12413 IRTemp t3 = newTemp(Ity_I32);
12414 IRTemp t4 = newTemp(Ity_I32);
12415 IRTemp t5 = newTemp(Ity_I32);
12416 IRTemp t6 = newTemp(Ity_I32);
12417 IRTemp t7 = newTemp(Ity_I32);
12418 delta += 1;
12419 assign( t0, binop( Iop_And32,
12420 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
12421 mkU32(1) ));
12422 assign( t1, binop( Iop_And32,
12423 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
12424 mkU32(2) ));
12425 assign( t2, binop( Iop_And32,
12426 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
12427 mkU32(4) ));
12428 assign( t3, binop( Iop_And32,
12429 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
12430 mkU32(8) ));
12431 assign( t4, binop( Iop_And32,
12432 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
12433 mkU32(16) ));
12434 assign( t5, binop( Iop_And32,
12435 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
12436 mkU32(32) ));
12437 assign( t6, binop( Iop_And32,
12438 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
12439 mkU32(64) ));
12440 assign( t7, binop( Iop_And32,
12441 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
12442 mkU32(128) ));
12443 putIReg32( rG, binop(Iop_Or32,
12444 binop(Iop_Or32,
12445 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12446 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
12447 binop(Iop_Or32,
12448 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
12449 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
12450 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12451 return delta;
12455 static Long dis_MOVMSKPD_128 ( const VexAbiInfo* vbi, Prefix pfx,
12456 Long delta, Bool isAvx )
12458 UChar modrm = getUChar(delta);
12459 UInt rG = gregOfRexRM(pfx,modrm);
12460 UInt rE = eregOfRexRM(pfx,modrm);
12461 IRTemp t0 = newTemp(Ity_I32);
12462 IRTemp t1 = newTemp(Ity_I32);
12463 delta += 1;
12464 assign( t0, binop( Iop_And32,
12465 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
12466 mkU32(1) ));
12467 assign( t1, binop( Iop_And32,
12468 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
12469 mkU32(2) ));
12470 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
12471 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
12472 nameXMMReg(rE), nameIReg32(rG));
12473 return delta;
12477 static Long dis_MOVMSKPD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
12479 UChar modrm = getUChar(delta);
12480 UInt rG = gregOfRexRM(pfx,modrm);
12481 UInt rE = eregOfRexRM(pfx,modrm);
12482 IRTemp t0 = newTemp(Ity_I32);
12483 IRTemp t1 = newTemp(Ity_I32);
12484 IRTemp t2 = newTemp(Ity_I32);
12485 IRTemp t3 = newTemp(Ity_I32);
12486 delta += 1;
12487 assign( t0, binop( Iop_And32,
12488 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
12489 mkU32(1) ));
12490 assign( t1, binop( Iop_And32,
12491 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
12492 mkU32(2) ));
12493 assign( t2, binop( Iop_And32,
12494 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
12495 mkU32(4) ));
12496 assign( t3, binop( Iop_And32,
12497 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
12498 mkU32(8) ));
12499 putIReg32( rG, binop(Iop_Or32,
12500 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
12501 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
12502 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
12503 return delta;
12507 /* Note, this also handles SSE(1) insns. */
12508 __attribute__((noinline))
12509 static
12510 Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
12511 const VexArchInfo* archinfo,
12512 const VexAbiInfo* vbi,
12513 Prefix pfx, Int sz, Long deltaIN,
12514 DisResult* dres )
12516 IRTemp addr = IRTemp_INVALID;
12517 IRTemp t0 = IRTemp_INVALID;
12518 IRTemp t1 = IRTemp_INVALID;
12519 IRTemp t2 = IRTemp_INVALID;
12520 IRTemp t3 = IRTemp_INVALID;
12521 IRTemp t4 = IRTemp_INVALID;
12522 IRTemp t5 = IRTemp_INVALID;
12523 IRTemp t6 = IRTemp_INVALID;
12524 UChar modrm = 0;
12525 Int alen = 0;
12526 HChar dis_buf[50];
12528 *decode_OK = False;
12530 Long delta = deltaIN;
12531 UChar opc = getUChar(delta);
12532 delta++;
12533 switch (opc) {
12535 case 0x10:
12536 if (have66noF2noF3(pfx)
12537 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12538 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
12539 modrm = getUChar(delta);
12540 if (epartIsReg(modrm)) {
12541 putXMMReg( gregOfRexRM(pfx,modrm),
12542 getXMMReg( eregOfRexRM(pfx,modrm) ));
12543 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12544 nameXMMReg(gregOfRexRM(pfx,modrm)));
12545 delta += 1;
12546 } else {
12547 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12548 putXMMReg( gregOfRexRM(pfx,modrm),
12549 loadLE(Ity_V128, mkexpr(addr)) );
12550 DIP("movupd %s,%s\n", dis_buf,
12551 nameXMMReg(gregOfRexRM(pfx,modrm)));
12552 delta += alen;
12554 goto decode_success;
12556 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
12557 G (lo half xmm). If E is mem, upper half of G is zeroed out.
12558 If E is reg, upper half of G is unchanged. */
12559 if (haveF2no66noF3(pfx)
12560 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
12561 modrm = getUChar(delta);
12562 if (epartIsReg(modrm)) {
12563 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12564 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
12565 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12566 nameXMMReg(gregOfRexRM(pfx,modrm)));
12567 delta += 1;
12568 } else {
12569 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12570 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12571 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
12572 loadLE(Ity_I64, mkexpr(addr)) );
12573 DIP("movsd %s,%s\n", dis_buf,
12574 nameXMMReg(gregOfRexRM(pfx,modrm)));
12575 delta += alen;
12577 goto decode_success;
12579 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
12580 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
12581 if (haveF3no66noF2(pfx)
12582 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12583 modrm = getUChar(delta);
12584 if (epartIsReg(modrm)) {
12585 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12586 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
12587 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12588 nameXMMReg(gregOfRexRM(pfx,modrm)));
12589 delta += 1;
12590 } else {
12591 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12592 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
12593 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
12594 loadLE(Ity_I32, mkexpr(addr)) );
12595 DIP("movss %s,%s\n", dis_buf,
12596 nameXMMReg(gregOfRexRM(pfx,modrm)));
12597 delta += alen;
12599 goto decode_success;
12601 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
12602 if (haveNo66noF2noF3(pfx)
12603 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12604 modrm = getUChar(delta);
12605 if (epartIsReg(modrm)) {
12606 putXMMReg( gregOfRexRM(pfx,modrm),
12607 getXMMReg( eregOfRexRM(pfx,modrm) ));
12608 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12609 nameXMMReg(gregOfRexRM(pfx,modrm)));
12610 delta += 1;
12611 } else {
12612 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12613 putXMMReg( gregOfRexRM(pfx,modrm),
12614 loadLE(Ity_V128, mkexpr(addr)) );
12615 DIP("movups %s,%s\n", dis_buf,
12616 nameXMMReg(gregOfRexRM(pfx,modrm)));
12617 delta += alen;
12619 goto decode_success;
12621 break;
12623 case 0x11:
12624 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
12625 or lo half xmm). */
12626 if (haveF2no66noF3(pfx)
12627 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12628 modrm = getUChar(delta);
12629 if (epartIsReg(modrm)) {
12630 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
12631 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
12632 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12633 nameXMMReg(eregOfRexRM(pfx,modrm)));
12634 delta += 1;
12635 } else {
12636 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12637 storeLE( mkexpr(addr),
12638 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
12639 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12640 dis_buf);
12641 delta += alen;
12643 goto decode_success;
12645 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
12646 or lo 1/4 xmm). */
12647 if (haveF3no66noF2(pfx) && sz == 4) {
12648 modrm = getUChar(delta);
12649 if (epartIsReg(modrm)) {
12650 /* fall through, we don't yet have a test case */
12651 } else {
12652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12653 storeLE( mkexpr(addr),
12654 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
12655 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12656 dis_buf);
12657 delta += alen;
12658 goto decode_success;
12661 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
12662 if (have66noF2noF3(pfx)
12663 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12664 modrm = getUChar(delta);
12665 if (epartIsReg(modrm)) {
12666 putXMMReg( eregOfRexRM(pfx,modrm),
12667 getXMMReg( gregOfRexRM(pfx,modrm) ) );
12668 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12669 nameXMMReg(eregOfRexRM(pfx,modrm)));
12670 delta += 1;
12671 } else {
12672 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12673 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12674 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12675 dis_buf );
12676 delta += alen;
12678 goto decode_success;
12680 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
12681 if (haveNo66noF2noF3(pfx)
12682 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12683 modrm = getUChar(delta);
12684 if (epartIsReg(modrm)) {
12685 /* fall through; awaiting test case */
12686 } else {
12687 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12688 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12689 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12690 dis_buf );
12691 delta += alen;
12692 goto decode_success;
12695 break;
12697 case 0x12:
12698 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
12699 /* Identical to MOVLPS ? */
12700 if (have66noF2noF3(pfx)
12701 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12702 modrm = getUChar(delta);
12703 if (epartIsReg(modrm)) {
12704 /* fall through; apparently reg-reg is not possible */
12705 } else {
12706 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12707 delta += alen;
12708 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12709 0/*lower lane*/,
12710 loadLE(Ity_I64, mkexpr(addr)) );
12711 DIP("movlpd %s, %s\n",
12712 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12713 goto decode_success;
12716 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
12717 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
12718 if (haveNo66noF2noF3(pfx)
12719 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12720 modrm = getUChar(delta);
12721 if (epartIsReg(modrm)) {
12722 delta += 1;
12723 putXMMRegLane64( gregOfRexRM(pfx,modrm),
12724 0/*lower lane*/,
12725 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
12726 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12727 nameXMMReg(gregOfRexRM(pfx,modrm)));
12728 } else {
12729 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12730 delta += alen;
12731 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
12732 loadLE(Ity_I64, mkexpr(addr)) );
12733 DIP("movlps %s, %s\n",
12734 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
12736 goto decode_success;
12738 break;
12740 case 0x13:
12741 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
12742 if (haveNo66noF2noF3(pfx)
12743 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12744 modrm = getUChar(delta);
12745 if (!epartIsReg(modrm)) {
12746 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12747 delta += alen;
12748 storeLE( mkexpr(addr),
12749 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12750 0/*lower lane*/ ) );
12751 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12752 dis_buf);
12753 goto decode_success;
12755 /* else fall through */
12757 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
12758 /* Identical to MOVLPS ? */
12759 if (have66noF2noF3(pfx)
12760 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12761 modrm = getUChar(delta);
12762 if (!epartIsReg(modrm)) {
12763 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12764 delta += alen;
12765 storeLE( mkexpr(addr),
12766 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12767 0/*lower lane*/ ) );
12768 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12769 dis_buf);
12770 goto decode_success;
12772 /* else fall through */
12774 break;
12776 case 0x14:
12777 case 0x15:
12778 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
12779 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
12780 /* These just appear to be special cases of SHUFPS */
12781 if (haveNo66noF2noF3(pfx) && sz == 4) {
12782 Bool hi = toBool(opc == 0x15);
12783 IRTemp sV = newTemp(Ity_V128);
12784 IRTemp dV = newTemp(Ity_V128);
12785 modrm = getUChar(delta);
12786 UInt rG = gregOfRexRM(pfx,modrm);
12787 assign( dV, getXMMReg(rG) );
12788 if (epartIsReg(modrm)) {
12789 UInt rE = eregOfRexRM(pfx,modrm);
12790 assign( sV, getXMMReg(rE) );
12791 delta += 1;
12792 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12793 nameXMMReg(rE), nameXMMReg(rG));
12794 } else {
12795 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12796 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12797 delta += alen;
12798 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12799 dis_buf, nameXMMReg(rG));
12801 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
12802 putXMMReg( rG, mkexpr(res) );
12803 goto decode_success;
12805 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12806 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12807 /* These just appear to be special cases of SHUFPS */
12808 if (have66noF2noF3(pfx)
12809 && sz == 2 /* could be 8 if rex also present */) {
12810 Bool hi = toBool(opc == 0x15);
12811 IRTemp sV = newTemp(Ity_V128);
12812 IRTemp dV = newTemp(Ity_V128);
12813 modrm = getUChar(delta);
12814 UInt rG = gregOfRexRM(pfx,modrm);
12815 assign( dV, getXMMReg(rG) );
12816 if (epartIsReg(modrm)) {
12817 UInt rE = eregOfRexRM(pfx,modrm);
12818 assign( sV, getXMMReg(rE) );
12819 delta += 1;
12820 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12821 nameXMMReg(rE), nameXMMReg(rG));
12822 } else {
12823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12824 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12825 delta += alen;
12826 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
12827 dis_buf, nameXMMReg(rG));
12829 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12830 putXMMReg( rG, mkexpr(res) );
12831 goto decode_success;
12833 break;
12835 case 0x16:
12836 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12837 /* These seems identical to MOVHPS. This instruction encoding is
12838 completely crazy. */
12839 if (have66noF2noF3(pfx)
12840 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12841 modrm = getUChar(delta);
12842 if (epartIsReg(modrm)) {
12843 /* fall through; apparently reg-reg is not possible */
12844 } else {
12845 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12846 delta += alen;
12847 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12848 loadLE(Ity_I64, mkexpr(addr)) );
12849 DIP("movhpd %s,%s\n", dis_buf,
12850 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12851 goto decode_success;
12854 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12855 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12856 if (haveNo66noF2noF3(pfx)
12857 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12858 modrm = getUChar(delta);
12859 if (epartIsReg(modrm)) {
12860 delta += 1;
12861 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12862 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12863 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12864 nameXMMReg(gregOfRexRM(pfx,modrm)));
12865 } else {
12866 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12867 delta += alen;
12868 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12869 loadLE(Ity_I64, mkexpr(addr)) );
12870 DIP("movhps %s,%s\n", dis_buf,
12871 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12873 goto decode_success;
12875 break;
12877 case 0x17:
12878 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12879 if (haveNo66noF2noF3(pfx)
12880 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12881 modrm = getUChar(delta);
12882 if (!epartIsReg(modrm)) {
12883 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12884 delta += alen;
12885 storeLE( mkexpr(addr),
12886 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12887 1/*upper lane*/ ) );
12888 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12889 dis_buf);
12890 goto decode_success;
12892 /* else fall through */
12894 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12895 /* Again, this seems identical to MOVHPS. */
12896 if (have66noF2noF3(pfx)
12897 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12898 modrm = getUChar(delta);
12899 if (!epartIsReg(modrm)) {
12900 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12901 delta += alen;
12902 storeLE( mkexpr(addr),
12903 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12904 1/*upper lane*/ ) );
12905 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12906 dis_buf);
12907 goto decode_success;
12909 /* else fall through */
12911 break;
12913 case 0x18:
12914 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12915 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12916 /* 0F 18 /2 = PREFETCH1 */
12917 /* 0F 18 /3 = PREFETCH2 */
12918 if (haveNo66noF2noF3(pfx)
12919 && !epartIsReg(getUChar(delta))
12920 && gregLO3ofRM(getUChar(delta)) >= 0
12921 && gregLO3ofRM(getUChar(delta)) <= 3) {
12922 const HChar* hintstr = "??";
12924 modrm = getUChar(delta);
12925 vassert(!epartIsReg(modrm));
12927 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12928 delta += alen;
12930 switch (gregLO3ofRM(modrm)) {
12931 case 0: hintstr = "nta"; break;
12932 case 1: hintstr = "t0"; break;
12933 case 2: hintstr = "t1"; break;
12934 case 3: hintstr = "t2"; break;
12935 default: vassert(0);
12938 DIP("prefetch%s %s\n", hintstr, dis_buf);
12939 goto decode_success;
12941 break;
12943 case 0x28:
12944 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12945 if (have66noF2noF3(pfx)
12946 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12947 modrm = getUChar(delta);
12948 if (epartIsReg(modrm)) {
12949 putXMMReg( gregOfRexRM(pfx,modrm),
12950 getXMMReg( eregOfRexRM(pfx,modrm) ));
12951 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12952 nameXMMReg(gregOfRexRM(pfx,modrm)));
12953 delta += 1;
12954 } else {
12955 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12956 gen_SIGNAL_if_not_16_aligned( vbi, addr );
12957 putXMMReg( gregOfRexRM(pfx,modrm),
12958 loadLE(Ity_V128, mkexpr(addr)) );
12959 DIP("movapd %s,%s\n", dis_buf,
12960 nameXMMReg(gregOfRexRM(pfx,modrm)));
12961 delta += alen;
12963 goto decode_success;
12965 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12966 if (haveNo66noF2noF3(pfx)
12967 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12968 modrm = getUChar(delta);
12969 if (epartIsReg(modrm)) {
12970 putXMMReg( gregOfRexRM(pfx,modrm),
12971 getXMMReg( eregOfRexRM(pfx,modrm) ));
12972 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12973 nameXMMReg(gregOfRexRM(pfx,modrm)));
12974 delta += 1;
12975 } else {
12976 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12977 gen_SIGNAL_if_not_16_aligned( vbi, addr );
12978 putXMMReg( gregOfRexRM(pfx,modrm),
12979 loadLE(Ity_V128, mkexpr(addr)) );
12980 DIP("movaps %s,%s\n", dis_buf,
12981 nameXMMReg(gregOfRexRM(pfx,modrm)));
12982 delta += alen;
12984 goto decode_success;
12986 break;
12988 case 0x29:
12989 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12990 if (haveNo66noF2noF3(pfx)
12991 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12992 modrm = getUChar(delta);
12993 if (epartIsReg(modrm)) {
12994 putXMMReg( eregOfRexRM(pfx,modrm),
12995 getXMMReg( gregOfRexRM(pfx,modrm) ));
12996 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12997 nameXMMReg(eregOfRexRM(pfx,modrm)));
12998 delta += 1;
12999 } else {
13000 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13001 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13002 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13003 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13004 dis_buf );
13005 delta += alen;
13007 goto decode_success;
13009 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
13010 if (have66noF2noF3(pfx)
13011 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13012 modrm = getUChar(delta);
13013 if (epartIsReg(modrm)) {
13014 putXMMReg( eregOfRexRM(pfx,modrm),
13015 getXMMReg( gregOfRexRM(pfx,modrm) ) );
13016 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13017 nameXMMReg(eregOfRexRM(pfx,modrm)));
13018 delta += 1;
13019 } else {
13020 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13021 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13022 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13023 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13024 dis_buf );
13025 delta += alen;
13027 goto decode_success;
13029 break;
13031 case 0x2A:
13032 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
13033 half xmm */
13034 if (haveNo66noF2noF3(pfx) && sz == 4) {
13035 IRTemp arg64 = newTemp(Ity_I64);
13036 IRTemp rmode = newTemp(Ity_I32);
13038 modrm = getUChar(delta);
13039 if (epartIsReg(modrm)) {
13040 /* Only switch to MMX mode if the source is a MMX register.
13041 See comments on CVTPI2PD for details. Fixes #357059. */
13042 do_MMX_preamble();
13043 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13044 delta += 1;
13045 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13046 nameXMMReg(gregOfRexRM(pfx,modrm)));
13047 } else {
13048 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13049 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13050 delta += alen;
13051 DIP("cvtpi2ps %s,%s\n", dis_buf,
13052 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13055 assign( rmode, get_sse_roundingmode() );
13057 putXMMRegLane32F(
13058 gregOfRexRM(pfx,modrm), 0,
13059 binop(Iop_F64toF32,
13060 mkexpr(rmode),
13061 unop(Iop_I32StoF64,
13062 unop(Iop_64to32, mkexpr(arg64)) )) );
13064 putXMMRegLane32F(
13065 gregOfRexRM(pfx,modrm), 1,
13066 binop(Iop_F64toF32,
13067 mkexpr(rmode),
13068 unop(Iop_I32StoF64,
13069 unop(Iop_64HIto32, mkexpr(arg64)) )) );
13071 goto decode_success;
13073 /* F3 0F 2A = CVTSI2SS
13074 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
13075 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
13076 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13077 IRTemp rmode = newTemp(Ity_I32);
13078 assign( rmode, get_sse_roundingmode() );
13079 modrm = getUChar(delta);
13080 if (sz == 4) {
13081 IRTemp arg32 = newTemp(Ity_I32);
13082 if (epartIsReg(modrm)) {
13083 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13084 delta += 1;
13085 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13086 nameXMMReg(gregOfRexRM(pfx,modrm)));
13087 } else {
13088 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13089 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13090 delta += alen;
13091 DIP("cvtsi2ss %s,%s\n", dis_buf,
13092 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13094 putXMMRegLane32F(
13095 gregOfRexRM(pfx,modrm), 0,
13096 binop(Iop_F64toF32,
13097 mkexpr(rmode),
13098 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
13099 } else {
13100 /* sz == 8 */
13101 IRTemp arg64 = newTemp(Ity_I64);
13102 if (epartIsReg(modrm)) {
13103 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13104 delta += 1;
13105 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13106 nameXMMReg(gregOfRexRM(pfx,modrm)));
13107 } else {
13108 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13109 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13110 delta += alen;
13111 DIP("cvtsi2ssq %s,%s\n", dis_buf,
13112 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13114 putXMMRegLane32F(
13115 gregOfRexRM(pfx,modrm), 0,
13116 binop(Iop_F64toF32,
13117 mkexpr(rmode),
13118 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
13120 goto decode_success;
13122 /* F2 0F 2A = CVTSI2SD
13123 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
13124 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
13126 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13127 modrm = getUChar(delta);
13128 if (sz == 4) {
13129 IRTemp arg32 = newTemp(Ity_I32);
13130 if (epartIsReg(modrm)) {
13131 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
13132 delta += 1;
13133 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13134 nameXMMReg(gregOfRexRM(pfx,modrm)));
13135 } else {
13136 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13137 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
13138 delta += alen;
13139 DIP("cvtsi2sdl %s,%s\n", dis_buf,
13140 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13142 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13143 unop(Iop_I32StoF64, mkexpr(arg32))
13145 } else {
13146 /* sz == 8 */
13147 IRTemp arg64 = newTemp(Ity_I64);
13148 if (epartIsReg(modrm)) {
13149 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
13150 delta += 1;
13151 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13152 nameXMMReg(gregOfRexRM(pfx,modrm)));
13153 } else {
13154 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13155 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13156 delta += alen;
13157 DIP("cvtsi2sdq %s,%s\n", dis_buf,
13158 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13160 putXMMRegLane64F(
13161 gregOfRexRM(pfx,modrm),
13163 binop( Iop_I64StoF64,
13164 get_sse_roundingmode(),
13165 mkexpr(arg64)
13169 goto decode_success;
13171 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
13172 xmm(G) */
13173 if (have66noF2noF3(pfx) && sz == 2) {
13174 IRTemp arg64 = newTemp(Ity_I64);
13176 modrm = getUChar(delta);
13177 if (epartIsReg(modrm)) {
13178 /* Only switch to MMX mode if the source is a MMX register.
13179 This is inconsistent with all other instructions which
13180 convert between XMM and (M64 or MMX), which always switch
13181 to MMX mode even if 64-bit operand is M64 and not MMX. At
13182 least, that's what the Intel docs seem to me to say.
13183 Fixes #210264. */
13184 do_MMX_preamble();
13185 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
13186 delta += 1;
13187 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
13188 nameXMMReg(gregOfRexRM(pfx,modrm)));
13189 } else {
13190 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13191 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
13192 delta += alen;
13193 DIP("cvtpi2pd %s,%s\n", dis_buf,
13194 nameXMMReg(gregOfRexRM(pfx,modrm)) );
13197 putXMMRegLane64F(
13198 gregOfRexRM(pfx,modrm), 0,
13199 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
13202 putXMMRegLane64F(
13203 gregOfRexRM(pfx,modrm), 1,
13204 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
13207 goto decode_success;
13209 break;
13211 case 0x2B:
13212 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
13213 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
13214 if ( (haveNo66noF2noF3(pfx) && sz == 4)
13215 || (have66noF2noF3(pfx) && sz == 2) ) {
13216 modrm = getUChar(delta);
13217 if (!epartIsReg(modrm)) {
13218 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13219 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13220 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13221 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
13222 dis_buf,
13223 nameXMMReg(gregOfRexRM(pfx,modrm)));
13224 delta += alen;
13225 goto decode_success;
13227 /* else fall through */
13229 break;
13231 case 0x2C:
13232 case 0x2D:
13233 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13234 I32 in mmx, according to prevailing SSE rounding mode */
13235 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
13236 I32 in mmx, rounding towards zero */
13237 if (haveNo66noF2noF3(pfx) && sz == 4) {
13238 IRTemp dst64 = newTemp(Ity_I64);
13239 IRTemp rmode = newTemp(Ity_I32);
13240 IRTemp f32lo = newTemp(Ity_F32);
13241 IRTemp f32hi = newTemp(Ity_F32);
13242 Bool r2zero = toBool(opc == 0x2C);
13244 do_MMX_preamble();
13245 modrm = getUChar(delta);
13247 if (epartIsReg(modrm)) {
13248 delta += 1;
13249 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13250 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
13251 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13252 nameXMMReg(eregOfRexRM(pfx,modrm)),
13253 nameMMXReg(gregLO3ofRM(modrm)));
13254 } else {
13255 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13256 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13257 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
13258 mkexpr(addr),
13259 mkU64(4) )));
13260 delta += alen;
13261 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
13262 dis_buf,
13263 nameMMXReg(gregLO3ofRM(modrm)));
13266 if (r2zero) {
13267 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13268 } else {
13269 assign( rmode, get_sse_roundingmode() );
13272 assign(
13273 dst64,
13274 binop( Iop_32HLto64,
13275 binop( Iop_F64toI32S,
13276 mkexpr(rmode),
13277 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
13278 binop( Iop_F64toI32S,
13279 mkexpr(rmode),
13280 unop( Iop_F32toF64, mkexpr(f32lo) ) )
13284 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13285 goto decode_success;
13287 /* F3 0F 2D = CVTSS2SI
13288 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13289 according to prevailing SSE rounding mode
13290 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13291 according to prevailing SSE rounding mode
13293 /* F3 0F 2C = CVTTSS2SI
13294 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
13295 truncating towards zero
13296 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
13297 truncating towards zero
13299 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
13300 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13301 goto decode_success;
13303 /* F2 0F 2D = CVTSD2SI
13304 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13305 according to prevailing SSE rounding mode
13306 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13307 according to prevailing SSE rounding mode
13309 /* F2 0F 2C = CVTTSD2SI
13310 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
13311 truncating towards zero
13312 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
13313 truncating towards zero
13315 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
13316 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
13317 goto decode_success;
13319 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13320 I32 in mmx, according to prevailing SSE rounding mode */
13321 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
13322 I32 in mmx, rounding towards zero */
13323 if (have66noF2noF3(pfx) && sz == 2) {
13324 IRTemp dst64 = newTemp(Ity_I64);
13325 IRTemp rmode = newTemp(Ity_I32);
13326 IRTemp f64lo = newTemp(Ity_F64);
13327 IRTemp f64hi = newTemp(Ity_F64);
13328 Bool r2zero = toBool(opc == 0x2C);
13330 do_MMX_preamble();
13331 modrm = getUChar(delta);
13333 if (epartIsReg(modrm)) {
13334 delta += 1;
13335 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13336 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
13337 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
13338 nameXMMReg(eregOfRexRM(pfx,modrm)),
13339 nameMMXReg(gregLO3ofRM(modrm)));
13340 } else {
13341 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13342 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13343 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
13344 mkexpr(addr),
13345 mkU64(8) )));
13346 delta += alen;
13347 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
13348 dis_buf,
13349 nameMMXReg(gregLO3ofRM(modrm)));
13352 if (r2zero) {
13353 assign(rmode, mkU32((UInt)Irrm_ZERO) );
13354 } else {
13355 assign( rmode, get_sse_roundingmode() );
13358 assign(
13359 dst64,
13360 binop( Iop_32HLto64,
13361 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
13362 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
13366 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
13367 goto decode_success;
13369 break;
13371 case 0x2E:
13372 case 0x2F:
13373 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
13374 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
13375 if (have66noF2noF3(pfx) && sz == 2) {
13376 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
13377 goto decode_success;
13379 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
13380 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
13381 if (haveNo66noF2noF3(pfx) && sz == 4) {
13382 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
13383 goto decode_success;
13385 break;
13387 case 0x50:
13388 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
13389 to 4 lowest bits of ireg(G) */
13390 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13391 && epartIsReg(getUChar(delta))) {
13392 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13393 set to 1, which has been known to happen:
13395 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
13397 20071106: Intel docs say that REX.W isn't redundant: when
13398 present, a 64-bit register is written; when not present, only
13399 the 32-bit half is written. However, testing on a Core2
13400 machine suggests the entire 64 bit register is written
13401 irrespective of the status of REX.W. That could be because
13402 of the default rule that says "if the lower half of a 32-bit
13403 register is written, the upper half is zeroed". By using
13404 putIReg32 here we inadvertantly produce the same behaviour as
13405 the Core2, for the same reason -- putIReg32 implements said
13406 rule.
13408 AMD docs give no indication that REX.W is even valid for this
13409 insn. */
13410 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
13411 goto decode_success;
13413 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
13414 2 lowest bits of ireg(G) */
13415 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13416 /* sz == 8 is a kludge to handle insns with REX.W redundantly
13417 set to 1, which has been known to happen:
13418 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
13419 20071106: see further comments on MOVMSKPS implementation above.
13421 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
13422 goto decode_success;
13424 break;
13426 case 0x51:
13427 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
13428 if (haveF3no66noF2(pfx) && sz == 4) {
13429 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13430 "sqrtss", Iop_Sqrt32F0x4 );
13431 goto decode_success;
13433 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
13434 if (haveNo66noF2noF3(pfx) && sz == 4) {
13435 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13436 "sqrtps", Iop_Sqrt32Fx4 );
13437 goto decode_success;
13439 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
13440 if (haveF2no66noF3(pfx) && sz == 4) {
13441 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
13442 "sqrtsd", Iop_Sqrt64F0x2 );
13443 goto decode_success;
13445 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
13446 if (have66noF2noF3(pfx) && sz == 2) {
13447 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13448 "sqrtpd", Iop_Sqrt64Fx2 );
13449 goto decode_success;
13451 break;
13453 case 0x52:
13454 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
13455 if (haveF3no66noF2(pfx) && sz == 4) {
13456 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13457 "rsqrtss", Iop_RSqrtEst32F0x4 );
13458 goto decode_success;
13460 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
13461 if (haveNo66noF2noF3(pfx) && sz == 4) {
13462 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13463 "rsqrtps", Iop_RSqrtEst32Fx4 );
13464 goto decode_success;
13466 break;
13468 case 0x53:
13469 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
13470 if (haveF3no66noF2(pfx) && sz == 4) {
13471 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
13472 "rcpss", Iop_RecipEst32F0x4 );
13473 goto decode_success;
13475 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
13476 if (haveNo66noF2noF3(pfx) && sz == 4) {
13477 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
13478 "rcpps", Iop_RecipEst32Fx4 );
13479 goto decode_success;
13481 break;
13483 case 0x54:
13484 /* 0F 54 = ANDPS -- G = G and E */
13485 if (haveNo66noF2noF3(pfx) && sz == 4) {
13486 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
13487 goto decode_success;
13489 /* 66 0F 54 = ANDPD -- G = G and E */
13490 if (have66noF2noF3(pfx) && sz == 2) {
13491 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
13492 goto decode_success;
13494 break;
13496 case 0x55:
13497 /* 0F 55 = ANDNPS -- G = (not G) and E */
13498 if (haveNo66noF2noF3(pfx) && sz == 4) {
13499 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
13500 Iop_AndV128 );
13501 goto decode_success;
13503 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
13504 if (have66noF2noF3(pfx) && sz == 2) {
13505 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
13506 Iop_AndV128 );
13507 goto decode_success;
13509 break;
13511 case 0x56:
13512 /* 0F 56 = ORPS -- G = G and E */
13513 if (haveNo66noF2noF3(pfx) && sz == 4) {
13514 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
13515 goto decode_success;
13517 /* 66 0F 56 = ORPD -- G = G and E */
13518 if (have66noF2noF3(pfx) && sz == 2) {
13519 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
13520 goto decode_success;
13522 break;
13524 case 0x57:
13525 /* 66 0F 57 = XORPD -- G = G xor E */
13526 if (have66noF2noF3(pfx) && sz == 2) {
13527 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
13528 goto decode_success;
13530 /* 0F 57 = XORPS -- G = G xor E */
13531 if (haveNo66noF2noF3(pfx) && sz == 4) {
13532 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
13533 goto decode_success;
13535 break;
13537 case 0x58:
13538 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
13539 if (haveNo66noF2noF3(pfx) && sz == 4) {
13540 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
13541 goto decode_success;
13543 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
13544 if (haveF3no66noF2(pfx) && sz == 4) {
13545 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
13546 goto decode_success;
13548 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
13549 if (haveF2no66noF3(pfx)
13550 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13551 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
13552 goto decode_success;
13554 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
13555 if (have66noF2noF3(pfx)
13556 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13557 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
13558 goto decode_success;
13560 break;
13562 case 0x59:
13563 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
13564 if (haveF2no66noF3(pfx)
13565 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13566 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
13567 goto decode_success;
13569 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
13570 if (haveF3no66noF2(pfx) && sz == 4) {
13571 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
13572 goto decode_success;
13574 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
13575 if (haveNo66noF2noF3(pfx) && sz == 4) {
13576 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
13577 goto decode_success;
13579 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
13580 if (have66noF2noF3(pfx)
13581 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13582 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
13583 goto decode_success;
13585 break;
13587 case 0x5A:
13588 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
13589 F64 in xmm(G). */
13590 if (haveNo66noF2noF3(pfx)
13591 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13592 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
13593 goto decode_success;
13595 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
13596 low half xmm(G) */
13597 if (haveF3no66noF2(pfx) && sz == 4) {
13598 IRTemp f32lo = newTemp(Ity_F32);
13600 modrm = getUChar(delta);
13601 if (epartIsReg(modrm)) {
13602 delta += 1;
13603 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
13604 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13605 nameXMMReg(gregOfRexRM(pfx,modrm)));
13606 } else {
13607 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13608 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
13609 delta += alen;
13610 DIP("cvtss2sd %s,%s\n", dis_buf,
13611 nameXMMReg(gregOfRexRM(pfx,modrm)));
13614 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
13615 unop( Iop_F32toF64, mkexpr(f32lo) ) );
13617 goto decode_success;
13619 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
13620 low 1/4 xmm(G), according to prevailing SSE rounding mode */
13621 if (haveF2no66noF3(pfx) && sz == 4) {
13622 IRTemp rmode = newTemp(Ity_I32);
13623 IRTemp f64lo = newTemp(Ity_F64);
13625 modrm = getUChar(delta);
13626 if (epartIsReg(modrm)) {
13627 delta += 1;
13628 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
13629 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13630 nameXMMReg(gregOfRexRM(pfx,modrm)));
13631 } else {
13632 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13633 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
13634 delta += alen;
13635 DIP("cvtsd2ss %s,%s\n", dis_buf,
13636 nameXMMReg(gregOfRexRM(pfx,modrm)));
13639 assign( rmode, get_sse_roundingmode() );
13640 putXMMRegLane32F(
13641 gregOfRexRM(pfx,modrm), 0,
13642 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
13645 goto decode_success;
13647 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
13648 lo half xmm(G), rounding according to prevailing SSE rounding
13649 mode, and zero upper half */
13650 /* Note, this is practically identical to CVTPD2DQ. It would have
13651 be nice to merge them together. */
13652 if (have66noF2noF3(pfx) && sz == 2) {
13653 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13654 goto decode_success;
13656 break;
13658 case 0x5B:
13659 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13660 xmm(G), rounding towards zero */
13661 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
13662 xmm(G), as per the prevailing rounding mode */
13663 if ( (have66noF2noF3(pfx) && sz == 2)
13664 || (haveF3no66noF2(pfx) && sz == 4) ) {
13665 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
13666 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
13667 goto decode_success;
13669 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
13670 xmm(G) */
13671 if (haveNo66noF2noF3(pfx) && sz == 4) {
13672 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
13673 goto decode_success;
13675 break;
13677 case 0x5C:
13678 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
13679 if (haveF3no66noF2(pfx) && sz == 4) {
13680 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
13681 goto decode_success;
13683 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
13684 if (haveF2no66noF3(pfx)
13685 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13686 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
13687 goto decode_success;
13689 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
13690 if (haveNo66noF2noF3(pfx) && sz == 4) {
13691 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
13692 goto decode_success;
13694 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
13695 if (have66noF2noF3(pfx) && sz == 2) {
13696 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
13697 goto decode_success;
13699 break;
13701 case 0x5D:
13702 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
13703 if (haveNo66noF2noF3(pfx) && sz == 4) {
13704 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
13705 goto decode_success;
13707 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
13708 if (haveF3no66noF2(pfx) && sz == 4) {
13709 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
13710 goto decode_success;
13712 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
13713 if (haveF2no66noF3(pfx)
13714 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13715 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
13716 goto decode_success;
13718 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
13719 if (have66noF2noF3(pfx) && sz == 2) {
13720 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
13721 goto decode_success;
13723 break;
13725 case 0x5E:
13726 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
13727 if (haveF2no66noF3(pfx) && sz == 4) {
13728 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
13729 goto decode_success;
13731 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
13732 if (haveNo66noF2noF3(pfx) && sz == 4) {
13733 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
13734 goto decode_success;
13736 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
13737 if (haveF3no66noF2(pfx) && sz == 4) {
13738 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
13739 goto decode_success;
13741 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
13742 if (have66noF2noF3(pfx) && sz == 2) {
13743 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
13744 goto decode_success;
13746 break;
13748 case 0x5F:
13749 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
13750 if (haveNo66noF2noF3(pfx) && sz == 4) {
13751 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
13752 goto decode_success;
13754 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
13755 if (haveF3no66noF2(pfx) && sz == 4) {
13756 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
13757 goto decode_success;
13759 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
13760 if (haveF2no66noF3(pfx)
13761 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13762 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
13763 goto decode_success;
13765 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
13766 if (have66noF2noF3(pfx) && sz == 2) {
13767 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
13768 goto decode_success;
13770 break;
13772 case 0x60:
13773 /* 66 0F 60 = PUNPCKLBW */
13774 if (have66noF2noF3(pfx) && sz == 2) {
13775 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13776 "punpcklbw",
13777 Iop_InterleaveLO8x16, True );
13778 goto decode_success;
13780 break;
13782 case 0x61:
13783 /* 66 0F 61 = PUNPCKLWD */
13784 if (have66noF2noF3(pfx) && sz == 2) {
13785 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13786 "punpcklwd",
13787 Iop_InterleaveLO16x8, True );
13788 goto decode_success;
13790 break;
13792 case 0x62:
13793 /* 66 0F 62 = PUNPCKLDQ */
13794 if (have66noF2noF3(pfx) && sz == 2) {
13795 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13796 "punpckldq",
13797 Iop_InterleaveLO32x4, True );
13798 goto decode_success;
13800 break;
13802 case 0x63:
13803 /* 66 0F 63 = PACKSSWB */
13804 if (have66noF2noF3(pfx) && sz == 2) {
13805 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13806 "packsswb",
13807 Iop_QNarrowBin16Sto8Sx16, True );
13808 goto decode_success;
13810 break;
13812 case 0x64:
13813 /* 66 0F 64 = PCMPGTB */
13814 if (have66noF2noF3(pfx) && sz == 2) {
13815 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13816 "pcmpgtb", Iop_CmpGT8Sx16, False );
13817 goto decode_success;
13819 break;
13821 case 0x65:
13822 /* 66 0F 65 = PCMPGTW */
13823 if (have66noF2noF3(pfx) && sz == 2) {
13824 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13825 "pcmpgtw", Iop_CmpGT16Sx8, False );
13826 goto decode_success;
13828 break;
13830 case 0x66:
13831 /* 66 0F 66 = PCMPGTD */
13832 if (have66noF2noF3(pfx) && sz == 2) {
13833 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13834 "pcmpgtd", Iop_CmpGT32Sx4, False );
13835 goto decode_success;
13837 break;
13839 case 0x67:
13840 /* 66 0F 67 = PACKUSWB */
13841 if (have66noF2noF3(pfx) && sz == 2) {
13842 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13843 "packuswb",
13844 Iop_QNarrowBin16Sto8Ux16, True );
13845 goto decode_success;
13847 break;
13849 case 0x68:
13850 /* 66 0F 68 = PUNPCKHBW */
13851 if (have66noF2noF3(pfx) && sz == 2) {
13852 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13853 "punpckhbw",
13854 Iop_InterleaveHI8x16, True );
13855 goto decode_success;
13857 break;
13859 case 0x69:
13860 /* 66 0F 69 = PUNPCKHWD */
13861 if (have66noF2noF3(pfx) && sz == 2) {
13862 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13863 "punpckhwd",
13864 Iop_InterleaveHI16x8, True );
13865 goto decode_success;
13867 break;
13869 case 0x6A:
13870 /* 66 0F 6A = PUNPCKHDQ */
13871 if (have66noF2noF3(pfx) && sz == 2) {
13872 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13873 "punpckhdq",
13874 Iop_InterleaveHI32x4, True );
13875 goto decode_success;
13877 break;
13879 case 0x6B:
13880 /* 66 0F 6B = PACKSSDW */
13881 if (have66noF2noF3(pfx) && sz == 2) {
13882 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13883 "packssdw",
13884 Iop_QNarrowBin32Sto16Sx8, True );
13885 goto decode_success;
13887 break;
13889 case 0x6C:
13890 /* 66 0F 6C = PUNPCKLQDQ */
13891 if (have66noF2noF3(pfx) && sz == 2) {
13892 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13893 "punpcklqdq",
13894 Iop_InterleaveLO64x2, True );
13895 goto decode_success;
13897 break;
13899 case 0x6D:
13900 /* 66 0F 6D = PUNPCKHQDQ */
13901 if (have66noF2noF3(pfx) && sz == 2) {
13902 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13903 "punpckhqdq",
13904 Iop_InterleaveHI64x2, True );
13905 goto decode_success;
13907 break;
13909 case 0x6E:
13910 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13911 zeroing high 3/4 of xmm. */
13912 /* or from ireg64/m64 to xmm lo 1/2,
13913 zeroing high 1/2 of xmm. */
13914 if (have66noF2noF3(pfx)) {
13915 vassert(sz == 2 || sz == 8);
13916 if (sz == 2) sz = 4;
13917 modrm = getUChar(delta);
13918 if (epartIsReg(modrm)) {
13919 delta += 1;
13920 if (sz == 4) {
13921 putXMMReg(
13922 gregOfRexRM(pfx,modrm),
13923 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13925 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13926 nameXMMReg(gregOfRexRM(pfx,modrm)));
13927 } else {
13928 putXMMReg(
13929 gregOfRexRM(pfx,modrm),
13930 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13932 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13933 nameXMMReg(gregOfRexRM(pfx,modrm)));
13935 } else {
13936 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13937 delta += alen;
13938 putXMMReg(
13939 gregOfRexRM(pfx,modrm),
13940 sz == 4
13941 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13942 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13944 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13945 nameXMMReg(gregOfRexRM(pfx,modrm)));
13947 goto decode_success;
13949 break;
13951 case 0x6F:
13952 if (have66noF2noF3(pfx)
13953 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13954 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13955 modrm = getUChar(delta);
13956 if (epartIsReg(modrm)) {
13957 putXMMReg( gregOfRexRM(pfx,modrm),
13958 getXMMReg( eregOfRexRM(pfx,modrm) ));
13959 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13960 nameXMMReg(gregOfRexRM(pfx,modrm)));
13961 delta += 1;
13962 } else {
13963 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13964 gen_SIGNAL_if_not_16_aligned( vbi, addr );
13965 putXMMReg( gregOfRexRM(pfx,modrm),
13966 loadLE(Ity_V128, mkexpr(addr)) );
13967 DIP("movdqa %s,%s\n", dis_buf,
13968 nameXMMReg(gregOfRexRM(pfx,modrm)));
13969 delta += alen;
13971 goto decode_success;
13973 if (haveF3no66noF2(pfx)
13974 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13975 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13976 modrm = getUChar(delta);
13977 if (epartIsReg(modrm)) {
13978 putXMMReg( gregOfRexRM(pfx,modrm),
13979 getXMMReg( eregOfRexRM(pfx,modrm) ));
13980 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13981 nameXMMReg(gregOfRexRM(pfx,modrm)));
13982 delta += 1;
13983 } else {
13984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13985 putXMMReg( gregOfRexRM(pfx,modrm),
13986 loadLE(Ity_V128, mkexpr(addr)) );
13987 DIP("movdqu %s,%s\n", dis_buf,
13988 nameXMMReg(gregOfRexRM(pfx,modrm)));
13989 delta += alen;
13991 goto decode_success;
13993 break;
13995 case 0x70:
13996 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13997 if (have66noF2noF3(pfx) && sz == 2) {
13998 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
13999 goto decode_success;
14001 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14002 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
14003 if (haveNo66noF2noF3(pfx) && sz == 4) {
14004 Int order;
14005 IRTemp sV, dV, s3, s2, s1, s0;
14006 s3 = s2 = s1 = s0 = IRTemp_INVALID;
14007 sV = newTemp(Ity_I64);
14008 dV = newTemp(Ity_I64);
14009 do_MMX_preamble();
14010 modrm = getUChar(delta);
14011 if (epartIsReg(modrm)) {
14012 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14013 order = (Int)getUChar(delta+1);
14014 delta += 1+1;
14015 DIP("pshufw $%d,%s,%s\n", order,
14016 nameMMXReg(eregLO3ofRM(modrm)),
14017 nameMMXReg(gregLO3ofRM(modrm)));
14018 } else {
14019 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14020 1/*extra byte after amode*/ );
14021 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14022 order = (Int)getUChar(delta+alen);
14023 delta += 1+alen;
14024 DIP("pshufw $%d,%s,%s\n", order,
14025 dis_buf,
14026 nameMMXReg(gregLO3ofRM(modrm)));
14028 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
14029 # define SEL(n) \
14030 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
14031 assign(dV,
14032 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
14033 SEL((order>>2)&3), SEL((order>>0)&3) )
14035 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
14036 # undef SEL
14037 goto decode_success;
14039 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
14040 mem) to G(xmm), and copy upper half */
14041 if (haveF2no66noF3(pfx) && sz == 4) {
14042 delta = dis_PSHUFxW_128( vbi, pfx, delta,
14043 False/*!isAvx*/, False/*!xIsH*/ );
14044 goto decode_success;
14046 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
14047 mem) to G(xmm), and copy lower half */
14048 if (haveF3no66noF2(pfx) && sz == 4) {
14049 delta = dis_PSHUFxW_128( vbi, pfx, delta,
14050 False/*!isAvx*/, True/*xIsH*/ );
14051 goto decode_success;
14053 break;
14055 case 0x71:
14056 /* 66 0F 71 /2 ib = PSRLW by immediate */
14057 if (have66noF2noF3(pfx) && sz == 2
14058 && epartIsReg(getUChar(delta))
14059 && gregLO3ofRM(getUChar(delta)) == 2) {
14060 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
14061 goto decode_success;
14063 /* 66 0F 71 /4 ib = PSRAW by immediate */
14064 if (have66noF2noF3(pfx) && sz == 2
14065 && epartIsReg(getUChar(delta))
14066 && gregLO3ofRM(getUChar(delta)) == 4) {
14067 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
14068 goto decode_success;
14070 /* 66 0F 71 /6 ib = PSLLW by immediate */
14071 if (have66noF2noF3(pfx) && sz == 2
14072 && epartIsReg(getUChar(delta))
14073 && gregLO3ofRM(getUChar(delta)) == 6) {
14074 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
14075 goto decode_success;
14077 break;
14079 case 0x72:
14080 /* 66 0F 72 /2 ib = PSRLD by immediate */
14081 if (have66noF2noF3(pfx) && sz == 2
14082 && epartIsReg(getUChar(delta))
14083 && gregLO3ofRM(getUChar(delta)) == 2) {
14084 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
14085 goto decode_success;
14087 /* 66 0F 72 /4 ib = PSRAD by immediate */
14088 if (have66noF2noF3(pfx) && sz == 2
14089 && epartIsReg(getUChar(delta))
14090 && gregLO3ofRM(getUChar(delta)) == 4) {
14091 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
14092 goto decode_success;
14094 /* 66 0F 72 /6 ib = PSLLD by immediate */
14095 if (have66noF2noF3(pfx) && sz == 2
14096 && epartIsReg(getUChar(delta))
14097 && gregLO3ofRM(getUChar(delta)) == 6) {
14098 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
14099 goto decode_success;
14101 break;
14103 case 0x73:
14104 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
14105 /* note, if mem case ever filled in, 1 byte after amode */
14106 if (have66noF2noF3(pfx) && sz == 2
14107 && epartIsReg(getUChar(delta))
14108 && gregLO3ofRM(getUChar(delta)) == 3) {
14109 Int imm = (Int)getUChar(delta+1);
14110 Int reg = eregOfRexRM(pfx,getUChar(delta));
14111 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
14112 delta += 2;
14113 IRTemp sV = newTemp(Ity_V128);
14114 assign( sV, getXMMReg(reg) );
14115 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
14116 goto decode_success;
14118 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
14119 /* note, if mem case ever filled in, 1 byte after amode */
14120 if (have66noF2noF3(pfx) && sz == 2
14121 && epartIsReg(getUChar(delta))
14122 && gregLO3ofRM(getUChar(delta)) == 7) {
14123 Int imm = (Int)getUChar(delta+1);
14124 Int reg = eregOfRexRM(pfx,getUChar(delta));
14125 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
14126 vassert(imm >= 0 && imm <= 255);
14127 delta += 2;
14128 IRTemp sV = newTemp(Ity_V128);
14129 assign( sV, getXMMReg(reg) );
14130 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
14131 goto decode_success;
14133 /* 66 0F 73 /2 ib = PSRLQ by immediate */
14134 if (have66noF2noF3(pfx) && sz == 2
14135 && epartIsReg(getUChar(delta))
14136 && gregLO3ofRM(getUChar(delta)) == 2) {
14137 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
14138 goto decode_success;
14140 /* 66 0F 73 /6 ib = PSLLQ by immediate */
14141 if (have66noF2noF3(pfx) && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14142 && epartIsReg(getUChar(delta))
14143 && gregLO3ofRM(getUChar(delta)) == 6) {
14144 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
14145 goto decode_success;
14147 break;
14149 case 0x74:
14150 /* 66 0F 74 = PCMPEQB */
14151 if (have66noF2noF3(pfx) && sz == 2) {
14152 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14153 "pcmpeqb", Iop_CmpEQ8x16, False );
14154 goto decode_success;
14156 break;
14158 case 0x75:
14159 /* 66 0F 75 = PCMPEQW */
14160 if (have66noF2noF3(pfx) && sz == 2) {
14161 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14162 "pcmpeqw", Iop_CmpEQ16x8, False );
14163 goto decode_success;
14165 break;
14167 case 0x76:
14168 /* 66 0F 76 = PCMPEQD */
14169 if (have66noF2noF3(pfx) && sz == 2) {
14170 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14171 "pcmpeqd", Iop_CmpEQ32x4, False );
14172 goto decode_success;
14174 break;
14176 case 0x7E:
14177 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
14178 G (lo half xmm). Upper half of G is zeroed out. */
14179 if (haveF3no66noF2(pfx)
14180 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14181 modrm = getUChar(delta);
14182 if (epartIsReg(modrm)) {
14183 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14184 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14185 /* zero bits 127:64 */
14186 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
14187 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14188 nameXMMReg(gregOfRexRM(pfx,modrm)));
14189 delta += 1;
14190 } else {
14191 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14192 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
14193 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
14194 loadLE(Ity_I64, mkexpr(addr)) );
14195 DIP("movsd %s,%s\n", dis_buf,
14196 nameXMMReg(gregOfRexRM(pfx,modrm)));
14197 delta += alen;
14199 goto decode_success;
14201 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
14202 /* or from xmm low 1/2 to ireg64 or m64. */
14203 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
14204 if (sz == 2) sz = 4;
14205 modrm = getUChar(delta);
14206 if (epartIsReg(modrm)) {
14207 delta += 1;
14208 if (sz == 4) {
14209 putIReg32( eregOfRexRM(pfx,modrm),
14210 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
14211 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14212 nameIReg32(eregOfRexRM(pfx,modrm)));
14213 } else {
14214 putIReg64( eregOfRexRM(pfx,modrm),
14215 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
14216 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14217 nameIReg64(eregOfRexRM(pfx,modrm)));
14219 } else {
14220 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14221 delta += alen;
14222 storeLE( mkexpr(addr),
14223 sz == 4
14224 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
14225 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
14226 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
14227 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14229 goto decode_success;
14231 break;
14233 case 0x7F:
14234 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
14235 if (haveF3no66noF2(pfx)
14236 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14237 modrm = getUChar(delta);
14238 if (epartIsReg(modrm)) {
14239 goto decode_failure; /* awaiting test case */
14240 delta += 1;
14241 putXMMReg( eregOfRexRM(pfx,modrm),
14242 getXMMReg(gregOfRexRM(pfx,modrm)) );
14243 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14244 nameXMMReg(eregOfRexRM(pfx,modrm)));
14245 } else {
14246 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14247 delta += alen;
14248 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14249 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14251 goto decode_success;
14253 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
14254 if (have66noF2noF3(pfx) && sz == 2) {
14255 modrm = getUChar(delta);
14256 if (epartIsReg(modrm)) {
14257 delta += 1;
14258 putXMMReg( eregOfRexRM(pfx,modrm),
14259 getXMMReg(gregOfRexRM(pfx,modrm)) );
14260 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
14261 nameXMMReg(eregOfRexRM(pfx,modrm)));
14262 } else {
14263 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
14264 gen_SIGNAL_if_not_16_aligned( vbi, addr );
14265 delta += alen;
14266 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14267 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
14269 goto decode_success;
14271 break;
14273 case 0xAE:
14274 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
14275 if (haveNo66noF2noF3(pfx)
14276 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14277 && sz == 4) {
14278 delta += 1;
14279 /* Insert a memory fence. It's sometimes important that these
14280 are carried through to the generated code. */
14281 stmt( IRStmt_MBE(Imbe_Fence) );
14282 DIP("sfence\n");
14283 goto decode_success;
14285 /* mindless duplication follows .. */
14286 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
14287 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
14288 if (haveNo66noF2noF3(pfx)
14289 && epartIsReg(getUChar(delta))
14290 && (gregLO3ofRM(getUChar(delta)) == 5
14291 || gregLO3ofRM(getUChar(delta)) == 6)
14292 && sz == 4) {
14293 delta += 1;
14294 /* Insert a memory fence. It's sometimes important that these
14295 are carried through to the generated code. */
14296 stmt( IRStmt_MBE(Imbe_Fence) );
14297 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
14298 goto decode_success;
14301 /* 0F AE /7 = CLFLUSH -- flush cache line */
14302 if (haveNo66noF2noF3(pfx)
14303 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
14304 && sz == 4) {
14306 /* This is something of a hack. We need to know the size of
14307 the cache line containing addr. Since we don't (easily),
14308 assume 256 on the basis that no real cache would have a
14309 line that big. It's safe to invalidate more stuff than we
14310 need, just inefficient. */
14311 ULong lineszB = 256ULL;
14313 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14314 delta += alen;
14316 /* Round addr down to the start of the containing block. */
14317 stmt( IRStmt_Put(
14318 OFFB_CMSTART,
14319 binop( Iop_And64,
14320 mkexpr(addr),
14321 mkU64( ~(lineszB-1) ))) );
14323 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
14325 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
14327 DIP("clflush %s\n", dis_buf);
14328 goto decode_success;
14331 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
14332 if (haveNo66noF2noF3(pfx)
14333 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
14334 && sz == 4) {
14335 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14336 goto decode_success;
14338 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
14339 if (haveNo66noF2noF3(pfx)
14340 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
14341 && sz == 4) {
14342 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
14343 goto decode_success;
14345 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */
14346 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14347 && !epartIsReg(getUChar(delta))
14348 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
14349 delta = dis_FXSAVE(vbi, pfx, delta, sz);
14350 goto decode_success;
14352 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory */
14353 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14354 && !epartIsReg(getUChar(delta))
14355 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
14356 delta = dis_FXRSTOR(vbi, pfx, delta, sz);
14357 goto decode_success;
14359 /* 0F AE /4 = XSAVE mem -- write x87, SSE, AVX state to memory */
14360 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14361 && !epartIsReg(getUChar(delta))
14362 && gregOfRexRM(pfx,getUChar(delta)) == 4
14363 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14364 delta = dis_XSAVE(vbi, pfx, delta, sz);
14365 goto decode_success;
14367 /* 0F AE /5 = XRSTOR mem -- read x87, SSE, AVX state from memory */
14368 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
14369 && !epartIsReg(getUChar(delta))
14370 && gregOfRexRM(pfx,getUChar(delta)) == 5
14371 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
14372 delta = dis_XRSTOR(vbi, pfx, delta, sz);
14373 goto decode_success;
14375 break;
14377 case 0xC2:
14378 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
14379 if (haveNo66noF2noF3(pfx) && sz == 4) {
14380 Long delta0 = delta;
14381 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
14382 if (delta > delta0) goto decode_success;
14384 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
14385 if (haveF3no66noF2(pfx) && sz == 4) {
14386 Long delta0 = delta;
14387 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
14388 if (delta > delta0) goto decode_success;
14390 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
14391 if (haveF2no66noF3(pfx) && sz == 4) {
14392 Long delta0 = delta;
14393 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
14394 if (delta > delta0) goto decode_success;
14396 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
14397 if (have66noF2noF3(pfx) && sz == 2) {
14398 Long delta0 = delta;
14399 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
14400 if (delta > delta0) goto decode_success;
14402 break;
14404 case 0xC3:
14405 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
14406 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14407 modrm = getUChar(delta);
14408 if (!epartIsReg(modrm)) {
14409 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14410 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
14411 DIP("movnti %s,%s\n", dis_buf,
14412 nameIRegG(sz, pfx, modrm));
14413 delta += alen;
14414 goto decode_success;
14416 /* else fall through */
14418 break;
14420 case 0xC4:
14421 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14422 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14423 put it into the specified lane of mmx(G). */
14424 if (haveNo66noF2noF3(pfx)
14425 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14426 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
14427 mmx reg. t4 is the new lane value. t5 is the original
14428 mmx value. t6 is the new mmx value. */
14429 Int lane;
14430 t4 = newTemp(Ity_I16);
14431 t5 = newTemp(Ity_I64);
14432 t6 = newTemp(Ity_I64);
14433 modrm = getUChar(delta);
14434 do_MMX_preamble();
14436 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
14437 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
14439 if (epartIsReg(modrm)) {
14440 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
14441 delta += 1+1;
14442 lane = getUChar(delta-1);
14443 DIP("pinsrw $%d,%s,%s\n", lane,
14444 nameIReg16(eregOfRexRM(pfx,modrm)),
14445 nameMMXReg(gregLO3ofRM(modrm)));
14446 } else {
14447 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14448 delta += 1+alen;
14449 lane = getUChar(delta-1);
14450 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14451 DIP("pinsrw $%d,%s,%s\n", lane,
14452 dis_buf,
14453 nameMMXReg(gregLO3ofRM(modrm)));
14456 switch (lane & 3) {
14457 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
14458 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
14459 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
14460 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
14461 default: vassert(0);
14463 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
14464 goto decode_success;
14466 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
14467 put it into the specified lane of xmm(G). */
14468 if (have66noF2noF3(pfx)
14469 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14470 Int lane;
14471 t4 = newTemp(Ity_I16);
14472 modrm = getUChar(delta);
14473 UInt rG = gregOfRexRM(pfx,modrm);
14474 if (epartIsReg(modrm)) {
14475 UInt rE = eregOfRexRM(pfx,modrm);
14476 assign(t4, getIReg16(rE));
14477 delta += 1+1;
14478 lane = getUChar(delta-1);
14479 DIP("pinsrw $%d,%s,%s\n",
14480 lane, nameIReg16(rE), nameXMMReg(rG));
14481 } else {
14482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
14483 1/*byte after the amode*/ );
14484 delta += 1+alen;
14485 lane = getUChar(delta-1);
14486 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
14487 DIP("pinsrw $%d,%s,%s\n",
14488 lane, dis_buf, nameXMMReg(rG));
14490 IRTemp src_vec = newTemp(Ity_V128);
14491 assign(src_vec, getXMMReg(rG));
14492 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
14493 putXMMReg(rG, mkexpr(res_vec));
14494 goto decode_success;
14496 break;
14498 case 0xC5:
14499 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14500 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
14501 zero-extend of it in ireg(G). */
14502 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
14503 modrm = getUChar(delta);
14504 if (epartIsReg(modrm)) {
14505 IRTemp sV = newTemp(Ity_I64);
14506 t5 = newTemp(Ity_I16);
14507 do_MMX_preamble();
14508 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
14509 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
14510 switch (getUChar(delta+1) & 3) {
14511 case 0: assign(t5, mkexpr(t0)); break;
14512 case 1: assign(t5, mkexpr(t1)); break;
14513 case 2: assign(t5, mkexpr(t2)); break;
14514 case 3: assign(t5, mkexpr(t3)); break;
14515 default: vassert(0);
14517 if (sz == 8)
14518 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
14519 else
14520 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
14521 DIP("pextrw $%d,%s,%s\n",
14522 (Int)getUChar(delta+1),
14523 nameMMXReg(eregLO3ofRM(modrm)),
14524 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
14525 : nameIReg32(gregOfRexRM(pfx,modrm))
14527 delta += 2;
14528 goto decode_success;
14530 /* else fall through */
14531 /* note, for anyone filling in the mem case: this insn has one
14532 byte after the amode and therefore you must pass 1 as the
14533 last arg to disAMode */
14535 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
14536 zero-extend of it in ireg(G). */
14537 if (have66noF2noF3(pfx)
14538 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14539 Long delta0 = delta;
14540 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
14541 False/*!isAvx*/ );
14542 if (delta > delta0) goto decode_success;
14543 /* else fall through -- decoding has failed */
14545 break;
14547 case 0xC6:
14548 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
14549 if (haveNo66noF2noF3(pfx) && sz == 4) {
14550 Int imm8 = 0;
14551 IRTemp sV = newTemp(Ity_V128);
14552 IRTemp dV = newTemp(Ity_V128);
14553 modrm = getUChar(delta);
14554 UInt rG = gregOfRexRM(pfx,modrm);
14555 assign( dV, getXMMReg(rG) );
14556 if (epartIsReg(modrm)) {
14557 UInt rE = eregOfRexRM(pfx,modrm);
14558 assign( sV, getXMMReg(rE) );
14559 imm8 = (Int)getUChar(delta+1);
14560 delta += 1+1;
14561 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
14562 } else {
14563 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14564 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14565 imm8 = (Int)getUChar(delta+alen);
14566 delta += 1+alen;
14567 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
14569 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
14570 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14571 goto decode_success;
14573 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
14574 if (have66noF2noF3(pfx) && sz == 2) {
14575 Int select;
14576 IRTemp sV = newTemp(Ity_V128);
14577 IRTemp dV = newTemp(Ity_V128);
14579 modrm = getUChar(delta);
14580 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
14582 if (epartIsReg(modrm)) {
14583 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
14584 select = (Int)getUChar(delta+1);
14585 delta += 1+1;
14586 DIP("shufpd $%d,%s,%s\n", select,
14587 nameXMMReg(eregOfRexRM(pfx,modrm)),
14588 nameXMMReg(gregOfRexRM(pfx,modrm)));
14589 } else {
14590 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
14591 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14592 select = getUChar(delta+alen);
14593 delta += 1+alen;
14594 DIP("shufpd $%d,%s,%s\n", select,
14595 dis_buf,
14596 nameXMMReg(gregOfRexRM(pfx,modrm)));
14599 IRTemp res = math_SHUFPD_128( sV, dV, select );
14600 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
14601 goto decode_success;
14603 break;
14605 case 0xD1:
14606 /* 66 0F D1 = PSRLW by E */
14607 if (have66noF2noF3(pfx) && sz == 2) {
14608 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
14609 goto decode_success;
14611 break;
14613 case 0xD2:
14614 /* 66 0F D2 = PSRLD by E */
14615 if (have66noF2noF3(pfx) && sz == 2) {
14616 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
14617 goto decode_success;
14619 break;
14621 case 0xD3:
14622 /* 66 0F D3 = PSRLQ by E */
14623 if (have66noF2noF3(pfx) && sz == 2) {
14624 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
14625 goto decode_success;
14627 break;
14629 case 0xD4:
14630 /* 66 0F D4 = PADDQ */
14631 if (have66noF2noF3(pfx) && sz == 2) {
14632 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14633 "paddq", Iop_Add64x2, False );
14634 goto decode_success;
14636 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14637 /* 0F D4 = PADDQ -- add 64x1 */
14638 if (haveNo66noF2noF3(pfx) && sz == 4) {
14639 do_MMX_preamble();
14640 delta = dis_MMXop_regmem_to_reg (
14641 vbi, pfx, delta, opc, "paddq", False );
14642 goto decode_success;
14644 break;
14646 case 0xD5:
14647 /* 66 0F D5 = PMULLW -- 16x8 multiply */
14648 if (have66noF2noF3(pfx) && sz == 2) {
14649 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14650 "pmullw", Iop_Mul16x8, False );
14651 goto decode_success;
14653 break;
14655 case 0xD6:
14656 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14657 hi half). */
14658 if (haveF3no66noF2(pfx) && sz == 4) {
14659 modrm = getUChar(delta);
14660 if (epartIsReg(modrm)) {
14661 do_MMX_preamble();
14662 putXMMReg( gregOfRexRM(pfx,modrm),
14663 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14664 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14665 nameXMMReg(gregOfRexRM(pfx,modrm)));
14666 delta += 1;
14667 goto decode_success;
14669 /* apparently no mem case for this insn */
14671 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14672 or lo half xmm). */
14673 if (have66noF2noF3(pfx)
14674 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14675 modrm = getUChar(delta);
14676 if (epartIsReg(modrm)) {
14677 /* fall through, awaiting test case */
14678 /* dst: lo half copied, hi half zeroed */
14679 } else {
14680 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14681 storeLE( mkexpr(addr),
14682 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14683 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14684 delta += alen;
14685 goto decode_success;
14688 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14689 if (haveF2no66noF3(pfx) && sz == 4) {
14690 modrm = getUChar(delta);
14691 if (epartIsReg(modrm)) {
14692 do_MMX_preamble();
14693 putMMXReg( gregLO3ofRM(modrm),
14694 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14695 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14696 nameMMXReg(gregLO3ofRM(modrm)));
14697 delta += 1;
14698 goto decode_success;
14700 /* apparently no mem case for this insn */
14702 break;
14704 case 0xD7:
14705 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14706 lanes in xmm(E), turn them into a byte, and put
14707 zero-extend of it in ireg(G). Doing this directly is just
14708 too cumbersome; give up therefore and call a helper. */
14709 if (have66noF2noF3(pfx)
14710 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14711 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14712 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14713 goto decode_success;
14715 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14716 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
14717 mmx(E), turn them into a byte, and put zero-extend of it in
14718 ireg(G). */
14719 if (haveNo66noF2noF3(pfx)
14720 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
14721 modrm = getUChar(delta);
14722 if (epartIsReg(modrm)) {
14723 do_MMX_preamble();
14724 t0 = newTemp(Ity_I64);
14725 t1 = newTemp(Ity_I32);
14726 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
14727 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14728 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
14729 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14730 nameIReg32(gregOfRexRM(pfx,modrm)));
14731 delta += 1;
14732 goto decode_success;
14734 /* else fall through */
14736 break;
14738 case 0xD8:
14739 /* 66 0F D8 = PSUBUSB */
14740 if (have66noF2noF3(pfx) && sz == 2) {
14741 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14742 "psubusb", Iop_QSub8Ux16, False );
14743 goto decode_success;
14745 break;
14747 case 0xD9:
14748 /* 66 0F D9 = PSUBUSW */
14749 if (have66noF2noF3(pfx) && sz == 2) {
14750 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14751 "psubusw", Iop_QSub16Ux8, False );
14752 goto decode_success;
14754 break;
14756 case 0xDA:
14757 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14758 /* 0F DA = PMINUB -- 8x8 unsigned min */
14759 if (haveNo66noF2noF3(pfx) && sz == 4) {
14760 do_MMX_preamble();
14761 delta = dis_MMXop_regmem_to_reg (
14762 vbi, pfx, delta, opc, "pminub", False );
14763 goto decode_success;
14765 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14766 if (have66noF2noF3(pfx) && sz == 2) {
14767 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14768 "pminub", Iop_Min8Ux16, False );
14769 goto decode_success;
14771 break;
14773 case 0xDB:
14774 /* 66 0F DB = PAND */
14775 if (have66noF2noF3(pfx) && sz == 2) {
14776 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14777 goto decode_success;
14779 break;
14781 case 0xDC:
14782 /* 66 0F DC = PADDUSB */
14783 if (have66noF2noF3(pfx) && sz == 2) {
14784 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14785 "paddusb", Iop_QAdd8Ux16, False );
14786 goto decode_success;
14788 break;
14790 case 0xDD:
14791 /* 66 0F DD = PADDUSW */
14792 if (have66noF2noF3(pfx) && sz == 2) {
14793 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14794 "paddusw", Iop_QAdd16Ux8, False );
14795 goto decode_success;
14797 break;
14799 case 0xDE:
14800 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14801 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14802 if (haveNo66noF2noF3(pfx) && sz == 4) {
14803 do_MMX_preamble();
14804 delta = dis_MMXop_regmem_to_reg (
14805 vbi, pfx, delta, opc, "pmaxub", False );
14806 goto decode_success;
14808 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14809 if (have66noF2noF3(pfx) && sz == 2) {
14810 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14811 "pmaxub", Iop_Max8Ux16, False );
14812 goto decode_success;
14814 break;
14816 case 0xDF:
14817 /* 66 0F DF = PANDN */
14818 if (have66noF2noF3(pfx) && sz == 2) {
14819 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14820 goto decode_success;
14822 break;
14824 case 0xE0:
14825 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14826 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14827 if (haveNo66noF2noF3(pfx) && sz == 4) {
14828 do_MMX_preamble();
14829 delta = dis_MMXop_regmem_to_reg (
14830 vbi, pfx, delta, opc, "pavgb", False );
14831 goto decode_success;
14833 /* 66 0F E0 = PAVGB */
14834 if (have66noF2noF3(pfx) && sz == 2) {
14835 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14836 "pavgb", Iop_Avg8Ux16, False );
14837 goto decode_success;
14839 break;
14841 case 0xE1:
14842 /* 66 0F E1 = PSRAW by E */
14843 if (have66noF2noF3(pfx) && sz == 2) {
14844 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14845 goto decode_success;
14847 break;
14849 case 0xE2:
14850 /* 66 0F E2 = PSRAD by E */
14851 if (have66noF2noF3(pfx) && sz == 2) {
14852 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14853 goto decode_success;
14855 break;
14857 case 0xE3:
14858 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14859 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14860 if (haveNo66noF2noF3(pfx) && sz == 4) {
14861 do_MMX_preamble();
14862 delta = dis_MMXop_regmem_to_reg (
14863 vbi, pfx, delta, opc, "pavgw", False );
14864 goto decode_success;
14866 /* 66 0F E3 = PAVGW */
14867 if (have66noF2noF3(pfx) && sz == 2) {
14868 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14869 "pavgw", Iop_Avg16Ux8, False );
14870 goto decode_success;
14872 break;
14874 case 0xE4:
14875 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14876 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14877 if (haveNo66noF2noF3(pfx) && sz == 4) {
14878 do_MMX_preamble();
14879 delta = dis_MMXop_regmem_to_reg (
14880 vbi, pfx, delta, opc, "pmuluh", False );
14881 goto decode_success;
14883 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14884 if (have66noF2noF3(pfx) && sz == 2) {
14885 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14886 "pmulhuw", Iop_MulHi16Ux8, False );
14887 goto decode_success;
14889 break;
14891 case 0xE5:
14892 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14893 if (have66noF2noF3(pfx) && sz == 2) {
14894 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14895 "pmulhw", Iop_MulHi16Sx8, False );
14896 goto decode_success;
14898 break;
14900 case 0xE6:
14901 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14902 lo half xmm(G), and zero upper half, rounding towards zero */
14903 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14904 lo half xmm(G), according to prevailing rounding mode, and zero
14905 upper half */
14906 if ( (haveF2no66noF3(pfx) && sz == 4)
14907 || (have66noF2noF3(pfx) && sz == 2) ) {
14908 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14909 toBool(sz == 2)/*r2zero*/);
14910 goto decode_success;
14912 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14913 F64 in xmm(G) */
14914 if (haveF3no66noF2(pfx) && sz == 4) {
14915 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
14916 goto decode_success;
14918 break;
14920 case 0xE7:
14921 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14922 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14923 Intel manual does not say anything about the usual business of
14924 the FP reg tags getting trashed whenever an MMX insn happens.
14925 So we just leave them alone.
14927 if (haveNo66noF2noF3(pfx) && sz == 4) {
14928 modrm = getUChar(delta);
14929 if (!epartIsReg(modrm)) {
14930 /* do_MMX_preamble(); Intel docs don't specify this */
14931 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14932 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14933 DIP("movntq %s,%s\n", dis_buf,
14934 nameMMXReg(gregLO3ofRM(modrm)));
14935 delta += alen;
14936 goto decode_success;
14938 /* else fall through */
14940 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14941 if (have66noF2noF3(pfx) && sz == 2) {
14942 modrm = getUChar(delta);
14943 if (!epartIsReg(modrm)) {
14944 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14945 gen_SIGNAL_if_not_16_aligned( vbi, addr );
14946 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14947 DIP("movntdq %s,%s\n", dis_buf,
14948 nameXMMReg(gregOfRexRM(pfx,modrm)));
14949 delta += alen;
14950 goto decode_success;
14952 /* else fall through */
14954 break;
14956 case 0xE8:
14957 /* 66 0F E8 = PSUBSB */
14958 if (have66noF2noF3(pfx) && sz == 2) {
14959 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14960 "psubsb", Iop_QSub8Sx16, False );
14961 goto decode_success;
14963 break;
14965 case 0xE9:
14966 /* 66 0F E9 = PSUBSW */
14967 if (have66noF2noF3(pfx) && sz == 2) {
14968 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14969 "psubsw", Iop_QSub16Sx8, False );
14970 goto decode_success;
14972 break;
14974 case 0xEA:
14975 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14976 /* 0F EA = PMINSW -- 16x4 signed min */
14977 if (haveNo66noF2noF3(pfx) && sz == 4) {
14978 do_MMX_preamble();
14979 delta = dis_MMXop_regmem_to_reg (
14980 vbi, pfx, delta, opc, "pminsw", False );
14981 goto decode_success;
14983 /* 66 0F EA = PMINSW -- 16x8 signed min */
14984 if (have66noF2noF3(pfx) && sz == 2) {
14985 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14986 "pminsw", Iop_Min16Sx8, False );
14987 goto decode_success;
14989 break;
14991 case 0xEB:
14992 /* 66 0F EB = POR */
14993 if (have66noF2noF3(pfx) && sz == 2) {
14994 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14995 goto decode_success;
14997 break;
14999 case 0xEC:
15000 /* 66 0F EC = PADDSB */
15001 if (have66noF2noF3(pfx) && sz == 2) {
15002 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15003 "paddsb", Iop_QAdd8Sx16, False );
15004 goto decode_success;
15006 break;
15008 case 0xED:
15009 /* 66 0F ED = PADDSW */
15010 if (have66noF2noF3(pfx) && sz == 2) {
15011 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15012 "paddsw", Iop_QAdd16Sx8, False );
15013 goto decode_success;
15015 break;
15017 case 0xEE:
15018 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15019 /* 0F EE = PMAXSW -- 16x4 signed max */
15020 if (haveNo66noF2noF3(pfx) && sz == 4) {
15021 do_MMX_preamble();
15022 delta = dis_MMXop_regmem_to_reg (
15023 vbi, pfx, delta, opc, "pmaxsw", False );
15024 goto decode_success;
15026 /* 66 0F EE = PMAXSW -- 16x8 signed max */
15027 if (have66noF2noF3(pfx) && sz == 2) {
15028 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15029 "pmaxsw", Iop_Max16Sx8, False );
15030 goto decode_success;
15032 break;
15034 case 0xEF:
15035 /* 66 0F EF = PXOR */
15036 if (have66noF2noF3(pfx) && sz == 2) {
15037 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
15038 goto decode_success;
15040 break;
15042 case 0xF1:
15043 /* 66 0F F1 = PSLLW by E */
15044 if (have66noF2noF3(pfx) && sz == 2) {
15045 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
15046 goto decode_success;
15048 break;
15050 case 0xF2:
15051 /* 66 0F F2 = PSLLD by E */
15052 if (have66noF2noF3(pfx) && sz == 2) {
15053 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
15054 goto decode_success;
15056 break;
15058 case 0xF3:
15059 /* 66 0F F3 = PSLLQ by E */
15060 if (have66noF2noF3(pfx) && sz == 2) {
15061 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
15062 goto decode_success;
15064 break;
15066 case 0xF4:
15067 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15068 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
15069 half */
15070 if (have66noF2noF3(pfx) && sz == 2) {
15071 IRTemp sV = newTemp(Ity_V128);
15072 IRTemp dV = newTemp(Ity_V128);
15073 modrm = getUChar(delta);
15074 UInt rG = gregOfRexRM(pfx,modrm);
15075 assign( dV, getXMMReg(rG) );
15076 if (epartIsReg(modrm)) {
15077 UInt rE = eregOfRexRM(pfx,modrm);
15078 assign( sV, getXMMReg(rE) );
15079 delta += 1;
15080 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15081 } else {
15082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15083 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15084 delta += alen;
15085 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
15087 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
15088 goto decode_success;
15090 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15091 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
15092 0 to form 64-bit result */
15093 if (haveNo66noF2noF3(pfx) && sz == 4) {
15094 IRTemp sV = newTemp(Ity_I64);
15095 IRTemp dV = newTemp(Ity_I64);
15096 t1 = newTemp(Ity_I32);
15097 t0 = newTemp(Ity_I32);
15098 modrm = getUChar(delta);
15100 do_MMX_preamble();
15101 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15103 if (epartIsReg(modrm)) {
15104 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15105 delta += 1;
15106 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15107 nameMMXReg(gregLO3ofRM(modrm)));
15108 } else {
15109 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15110 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15111 delta += alen;
15112 DIP("pmuludq %s,%s\n", dis_buf,
15113 nameMMXReg(gregLO3ofRM(modrm)));
15116 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
15117 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
15118 putMMXReg( gregLO3ofRM(modrm),
15119 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
15120 goto decode_success;
15122 break;
15124 case 0xF5:
15125 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
15126 E(xmm or mem) to G(xmm) */
15127 if (have66noF2noF3(pfx) && sz == 2) {
15128 IRTemp sV = newTemp(Ity_V128);
15129 IRTemp dV = newTemp(Ity_V128);
15130 modrm = getUChar(delta);
15131 UInt rG = gregOfRexRM(pfx,modrm);
15132 if (epartIsReg(modrm)) {
15133 UInt rE = eregOfRexRM(pfx,modrm);
15134 assign( sV, getXMMReg(rE) );
15135 delta += 1;
15136 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15137 } else {
15138 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15139 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15140 delta += alen;
15141 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
15143 assign( dV, getXMMReg(rG) );
15144 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
15145 goto decode_success;
15147 break;
15149 case 0xF6:
15150 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15151 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
15152 if (haveNo66noF2noF3(pfx) && sz == 4) {
15153 do_MMX_preamble();
15154 delta = dis_MMXop_regmem_to_reg (
15155 vbi, pfx, delta, opc, "psadbw", False );
15156 goto decode_success;
15158 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
15159 from E(xmm or mem) to G(xmm) */
15160 if (have66noF2noF3(pfx) && sz == 2) {
15161 IRTemp sV = newTemp(Ity_V128);
15162 IRTemp dV = newTemp(Ity_V128);
15163 modrm = getUChar(delta);
15164 UInt rG = gregOfRexRM(pfx,modrm);
15165 if (epartIsReg(modrm)) {
15166 UInt rE = eregOfRexRM(pfx,modrm);
15167 assign( sV, getXMMReg(rE) );
15168 delta += 1;
15169 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15170 } else {
15171 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15172 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15173 delta += alen;
15174 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
15176 assign( dV, getXMMReg(rG) );
15177 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
15179 goto decode_success;
15181 break;
15183 case 0xF7:
15184 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
15185 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
15186 if (haveNo66noF2noF3(pfx) && sz == 4) {
15187 Bool ok = False;
15188 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
15189 if (ok) goto decode_success;
15191 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
15192 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
15193 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
15194 goto decode_success;
15196 break;
15198 case 0xF8:
15199 /* 66 0F F8 = PSUBB */
15200 if (have66noF2noF3(pfx) && sz == 2) {
15201 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15202 "psubb", Iop_Sub8x16, False );
15203 goto decode_success;
15205 break;
15207 case 0xF9:
15208 /* 66 0F F9 = PSUBW */
15209 if (have66noF2noF3(pfx) && sz == 2) {
15210 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15211 "psubw", Iop_Sub16x8, False );
15212 goto decode_success;
15214 break;
15216 case 0xFA:
15217 /* 66 0F FA = PSUBD */
15218 if (have66noF2noF3(pfx) && sz == 2) {
15219 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15220 "psubd", Iop_Sub32x4, False );
15221 goto decode_success;
15223 break;
15225 case 0xFB:
15226 /* 66 0F FB = PSUBQ */
15227 if (have66noF2noF3(pfx) && sz == 2) {
15228 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15229 "psubq", Iop_Sub64x2, False );
15230 goto decode_success;
15232 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
15233 /* 0F FB = PSUBQ -- sub 64x1 */
15234 if (haveNo66noF2noF3(pfx) && sz == 4) {
15235 do_MMX_preamble();
15236 delta = dis_MMXop_regmem_to_reg (
15237 vbi, pfx, delta, opc, "psubq", False );
15238 goto decode_success;
15240 break;
15242 case 0xFC:
15243 /* 66 0F FC = PADDB */
15244 if (have66noF2noF3(pfx) && sz == 2) {
15245 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15246 "paddb", Iop_Add8x16, False );
15247 goto decode_success;
15249 break;
15251 case 0xFD:
15252 /* 66 0F FD = PADDW */
15253 if (have66noF2noF3(pfx) && sz == 2) {
15254 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15255 "paddw", Iop_Add16x8, False );
15256 goto decode_success;
15258 break;
15260 case 0xFE:
15261 /* 66 0F FE = PADDD */
15262 if (have66noF2noF3(pfx) && sz == 2) {
15263 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
15264 "paddd", Iop_Add32x4, False );
15265 goto decode_success;
15267 break;
15269 default:
15270 goto decode_failure;
15274 decode_failure:
15275 *decode_OK = False;
15276 return deltaIN;
15278 decode_success:
15279 *decode_OK = True;
15280 return delta;
15284 /*------------------------------------------------------------*/
15285 /*--- ---*/
15286 /*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
15287 /*--- ---*/
15288 /*------------------------------------------------------------*/
15290 static Long dis_MOVDDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15291 Long delta, Bool isAvx )
15293 IRTemp addr = IRTemp_INVALID;
15294 Int alen = 0;
15295 HChar dis_buf[50];
15296 IRTemp sV = newTemp(Ity_V128);
15297 IRTemp d0 = newTemp(Ity_I64);
15298 UChar modrm = getUChar(delta);
15299 UInt rG = gregOfRexRM(pfx,modrm);
15300 if (epartIsReg(modrm)) {
15301 UInt rE = eregOfRexRM(pfx,modrm);
15302 assign( sV, getXMMReg(rE) );
15303 DIP("%smovddup %s,%s\n",
15304 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
15305 delta += 1;
15306 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
15307 } else {
15308 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15309 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15310 DIP("%smovddup %s,%s\n",
15311 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
15312 delta += alen;
15314 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15315 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
15316 return delta;
15320 static Long dis_MOVDDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15321 Long delta )
15323 IRTemp addr = IRTemp_INVALID;
15324 Int alen = 0;
15325 HChar dis_buf[50];
15326 IRTemp d0 = newTemp(Ity_I64);
15327 IRTemp d1 = newTemp(Ity_I64);
15328 UChar modrm = getUChar(delta);
15329 UInt rG = gregOfRexRM(pfx,modrm);
15330 if (epartIsReg(modrm)) {
15331 UInt rE = eregOfRexRM(pfx,modrm);
15332 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
15333 delta += 1;
15334 assign ( d0, getYMMRegLane64(rE, 0) );
15335 assign ( d1, getYMMRegLane64(rE, 2) );
15336 } else {
15337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15338 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
15339 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
15340 mkexpr(addr), mkU64(16))) );
15341 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
15342 delta += alen;
15344 putYMMRegLane64( rG, 0, mkexpr(d0) );
15345 putYMMRegLane64( rG, 1, mkexpr(d0) );
15346 putYMMRegLane64( rG, 2, mkexpr(d1) );
15347 putYMMRegLane64( rG, 3, mkexpr(d1) );
15348 return delta;
15352 static Long dis_MOVSxDUP_128 ( const VexAbiInfo* vbi, Prefix pfx,
15353 Long delta, Bool isAvx, Bool isL )
15355 IRTemp addr = IRTemp_INVALID;
15356 Int alen = 0;
15357 HChar dis_buf[50];
15358 IRTemp sV = newTemp(Ity_V128);
15359 UChar modrm = getUChar(delta);
15360 UInt rG = gregOfRexRM(pfx,modrm);
15361 IRTemp s3, s2, s1, s0;
15362 s3 = s2 = s1 = s0 = IRTemp_INVALID;
15363 if (epartIsReg(modrm)) {
15364 UInt rE = eregOfRexRM(pfx,modrm);
15365 assign( sV, getXMMReg(rE) );
15366 DIP("%smovs%cdup %s,%s\n",
15367 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
15368 delta += 1;
15369 } else {
15370 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15371 if (!isAvx)
15372 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15373 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15374 DIP("%smovs%cdup %s,%s\n",
15375 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
15376 delta += alen;
15378 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15379 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15380 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
15381 : mkV128from32s( s3, s3, s1, s1 ) );
15382 return delta;
15386 static Long dis_MOVSxDUP_256 ( const VexAbiInfo* vbi, Prefix pfx,
15387 Long delta, Bool isL )
15389 IRTemp addr = IRTemp_INVALID;
15390 Int alen = 0;
15391 HChar dis_buf[50];
15392 IRTemp sV = newTemp(Ity_V256);
15393 UChar modrm = getUChar(delta);
15394 UInt rG = gregOfRexRM(pfx,modrm);
15395 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
15396 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
15397 if (epartIsReg(modrm)) {
15398 UInt rE = eregOfRexRM(pfx,modrm);
15399 assign( sV, getYMMReg(rE) );
15400 DIP("vmovs%cdup %s,%s\n",
15401 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
15402 delta += 1;
15403 } else {
15404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15405 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15406 DIP("vmovs%cdup %s,%s\n",
15407 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
15408 delta += alen;
15410 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
15411 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
15412 : mkV128from32s( s7, s7, s5, s5 ) );
15413 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
15414 : mkV128from32s( s3, s3, s1, s1 ) );
15415 return delta;
15419 static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15421 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15422 IRTemp leftV = newTemp(Ity_V128);
15423 IRTemp rightV = newTemp(Ity_V128);
15424 IRTemp rm = newTemp(Ity_I32);
15425 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15427 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
15428 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
15430 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
15431 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
15433 IRTemp res = newTemp(Ity_V128);
15434 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15435 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
15436 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15437 return res;
15441 static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
15443 IRTemp s1, s0, d1, d0;
15444 IRTemp leftV = newTemp(Ity_V128);
15445 IRTemp rightV = newTemp(Ity_V128);
15446 IRTemp rm = newTemp(Ity_I32);
15447 s1 = s0 = d1 = d0 = IRTemp_INVALID;
15449 breakupV128to64s( sV, &s1, &s0 );
15450 breakupV128to64s( dV, &d1, &d0 );
15452 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
15453 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
15455 IRTemp res = newTemp(Ity_V128);
15456 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
15457 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
15458 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
15459 return res;
15463 __attribute__((noinline))
15464 static
15465 Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
15466 const VexAbiInfo* vbi,
15467 Prefix pfx, Int sz, Long deltaIN )
15469 IRTemp addr = IRTemp_INVALID;
15470 UChar modrm = 0;
15471 Int alen = 0;
15472 HChar dis_buf[50];
15474 *decode_OK = False;
15476 Long delta = deltaIN;
15477 UChar opc = getUChar(delta);
15478 delta++;
15479 switch (opc) {
15481 case 0x12:
15482 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
15483 duplicating some lanes (2:2:0:0). */
15484 if (haveF3no66noF2(pfx) && sz == 4) {
15485 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15486 True/*isL*/ );
15487 goto decode_success;
15489 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
15490 duplicating some lanes (0:1:0:1). */
15491 if (haveF2no66noF3(pfx)
15492 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
15493 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
15494 goto decode_success;
15496 break;
15498 case 0x16:
15499 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
15500 duplicating some lanes (3:3:1:1). */
15501 if (haveF3no66noF2(pfx) && sz == 4) {
15502 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
15503 False/*!isL*/ );
15504 goto decode_success;
15506 break;
15508 case 0x7C:
15509 case 0x7D:
15510 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
15511 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
15512 if (haveF2no66noF3(pfx) && sz == 4) {
15513 IRTemp eV = newTemp(Ity_V128);
15514 IRTemp gV = newTemp(Ity_V128);
15515 Bool isAdd = opc == 0x7C;
15516 const HChar* str = isAdd ? "add" : "sub";
15517 modrm = getUChar(delta);
15518 UInt rG = gregOfRexRM(pfx,modrm);
15519 if (epartIsReg(modrm)) {
15520 UInt rE = eregOfRexRM(pfx,modrm);
15521 assign( eV, getXMMReg(rE) );
15522 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15523 delta += 1;
15524 } else {
15525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15526 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15527 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
15528 delta += alen;
15531 assign( gV, getXMMReg(rG) );
15532 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
15533 goto decode_success;
15535 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
15536 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
15537 if (have66noF2noF3(pfx) && sz == 2) {
15538 IRTemp eV = newTemp(Ity_V128);
15539 IRTemp gV = newTemp(Ity_V128);
15540 Bool isAdd = opc == 0x7C;
15541 const HChar* str = isAdd ? "add" : "sub";
15542 modrm = getUChar(delta);
15543 UInt rG = gregOfRexRM(pfx,modrm);
15544 if (epartIsReg(modrm)) {
15545 UInt rE = eregOfRexRM(pfx,modrm);
15546 assign( eV, getXMMReg(rE) );
15547 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
15548 delta += 1;
15549 } else {
15550 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15551 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15552 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
15553 delta += alen;
15556 assign( gV, getXMMReg(rG) );
15557 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
15558 goto decode_success;
15560 break;
15562 case 0xD0:
15563 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
15564 if (have66noF2noF3(pfx) && sz == 2) {
15565 IRTemp eV = newTemp(Ity_V128);
15566 IRTemp gV = newTemp(Ity_V128);
15567 modrm = getUChar(delta);
15568 UInt rG = gregOfRexRM(pfx,modrm);
15569 if (epartIsReg(modrm)) {
15570 UInt rE = eregOfRexRM(pfx,modrm);
15571 assign( eV, getXMMReg(rE) );
15572 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15573 delta += 1;
15574 } else {
15575 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15576 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15577 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
15578 delta += alen;
15581 assign( gV, getXMMReg(rG) );
15582 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
15583 goto decode_success;
15585 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
15586 if (haveF2no66noF3(pfx) && sz == 4) {
15587 IRTemp eV = newTemp(Ity_V128);
15588 IRTemp gV = newTemp(Ity_V128);
15589 modrm = getUChar(delta);
15590 UInt rG = gregOfRexRM(pfx,modrm);
15592 modrm = getUChar(delta);
15593 if (epartIsReg(modrm)) {
15594 UInt rE = eregOfRexRM(pfx,modrm);
15595 assign( eV, getXMMReg(rE) );
15596 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
15597 delta += 1;
15598 } else {
15599 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15600 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
15601 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
15602 delta += alen;
15605 assign( gV, getXMMReg(rG) );
15606 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
15607 goto decode_success;
15609 break;
15611 case 0xF0:
15612 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
15613 if (haveF2no66noF3(pfx) && sz == 4) {
15614 modrm = getUChar(delta);
15615 if (epartIsReg(modrm)) {
15616 goto decode_failure;
15617 } else {
15618 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15619 putXMMReg( gregOfRexRM(pfx,modrm),
15620 loadLE(Ity_V128, mkexpr(addr)) );
15621 DIP("lddqu %s,%s\n", dis_buf,
15622 nameXMMReg(gregOfRexRM(pfx,modrm)));
15623 delta += alen;
15625 goto decode_success;
15627 break;
15629 default:
15630 goto decode_failure;
15634 decode_failure:
15635 *decode_OK = False;
15636 return deltaIN;
15638 decode_success:
15639 *decode_OK = True;
15640 return delta;
15644 /*------------------------------------------------------------*/
15645 /*--- ---*/
15646 /*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
15647 /*--- ---*/
15648 /*------------------------------------------------------------*/
15650 static
15651 IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15653 IRTemp halfMask = newTemp(Ity_I64);
15654 assign(halfMask, mkU64(0x8F8F8F8F8F8F8F8FULL));
15655 IRExpr* mask = binop(Iop_64HLtoV128, mkexpr(halfMask), mkexpr(halfMask));
15656 IRTemp res = newTemp(Ity_V128);
15657 assign(res,
15658 binop(Iop_PermOrZero8x16,
15659 mkexpr(dV),
15660 // Mask off bits [6:3] of each source operand lane
15661 binop(Iop_AndV128, mkexpr(sV), mask)
15663 return res;
15667 static
15668 IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15670 IRTemp sHi, sLo, dHi, dLo;
15671 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15672 breakupV256toV128s( dV, &dHi, &dLo);
15673 breakupV256toV128s( sV, &sHi, &sLo);
15674 IRTemp res = newTemp(Ity_V256);
15675 assign(res, binop(Iop_V128HLtoV256,
15676 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15677 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15678 return res;
15682 static Long dis_PHADD_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15683 Bool isAvx, UChar opc )
15685 IRTemp addr = IRTemp_INVALID;
15686 Int alen = 0;
15687 HChar dis_buf[50];
15688 const HChar* str = "???";
15689 IROp opV64 = Iop_INVALID;
15690 IROp opCatO = Iop_CatOddLanes16x4;
15691 IROp opCatE = Iop_CatEvenLanes16x4;
15692 IRTemp sV = newTemp(Ity_V128);
15693 IRTemp dV = newTemp(Ity_V128);
15694 IRTemp sHi = newTemp(Ity_I64);
15695 IRTemp sLo = newTemp(Ity_I64);
15696 IRTemp dHi = newTemp(Ity_I64);
15697 IRTemp dLo = newTemp(Ity_I64);
15698 UChar modrm = getUChar(delta);
15699 UInt rG = gregOfRexRM(pfx,modrm);
15700 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15702 switch (opc) {
15703 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15704 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15705 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15706 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15707 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15708 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15709 default: vassert(0);
15711 if (opc == 0x02 || opc == 0x06) {
15712 opCatO = Iop_InterleaveHI32x2;
15713 opCatE = Iop_InterleaveLO32x2;
15716 assign( dV, getXMMReg(rV) );
15718 if (epartIsReg(modrm)) {
15719 UInt rE = eregOfRexRM(pfx,modrm);
15720 assign( sV, getXMMReg(rE) );
15721 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15722 nameXMMReg(rE), nameXMMReg(rG));
15723 delta += 1;
15724 } else {
15725 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15726 if (!isAvx)
15727 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15728 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15729 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15730 dis_buf, nameXMMReg(rG));
15731 delta += alen;
15734 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15735 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15736 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15737 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15739 /* This isn't a particularly efficient way to compute the
15740 result, but at least it avoids a proliferation of IROps,
15741 hence avoids complication all the backends. */
15743 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15744 ( rG,
15745 binop(Iop_64HLtoV128,
15746 binop(opV64,
15747 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15748 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15749 binop(opV64,
15750 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15751 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15752 return delta;
15756 static Long dis_PHADD_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
15757 UChar opc )
15759 IRTemp addr = IRTemp_INVALID;
15760 Int alen = 0;
15761 HChar dis_buf[50];
15762 const HChar* str = "???";
15763 IROp opV64 = Iop_INVALID;
15764 IROp opCatO = Iop_CatOddLanes16x4;
15765 IROp opCatE = Iop_CatEvenLanes16x4;
15766 IRTemp sV = newTemp(Ity_V256);
15767 IRTemp dV = newTemp(Ity_V256);
15768 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15769 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15770 UChar modrm = getUChar(delta);
15771 UInt rG = gregOfRexRM(pfx,modrm);
15772 UInt rV = getVexNvvvv(pfx);
15774 switch (opc) {
15775 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15776 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15777 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15778 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15779 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15780 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15781 default: vassert(0);
15783 if (opc == 0x02 || opc == 0x06) {
15784 opCatO = Iop_InterleaveHI32x2;
15785 opCatE = Iop_InterleaveLO32x2;
15788 assign( dV, getYMMReg(rV) );
15790 if (epartIsReg(modrm)) {
15791 UInt rE = eregOfRexRM(pfx,modrm);
15792 assign( sV, getYMMReg(rE) );
15793 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15794 delta += 1;
15795 } else {
15796 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15797 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15798 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15799 delta += alen;
15802 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15803 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15805 /* This isn't a particularly efficient way to compute the
15806 result, but at least it avoids a proliferation of IROps,
15807 hence avoids complication all the backends. */
15809 putYMMReg( rG,
15810 binop(Iop_V128HLtoV256,
15811 binop(Iop_64HLtoV128,
15812 binop(opV64,
15813 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15814 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15815 binop(opV64,
15816 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15817 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15818 binop(Iop_64HLtoV128,
15819 binop(opV64,
15820 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15821 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15822 binop(opV64,
15823 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15824 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15825 return delta;
15829 static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15831 IRTemp res = newTemp(Ity_V128);
15832 assign(res, binop(Iop_PwExtUSMulQAdd8x16, mkexpr(dV), mkexpr(sV)));
15833 return res;
15837 static
15838 IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15840 IRTemp sHi, sLo, dHi, dLo;
15841 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15842 breakupV256toV128s( dV, &dHi, &dLo);
15843 breakupV256toV128s( sV, &sHi, &sLo);
15844 IRTemp res = newTemp(Ity_V256);
15845 assign(res, binop(Iop_V128HLtoV256,
15846 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15847 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15848 return res;
15852 __attribute__((noinline))
15853 static
15854 Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15855 const VexAbiInfo* vbi,
15856 Prefix pfx, Int sz, Long deltaIN )
15858 IRTemp addr = IRTemp_INVALID;
15859 UChar modrm = 0;
15860 Int alen = 0;
15861 HChar dis_buf[50];
15863 *decode_OK = False;
15865 Long delta = deltaIN;
15866 UChar opc = getUChar(delta);
15867 delta++;
15868 switch (opc) {
15870 case 0x00:
15871 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15872 if (have66noF2noF3(pfx)
15873 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15874 IRTemp sV = newTemp(Ity_V128);
15875 IRTemp dV = newTemp(Ity_V128);
15877 modrm = getUChar(delta);
15878 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15880 if (epartIsReg(modrm)) {
15881 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15882 delta += 1;
15883 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15884 nameXMMReg(gregOfRexRM(pfx,modrm)));
15885 } else {
15886 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15887 gen_SIGNAL_if_not_16_aligned( vbi, addr );
15888 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15889 delta += alen;
15890 DIP("pshufb %s,%s\n", dis_buf,
15891 nameXMMReg(gregOfRexRM(pfx,modrm)));
15894 IRTemp res = math_PSHUFB_XMM( dV, sV );
15895 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
15896 goto decode_success;
15898 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15899 if (haveNo66noF2noF3(pfx) && sz == 4) {
15900 IRTemp sV = newTemp(Ity_I64);
15901 IRTemp dV = newTemp(Ity_I64);
15903 modrm = getUChar(delta);
15904 do_MMX_preamble();
15905 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15907 if (epartIsReg(modrm)) {
15908 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15909 delta += 1;
15910 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15911 nameMMXReg(gregLO3ofRM(modrm)));
15912 } else {
15913 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15914 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15915 delta += alen;
15916 DIP("pshufb %s,%s\n", dis_buf,
15917 nameMMXReg(gregLO3ofRM(modrm)));
15920 putMMXReg(
15921 gregLO3ofRM(modrm),
15922 binop(
15923 Iop_PermOrZero8x8,
15924 mkexpr(dV),
15925 // Mask off bits [6:3] of each source operand lane
15926 binop(Iop_And64, mkexpr(sV), mkU64(0x8787878787878787ULL))
15929 goto decode_success;
15931 break;
15933 case 0x01:
15934 case 0x02:
15935 case 0x03:
15936 case 0x05:
15937 case 0x06:
15938 case 0x07:
15939 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15940 G to G (xmm). */
15941 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15942 G to G (xmm). */
15943 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15944 xmm) and G to G (xmm). */
15945 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15946 G to G (xmm). */
15947 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15948 G to G (xmm). */
15949 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15950 xmm) and G to G (xmm). */
15951 if (have66noF2noF3(pfx)
15952 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15953 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
15954 goto decode_success;
15956 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15957 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15958 to G (mmx). */
15959 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15960 to G (mmx). */
15961 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15962 mmx) and G to G (mmx). */
15963 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15964 to G (mmx). */
15965 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15966 to G (mmx). */
15967 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15968 mmx) and G to G (mmx). */
15969 if (haveNo66noF2noF3(pfx) && sz == 4) {
15970 const HChar* str = "???";
15971 IROp opV64 = Iop_INVALID;
15972 IROp opCatO = Iop_CatOddLanes16x4;
15973 IROp opCatE = Iop_CatEvenLanes16x4;
15974 IRTemp sV = newTemp(Ity_I64);
15975 IRTemp dV = newTemp(Ity_I64);
15977 modrm = getUChar(delta);
15979 switch (opc) {
15980 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15981 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15982 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15983 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15984 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15985 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15986 default: vassert(0);
15988 if (opc == 0x02 || opc == 0x06) {
15989 opCatO = Iop_InterleaveHI32x2;
15990 opCatE = Iop_InterleaveLO32x2;
15993 do_MMX_preamble();
15994 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15996 if (epartIsReg(modrm)) {
15997 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15998 delta += 1;
15999 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16000 nameMMXReg(gregLO3ofRM(modrm)));
16001 } else {
16002 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16003 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16004 delta += alen;
16005 DIP("ph%s %s,%s\n", str, dis_buf,
16006 nameMMXReg(gregLO3ofRM(modrm)));
16009 putMMXReg(
16010 gregLO3ofRM(modrm),
16011 binop(opV64,
16012 binop(opCatE,mkexpr(sV),mkexpr(dV)),
16013 binop(opCatO,mkexpr(sV),mkexpr(dV))
16016 goto decode_success;
16018 break;
16020 case 0x04:
16021 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16022 Unsigned Bytes (XMM) */
16023 if (have66noF2noF3(pfx)
16024 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16025 IRTemp sV = newTemp(Ity_V128);
16026 IRTemp dV = newTemp(Ity_V128);
16027 modrm = getUChar(delta);
16028 UInt rG = gregOfRexRM(pfx,modrm);
16030 assign( dV, getXMMReg(rG) );
16032 if (epartIsReg(modrm)) {
16033 UInt rE = eregOfRexRM(pfx,modrm);
16034 assign( sV, getXMMReg(rE) );
16035 delta += 1;
16036 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
16037 } else {
16038 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16039 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16040 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16041 delta += alen;
16042 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
16045 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
16046 goto decode_success;
16048 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
16049 Unsigned Bytes (MMX) */
16050 if (haveNo66noF2noF3(pfx) && sz == 4) {
16051 IRTemp sV = newTemp(Ity_I64);
16052 IRTemp dV = newTemp(Ity_I64);
16053 IRTemp sVoddsSX = newTemp(Ity_I64);
16054 IRTemp sVevensSX = newTemp(Ity_I64);
16055 IRTemp dVoddsZX = newTemp(Ity_I64);
16056 IRTemp dVevensZX = newTemp(Ity_I64);
16058 modrm = getUChar(delta);
16059 do_MMX_preamble();
16060 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16062 if (epartIsReg(modrm)) {
16063 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16064 delta += 1;
16065 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16066 nameMMXReg(gregLO3ofRM(modrm)));
16067 } else {
16068 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16069 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16070 delta += alen;
16071 DIP("pmaddubsw %s,%s\n", dis_buf,
16072 nameMMXReg(gregLO3ofRM(modrm)));
16075 /* compute dV unsigned x sV signed */
16076 assign( sVoddsSX,
16077 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
16078 assign( sVevensSX,
16079 binop(Iop_SarN16x4,
16080 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
16081 mkU8(8)) );
16082 assign( dVoddsZX,
16083 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
16084 assign( dVevensZX,
16085 binop(Iop_ShrN16x4,
16086 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
16087 mkU8(8)) );
16089 putMMXReg(
16090 gregLO3ofRM(modrm),
16091 binop(Iop_QAdd16Sx4,
16092 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
16093 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
16096 goto decode_success;
16098 break;
16100 case 0x08:
16101 case 0x09:
16102 case 0x0A:
16103 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
16104 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
16105 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
16106 if (have66noF2noF3(pfx)
16107 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16108 IRTemp sV = newTemp(Ity_V128);
16109 IRTemp dV = newTemp(Ity_V128);
16110 IRTemp sHi = newTemp(Ity_I64);
16111 IRTemp sLo = newTemp(Ity_I64);
16112 IRTemp dHi = newTemp(Ity_I64);
16113 IRTemp dLo = newTemp(Ity_I64);
16114 const HChar* str = "???";
16115 Int laneszB = 0;
16117 switch (opc) {
16118 case 0x08: laneszB = 1; str = "b"; break;
16119 case 0x09: laneszB = 2; str = "w"; break;
16120 case 0x0A: laneszB = 4; str = "d"; break;
16121 default: vassert(0);
16124 modrm = getUChar(delta);
16125 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16127 if (epartIsReg(modrm)) {
16128 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16129 delta += 1;
16130 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16131 nameXMMReg(gregOfRexRM(pfx,modrm)));
16132 } else {
16133 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16134 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16135 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16136 delta += alen;
16137 DIP("psign%s %s,%s\n", str, dis_buf,
16138 nameXMMReg(gregOfRexRM(pfx,modrm)));
16141 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16142 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16143 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16144 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16146 putXMMReg(
16147 gregOfRexRM(pfx,modrm),
16148 binop(Iop_64HLtoV128,
16149 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
16150 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
16153 goto decode_success;
16155 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
16156 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
16157 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
16158 if (haveNo66noF2noF3(pfx) && sz == 4) {
16159 IRTemp sV = newTemp(Ity_I64);
16160 IRTemp dV = newTemp(Ity_I64);
16161 const HChar* str = "???";
16162 Int laneszB = 0;
16164 switch (opc) {
16165 case 0x08: laneszB = 1; str = "b"; break;
16166 case 0x09: laneszB = 2; str = "w"; break;
16167 case 0x0A: laneszB = 4; str = "d"; break;
16168 default: vassert(0);
16171 modrm = getUChar(delta);
16172 do_MMX_preamble();
16173 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16175 if (epartIsReg(modrm)) {
16176 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16177 delta += 1;
16178 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16179 nameMMXReg(gregLO3ofRM(modrm)));
16180 } else {
16181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16182 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16183 delta += alen;
16184 DIP("psign%s %s,%s\n", str, dis_buf,
16185 nameMMXReg(gregLO3ofRM(modrm)));
16188 putMMXReg(
16189 gregLO3ofRM(modrm),
16190 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
16192 goto decode_success;
16194 break;
16196 case 0x0B:
16197 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
16198 Scale (XMM) */
16199 if (have66noF2noF3(pfx)
16200 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16201 IRTemp sV = newTemp(Ity_V128);
16202 IRTemp dV = newTemp(Ity_V128);
16203 IRTemp sHi = newTemp(Ity_I64);
16204 IRTemp sLo = newTemp(Ity_I64);
16205 IRTemp dHi = newTemp(Ity_I64);
16206 IRTemp dLo = newTemp(Ity_I64);
16208 modrm = getUChar(delta);
16209 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16211 if (epartIsReg(modrm)) {
16212 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16213 delta += 1;
16214 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
16215 nameXMMReg(gregOfRexRM(pfx,modrm)));
16216 } else {
16217 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16218 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16219 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16220 delta += alen;
16221 DIP("pmulhrsw %s,%s\n", dis_buf,
16222 nameXMMReg(gregOfRexRM(pfx,modrm)));
16225 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
16226 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
16227 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
16228 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
16230 putXMMReg(
16231 gregOfRexRM(pfx,modrm),
16232 binop(Iop_64HLtoV128,
16233 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
16234 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
16237 goto decode_success;
16239 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
16240 (MMX) */
16241 if (haveNo66noF2noF3(pfx) && sz == 4) {
16242 IRTemp sV = newTemp(Ity_I64);
16243 IRTemp dV = newTemp(Ity_I64);
16245 modrm = getUChar(delta);
16246 do_MMX_preamble();
16247 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16249 if (epartIsReg(modrm)) {
16250 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16251 delta += 1;
16252 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
16253 nameMMXReg(gregLO3ofRM(modrm)));
16254 } else {
16255 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16256 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16257 delta += alen;
16258 DIP("pmulhrsw %s,%s\n", dis_buf,
16259 nameMMXReg(gregLO3ofRM(modrm)));
16262 putMMXReg(
16263 gregLO3ofRM(modrm),
16264 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
16266 goto decode_success;
16268 break;
16270 case 0x1C:
16271 case 0x1D:
16272 case 0x1E:
16273 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
16274 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
16275 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
16276 if (have66noF2noF3(pfx)
16277 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16278 IRTemp sV = newTemp(Ity_V128);
16279 const HChar* str = "???";
16280 Int laneszB = 0;
16282 switch (opc) {
16283 case 0x1C: laneszB = 1; str = "b"; break;
16284 case 0x1D: laneszB = 2; str = "w"; break;
16285 case 0x1E: laneszB = 4; str = "d"; break;
16286 default: vassert(0);
16289 modrm = getUChar(delta);
16290 if (epartIsReg(modrm)) {
16291 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16292 delta += 1;
16293 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
16294 nameXMMReg(gregOfRexRM(pfx,modrm)));
16295 } else {
16296 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16297 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16298 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16299 delta += alen;
16300 DIP("pabs%s %s,%s\n", str, dis_buf,
16301 nameXMMReg(gregOfRexRM(pfx,modrm)));
16304 putXMMReg( gregOfRexRM(pfx,modrm),
16305 mkexpr(math_PABS_XMM(sV, laneszB)) );
16306 goto decode_success;
16308 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
16309 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
16310 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
16311 if (haveNo66noF2noF3(pfx) && sz == 4) {
16312 IRTemp sV = newTemp(Ity_I64);
16313 const HChar* str = "???";
16314 Int laneszB = 0;
16316 switch (opc) {
16317 case 0x1C: laneszB = 1; str = "b"; break;
16318 case 0x1D: laneszB = 2; str = "w"; break;
16319 case 0x1E: laneszB = 4; str = "d"; break;
16320 default: vassert(0);
16323 modrm = getUChar(delta);
16324 do_MMX_preamble();
16326 if (epartIsReg(modrm)) {
16327 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16328 delta += 1;
16329 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
16330 nameMMXReg(gregLO3ofRM(modrm)));
16331 } else {
16332 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
16333 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16334 delta += alen;
16335 DIP("pabs%s %s,%s\n", str, dis_buf,
16336 nameMMXReg(gregLO3ofRM(modrm)));
16339 putMMXReg( gregLO3ofRM(modrm),
16340 mkexpr(math_PABS_MMX( sV, laneszB )) );
16341 goto decode_success;
16343 break;
16345 default:
16346 break;
16350 //decode_failure:
16351 *decode_OK = False;
16352 return deltaIN;
16354 decode_success:
16355 *decode_OK = True;
16356 return delta;
16360 /*------------------------------------------------------------*/
16361 /*--- ---*/
16362 /*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
16363 /*--- ---*/
16364 /*------------------------------------------------------------*/
16366 __attribute__((noinline))
16367 static
16368 Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
16369 const VexAbiInfo* vbi,
16370 Prefix pfx, Int sz, Long deltaIN )
16372 Long d64 = 0;
16373 IRTemp addr = IRTemp_INVALID;
16374 UChar modrm = 0;
16375 Int alen = 0;
16376 HChar dis_buf[50];
16378 *decode_OK = False;
16380 Long delta = deltaIN;
16381 UChar opc = getUChar(delta);
16382 delta++;
16383 switch (opc) {
16385 case 0x0F:
16386 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
16387 if (have66noF2noF3(pfx)
16388 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
16389 IRTemp sV = newTemp(Ity_V128);
16390 IRTemp dV = newTemp(Ity_V128);
16392 modrm = getUChar(delta);
16393 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
16395 if (epartIsReg(modrm)) {
16396 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
16397 d64 = (Long)getUChar(delta+1);
16398 delta += 1+1;
16399 DIP("palignr $%lld,%s,%s\n", d64,
16400 nameXMMReg(eregOfRexRM(pfx,modrm)),
16401 nameXMMReg(gregOfRexRM(pfx,modrm)));
16402 } else {
16403 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16404 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16405 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
16406 d64 = (Long)getUChar(delta+alen);
16407 delta += alen+1;
16408 DIP("palignr $%lld,%s,%s\n", d64,
16409 dis_buf,
16410 nameXMMReg(gregOfRexRM(pfx,modrm)));
16413 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
16414 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
16415 goto decode_success;
16417 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
16418 if (haveNo66noF2noF3(pfx) && sz == 4) {
16419 IRTemp sV = newTemp(Ity_I64);
16420 IRTemp dV = newTemp(Ity_I64);
16421 IRTemp res = newTemp(Ity_I64);
16423 modrm = getUChar(delta);
16424 do_MMX_preamble();
16425 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
16427 if (epartIsReg(modrm)) {
16428 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
16429 d64 = (Long)getUChar(delta+1);
16430 delta += 1+1;
16431 DIP("palignr $%lld,%s,%s\n", d64,
16432 nameMMXReg(eregLO3ofRM(modrm)),
16433 nameMMXReg(gregLO3ofRM(modrm)));
16434 } else {
16435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
16436 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
16437 d64 = (Long)getUChar(delta+alen);
16438 delta += alen+1;
16439 DIP("palignr $%lld%s,%s\n", d64,
16440 dis_buf,
16441 nameMMXReg(gregLO3ofRM(modrm)));
16444 if (d64 == 0) {
16445 assign( res, mkexpr(sV) );
16447 else if (d64 >= 1 && d64 <= 7) {
16448 assign(res,
16449 binop(Iop_Or64,
16450 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
16451 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
16452 )));
16454 else if (d64 == 8) {
16455 assign( res, mkexpr(dV) );
16457 else if (d64 >= 9 && d64 <= 15) {
16458 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
16460 else if (d64 >= 16 && d64 <= 255) {
16461 assign( res, mkU64(0) );
16463 else
16464 vassert(0);
16466 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
16467 goto decode_success;
16469 break;
16471 default:
16472 break;
16476 //decode_failure:
16477 *decode_OK = False;
16478 return deltaIN;
16480 decode_success:
16481 *decode_OK = True;
16482 return delta;
16486 /*------------------------------------------------------------*/
16487 /*--- ---*/
16488 /*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
16489 /*--- ---*/
16490 /*------------------------------------------------------------*/
16492 __attribute__((noinline))
16493 static
16494 Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
16495 const VexArchInfo* archinfo,
16496 const VexAbiInfo* vbi,
16497 Prefix pfx, Int sz, Long deltaIN )
16499 IRTemp addr = IRTemp_INVALID;
16500 IRType ty = Ity_INVALID;
16501 UChar modrm = 0;
16502 Int alen = 0;
16503 HChar dis_buf[50];
16505 *decode_OK = False;
16507 Long delta = deltaIN;
16508 UChar opc = getUChar(delta);
16509 delta++;
16510 switch (opc) {
16512 case 0xB8:
16513 /* F3 0F B8 = POPCNT{W,L,Q}
16514 Count the number of 1 bits in a register
16516 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
16517 && (sz == 2 || sz == 4 || sz == 8)) {
16518 /*IRType*/ ty = szToITy(sz);
16519 IRTemp src = newTemp(ty);
16520 modrm = getUChar(delta);
16521 if (epartIsReg(modrm)) {
16522 assign(src, getIRegE(sz, pfx, modrm));
16523 delta += 1;
16524 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16525 nameIRegG(sz, pfx, modrm));
16526 } else {
16527 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16528 assign(src, loadLE(ty, mkexpr(addr)));
16529 delta += alen;
16530 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
16531 nameIRegG(sz, pfx, modrm));
16534 IRTemp result = gen_POPCOUNT(ty, src);
16535 putIRegG(sz, pfx, modrm, mkexpr(result));
16537 // Update flags. This is pretty lame .. perhaps can do better
16538 // if this turns out to be performance critical.
16539 // O S A C P are cleared. Z is set if SRC == 0.
16540 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16541 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16542 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16543 stmt( IRStmt_Put( OFFB_CC_DEP1,
16544 binop(Iop_Shl64,
16545 unop(Iop_1Uto64,
16546 binop(Iop_CmpEQ64,
16547 widenUto64(mkexpr(src)),
16548 mkU64(0))),
16549 mkU8(AMD64G_CC_SHIFT_Z))));
16551 goto decode_success;
16553 break;
16555 case 0xBC:
16556 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
16557 which we can only decode if we're sure this is a BMI1 capable cpu
16558 that supports TZCNT, since otherwise it's BSF, which behaves
16559 differently on zero source. */
16560 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16561 && (sz == 2 || sz == 4 || sz == 8)
16562 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16563 /*IRType*/ ty = szToITy(sz);
16564 IRTemp src = newTemp(ty);
16565 modrm = getUChar(delta);
16566 if (epartIsReg(modrm)) {
16567 assign(src, getIRegE(sz, pfx, modrm));
16568 delta += 1;
16569 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16570 nameIRegG(sz, pfx, modrm));
16571 } else {
16572 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16573 assign(src, loadLE(ty, mkexpr(addr)));
16574 delta += alen;
16575 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16576 nameIRegG(sz, pfx, modrm));
16579 IRTemp res = gen_TZCNT(ty, src);
16580 putIRegG(sz, pfx, modrm, mkexpr(res));
16582 // Update flags. This is pretty lame .. perhaps can do better
16583 // if this turns out to be performance critical.
16584 // O S A P are cleared. Z is set if RESULT == 0.
16585 // C is set if SRC is zero.
16586 IRTemp src64 = newTemp(Ity_I64);
16587 IRTemp res64 = newTemp(Ity_I64);
16588 assign(src64, widenUto64(mkexpr(src)));
16589 assign(res64, widenUto64(mkexpr(res)));
16591 IRTemp oszacp = newTemp(Ity_I64);
16592 assign(
16593 oszacp,
16594 binop(Iop_Or64,
16595 binop(Iop_Shl64,
16596 unop(Iop_1Uto64,
16597 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16598 mkU8(AMD64G_CC_SHIFT_Z)),
16599 binop(Iop_Shl64,
16600 unop(Iop_1Uto64,
16601 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16602 mkU8(AMD64G_CC_SHIFT_C))
16606 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16607 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16608 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16609 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16611 goto decode_success;
16613 break;
16615 case 0xBD:
16616 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16617 which we can only decode if we're sure this is an AMD cpu
16618 that supports LZCNT, since otherwise it's BSR, which behaves
16619 differently. Bizarrely, my Sandy Bridge also accepts these
16620 instructions but produces different results. */
16621 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16622 && (sz == 2 || sz == 4 || sz == 8)
16623 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16624 /*IRType*/ ty = szToITy(sz);
16625 IRTemp src = newTemp(ty);
16626 modrm = getUChar(delta);
16627 if (epartIsReg(modrm)) {
16628 assign(src, getIRegE(sz, pfx, modrm));
16629 delta += 1;
16630 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16631 nameIRegG(sz, pfx, modrm));
16632 } else {
16633 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16634 assign(src, loadLE(ty, mkexpr(addr)));
16635 delta += alen;
16636 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16637 nameIRegG(sz, pfx, modrm));
16640 IRTemp res = gen_LZCNT(ty, src);
16641 putIRegG(sz, pfx, modrm, mkexpr(res));
16643 // Update flags. This is pretty lame .. perhaps can do better
16644 // if this turns out to be performance critical.
16645 // O S A P are cleared. Z is set if RESULT == 0.
16646 // C is set if SRC is zero.
16647 IRTemp src64 = newTemp(Ity_I64);
16648 IRTemp res64 = newTemp(Ity_I64);
16649 assign(src64, widenUto64(mkexpr(src)));
16650 assign(res64, widenUto64(mkexpr(res)));
16652 IRTemp oszacp = newTemp(Ity_I64);
16653 assign(
16654 oszacp,
16655 binop(Iop_Or64,
16656 binop(Iop_Shl64,
16657 unop(Iop_1Uto64,
16658 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16659 mkU8(AMD64G_CC_SHIFT_Z)),
16660 binop(Iop_Shl64,
16661 unop(Iop_1Uto64,
16662 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16663 mkU8(AMD64G_CC_SHIFT_C))
16667 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16668 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16669 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16670 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16672 goto decode_success;
16674 break;
16676 default:
16677 break;
16681 //decode_failure:
16682 *decode_OK = False;
16683 return deltaIN;
16685 decode_success:
16686 *decode_OK = True;
16687 return delta;
16691 /*------------------------------------------------------------*/
16692 /*--- ---*/
16693 /*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16694 /*--- ---*/
16695 /*------------------------------------------------------------*/
16697 static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16698 IRTemp vec0/*controlling mask*/,
16699 UInt gran, IROp opSAR )
16701 /* The tricky bit is to convert vec0 into a suitable mask, by
16702 copying the most significant bit of each lane into all positions
16703 in the lane. */
16704 IRTemp sh = newTemp(Ity_I8);
16705 assign(sh, mkU8(8 * gran - 1));
16707 IRTemp mask = newTemp(Ity_V128);
16708 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16710 IRTemp notmask = newTemp(Ity_V128);
16711 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16713 IRTemp res = newTemp(Ity_V128);
16714 assign(res, binop(Iop_OrV128,
16715 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16716 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16717 return res;
16720 static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16721 IRTemp vec0/*controlling mask*/,
16722 UInt gran, IROp opSAR128 )
16724 /* The tricky bit is to convert vec0 into a suitable mask, by
16725 copying the most significant bit of each lane into all positions
16726 in the lane. */
16727 IRTemp sh = newTemp(Ity_I8);
16728 assign(sh, mkU8(8 * gran - 1));
16730 IRTemp vec0Hi = IRTemp_INVALID;
16731 IRTemp vec0Lo = IRTemp_INVALID;
16732 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16734 IRTemp mask = newTemp(Ity_V256);
16735 assign(mask, binop(Iop_V128HLtoV256,
16736 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16737 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16739 IRTemp notmask = newTemp(Ity_V256);
16740 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16742 IRTemp res = newTemp(Ity_V256);
16743 assign(res, binop(Iop_OrV256,
16744 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16745 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16746 return res;
16749 static Long dis_VBLENDV_128 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16750 const HChar *name, UInt gran, IROp opSAR )
16752 IRTemp addr = IRTemp_INVALID;
16753 Int alen = 0;
16754 HChar dis_buf[50];
16755 UChar modrm = getUChar(delta);
16756 UInt rG = gregOfRexRM(pfx, modrm);
16757 UInt rV = getVexNvvvv(pfx);
16758 UInt rIS4 = 0xFF; /* invalid */
16759 IRTemp vecE = newTemp(Ity_V128);
16760 IRTemp vecV = newTemp(Ity_V128);
16761 IRTemp vecIS4 = newTemp(Ity_V128);
16762 if (epartIsReg(modrm)) {
16763 delta++;
16764 UInt rE = eregOfRexRM(pfx, modrm);
16765 assign(vecE, getXMMReg(rE));
16766 UChar ib = getUChar(delta);
16767 rIS4 = (ib >> 4) & 0xF;
16768 DIP("%s %s,%s,%s,%s\n",
16769 name, nameXMMReg(rIS4), nameXMMReg(rE),
16770 nameXMMReg(rV), nameXMMReg(rG));
16771 } else {
16772 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16773 delta += alen;
16774 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16775 UChar ib = getUChar(delta);
16776 rIS4 = (ib >> 4) & 0xF;
16777 DIP("%s %s,%s,%s,%s\n",
16778 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16780 delta++;
16781 assign(vecV, getXMMReg(rV));
16782 assign(vecIS4, getXMMReg(rIS4));
16783 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16784 putYMMRegLoAndZU( rG, mkexpr(res) );
16785 return delta;
16788 static Long dis_VBLENDV_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta,
16789 const HChar *name, UInt gran, IROp opSAR128 )
16791 IRTemp addr = IRTemp_INVALID;
16792 Int alen = 0;
16793 HChar dis_buf[50];
16794 UChar modrm = getUChar(delta);
16795 UInt rG = gregOfRexRM(pfx, modrm);
16796 UInt rV = getVexNvvvv(pfx);
16797 UInt rIS4 = 0xFF; /* invalid */
16798 IRTemp vecE = newTemp(Ity_V256);
16799 IRTemp vecV = newTemp(Ity_V256);
16800 IRTemp vecIS4 = newTemp(Ity_V256);
16801 if (epartIsReg(modrm)) {
16802 delta++;
16803 UInt rE = eregOfRexRM(pfx, modrm);
16804 assign(vecE, getYMMReg(rE));
16805 UChar ib = getUChar(delta);
16806 rIS4 = (ib >> 4) & 0xF;
16807 DIP("%s %s,%s,%s,%s\n",
16808 name, nameYMMReg(rIS4), nameYMMReg(rE),
16809 nameYMMReg(rV), nameYMMReg(rG));
16810 } else {
16811 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16812 delta += alen;
16813 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16814 UChar ib = getUChar(delta);
16815 rIS4 = (ib >> 4) & 0xF;
16816 DIP("%s %s,%s,%s,%s\n",
16817 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16819 delta++;
16820 assign(vecV, getYMMReg(rV));
16821 assign(vecIS4, getYMMReg(rIS4));
16822 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16823 putYMMReg( rG, mkexpr(res) );
16824 return delta;
16827 static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16829 /* Set Z=1 iff (vecE & vecG) == 0--(128)--0
16830 Set C=1 iff (vecE & not vecG) == 0--(128)--0
16832 For the case `sign == 0`, be careful to use only IROps that can be
16833 instrumented exactly by memcheck. This is because PTEST is used for
16834 __builtin_strcmp in gcc14. See
16835 https://bugzilla.redhat.com/show_bug.cgi?id=2257546
16838 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16840 /* andV resp. andnV, are reduced to 64-bit values by or-ing the top
16841 and bottom 64-bits together. It relies on this trick:
16843 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16845 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16846 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16848 and so the OR of the above 2 exprs produces
16849 [a OR b, a OR b], from which we simply take the lower half.
16851 IRTemp and64 = newTemp(Ity_I64);
16852 IRTemp andn64 = newTemp(Ity_I64);
16854 assign(and64,
16855 unop(Iop_V128to64,
16856 binop(Iop_OrV128,
16857 binop(Iop_InterleaveLO64x2,
16858 mkexpr(andV), mkexpr(andV)),
16859 binop(Iop_InterleaveHI64x2,
16860 mkexpr(andV), mkexpr(andV)))));
16862 assign(andn64,
16863 unop(Iop_V128to64,
16864 binop(Iop_OrV128,
16865 binop(Iop_InterleaveLO64x2,
16866 mkexpr(andnV), mkexpr(andnV)),
16867 binop(Iop_InterleaveHI64x2,
16868 mkexpr(andnV), mkexpr(andnV)))));
16870 // Make z64 and c64 be either all-0s or all-1s
16871 IRTemp z64 = newTemp(Ity_I64);
16872 IRTemp c64 = newTemp(Ity_I64);
16874 if (sign == 64) {
16875 /* When only interested in the most significant bit, just copy bit 63
16876 into all bit positions, then invert. */
16877 assign(z64,
16878 unop(Iop_Not64,
16879 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16881 assign(c64,
16882 unop(Iop_Not64,
16883 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16884 } else if (sign == 32) {
16885 /* If we're interested into bits 63 and 31, OR bit 31 into bit 63, copy
16886 bit 63 into all bit positions, then invert. */
16887 IRTemp and3264 = newTemp(Ity_I64);
16888 assign(and3264, binop(Iop_Or64, mkexpr(and64),
16889 binop(Iop_Shl64, mkexpr(and64), mkU8(32))));
16890 assign(z64,
16891 unop(Iop_Not64,
16892 binop(Iop_Sar64, mkexpr(and3264), mkU8(63))));
16894 IRTemp andn3264 = newTemp(Ity_I64);
16895 assign(andn3264, binop(Iop_Or64, mkexpr(andn64),
16896 binop(Iop_Shl64, mkexpr(andn64), mkU8(32))));
16897 assign(c64,
16898 unop(Iop_Not64,
16899 binop(Iop_Sar64, mkexpr(andn3264), mkU8(63))));
16900 } else {
16901 vassert(sign == 0);
16902 assign(z64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(and64), mkU64(0)),
16903 mkU64(~0ULL), mkU64(0ULL)));
16904 assign(c64, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(andn64), mkU64(0)),
16905 mkU64(~0ULL), mkU64(0ULL)));
16908 /* And finally, slice out the Z and C flags and set the flags
16909 thunk to COPY for them. OSAP are set to zero. */
16910 IRTemp newOSZACP = newTemp(Ity_I64);
16911 assign(newOSZACP,
16912 binop(Iop_Or64,
16913 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16914 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16916 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16917 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16918 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16919 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16923 /* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16924 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16925 static Long dis_xTESTy_128 ( const VexAbiInfo* vbi, Prefix pfx,
16926 Long delta, Bool isAvx, Int sign )
16928 IRTemp addr = IRTemp_INVALID;
16929 Int alen = 0;
16930 HChar dis_buf[50];
16931 UChar modrm = getUChar(delta);
16932 UInt rG = gregOfRexRM(pfx, modrm);
16933 IRTemp vecE = newTemp(Ity_V128);
16934 IRTemp vecG = newTemp(Ity_V128);
16936 if ( epartIsReg(modrm) ) {
16937 UInt rE = eregOfRexRM(pfx, modrm);
16938 assign(vecE, getXMMReg(rE));
16939 delta += 1;
16940 DIP( "%s%stest%s %s,%s\n",
16941 isAvx ? "v" : "", sign == 0 ? "p" : "",
16942 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16943 nameXMMReg(rE), nameXMMReg(rG) );
16944 } else {
16945 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16946 if (!isAvx)
16947 gen_SIGNAL_if_not_16_aligned( vbi, addr );
16948 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16949 delta += alen;
16950 DIP( "%s%stest%s %s,%s\n",
16951 isAvx ? "v" : "", sign == 0 ? "p" : "",
16952 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16953 dis_buf, nameXMMReg(rG) );
16956 assign(vecG, getXMMReg(rG));
16958 /* Set Z=1 iff (vecE & vecG) == 0
16959 Set C=1 iff (vecE & not vecG) == 0
16962 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16963 IRTemp andV = newTemp(Ity_V128);
16964 IRTemp andnV = newTemp(Ity_V128);
16965 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16966 assign(andnV, binop(Iop_AndV128,
16967 mkexpr(vecE), unop(Iop_NotV128, mkexpr(vecG))));
16969 finish_xTESTy ( andV, andnV, sign );
16970 return delta;
16974 /* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16975 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16976 static Long dis_xTESTy_256 ( const VexAbiInfo* vbi, Prefix pfx,
16977 Long delta, Int sign )
16979 IRTemp addr = IRTemp_INVALID;
16980 Int alen = 0;
16981 HChar dis_buf[50];
16982 UChar modrm = getUChar(delta);
16983 UInt rG = gregOfRexRM(pfx, modrm);
16984 IRTemp vecE = newTemp(Ity_V256);
16985 IRTemp vecG = newTemp(Ity_V256);
16987 if ( epartIsReg(modrm) ) {
16988 UInt rE = eregOfRexRM(pfx, modrm);
16989 assign(vecE, getYMMReg(rE));
16990 delta += 1;
16991 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16992 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16993 nameYMMReg(rE), nameYMMReg(rG) );
16994 } else {
16995 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16996 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
16997 delta += alen;
16998 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16999 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
17000 dis_buf, nameYMMReg(rG) );
17003 assign(vecG, getYMMReg(rG));
17005 /* Set Z=1 iff (vecE & vecG) == 0
17006 Set C=1 iff (vecE & not vecG) == 0
17009 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
17010 IRTemp andV = newTemp(Ity_V256);
17011 IRTemp andnV = newTemp(Ity_V256);
17012 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
17013 assign(andnV, binop(Iop_AndV256,
17014 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
17016 IRTemp andVhi = IRTemp_INVALID;
17017 IRTemp andVlo = IRTemp_INVALID;
17018 IRTemp andnVhi = IRTemp_INVALID;
17019 IRTemp andnVlo = IRTemp_INVALID;
17020 breakupV256toV128s( andV, &andVhi, &andVlo );
17021 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
17023 IRTemp andV128 = newTemp(Ity_V128);
17024 IRTemp andnV128 = newTemp(Ity_V128);
17025 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
17026 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
17028 finish_xTESTy ( andV128, andnV128, sign );
17029 return delta;
17033 /* Handles 128 and 256 bit versions of VCVTPH2PS. */
17034 static Long dis_VCVTPH2PS ( const VexAbiInfo* vbi, Prefix pfx,
17035 Long delta, Bool is256bit )
17037 /* This is a width-doubling load or reg-reg move, that does conversion on the
17038 transferred data. */
17039 UChar modrm = getUChar(delta);
17040 UInt rG = gregOfRexRM(pfx, modrm);
17041 IRTemp srcE = newTemp(is256bit ? Ity_V128 : Ity_I64);
17043 if (epartIsReg(modrm)) {
17044 UInt rE = eregOfRexRM(pfx, modrm);
17045 assign(srcE, is256bit ? unop(Iop_V256toV128_0, getYMMReg(rE))
17046 : unop(Iop_V128to64, getXMMReg(rE)));
17047 delta += 1;
17048 DIP("vcvtph2ps %s,%s\n", nameXMMReg(rE),
17049 (is256bit ? nameYMMReg: nameXMMReg)(rG));
17050 } else {
17051 Int alen = 0;
17052 HChar dis_buf[50];
17053 IRTemp addr = disAMode(&alen, vbi, pfx, delta, dis_buf, 0);
17054 // I don't think we need an alignment check here (not 100% sure tho.)
17055 assign(srcE, loadLE(is256bit ? Ity_V128 : Ity_I64, mkexpr(addr)));
17056 delta += alen;
17057 DIP( "vcvtph2ps %s,%s\n", dis_buf,
17058 (is256bit ? nameYMMReg: nameXMMReg)(rG));
17061 IRExpr* res = unop(is256bit ? Iop_F16toF32x8 : Iop_F16toF32x4, mkexpr(srcE));
17062 (is256bit ? putYMMReg : putYMMRegLoAndZU)(rG, res);
17064 return delta;
17068 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
17069 static Long dis_PMOVxXBW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17070 Long delta, Bool isAvx, Bool xIsZ )
17072 IRTemp addr = IRTemp_INVALID;
17073 Int alen = 0;
17074 HChar dis_buf[50];
17075 IRTemp srcVec = newTemp(Ity_V128);
17076 UChar modrm = getUChar(delta);
17077 const HChar* mbV = isAvx ? "v" : "";
17078 const HChar how = xIsZ ? 'z' : 's';
17079 UInt rG = gregOfRexRM(pfx, modrm);
17080 if ( epartIsReg(modrm) ) {
17081 UInt rE = eregOfRexRM(pfx, modrm);
17082 assign( srcVec, getXMMReg(rE) );
17083 delta += 1;
17084 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17085 } else {
17086 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17087 assign( srcVec,
17088 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17089 delta += alen;
17090 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17093 IRExpr* res
17094 = xIsZ /* do math for either zero or sign extend */
17095 ? binop( Iop_InterleaveLO8x16,
17096 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17097 : binop( Iop_SarN16x8,
17098 binop( Iop_ShlN16x8,
17099 binop( Iop_InterleaveLO8x16,
17100 IRExpr_Const( IRConst_V128(0) ),
17101 mkexpr(srcVec) ),
17102 mkU8(8) ),
17103 mkU8(8) );
17105 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17107 return delta;
17111 /* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
17112 static Long dis_PMOVxXBW_256 ( const VexAbiInfo* vbi, Prefix pfx,
17113 Long delta, Bool xIsZ )
17115 IRTemp addr = IRTemp_INVALID;
17116 Int alen = 0;
17117 HChar dis_buf[50];
17118 IRTemp srcVec = newTemp(Ity_V128);
17119 UChar modrm = getUChar(delta);
17120 UChar how = xIsZ ? 'z' : 's';
17121 UInt rG = gregOfRexRM(pfx, modrm);
17122 if ( epartIsReg(modrm) ) {
17123 UInt rE = eregOfRexRM(pfx, modrm);
17124 assign( srcVec, getXMMReg(rE) );
17125 delta += 1;
17126 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17127 } else {
17128 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17129 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17130 delta += alen;
17131 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17134 /* First do zero extend. */
17135 IRExpr* res
17136 = binop( Iop_V128HLtoV256,
17137 binop( Iop_InterleaveHI8x16,
17138 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17139 binop( Iop_InterleaveLO8x16,
17140 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17141 /* And if needed sign extension as well. */
17142 if (!xIsZ)
17143 res = binop( Iop_SarN16x16,
17144 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
17146 putYMMReg ( rG, res );
17148 return delta;
17152 static Long dis_PMOVxXWD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17153 Long delta, Bool isAvx, Bool xIsZ )
17155 IRTemp addr = IRTemp_INVALID;
17156 Int alen = 0;
17157 HChar dis_buf[50];
17158 IRTemp srcVec = newTemp(Ity_V128);
17159 UChar modrm = getUChar(delta);
17160 const HChar* mbV = isAvx ? "v" : "";
17161 const HChar how = xIsZ ? 'z' : 's';
17162 UInt rG = gregOfRexRM(pfx, modrm);
17164 if ( epartIsReg(modrm) ) {
17165 UInt rE = eregOfRexRM(pfx, modrm);
17166 assign( srcVec, getXMMReg(rE) );
17167 delta += 1;
17168 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17169 } else {
17170 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17171 assign( srcVec,
17172 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17173 delta += alen;
17174 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17177 IRExpr* res
17178 = binop( Iop_InterleaveLO16x8,
17179 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
17180 if (!xIsZ)
17181 res = binop(Iop_SarN32x4,
17182 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
17184 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17185 ( gregOfRexRM(pfx, modrm), res );
17187 return delta;
17191 static Long dis_PMOVxXWD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17192 Long delta, Bool xIsZ )
17194 IRTemp addr = IRTemp_INVALID;
17195 Int alen = 0;
17196 HChar dis_buf[50];
17197 IRTemp srcVec = newTemp(Ity_V128);
17198 UChar modrm = getUChar(delta);
17199 UChar how = xIsZ ? 'z' : 's';
17200 UInt rG = gregOfRexRM(pfx, modrm);
17202 if ( epartIsReg(modrm) ) {
17203 UInt rE = eregOfRexRM(pfx, modrm);
17204 assign( srcVec, getXMMReg(rE) );
17205 delta += 1;
17206 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17207 } else {
17208 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17209 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
17210 delta += alen;
17211 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17214 IRExpr* res
17215 = binop( Iop_V128HLtoV256,
17216 binop( Iop_InterleaveHI16x8,
17217 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17218 binop( Iop_InterleaveLO16x8,
17219 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17220 if (!xIsZ)
17221 res = binop(Iop_SarN32x8,
17222 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
17224 putYMMReg ( rG, res );
17226 return delta;
17230 static Long dis_PMOVSXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17231 Long delta, Bool isAvx )
17233 IRTemp addr = IRTemp_INVALID;
17234 Int alen = 0;
17235 HChar dis_buf[50];
17236 IRTemp srcBytes = newTemp(Ity_I32);
17237 UChar modrm = getUChar(delta);
17238 const HChar* mbV = isAvx ? "v" : "";
17239 UInt rG = gregOfRexRM(pfx, modrm);
17241 if ( epartIsReg( modrm ) ) {
17242 UInt rE = eregOfRexRM(pfx, modrm);
17243 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17244 delta += 1;
17245 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17246 } else {
17247 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17248 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17249 delta += alen;
17250 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17253 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17254 ( rG, binop( Iop_64HLtoV128,
17255 unop( Iop_16Sto64,
17256 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
17257 unop( Iop_16Sto64,
17258 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
17259 return delta;
17263 static Long dis_PMOVSXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx, Long delta )
17265 IRTemp addr = IRTemp_INVALID;
17266 Int alen = 0;
17267 HChar dis_buf[50];
17268 IRTemp srcBytes = newTemp(Ity_I64);
17269 UChar modrm = getUChar(delta);
17270 UInt rG = gregOfRexRM(pfx, modrm);
17271 IRTemp s3, s2, s1, s0;
17272 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17274 if ( epartIsReg( modrm ) ) {
17275 UInt rE = eregOfRexRM(pfx, modrm);
17276 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
17277 delta += 1;
17278 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17279 } else {
17280 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17281 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
17282 delta += alen;
17283 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17286 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
17287 putYMMReg( rG, binop( Iop_V128HLtoV256,
17288 binop( Iop_64HLtoV128,
17289 unop( Iop_16Sto64, mkexpr(s3) ),
17290 unop( Iop_16Sto64, mkexpr(s2) ) ),
17291 binop( Iop_64HLtoV128,
17292 unop( Iop_16Sto64, mkexpr(s1) ),
17293 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
17294 return delta;
17298 static Long dis_PMOVZXWQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17299 Long delta, Bool isAvx )
17301 IRTemp addr = IRTemp_INVALID;
17302 Int alen = 0;
17303 HChar dis_buf[50];
17304 IRTemp srcVec = newTemp(Ity_V128);
17305 UChar modrm = getUChar(delta);
17306 const HChar* mbV = isAvx ? "v" : "";
17307 UInt rG = gregOfRexRM(pfx, modrm);
17309 if ( epartIsReg( modrm ) ) {
17310 UInt rE = eregOfRexRM(pfx, modrm);
17311 assign( srcVec, getXMMReg(rE) );
17312 delta += 1;
17313 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17314 } else {
17315 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17316 assign( srcVec,
17317 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17318 delta += alen;
17319 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17322 IRTemp zeroVec = newTemp( Ity_V128 );
17323 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17325 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17326 ( rG, binop( Iop_InterleaveLO16x8,
17327 mkexpr(zeroVec),
17328 binop( Iop_InterleaveLO16x8,
17329 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
17330 return delta;
17334 static Long dis_PMOVZXWQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17335 Long delta )
17337 IRTemp addr = IRTemp_INVALID;
17338 Int alen = 0;
17339 HChar dis_buf[50];
17340 IRTemp srcVec = newTemp(Ity_V128);
17341 UChar modrm = getUChar(delta);
17342 UInt rG = gregOfRexRM(pfx, modrm);
17344 if ( epartIsReg( modrm ) ) {
17345 UInt rE = eregOfRexRM(pfx, modrm);
17346 assign( srcVec, getXMMReg(rE) );
17347 delta += 1;
17348 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17349 } else {
17350 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17351 assign( srcVec,
17352 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17353 delta += alen;
17354 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
17357 IRTemp zeroVec = newTemp( Ity_V128 );
17358 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17360 putYMMReg( rG, binop( Iop_V128HLtoV256,
17361 binop( Iop_InterleaveHI16x8,
17362 mkexpr(zeroVec),
17363 binop( Iop_InterleaveLO16x8,
17364 mkexpr(zeroVec), mkexpr(srcVec) ) ),
17365 binop( Iop_InterleaveLO16x8,
17366 mkexpr(zeroVec),
17367 binop( Iop_InterleaveLO16x8,
17368 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17369 return delta;
17373 /* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
17374 static Long dis_PMOVxXDQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17375 Long delta, Bool isAvx, Bool xIsZ )
17377 IRTemp addr = IRTemp_INVALID;
17378 Int alen = 0;
17379 HChar dis_buf[50];
17380 IRTemp srcI64 = newTemp(Ity_I64);
17381 IRTemp srcVec = newTemp(Ity_V128);
17382 UChar modrm = getUChar(delta);
17383 const HChar* mbV = isAvx ? "v" : "";
17384 const HChar how = xIsZ ? 'z' : 's';
17385 UInt rG = gregOfRexRM(pfx, modrm);
17386 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17387 thing in a V128, with arbitrary junk in the top 64 bits. Use
17388 one or both of them and let iropt clean up afterwards (as
17389 usual). */
17390 if ( epartIsReg(modrm) ) {
17391 UInt rE = eregOfRexRM(pfx, modrm);
17392 assign( srcVec, getXMMReg(rE) );
17393 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
17394 delta += 1;
17395 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17396 } else {
17397 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17398 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
17399 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
17400 delta += alen;
17401 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17404 IRExpr* res
17405 = xIsZ /* do math for either zero or sign extend */
17406 ? binop( Iop_InterleaveLO32x4,
17407 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
17408 : binop( Iop_64HLtoV128,
17409 unop( Iop_32Sto64,
17410 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
17411 unop( Iop_32Sto64,
17412 unop( Iop_64to32, mkexpr(srcI64) ) ) );
17414 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17416 return delta;
17420 /* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
17421 static Long dis_PMOVxXDQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17422 Long delta, Bool xIsZ )
17424 IRTemp addr = IRTemp_INVALID;
17425 Int alen = 0;
17426 HChar dis_buf[50];
17427 IRTemp srcVec = newTemp(Ity_V128);
17428 UChar modrm = getUChar(delta);
17429 UChar how = xIsZ ? 'z' : 's';
17430 UInt rG = gregOfRexRM(pfx, modrm);
17431 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
17432 thing in a V128, with arbitrary junk in the top 64 bits. Use
17433 one or both of them and let iropt clean up afterwards (as
17434 usual). */
17435 if ( epartIsReg(modrm) ) {
17436 UInt rE = eregOfRexRM(pfx, modrm);
17437 assign( srcVec, getXMMReg(rE) );
17438 delta += 1;
17439 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17440 } else {
17441 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17442 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
17443 delta += alen;
17444 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17447 IRExpr* res;
17448 if (xIsZ)
17449 res = binop( Iop_V128HLtoV256,
17450 binop( Iop_InterleaveHI32x4,
17451 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
17452 binop( Iop_InterleaveLO32x4,
17453 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
17454 else {
17455 IRTemp s3, s2, s1, s0;
17456 s3 = s2 = s1 = s0 = IRTemp_INVALID;
17457 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
17458 res = binop( Iop_V128HLtoV256,
17459 binop( Iop_64HLtoV128,
17460 unop( Iop_32Sto64, mkexpr(s3) ),
17461 unop( Iop_32Sto64, mkexpr(s2) ) ),
17462 binop( Iop_64HLtoV128,
17463 unop( Iop_32Sto64, mkexpr(s1) ),
17464 unop( Iop_32Sto64, mkexpr(s0) ) ) );
17467 putYMMReg ( rG, res );
17469 return delta;
17473 /* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
17474 static Long dis_PMOVxXBD_128 ( const VexAbiInfo* vbi, Prefix pfx,
17475 Long delta, Bool isAvx, Bool xIsZ )
17477 IRTemp addr = IRTemp_INVALID;
17478 Int alen = 0;
17479 HChar dis_buf[50];
17480 IRTemp srcVec = newTemp(Ity_V128);
17481 UChar modrm = getUChar(delta);
17482 const HChar* mbV = isAvx ? "v" : "";
17483 const HChar how = xIsZ ? 'z' : 's';
17484 UInt rG = gregOfRexRM(pfx, modrm);
17485 if ( epartIsReg(modrm) ) {
17486 UInt rE = eregOfRexRM(pfx, modrm);
17487 assign( srcVec, getXMMReg(rE) );
17488 delta += 1;
17489 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
17490 } else {
17491 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17492 assign( srcVec,
17493 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
17494 delta += alen;
17495 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
17498 IRTemp zeroVec = newTemp(Ity_V128);
17499 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17501 IRExpr* res
17502 = binop(Iop_InterleaveLO8x16,
17503 mkexpr(zeroVec),
17504 binop(Iop_InterleaveLO8x16,
17505 mkexpr(zeroVec), mkexpr(srcVec)));
17506 if (!xIsZ)
17507 res = binop(Iop_SarN32x4,
17508 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
17510 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
17512 return delta;
17516 /* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
17517 static Long dis_PMOVxXBD_256 ( const VexAbiInfo* vbi, Prefix pfx,
17518 Long delta, Bool xIsZ )
17520 IRTemp addr = IRTemp_INVALID;
17521 Int alen = 0;
17522 HChar dis_buf[50];
17523 IRTemp srcVec = newTemp(Ity_V128);
17524 UChar modrm = getUChar(delta);
17525 UChar how = xIsZ ? 'z' : 's';
17526 UInt rG = gregOfRexRM(pfx, modrm);
17527 if ( epartIsReg(modrm) ) {
17528 UInt rE = eregOfRexRM(pfx, modrm);
17529 assign( srcVec, getXMMReg(rE) );
17530 delta += 1;
17531 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
17532 } else {
17533 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17534 assign( srcVec,
17535 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
17536 delta += alen;
17537 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
17540 IRTemp zeroVec = newTemp(Ity_V128);
17541 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17543 IRExpr* res
17544 = binop( Iop_V128HLtoV256,
17545 binop(Iop_InterleaveHI8x16,
17546 mkexpr(zeroVec),
17547 binop(Iop_InterleaveLO8x16,
17548 mkexpr(zeroVec), mkexpr(srcVec)) ),
17549 binop(Iop_InterleaveLO8x16,
17550 mkexpr(zeroVec),
17551 binop(Iop_InterleaveLO8x16,
17552 mkexpr(zeroVec), mkexpr(srcVec)) ) );
17553 if (!xIsZ)
17554 res = binop(Iop_SarN32x8,
17555 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
17557 putYMMReg ( rG, res );
17559 return delta;
17563 /* Handles 128 bit versions of PMOVSXBQ. */
17564 static Long dis_PMOVSXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17565 Long delta, Bool isAvx )
17567 IRTemp addr = IRTemp_INVALID;
17568 Int alen = 0;
17569 HChar dis_buf[50];
17570 IRTemp srcBytes = newTemp(Ity_I16);
17571 UChar modrm = getUChar(delta);
17572 const HChar* mbV = isAvx ? "v" : "";
17573 UInt rG = gregOfRexRM(pfx, modrm);
17574 if ( epartIsReg(modrm) ) {
17575 UInt rE = eregOfRexRM(pfx, modrm);
17576 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
17577 delta += 1;
17578 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17579 } else {
17580 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17581 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
17582 delta += alen;
17583 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17586 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17587 ( rG, binop( Iop_64HLtoV128,
17588 unop( Iop_8Sto64,
17589 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
17590 unop( Iop_8Sto64,
17591 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17592 return delta;
17596 /* Handles 256 bit versions of PMOVSXBQ. */
17597 static Long dis_PMOVSXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17598 Long delta )
17600 IRTemp addr = IRTemp_INVALID;
17601 Int alen = 0;
17602 HChar dis_buf[50];
17603 IRTemp srcBytes = newTemp(Ity_I32);
17604 UChar modrm = getUChar(delta);
17605 UInt rG = gregOfRexRM(pfx, modrm);
17606 if ( epartIsReg(modrm) ) {
17607 UInt rE = eregOfRexRM(pfx, modrm);
17608 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17609 delta += 1;
17610 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17611 } else {
17612 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17613 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17614 delta += alen;
17615 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17618 putYMMReg
17619 ( rG, binop( Iop_V128HLtoV256,
17620 binop( Iop_64HLtoV128,
17621 unop( Iop_8Sto64,
17622 unop( Iop_16HIto8,
17623 unop( Iop_32HIto16,
17624 mkexpr(srcBytes) ) ) ),
17625 unop( Iop_8Sto64,
17626 unop( Iop_16to8,
17627 unop( Iop_32HIto16,
17628 mkexpr(srcBytes) ) ) ) ),
17629 binop( Iop_64HLtoV128,
17630 unop( Iop_8Sto64,
17631 unop( Iop_16HIto8,
17632 unop( Iop_32to16,
17633 mkexpr(srcBytes) ) ) ),
17634 unop( Iop_8Sto64,
17635 unop( Iop_16to8,
17636 unop( Iop_32to16,
17637 mkexpr(srcBytes) ) ) ) ) ) );
17638 return delta;
17642 /* Handles 128 bit versions of PMOVZXBQ. */
17643 static Long dis_PMOVZXBQ_128 ( const VexAbiInfo* vbi, Prefix pfx,
17644 Long delta, Bool isAvx )
17646 IRTemp addr = IRTemp_INVALID;
17647 Int alen = 0;
17648 HChar dis_buf[50];
17649 IRTemp srcVec = newTemp(Ity_V128);
17650 UChar modrm = getUChar(delta);
17651 const HChar* mbV = isAvx ? "v" : "";
17652 UInt rG = gregOfRexRM(pfx, modrm);
17653 if ( epartIsReg(modrm) ) {
17654 UInt rE = eregOfRexRM(pfx, modrm);
17655 assign( srcVec, getXMMReg(rE) );
17656 delta += 1;
17657 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17658 } else {
17659 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17660 assign( srcVec,
17661 unop( Iop_32UtoV128,
17662 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17663 delta += alen;
17664 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17667 IRTemp zeroVec = newTemp(Ity_V128);
17668 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17670 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17671 ( rG, binop( Iop_InterleaveLO8x16,
17672 mkexpr(zeroVec),
17673 binop( Iop_InterleaveLO8x16,
17674 mkexpr(zeroVec),
17675 binop( Iop_InterleaveLO8x16,
17676 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17677 return delta;
17681 /* Handles 256 bit versions of PMOVZXBQ. */
17682 static Long dis_PMOVZXBQ_256 ( const VexAbiInfo* vbi, Prefix pfx,
17683 Long delta )
17685 IRTemp addr = IRTemp_INVALID;
17686 Int alen = 0;
17687 HChar dis_buf[50];
17688 IRTemp srcVec = newTemp(Ity_V128);
17689 UChar modrm = getUChar(delta);
17690 UInt rG = gregOfRexRM(pfx, modrm);
17691 if ( epartIsReg(modrm) ) {
17692 UInt rE = eregOfRexRM(pfx, modrm);
17693 assign( srcVec, getXMMReg(rE) );
17694 delta += 1;
17695 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17696 } else {
17697 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17698 assign( srcVec,
17699 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17700 delta += alen;
17701 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17704 IRTemp zeroVec = newTemp(Ity_V128);
17705 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17707 putYMMReg
17708 ( rG, binop( Iop_V128HLtoV256,
17709 binop( Iop_InterleaveHI8x16,
17710 mkexpr(zeroVec),
17711 binop( Iop_InterleaveLO8x16,
17712 mkexpr(zeroVec),
17713 binop( Iop_InterleaveLO8x16,
17714 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17715 binop( Iop_InterleaveLO8x16,
17716 mkexpr(zeroVec),
17717 binop( Iop_InterleaveLO8x16,
17718 mkexpr(zeroVec),
17719 binop( Iop_InterleaveLO8x16,
17720 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17721 ) );
17722 return delta;
17726 static Long dis_PHMINPOSUW_128 ( const VexAbiInfo* vbi, Prefix pfx,
17727 Long delta, Bool isAvx )
17729 IRTemp addr = IRTemp_INVALID;
17730 Int alen = 0;
17731 HChar dis_buf[50];
17732 UChar modrm = getUChar(delta);
17733 const HChar* mbV = isAvx ? "v" : "";
17734 IRTemp sV = newTemp(Ity_V128);
17735 IRTemp sHi = newTemp(Ity_I64);
17736 IRTemp sLo = newTemp(Ity_I64);
17737 IRTemp dLo = newTemp(Ity_I64);
17738 UInt rG = gregOfRexRM(pfx,modrm);
17739 if (epartIsReg(modrm)) {
17740 UInt rE = eregOfRexRM(pfx,modrm);
17741 assign( sV, getXMMReg(rE) );
17742 delta += 1;
17743 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17744 } else {
17745 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17746 if (!isAvx)
17747 gen_SIGNAL_if_not_16_aligned(vbi, addr);
17748 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17749 delta += alen;
17750 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17752 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17753 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17754 assign( dLo, mkIRExprCCall(
17755 Ity_I64, 0/*regparms*/,
17756 "amd64g_calculate_sse_phminposuw",
17757 &amd64g_calculate_sse_phminposuw,
17758 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17760 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17761 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17762 return delta;
17766 static Long dis_AESx ( const VexAbiInfo* vbi, Prefix pfx,
17767 Long delta, Bool isAvx, UChar opc )
17769 IRTemp addr = IRTemp_INVALID;
17770 Int alen = 0;
17771 HChar dis_buf[50];
17772 UChar modrm = getUChar(delta);
17773 UInt rG = gregOfRexRM(pfx, modrm);
17774 UInt regNoL = 0;
17775 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17777 /* This is a nasty kludge. We need to pass 2 x V128 to the
17778 helper. Since we can't do that, use a dirty
17779 helper to compute the results directly from the XMM regs in
17780 the guest state. That means for the memory case, we need to
17781 move the left operand into a pseudo-register (XMM16, let's
17782 call it). */
17783 if (epartIsReg(modrm)) {
17784 regNoL = eregOfRexRM(pfx, modrm);
17785 delta += 1;
17786 } else {
17787 regNoL = 16; /* use XMM16 as an intermediary */
17788 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17789 /* alignment check needed ???? */
17790 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17791 delta += alen;
17794 void* fn = &amd64g_dirtyhelper_AES;
17795 const HChar* nm = "amd64g_dirtyhelper_AES";
17797 /* Round up the arguments. Note that this is a kludge -- the
17798 use of mkU64 rather than mkIRExpr_HWord implies the
17799 assumption that the host's word size is 64-bit. */
17800 UInt gstOffD = ymmGuestRegOffset(rG);
17801 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17802 UInt gstOffR = ymmGuestRegOffset(regNoR);
17803 IRExpr* opc4 = mkU64(opc);
17804 IRExpr* gstOffDe = mkU64(gstOffD);
17805 IRExpr* gstOffLe = mkU64(gstOffL);
17806 IRExpr* gstOffRe = mkU64(gstOffR);
17807 IRExpr** args
17808 = mkIRExprVec_5( IRExpr_GSPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
17810 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17811 /* It's not really a dirty call, but we can't use the clean helper
17812 mechanism here for the very lame reason that we can't pass 2 x
17813 V128s by value to a helper. Hence this roundabout scheme. */
17814 d->nFxState = 2;
17815 vex_bzero(&d->fxState, sizeof(d->fxState));
17816 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17817 the second for !isAvx or the third for isAvx.
17818 AESIMC (0xDB) reads the first register, and writes the second. */
17819 d->fxState[0].fx = Ifx_Read;
17820 d->fxState[0].offset = gstOffL;
17821 d->fxState[0].size = sizeof(U128);
17822 d->fxState[1].offset = gstOffR;
17823 d->fxState[1].size = sizeof(U128);
17824 if (opc == 0xDB)
17825 d->fxState[1].fx = Ifx_Write;
17826 else if (!isAvx || rG == regNoR)
17827 d->fxState[1].fx = Ifx_Modify;
17828 else {
17829 d->fxState[1].fx = Ifx_Read;
17830 d->nFxState++;
17831 d->fxState[2].fx = Ifx_Write;
17832 d->fxState[2].offset = gstOffD;
17833 d->fxState[2].size = sizeof(U128);
17836 stmt( IRStmt_Dirty(d) );
17838 const HChar* opsuf;
17839 switch (opc) {
17840 case 0xDC: opsuf = "enc"; break;
17841 case 0XDD: opsuf = "enclast"; break;
17842 case 0xDE: opsuf = "dec"; break;
17843 case 0xDF: opsuf = "declast"; break;
17844 case 0xDB: opsuf = "imc"; break;
17845 default: vassert(0);
17847 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17848 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17849 nameXMMReg(regNoR),
17850 (isAvx && opc != 0xDB) ? "," : "",
17851 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17853 if (isAvx)
17854 putYMMRegLane128( rG, 1, mkV128(0) );
17855 return delta;
17858 static Long dis_AESKEYGENASSIST ( const VexAbiInfo* vbi, Prefix pfx,
17859 Long delta, Bool isAvx )
17861 IRTemp addr = IRTemp_INVALID;
17862 Int alen = 0;
17863 HChar dis_buf[50];
17864 UChar modrm = getUChar(delta);
17865 UInt regNoL = 0;
17866 UInt regNoR = gregOfRexRM(pfx, modrm);
17867 UChar imm = 0;
17869 /* This is a nasty kludge. See AESENC et al. instructions. */
17870 modrm = getUChar(delta);
17871 if (epartIsReg(modrm)) {
17872 regNoL = eregOfRexRM(pfx, modrm);
17873 imm = getUChar(delta+1);
17874 delta += 1+1;
17875 } else {
17876 regNoL = 16; /* use XMM16 as an intermediary */
17877 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17878 /* alignment check ???? . */
17879 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17880 imm = getUChar(delta+alen);
17881 delta += alen+1;
17884 /* Who ya gonna call? Presumably not Ghostbusters. */
17885 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
17886 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
17888 /* Round up the arguments. Note that this is a kludge -- the
17889 use of mkU64 rather than mkIRExpr_HWord implies the
17890 assumption that the host's word size is 64-bit. */
17891 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17892 UInt gstOffR = ymmGuestRegOffset(regNoR);
17894 IRExpr* imme = mkU64(imm & 0xFF);
17895 IRExpr* gstOffLe = mkU64(gstOffL);
17896 IRExpr* gstOffRe = mkU64(gstOffR);
17897 IRExpr** args
17898 = mkIRExprVec_4( IRExpr_GSPTR(), imme, gstOffLe, gstOffRe );
17900 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
17901 /* It's not really a dirty call, but we can't use the clean helper
17902 mechanism here for the very lame reason that we can't pass 2 x
17903 V128s by value to a helper. Hence this roundabout scheme. */
17904 d->nFxState = 2;
17905 vex_bzero(&d->fxState, sizeof(d->fxState));
17906 d->fxState[0].fx = Ifx_Read;
17907 d->fxState[0].offset = gstOffL;
17908 d->fxState[0].size = sizeof(U128);
17909 d->fxState[1].fx = Ifx_Write;
17910 d->fxState[1].offset = gstOffR;
17911 d->fxState[1].size = sizeof(U128);
17912 stmt( IRStmt_Dirty(d) );
17914 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17915 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17916 nameXMMReg(regNoR));
17917 if (isAvx)
17918 putYMMRegLane128( regNoR, 1, mkV128(0) );
17919 return delta;
17923 __attribute__((noinline))
17924 static
17925 Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17926 const VexAbiInfo* vbi,
17927 Prefix pfx, Int sz, Long deltaIN )
17929 IRTemp addr = IRTemp_INVALID;
17930 UChar modrm = 0;
17931 Int alen = 0;
17932 HChar dis_buf[50];
17934 *decode_OK = False;
17936 Long delta = deltaIN;
17937 UChar opc = getUChar(delta);
17938 delta++;
17939 switch (opc) {
17941 case 0x10:
17942 case 0x14:
17943 case 0x15:
17944 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17945 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17946 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17947 Blend at various granularities, with XMM0 (implicit operand)
17948 providing the controlling mask.
17950 if (have66noF2noF3(pfx) && sz == 2) {
17951 modrm = getUChar(delta);
17953 const HChar* nm = NULL;
17954 UInt gran = 0;
17955 IROp opSAR = Iop_INVALID;
17956 switch (opc) {
17957 case 0x10:
17958 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17959 break;
17960 case 0x14:
17961 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17962 break;
17963 case 0x15:
17964 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17965 break;
17967 vassert(nm);
17969 IRTemp vecE = newTemp(Ity_V128);
17970 IRTemp vecG = newTemp(Ity_V128);
17971 IRTemp vec0 = newTemp(Ity_V128);
17973 if ( epartIsReg(modrm) ) {
17974 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17975 delta += 1;
17976 DIP( "%s %s,%s\n", nm,
17977 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17978 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17979 } else {
17980 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17981 gen_SIGNAL_if_not_16_aligned( vbi, addr );
17982 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17983 delta += alen;
17984 DIP( "%s %s,%s\n", nm,
17985 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17988 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17989 assign(vec0, getXMMReg(0));
17991 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
17992 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
17994 goto decode_success;
17996 break;
17998 case 0x17:
17999 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
18000 Logical compare (set ZF and CF from AND/ANDN of the operands) */
18001 if (have66noF2noF3(pfx)
18002 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
18003 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
18004 goto decode_success;
18006 break;
18008 case 0x20:
18009 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
18010 Packed Move with Sign Extend from Byte to Word (XMM) */
18011 if (have66noF2noF3(pfx) && sz == 2) {
18012 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18013 False/*!isAvx*/, False/*!xIsZ*/ );
18014 goto decode_success;
18016 break;
18018 case 0x21:
18019 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
18020 Packed Move with Sign Extend from Byte to DWord (XMM) */
18021 if (have66noF2noF3(pfx) && sz == 2) {
18022 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18023 False/*!isAvx*/, False/*!xIsZ*/ );
18024 goto decode_success;
18026 break;
18028 case 0x22:
18029 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
18030 Packed Move with Sign Extend from Byte to QWord (XMM) */
18031 if (have66noF2noF3(pfx) && sz == 2) {
18032 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18033 goto decode_success;
18035 break;
18037 case 0x23:
18038 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
18039 Packed Move with Sign Extend from Word to DWord (XMM) */
18040 if (have66noF2noF3(pfx) && sz == 2) {
18041 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
18042 False/*!isAvx*/, False/*!xIsZ*/);
18043 goto decode_success;
18045 break;
18047 case 0x24:
18048 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
18049 Packed Move with Sign Extend from Word to QWord (XMM) */
18050 if (have66noF2noF3(pfx) && sz == 2) {
18051 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18052 goto decode_success;
18054 break;
18056 case 0x25:
18057 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
18058 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
18059 if (have66noF2noF3(pfx) && sz == 2) {
18060 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18061 False/*!isAvx*/, False/*!xIsZ*/ );
18062 goto decode_success;
18064 break;
18066 case 0x28:
18067 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
18068 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
18069 64-bit half */
18070 /* This is a really poor translation -- could be improved if
18071 performance critical. It's a copy-paste of PMULUDQ, too. */
18072 if (have66noF2noF3(pfx) && sz == 2) {
18073 IRTemp sV = newTemp(Ity_V128);
18074 IRTemp dV = newTemp(Ity_V128);
18075 modrm = getUChar(delta);
18076 UInt rG = gregOfRexRM(pfx,modrm);
18077 assign( dV, getXMMReg(rG) );
18078 if (epartIsReg(modrm)) {
18079 UInt rE = eregOfRexRM(pfx,modrm);
18080 assign( sV, getXMMReg(rE) );
18081 delta += 1;
18082 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
18083 } else {
18084 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18085 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
18086 delta += alen;
18087 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
18090 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
18091 goto decode_success;
18093 break;
18095 case 0x29:
18096 /* 66 0F 38 29 = PCMPEQQ
18097 64x2 equality comparison */
18098 if (have66noF2noF3(pfx) && sz == 2) {
18099 /* FIXME: this needs an alignment check */
18100 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18101 "pcmpeqq", Iop_CmpEQ64x2, False );
18102 goto decode_success;
18104 break;
18106 case 0x2A:
18107 /* 66 0F 38 2A = MOVNTDQA
18108 "non-temporal" "streaming" load
18109 Handle like MOVDQA but only memory operand is allowed */
18110 if (have66noF2noF3(pfx) && sz == 2) {
18111 modrm = getUChar(delta);
18112 if (!epartIsReg(modrm)) {
18113 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
18114 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18115 putXMMReg( gregOfRexRM(pfx,modrm),
18116 loadLE(Ity_V128, mkexpr(addr)) );
18117 DIP("movntdqa %s,%s\n", dis_buf,
18118 nameXMMReg(gregOfRexRM(pfx,modrm)));
18119 delta += alen;
18120 goto decode_success;
18123 break;
18125 case 0x2B:
18126 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
18127 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
18128 if (have66noF2noF3(pfx) && sz == 2) {
18130 modrm = getUChar(delta);
18132 IRTemp argL = newTemp(Ity_V128);
18133 IRTemp argR = newTemp(Ity_V128);
18135 if ( epartIsReg(modrm) ) {
18136 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18137 delta += 1;
18138 DIP( "packusdw %s,%s\n",
18139 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18140 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18141 } else {
18142 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18143 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18144 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18145 delta += alen;
18146 DIP( "packusdw %s,%s\n",
18147 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18150 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18152 putXMMReg( gregOfRexRM(pfx, modrm),
18153 binop( Iop_QNarrowBin32Sto16Ux8,
18154 mkexpr(argL), mkexpr(argR)) );
18156 goto decode_success;
18158 break;
18160 case 0x30:
18161 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
18162 Packed Move with Zero Extend from Byte to Word (XMM) */
18163 if (have66noF2noF3(pfx) && sz == 2) {
18164 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
18165 False/*!isAvx*/, True/*xIsZ*/ );
18166 goto decode_success;
18168 break;
18170 case 0x31:
18171 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
18172 Packed Move with Zero Extend from Byte to DWord (XMM) */
18173 if (have66noF2noF3(pfx) && sz == 2) {
18174 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
18175 False/*!isAvx*/, True/*xIsZ*/ );
18176 goto decode_success;
18178 break;
18180 case 0x32:
18181 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
18182 Packed Move with Zero Extend from Byte to QWord (XMM) */
18183 if (have66noF2noF3(pfx) && sz == 2) {
18184 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18185 goto decode_success;
18187 break;
18189 case 0x33:
18190 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
18191 Packed Move with Zero Extend from Word to DWord (XMM) */
18192 if (have66noF2noF3(pfx) && sz == 2) {
18193 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
18194 False/*!isAvx*/, True/*xIsZ*/ );
18195 goto decode_success;
18197 break;
18199 case 0x34:
18200 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
18201 Packed Move with Zero Extend from Word to QWord (XMM) */
18202 if (have66noF2noF3(pfx) && sz == 2) {
18203 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
18204 goto decode_success;
18206 break;
18208 case 0x35:
18209 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
18210 Packed Move with Zero Extend from DWord to QWord (XMM) */
18211 if (have66noF2noF3(pfx) && sz == 2) {
18212 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
18213 False/*!isAvx*/, True/*xIsZ*/ );
18214 goto decode_success;
18216 break;
18218 case 0x37:
18219 /* 66 0F 38 37 = PCMPGTQ
18220 64x2 comparison (signed, presumably; the Intel docs don't say :-)
18222 if (have66noF2noF3(pfx) && sz == 2) {
18223 /* FIXME: this needs an alignment check */
18224 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
18225 "pcmpgtq", Iop_CmpGT64Sx2, False );
18226 goto decode_success;
18228 break;
18230 case 0x38:
18231 case 0x3C:
18232 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
18233 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
18235 if (have66noF2noF3(pfx) && sz == 2) {
18236 /* FIXME: this needs an alignment check */
18237 Bool isMAX = opc == 0x3C;
18238 delta = dis_SSEint_E_to_G(
18239 vbi, pfx, delta,
18240 isMAX ? "pmaxsb" : "pminsb",
18241 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
18242 False
18244 goto decode_success;
18246 break;
18248 case 0x39:
18249 case 0x3D:
18250 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
18251 Minimum of Packed Signed Double Word Integers (XMM)
18252 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
18253 Maximum of Packed Signed Double Word Integers (XMM)
18255 if (have66noF2noF3(pfx) && sz == 2) {
18256 /* FIXME: this needs an alignment check */
18257 Bool isMAX = opc == 0x3D;
18258 delta = dis_SSEint_E_to_G(
18259 vbi, pfx, delta,
18260 isMAX ? "pmaxsd" : "pminsd",
18261 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
18262 False
18264 goto decode_success;
18266 break;
18268 case 0x3A:
18269 case 0x3E:
18270 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
18271 Minimum of Packed Unsigned Word Integers (XMM)
18272 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
18273 Maximum of Packed Unsigned Word Integers (XMM)
18275 if (have66noF2noF3(pfx) && sz == 2) {
18276 /* FIXME: this needs an alignment check */
18277 Bool isMAX = opc == 0x3E;
18278 delta = dis_SSEint_E_to_G(
18279 vbi, pfx, delta,
18280 isMAX ? "pmaxuw" : "pminuw",
18281 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
18282 False
18284 goto decode_success;
18286 break;
18288 case 0x3B:
18289 case 0x3F:
18290 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
18291 Minimum of Packed Unsigned Doubleword Integers (XMM)
18292 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
18293 Maximum of Packed Unsigned Doubleword Integers (XMM)
18295 if (have66noF2noF3(pfx) && sz == 2) {
18296 /* FIXME: this needs an alignment check */
18297 Bool isMAX = opc == 0x3F;
18298 delta = dis_SSEint_E_to_G(
18299 vbi, pfx, delta,
18300 isMAX ? "pmaxud" : "pminud",
18301 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
18302 False
18304 goto decode_success;
18306 break;
18308 case 0x40:
18309 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
18310 32x4 integer multiply from xmm2/m128 to xmm1 */
18311 if (have66noF2noF3(pfx) && sz == 2) {
18313 modrm = getUChar(delta);
18315 IRTemp argL = newTemp(Ity_V128);
18316 IRTemp argR = newTemp(Ity_V128);
18318 if ( epartIsReg(modrm) ) {
18319 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18320 delta += 1;
18321 DIP( "pmulld %s,%s\n",
18322 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18323 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18324 } else {
18325 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18326 gen_SIGNAL_if_not_16_aligned( vbi, addr );
18327 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
18328 delta += alen;
18329 DIP( "pmulld %s,%s\n",
18330 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18333 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
18335 putXMMReg( gregOfRexRM(pfx, modrm),
18336 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
18338 goto decode_success;
18340 break;
18342 case 0x41:
18343 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
18344 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
18345 if (have66noF2noF3(pfx) && sz == 2) {
18346 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
18347 goto decode_success;
18349 break;
18351 case 0xDC:
18352 case 0xDD:
18353 case 0xDE:
18354 case 0xDF:
18355 case 0xDB:
18356 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
18357 DD /r = AESENCLAST xmm1, xmm2/m128
18358 DE /r = AESDEC xmm1, xmm2/m128
18359 DF /r = AESDECLAST xmm1, xmm2/m128
18361 DB /r = AESIMC xmm1, xmm2/m128 */
18362 if (have66noF2noF3(pfx) && sz == 2) {
18363 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
18364 goto decode_success;
18366 break;
18368 case 0xF0:
18369 case 0xF1:
18370 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
18371 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
18372 The decoding on this is a bit unusual.
18374 if (haveF2noF3(pfx)
18375 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
18376 modrm = getUChar(delta);
18378 if (opc == 0xF0)
18379 sz = 1;
18380 else
18381 vassert(sz == 2 || sz == 4 || sz == 8);
18383 IRType tyE = szToITy(sz);
18384 IRTemp valE = newTemp(tyE);
18386 if (epartIsReg(modrm)) {
18387 assign(valE, getIRegE(sz, pfx, modrm));
18388 delta += 1;
18389 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
18390 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18391 } else {
18392 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
18393 assign(valE, loadLE(tyE, mkexpr(addr)));
18394 delta += alen;
18395 DIP("crc32b %s,%s\n", dis_buf,
18396 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
18399 /* Somewhat funny getting/putting of the crc32 value, in order
18400 to ensure that it turns into 64-bit gets and puts. However,
18401 mask off the upper 32 bits so as to not get memcheck false
18402 +ves around the helper call. */
18403 IRTemp valG0 = newTemp(Ity_I64);
18404 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
18405 mkU64(0xFFFFFFFF)));
18407 const HChar* nm = NULL;
18408 void* fn = NULL;
18409 switch (sz) {
18410 case 1: nm = "amd64g_calc_crc32b";
18411 fn = &amd64g_calc_crc32b; break;
18412 case 2: nm = "amd64g_calc_crc32w";
18413 fn = &amd64g_calc_crc32w; break;
18414 case 4: nm = "amd64g_calc_crc32l";
18415 fn = &amd64g_calc_crc32l; break;
18416 case 8: nm = "amd64g_calc_crc32q";
18417 fn = &amd64g_calc_crc32q; break;
18419 vassert(nm && fn);
18420 IRTemp valG1 = newTemp(Ity_I64);
18421 assign(valG1,
18422 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
18423 mkIRExprVec_2(mkexpr(valG0),
18424 widenUto64(mkexpr(valE)))));
18426 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
18427 goto decode_success;
18429 break;
18431 default:
18432 break;
18436 //decode_failure:
18437 *decode_OK = False;
18438 return deltaIN;
18440 decode_success:
18441 *decode_OK = True;
18442 return delta;
18446 /*------------------------------------------------------------*/
18447 /*--- ---*/
18448 /*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
18449 /*--- ---*/
18450 /*------------------------------------------------------------*/
18452 static Long dis_PEXTRW ( const VexAbiInfo* vbi, Prefix pfx,
18453 Long delta, Bool isAvx )
18455 IRTemp addr = IRTemp_INVALID;
18456 IRTemp t0 = IRTemp_INVALID;
18457 IRTemp t1 = IRTemp_INVALID;
18458 IRTemp t2 = IRTemp_INVALID;
18459 IRTemp t3 = IRTemp_INVALID;
18460 UChar modrm = getUChar(delta);
18461 Int alen = 0;
18462 HChar dis_buf[50];
18463 UInt rG = gregOfRexRM(pfx,modrm);
18464 Int imm8_20;
18465 IRTemp xmm_vec = newTemp(Ity_V128);
18466 IRTemp d16 = newTemp(Ity_I16);
18467 const HChar* mbV = isAvx ? "v" : "";
18469 vassert(0==getRexW(pfx)); /* ensured by caller */
18470 assign( xmm_vec, getXMMReg(rG) );
18471 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18473 if ( epartIsReg( modrm ) ) {
18474 imm8_20 = (Int)(getUChar(delta+1) & 7);
18475 } else {
18476 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18477 imm8_20 = (Int)(getUChar(delta+alen) & 7);
18480 switch (imm8_20) {
18481 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
18482 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
18483 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
18484 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
18485 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
18486 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
18487 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
18488 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
18489 default: vassert(0);
18492 if ( epartIsReg( modrm ) ) {
18493 UInt rE = eregOfRexRM(pfx,modrm);
18494 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
18495 delta += 1+1;
18496 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
18497 nameXMMReg( rG ), nameIReg32( rE ) );
18498 } else {
18499 storeLE( mkexpr(addr), mkexpr(d16) );
18500 delta += alen+1;
18501 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
18503 return delta;
18507 static Long dis_PEXTRD ( const VexAbiInfo* vbi, Prefix pfx,
18508 Long delta, Bool isAvx )
18510 IRTemp addr = IRTemp_INVALID;
18511 IRTemp t0 = IRTemp_INVALID;
18512 IRTemp t1 = IRTemp_INVALID;
18513 IRTemp t2 = IRTemp_INVALID;
18514 IRTemp t3 = IRTemp_INVALID;
18515 UChar modrm = 0;
18516 Int alen = 0;
18517 HChar dis_buf[50];
18519 Int imm8_10;
18520 IRTemp xmm_vec = newTemp(Ity_V128);
18521 IRTemp src_dword = newTemp(Ity_I32);
18522 const HChar* mbV = isAvx ? "v" : "";
18524 vassert(0==getRexW(pfx)); /* ensured by caller */
18525 modrm = getUChar(delta);
18526 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18527 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18529 if ( epartIsReg( modrm ) ) {
18530 imm8_10 = (Int)(getUChar(delta+1) & 3);
18531 } else {
18532 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18533 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18536 switch ( imm8_10 ) {
18537 case 0: assign( src_dword, mkexpr(t0) ); break;
18538 case 1: assign( src_dword, mkexpr(t1) ); break;
18539 case 2: assign( src_dword, mkexpr(t2) ); break;
18540 case 3: assign( src_dword, mkexpr(t3) ); break;
18541 default: vassert(0);
18544 if ( epartIsReg( modrm ) ) {
18545 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
18546 delta += 1+1;
18547 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
18548 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18549 nameIReg32( eregOfRexRM(pfx, modrm) ) );
18550 } else {
18551 storeLE( mkexpr(addr), mkexpr(src_dword) );
18552 delta += alen+1;
18553 DIP( "%spextrd $%d, %s,%s\n", mbV,
18554 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18556 return delta;
18560 static Long dis_PEXTRQ ( const VexAbiInfo* vbi, Prefix pfx,
18561 Long delta, Bool isAvx )
18563 IRTemp addr = IRTemp_INVALID;
18564 UChar modrm = 0;
18565 Int alen = 0;
18566 HChar dis_buf[50];
18568 Int imm8_0;
18569 IRTemp xmm_vec = newTemp(Ity_V128);
18570 IRTemp src_qword = newTemp(Ity_I64);
18571 const HChar* mbV = isAvx ? "v" : "";
18573 vassert(1==getRexW(pfx)); /* ensured by caller */
18574 modrm = getUChar(delta);
18575 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18577 if ( epartIsReg( modrm ) ) {
18578 imm8_0 = (Int)(getUChar(delta+1) & 1);
18579 } else {
18580 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18581 imm8_0 = (Int)(getUChar(delta+alen) & 1);
18584 switch ( imm8_0 ) {
18585 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
18586 break;
18587 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
18588 break;
18589 default: vassert(0);
18592 if ( epartIsReg( modrm ) ) {
18593 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18594 delta += 1+1;
18595 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18596 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18597 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18598 } else {
18599 storeLE( mkexpr(addr), mkexpr(src_qword) );
18600 delta += alen+1;
18601 DIP( "%spextrq $%d, %s,%s\n", mbV,
18602 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18604 return delta;
18607 static IRExpr* math_CTZ32(IRExpr *exp)
18609 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
18610 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18613 static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18614 Long delta, UChar opc, UChar imm,
18615 HChar dis_buf[])
18617 /* We only handle PCMPISTRI for now */
18618 vassert((opc & 0x03) == 0x03);
18619 /* And only an immediate byte of 0x38 or 0x3A */
18620 vassert((imm & ~0x02) == 0x38);
18622 /* FIXME: Is this correct when RegNoL == 16 ? */
18623 IRTemp argL = newTemp(Ity_V128);
18624 assign(argL, getXMMReg(regNoL));
18625 IRTemp argR = newTemp(Ity_V128);
18626 assign(argR, getXMMReg(regNoR));
18628 IRTemp zmaskL = newTemp(Ity_I32);
18629 assign(zmaskL, unop(Iop_16Uto32,
18630 unop(Iop_GetMSBs8x16,
18631 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
18632 IRTemp zmaskR = newTemp(Ity_I32);
18633 assign(zmaskR, unop(Iop_16Uto32,
18634 unop(Iop_GetMSBs8x16,
18635 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
18637 /* We want validL = ~(zmaskL | -zmaskL)
18639 But this formulation kills memcheck's validity tracking when any
18640 bits above the first "1" are invalid. So reformulate as:
18642 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18645 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
18647 /* Generate a bool expression which is zero iff the original is
18648 zero. Do this carefully so memcheck can propagate validity bits
18649 correctly.
18651 IRTemp zmaskL_zero = newTemp(Ity_I1);
18652 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
18654 IRTemp validL = newTemp(Ity_I32);
18655 assign(validL, binop(Iop_Sub32,
18656 IRExpr_ITE(mkexpr(zmaskL_zero),
18657 binop(Iop_Shl32, mkU32(1), ctzL),
18658 mkU32(0)),
18659 mkU32(1)));
18661 /* And similarly for validR. */
18662 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18663 IRTemp zmaskR_zero = newTemp(Ity_I1);
18664 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
18665 IRTemp validR = newTemp(Ity_I32);
18666 assign(validR, binop(Iop_Sub32,
18667 IRExpr_ITE(mkexpr(zmaskR_zero),
18668 binop(Iop_Shl32, mkU32(1), ctzR),
18669 mkU32(0)),
18670 mkU32(1)));
18672 /* Do the actual comparison. */
18673 IRExpr *boolResII = unop(Iop_16Uto32,
18674 unop(Iop_GetMSBs8x16,
18675 binop(Iop_CmpEQ8x16, mkexpr(argL),
18676 mkexpr(argR))));
18678 /* Compute boolresII & validL & validR (i.e., if both valid, use
18679 comparison result) */
18680 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18681 binop(Iop_And32,
18682 mkexpr(validL), mkexpr(validR)));
18684 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18685 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18686 mkexpr(validL), mkexpr(validR)));
18687 /* Otherwise, zero. */
18688 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18689 binop(Iop_Or32, intRes1_a, intRes1_b));
18691 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18692 result. */
18693 IRTemp intRes2 = newTemp(Ity_I32);
18694 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18695 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18697 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18698 of the msb. Since it is clear, we return the index of the
18699 lsb. */
18700 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18701 mkexpr(intRes2), mkU32(0x10000)));
18703 /* And thats our rcx. */
18704 putIReg32(R_RCX, newECX);
18706 /* Now for the condition codes... */
18708 /* C == 0 iff intRes2 == 0 */
18709 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18710 mkU32(0)),
18711 mkU32(1 << AMD64G_CC_SHIFT_C),
18712 mkU32(0));
18713 /* Z == 1 iff any in argL is 0 */
18714 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18715 mkU32(1 << AMD64G_CC_SHIFT_Z),
18716 mkU32(0));
18717 /* S == 1 iff any in argR is 0 */
18718 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18719 mkU32(1 << AMD64G_CC_SHIFT_S),
18720 mkU32(0));
18721 /* O == IntRes2[0] */
18722 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18723 mkU32(0x01)),
18724 mkU8(AMD64G_CC_SHIFT_O));
18726 /* Put them all together */
18727 IRTemp cc = newTemp(Ity_I64);
18728 assign(cc, widenUto64(binop(Iop_Or32,
18729 binop(Iop_Or32, c_bit, z_bit),
18730 binop(Iop_Or32, s_bit, o_bit))));
18731 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18732 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18733 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18734 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18736 return delta;
18739 /* This can fail, in which case it returns the original (unchanged)
18740 delta. */
18741 static Long dis_PCMPxSTRx ( const VexAbiInfo* vbi, Prefix pfx,
18742 Long delta, Bool isAvx, UChar opc )
18744 Long delta0 = delta;
18745 UInt isISTRx = opc & 2;
18746 UInt isxSTRM = (opc & 1) ^ 1;
18747 UInt regNoL = 0;
18748 UInt regNoR = 0;
18749 UChar imm = 0;
18750 IRTemp addr = IRTemp_INVALID;
18751 Int alen = 0;
18752 HChar dis_buf[50];
18754 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18755 (which is clean). Since we can't do that, use a dirty helper to
18756 compute the results directly from the XMM regs in the guest
18757 state. That means for the memory case, we need to move the left
18758 operand into a pseudo-register (XMM16, let's call it). */
18759 UChar modrm = getUChar(delta);
18760 if (epartIsReg(modrm)) {
18761 regNoL = eregOfRexRM(pfx, modrm);
18762 regNoR = gregOfRexRM(pfx, modrm);
18763 imm = getUChar(delta+1);
18764 delta += 1+1;
18765 } else {
18766 regNoL = 16; /* use XMM16 as an intermediary */
18767 regNoR = gregOfRexRM(pfx, modrm);
18768 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18769 /* No alignment check; I guess that makes sense, given that
18770 these insns are for dealing with C style strings. */
18771 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18772 imm = getUChar(delta+alen);
18773 delta += alen+1;
18776 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18777 itself. */
18778 if (regNoL == 16) {
18779 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18780 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18781 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18782 } else {
18783 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18784 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18785 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18788 /* Handle special case(s). */
18789 if (imm == 0x3A && isISTRx && !isxSTRM) {
18790 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18791 opc, imm, dis_buf);
18794 /* Now we know the XMM reg numbers for the operands, and the
18795 immediate byte. Is it one we can actually handle? Throw out any
18796 cases for which the helper function has not been verified. */
18797 switch (imm) {
18798 case 0x00: case 0x02:
18799 case 0x08: case 0x0A: case 0x0C: case 0x0E:
18800 case 0x10: case 0x12: case 0x14:
18801 case 0x18: case 0x1A:
18802 case 0x30: case 0x34:
18803 case 0x38: case 0x3A:
18804 case 0x40: case 0x42: case 0x44: case 0x46:
18805 case 0x4A:
18806 case 0x62:
18807 case 0x70: case 0x72:
18808 break;
18809 // the 16-bit character versions of the above
18810 case 0x01: case 0x03:
18811 case 0x09: case 0x0B: case 0x0D:
18812 case 0x13:
18813 case 0x19: case 0x1B:
18814 case 0x39: case 0x3B:
18815 case 0x41: case 0x45:
18816 case 0x4B:
18817 break;
18818 default:
18819 return delta0; /*FAIL*/
18822 /* Who ya gonna call? Presumably not Ghostbusters. */
18823 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
18824 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
18826 /* Round up the arguments. Note that this is a kludge -- the use
18827 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18828 the host's word size is 64-bit. */
18829 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18830 UInt gstOffR = ymmGuestRegOffset(regNoR);
18832 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18833 IRExpr* gstOffLe = mkU64(gstOffL);
18834 IRExpr* gstOffRe = mkU64(gstOffR);
18835 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18836 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18837 IRExpr** args
18838 = mkIRExprVec_6( IRExpr_GSPTR(),
18839 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
18841 IRTemp resT = newTemp(Ity_I64);
18842 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18843 /* It's not really a dirty call, but we can't use the clean helper
18844 mechanism here for the very lame reason that we can't pass 2 x
18845 V128s by value to a helper. Hence this roundabout scheme. */
18846 d->nFxState = 2;
18847 vex_bzero(&d->fxState, sizeof(d->fxState));
18848 d->fxState[0].fx = Ifx_Read;
18849 d->fxState[0].offset = gstOffL;
18850 d->fxState[0].size = sizeof(U128);
18851 d->fxState[1].fx = Ifx_Read;
18852 d->fxState[1].offset = gstOffR;
18853 d->fxState[1].size = sizeof(U128);
18854 if (isxSTRM) {
18855 /* Declare that the helper writes XMM0. */
18856 d->nFxState = 3;
18857 d->fxState[2].fx = Ifx_Write;
18858 d->fxState[2].offset = ymmGuestRegOffset(0);
18859 d->fxState[2].size = sizeof(U128);
18862 stmt( IRStmt_Dirty(d) );
18864 /* Now resT[15:0] holds the new OSZACP values, so the condition
18865 codes must be updated. And for a xSTRI case, resT[31:16] holds
18866 the new ECX value, so stash that too. */
18867 if (!isxSTRM) {
18868 putIReg64(R_RCX, binop(Iop_And64,
18869 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18870 mkU64(0xFFFF)));
18873 /* Zap the upper half of the dest reg as per AVX conventions. */
18874 if (isxSTRM && isAvx)
18875 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18877 stmt( IRStmt_Put(
18878 OFFB_CC_DEP1,
18879 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18881 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18882 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18883 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18885 return delta;
18889 static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18891 vassert(imm8 <= 15);
18893 // Create a V128 value which has the selected byte in the
18894 // specified lane, and zeroes everywhere else.
18895 IRTemp tmp128 = newTemp(Ity_V128);
18896 IRTemp halfshift = newTemp(Ity_I64);
18897 assign(halfshift, binop(Iop_Shl64,
18898 unop(Iop_8Uto64, mkexpr(u8)),
18899 mkU8(8 * (imm8 & 7))));
18900 if (imm8 < 8) {
18901 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18902 } else {
18903 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18906 UShort mask = ~(1 << imm8);
18907 IRTemp res = newTemp(Ity_V128);
18908 assign( res, binop(Iop_OrV128,
18909 mkexpr(tmp128),
18910 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
18911 return res;
18915 static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18917 IRTemp z32 = newTemp(Ity_I32);
18918 assign(z32, mkU32(0));
18920 /* Surround u32 with zeroes as per imm, giving us something we can
18921 OR into a suitably masked-out v128.*/
18922 IRTemp withZs = newTemp(Ity_V128);
18923 UShort mask = 0;
18924 switch (imm8) {
18925 case 3: mask = 0x0FFF;
18926 assign(withZs, mkV128from32s(u32, z32, z32, z32));
18927 break;
18928 case 2: mask = 0xF0FF;
18929 assign(withZs, mkV128from32s(z32, u32, z32, z32));
18930 break;
18931 case 1: mask = 0xFF0F;
18932 assign(withZs, mkV128from32s(z32, z32, u32, z32));
18933 break;
18934 case 0: mask = 0xFFF0;
18935 assign(withZs, mkV128from32s(z32, z32, z32, u32));
18936 break;
18937 default: vassert(0);
18940 IRTemp res = newTemp(Ity_V128);
18941 assign(res, binop( Iop_OrV128,
18942 mkexpr(withZs),
18943 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18944 return res;
18948 static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18950 /* Surround u64 with zeroes as per imm, giving us something we can
18951 OR into a suitably masked-out v128.*/
18952 IRTemp withZs = newTemp(Ity_V128);
18953 UShort mask = 0;
18954 if (imm8 == 0) {
18955 mask = 0xFF00;
18956 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18957 } else {
18958 vassert(imm8 == 1);
18959 mask = 0x00FF;
18960 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18963 IRTemp res = newTemp(Ity_V128);
18964 assign( res, binop( Iop_OrV128,
18965 mkexpr(withZs),
18966 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18967 return res;
18971 static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18973 const IRTemp inval = IRTemp_INVALID;
18974 IRTemp dstDs[4] = { inval, inval, inval, inval };
18975 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
18977 vassert(imm8 <= 255);
18978 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18980 UInt imm8_zmask = (imm8 & 15);
18981 IRTemp zero_32 = newTemp(Ity_I32);
18982 assign( zero_32, mkU32(0) );
18983 IRTemp resV = newTemp(Ity_V128);
18984 assign( resV, mkV128from32s(
18985 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18986 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18987 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18988 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18989 return resV;
18993 static Long dis_PEXTRB_128_GtoE ( const VexAbiInfo* vbi, Prefix pfx,
18994 Long delta, Bool isAvx )
18996 IRTemp addr = IRTemp_INVALID;
18997 Int alen = 0;
18998 HChar dis_buf[50];
18999 IRTemp xmm_vec = newTemp(Ity_V128);
19000 IRTemp sel_lane = newTemp(Ity_I32);
19001 IRTemp shr_lane = newTemp(Ity_I32);
19002 const HChar* mbV = isAvx ? "v" : "";
19003 UChar modrm = getUChar(delta);
19004 IRTemp t3, t2, t1, t0;
19005 Int imm8;
19006 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
19007 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19008 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19010 if ( epartIsReg( modrm ) ) {
19011 imm8 = (Int)getUChar(delta+1);
19012 } else {
19013 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19014 imm8 = (Int)getUChar(delta+alen);
19016 switch ( (imm8 >> 2) & 3 ) {
19017 case 0: assign( sel_lane, mkexpr(t0) ); break;
19018 case 1: assign( sel_lane, mkexpr(t1) ); break;
19019 case 2: assign( sel_lane, mkexpr(t2) ); break;
19020 case 3: assign( sel_lane, mkexpr(t3) ); break;
19021 default: vassert(0);
19023 assign( shr_lane,
19024 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
19026 if ( epartIsReg( modrm ) ) {
19027 putIReg64( eregOfRexRM(pfx,modrm),
19028 unop( Iop_32Uto64,
19029 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
19030 delta += 1+1;
19031 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
19032 nameXMMReg( gregOfRexRM(pfx, modrm) ),
19033 nameIReg64( eregOfRexRM(pfx, modrm) ) );
19034 } else {
19035 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
19036 delta += alen+1;
19037 DIP( "%spextrb $%d,%s,%s\n", mbV,
19038 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
19041 return delta;
19045 static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19047 vassert(imm8 < 256);
19048 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
19049 IRTemp and_vec = newTemp(Ity_V128);
19050 IRTemp sum_vec = newTemp(Ity_V128);
19051 IRTemp rm = newTemp(Ity_I32);
19052 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19053 assign( and_vec, binop( Iop_AndV128,
19054 triop( Iop_Mul64Fx2,
19055 mkexpr(rm),
19056 mkexpr(dst_vec), mkexpr(src_vec) ),
19057 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
19059 assign( sum_vec, binop( Iop_Add64F0x2,
19060 binop( Iop_InterleaveHI64x2,
19061 mkexpr(and_vec), mkexpr(and_vec) ),
19062 binop( Iop_InterleaveLO64x2,
19063 mkexpr(and_vec), mkexpr(and_vec) ) ) );
19064 IRTemp res = newTemp(Ity_V128);
19065 assign(res, binop( Iop_AndV128,
19066 binop( Iop_InterleaveLO64x2,
19067 mkexpr(sum_vec), mkexpr(sum_vec) ),
19068 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
19069 return res;
19073 static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
19075 vassert(imm8 < 256);
19076 IRTemp tmp_prod_vec = newTemp(Ity_V128);
19077 IRTemp prod_vec = newTemp(Ity_V128);
19078 IRTemp sum_vec = newTemp(Ity_V128);
19079 IRTemp rm = newTemp(Ity_I32);
19080 IRTemp v3, v2, v1, v0;
19081 v3 = v2 = v1 = v0 = IRTemp_INVALID;
19082 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
19083 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
19084 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
19085 0xFFFF };
19087 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
19088 assign( tmp_prod_vec,
19089 binop( Iop_AndV128,
19090 triop( Iop_Mul32Fx4,
19091 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
19092 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
19093 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
19094 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
19096 assign( sum_vec, triop( Iop_Add32Fx4,
19097 mkexpr(rm),
19098 binop( Iop_InterleaveHI32x4,
19099 mkexpr(prod_vec), mkexpr(prod_vec) ),
19100 binop( Iop_InterleaveLO32x4,
19101 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
19103 IRTemp res = newTemp(Ity_V128);
19104 assign( res, binop( Iop_AndV128,
19105 triop( Iop_Add32Fx4,
19106 mkexpr(rm),
19107 binop( Iop_InterleaveHI32x4,
19108 mkexpr(sum_vec), mkexpr(sum_vec) ),
19109 binop( Iop_InterleaveLO32x4,
19110 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
19111 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
19112 return res;
19116 static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
19118 /* Mask out bits of the operands we don't need. This isn't
19119 strictly necessary, but it does ensure Memcheck doesn't
19120 give us any false uninitialised value errors as a
19121 result. */
19122 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
19123 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
19125 IRTemp src_maskV = newTemp(Ity_V128);
19126 IRTemp dst_maskV = newTemp(Ity_V128);
19127 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
19128 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
19130 IRTemp src_masked = newTemp(Ity_V128);
19131 IRTemp dst_masked = newTemp(Ity_V128);
19132 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
19133 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
19135 /* Generate 4 64 bit values that we can hand to a clean helper */
19136 IRTemp sHi = newTemp(Ity_I64);
19137 IRTemp sLo = newTemp(Ity_I64);
19138 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
19139 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
19141 IRTemp dHi = newTemp(Ity_I64);
19142 IRTemp dLo = newTemp(Ity_I64);
19143 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
19144 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
19146 /* Compute halves of the result separately */
19147 IRTemp resHi = newTemp(Ity_I64);
19148 IRTemp resLo = newTemp(Ity_I64);
19150 IRExpr** argsHi
19151 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19152 mkU64( 0x80 | (imm8 & 7) ));
19153 IRExpr** argsLo
19154 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
19155 mkU64( 0x00 | (imm8 & 7) ));
19157 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19158 "amd64g_calc_mpsadbw",
19159 &amd64g_calc_mpsadbw, argsHi ));
19160 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
19161 "amd64g_calc_mpsadbw",
19162 &amd64g_calc_mpsadbw, argsLo ));
19164 IRTemp res = newTemp(Ity_V128);
19165 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
19166 return res;
19169 static Long dis_EXTRACTPS ( const VexAbiInfo* vbi, Prefix pfx,
19170 Long delta, Bool isAvx )
19172 IRTemp addr = IRTemp_INVALID;
19173 Int alen = 0;
19174 HChar dis_buf[50];
19175 UChar modrm = getUChar(delta);
19176 Int imm8_10;
19177 IRTemp xmm_vec = newTemp(Ity_V128);
19178 IRTemp src_dword = newTemp(Ity_I32);
19179 UInt rG = gregOfRexRM(pfx,modrm);
19180 IRTemp t3, t2, t1, t0;
19181 t3 = t2 = t1 = t0 = IRTemp_INVALID;
19183 assign( xmm_vec, getXMMReg( rG ) );
19184 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
19186 if ( epartIsReg( modrm ) ) {
19187 imm8_10 = (Int)(getUChar(delta+1) & 3);
19188 } else {
19189 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19190 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19193 switch ( imm8_10 ) {
19194 case 0: assign( src_dword, mkexpr(t0) ); break;
19195 case 1: assign( src_dword, mkexpr(t1) ); break;
19196 case 2: assign( src_dword, mkexpr(t2) ); break;
19197 case 3: assign( src_dword, mkexpr(t3) ); break;
19198 default: vassert(0);
19201 if ( epartIsReg( modrm ) ) {
19202 UInt rE = eregOfRexRM(pfx,modrm);
19203 putIReg32( rE, mkexpr(src_dword) );
19204 delta += 1+1;
19205 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19206 nameXMMReg( rG ), nameIReg32( rE ) );
19207 } else {
19208 storeLE( mkexpr(addr), mkexpr(src_dword) );
19209 delta += alen+1;
19210 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
19211 nameXMMReg( rG ), dis_buf );
19214 return delta;
19218 static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
19220 IRTemp t0 = newTemp(Ity_I64);
19221 IRTemp t1 = newTemp(Ity_I64);
19222 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
19223 mkexpr(dV)));
19224 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
19225 mkexpr(sV)));
19227 IRTemp t2 = newTemp(Ity_I64);
19228 IRTemp t3 = newTemp(Ity_I64);
19230 IRExpr** args;
19232 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
19233 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19234 &amd64g_calculate_pclmul, args));
19235 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
19236 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
19237 &amd64g_calculate_pclmul, args));
19239 IRTemp res = newTemp(Ity_V128);
19240 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
19241 return res;
19245 __attribute__((noinline))
19246 static
19247 Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
19248 const VexAbiInfo* vbi,
19249 Prefix pfx, Int sz, Long deltaIN )
19251 IRTemp addr = IRTemp_INVALID;
19252 UChar modrm = 0;
19253 Int alen = 0;
19254 HChar dis_buf[50];
19256 *decode_OK = False;
19258 Long delta = deltaIN;
19259 UChar opc = getUChar(delta);
19260 delta++;
19261 switch (opc) {
19263 case 0x08:
19264 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
19265 if (have66noF2noF3(pfx) && sz == 2) {
19267 IRTemp src0 = newTemp(Ity_F32);
19268 IRTemp src1 = newTemp(Ity_F32);
19269 IRTemp src2 = newTemp(Ity_F32);
19270 IRTemp src3 = newTemp(Ity_F32);
19271 IRTemp res0 = newTemp(Ity_F32);
19272 IRTemp res1 = newTemp(Ity_F32);
19273 IRTemp res2 = newTemp(Ity_F32);
19274 IRTemp res3 = newTemp(Ity_F32);
19275 IRTemp rm = newTemp(Ity_I32);
19276 Int imm = 0;
19278 modrm = getUChar(delta);
19280 if (epartIsReg(modrm)) {
19281 assign( src0,
19282 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19283 assign( src1,
19284 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
19285 assign( src2,
19286 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
19287 assign( src3,
19288 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
19289 imm = getUChar(delta+1);
19290 if (imm & ~15) goto decode_failure;
19291 delta += 1+1;
19292 DIP( "roundps $%d,%s,%s\n",
19293 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19294 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19295 } else {
19296 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19297 gen_SIGNAL_if_not_16_aligned(vbi, addr);
19298 assign( src0, loadLE(Ity_F32,
19299 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19300 assign( src1, loadLE(Ity_F32,
19301 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
19302 assign( src2, loadLE(Ity_F32,
19303 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19304 assign( src3, loadLE(Ity_F32,
19305 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
19306 imm = getUChar(delta+alen);
19307 if (imm & ~15) goto decode_failure;
19308 delta += alen+1;
19309 DIP( "roundps $%d,%s,%s\n",
19310 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19313 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19314 that encoding is the same as the encoding for IRRoundingMode,
19315 we can use that value directly in the IR as a rounding
19316 mode. */
19317 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19319 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
19320 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
19321 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
19322 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
19324 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19325 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19326 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
19327 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
19329 goto decode_success;
19331 break;
19333 case 0x09:
19334 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
19335 if (have66noF2noF3(pfx) && sz == 2) {
19337 IRTemp src0 = newTemp(Ity_F64);
19338 IRTemp src1 = newTemp(Ity_F64);
19339 IRTemp res0 = newTemp(Ity_F64);
19340 IRTemp res1 = newTemp(Ity_F64);
19341 IRTemp rm = newTemp(Ity_I32);
19342 Int imm = 0;
19344 modrm = getUChar(delta);
19346 if (epartIsReg(modrm)) {
19347 assign( src0,
19348 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
19349 assign( src1,
19350 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
19351 imm = getUChar(delta+1);
19352 if (imm & ~15) goto decode_failure;
19353 delta += 1+1;
19354 DIP( "roundpd $%d,%s,%s\n",
19355 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19356 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19357 } else {
19358 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19359 gen_SIGNAL_if_not_16_aligned(vbi, addr);
19360 assign( src0, loadLE(Ity_F64,
19361 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
19362 assign( src1, loadLE(Ity_F64,
19363 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
19364 imm = getUChar(delta+alen);
19365 if (imm & ~15) goto decode_failure;
19366 delta += alen+1;
19367 DIP( "roundpd $%d,%s,%s\n",
19368 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19371 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19372 that encoding is the same as the encoding for IRRoundingMode,
19373 we can use that value directly in the IR as a rounding
19374 mode. */
19375 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
19377 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
19378 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
19380 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
19381 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
19383 goto decode_success;
19385 break;
19387 case 0x0A:
19388 case 0x0B:
19389 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
19390 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
19392 if (have66noF2noF3(pfx) && sz == 2) {
19394 Bool isD = opc == 0x0B;
19395 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
19396 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
19397 Int imm = 0;
19399 modrm = getUChar(delta);
19401 if (epartIsReg(modrm)) {
19402 assign( src,
19403 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
19404 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
19405 imm = getUChar(delta+1);
19406 if (imm & ~15) goto decode_failure;
19407 delta += 1+1;
19408 DIP( "rounds%c $%d,%s,%s\n",
19409 isD ? 'd' : 's',
19410 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
19411 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19412 } else {
19413 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19414 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
19415 imm = getUChar(delta+alen);
19416 if (imm & ~15) goto decode_failure;
19417 delta += alen+1;
19418 DIP( "rounds%c $%d,%s,%s\n",
19419 isD ? 'd' : 's',
19420 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19423 /* (imm & 3) contains an Intel-encoded rounding mode. Because
19424 that encoding is the same as the encoding for IRRoundingMode,
19425 we can use that value directly in the IR as a rounding
19426 mode. */
19427 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
19428 (imm & 4) ? get_sse_roundingmode()
19429 : mkU32(imm & 3),
19430 mkexpr(src)) );
19432 if (isD)
19433 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19434 else
19435 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
19437 goto decode_success;
19439 break;
19441 case 0x0C:
19442 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
19443 Blend Packed Single Precision Floating-Point Values (XMM) */
19444 if (have66noF2noF3(pfx) && sz == 2) {
19446 Int imm8;
19447 IRTemp dst_vec = newTemp(Ity_V128);
19448 IRTemp src_vec = newTemp(Ity_V128);
19450 modrm = getUChar(delta);
19452 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19454 if ( epartIsReg( modrm ) ) {
19455 imm8 = (Int)getUChar(delta+1);
19456 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19457 delta += 1+1;
19458 DIP( "blendps $%d, %s,%s\n", imm8,
19459 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19460 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19461 } else {
19462 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19463 1/* imm8 is 1 byte after the amode */ );
19464 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19465 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19466 imm8 = (Int)getUChar(delta+alen);
19467 delta += alen+1;
19468 DIP( "blendps $%d, %s,%s\n",
19469 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19472 putXMMReg( gregOfRexRM(pfx, modrm),
19473 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
19474 goto decode_success;
19476 break;
19478 case 0x0D:
19479 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
19480 Blend Packed Double Precision Floating-Point Values (XMM) */
19481 if (have66noF2noF3(pfx) && sz == 2) {
19483 Int imm8;
19484 IRTemp dst_vec = newTemp(Ity_V128);
19485 IRTemp src_vec = newTemp(Ity_V128);
19487 modrm = getUChar(delta);
19488 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19490 if ( epartIsReg( modrm ) ) {
19491 imm8 = (Int)getUChar(delta+1);
19492 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19493 delta += 1+1;
19494 DIP( "blendpd $%d, %s,%s\n", imm8,
19495 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19496 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19497 } else {
19498 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19499 1/* imm8 is 1 byte after the amode */ );
19500 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19501 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19502 imm8 = (Int)getUChar(delta+alen);
19503 delta += alen+1;
19504 DIP( "blendpd $%d, %s,%s\n",
19505 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19508 putXMMReg( gregOfRexRM(pfx, modrm),
19509 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
19510 goto decode_success;
19512 break;
19514 case 0x0E:
19515 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
19516 Blend Packed Words (XMM) */
19517 if (have66noF2noF3(pfx) && sz == 2) {
19519 Int imm8;
19520 IRTemp dst_vec = newTemp(Ity_V128);
19521 IRTemp src_vec = newTemp(Ity_V128);
19523 modrm = getUChar(delta);
19525 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
19527 if ( epartIsReg( modrm ) ) {
19528 imm8 = (Int)getUChar(delta+1);
19529 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
19530 delta += 1+1;
19531 DIP( "pblendw $%d, %s,%s\n", imm8,
19532 nameXMMReg( eregOfRexRM(pfx, modrm) ),
19533 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19534 } else {
19535 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19536 1/* imm8 is 1 byte after the amode */ );
19537 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19538 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19539 imm8 = (Int)getUChar(delta+alen);
19540 delta += alen+1;
19541 DIP( "pblendw $%d, %s,%s\n",
19542 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
19545 putXMMReg( gregOfRexRM(pfx, modrm),
19546 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
19547 goto decode_success;
19549 break;
19551 case 0x14:
19552 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
19553 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
19554 (XMM) */
19555 if (have66noF2noF3(pfx) && sz == 2) {
19556 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
19557 goto decode_success;
19559 break;
19561 case 0x15:
19562 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
19563 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
19564 (XMM) */
19565 if (have66noF2noF3(pfx) && sz == 2) {
19566 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
19567 goto decode_success;
19569 break;
19571 case 0x16:
19572 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
19573 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
19574 Note that this insn has the same opcodes as PEXTRQ, but
19575 here the REX.W bit is _not_ present */
19576 if (have66noF2noF3(pfx)
19577 && sz == 2 /* REX.W is _not_ present */) {
19578 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
19579 goto decode_success;
19581 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
19582 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
19583 Note that this insn has the same opcodes as PEXTRD, but
19584 here the REX.W bit is present */
19585 if (have66noF2noF3(pfx)
19586 && sz == 8 /* REX.W is present */) {
19587 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
19588 goto decode_success;
19590 break;
19592 case 0x17:
19593 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
19594 float from xmm reg and store in gen.reg or mem. This is
19595 identical to PEXTRD, except that REX.W appears to be ignored.
19597 if (have66noF2noF3(pfx)
19598 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
19599 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
19600 goto decode_success;
19602 break;
19604 case 0x20:
19605 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19606 Extract byte from r32/m8 and insert into xmm1 */
19607 if (have66noF2noF3(pfx) && sz == 2) {
19608 Int imm8;
19609 IRTemp new8 = newTemp(Ity_I8);
19610 modrm = getUChar(delta);
19611 UInt rG = gregOfRexRM(pfx, modrm);
19612 if ( epartIsReg( modrm ) ) {
19613 UInt rE = eregOfRexRM(pfx,modrm);
19614 imm8 = (Int)(getUChar(delta+1) & 0xF);
19615 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
19616 delta += 1+1;
19617 DIP( "pinsrb $%d,%s,%s\n", imm8,
19618 nameIReg32(rE), nameXMMReg(rG) );
19619 } else {
19620 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19621 imm8 = (Int)(getUChar(delta+alen) & 0xF);
19622 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
19623 delta += alen+1;
19624 DIP( "pinsrb $%d,%s,%s\n",
19625 imm8, dis_buf, nameXMMReg(rG) );
19627 IRTemp src_vec = newTemp(Ity_V128);
19628 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19629 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19630 putXMMReg( rG, mkexpr(res) );
19631 goto decode_success;
19633 break;
19635 case 0x21:
19636 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
19637 Insert Packed Single Precision Floating-Point Value (XMM) */
19638 if (have66noF2noF3(pfx) && sz == 2) {
19639 UInt imm8;
19640 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19641 const IRTemp inval = IRTemp_INVALID;
19643 modrm = getUChar(delta);
19644 UInt rG = gregOfRexRM(pfx, modrm);
19646 if ( epartIsReg( modrm ) ) {
19647 UInt rE = eregOfRexRM(pfx, modrm);
19648 IRTemp vE = newTemp(Ity_V128);
19649 assign( vE, getXMMReg(rE) );
19650 IRTemp dsE[4] = { inval, inval, inval, inval };
19651 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
19652 imm8 = getUChar(delta+1);
19653 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
19654 delta += 1+1;
19655 DIP( "insertps $%u, %s,%s\n",
19656 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19657 } else {
19658 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19659 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19660 imm8 = getUChar(delta+alen);
19661 delta += alen+1;
19662 DIP( "insertps $%u, %s,%s\n",
19663 imm8, dis_buf, nameXMMReg(rG) );
19666 IRTemp vG = newTemp(Ity_V128);
19667 assign( vG, getXMMReg(rG) );
19669 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
19670 goto decode_success;
19672 break;
19674 case 0x22:
19675 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19676 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19677 if (have66noF2noF3(pfx)
19678 && sz == 2 /* REX.W is NOT present */) {
19679 Int imm8_10;
19680 IRTemp src_u32 = newTemp(Ity_I32);
19681 modrm = getUChar(delta);
19682 UInt rG = gregOfRexRM(pfx, modrm);
19684 if ( epartIsReg( modrm ) ) {
19685 UInt rE = eregOfRexRM(pfx,modrm);
19686 imm8_10 = (Int)(getUChar(delta+1) & 3);
19687 assign( src_u32, getIReg32( rE ) );
19688 delta += 1+1;
19689 DIP( "pinsrd $%d, %s,%s\n",
19690 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
19691 } else {
19692 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19693 imm8_10 = (Int)(getUChar(delta+alen) & 3);
19694 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
19695 delta += alen+1;
19696 DIP( "pinsrd $%d, %s,%s\n",
19697 imm8_10, dis_buf, nameXMMReg(rG) );
19700 IRTemp src_vec = newTemp(Ity_V128);
19701 assign(src_vec, getXMMReg( rG ));
19702 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19703 putXMMReg( rG, mkexpr(res_vec) );
19704 goto decode_success;
19706 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19707 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19708 if (have66noF2noF3(pfx)
19709 && sz == 8 /* REX.W is present */) {
19710 Int imm8_0;
19711 IRTemp src_u64 = newTemp(Ity_I64);
19712 modrm = getUChar(delta);
19713 UInt rG = gregOfRexRM(pfx, modrm);
19715 if ( epartIsReg( modrm ) ) {
19716 UInt rE = eregOfRexRM(pfx,modrm);
19717 imm8_0 = (Int)(getUChar(delta+1) & 1);
19718 assign( src_u64, getIReg64( rE ) );
19719 delta += 1+1;
19720 DIP( "pinsrq $%d, %s,%s\n",
19721 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
19722 } else {
19723 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19724 imm8_0 = (Int)(getUChar(delta+alen) & 1);
19725 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
19726 delta += alen+1;
19727 DIP( "pinsrq $%d, %s,%s\n",
19728 imm8_0, dis_buf, nameXMMReg(rG) );
19731 IRTemp src_vec = newTemp(Ity_V128);
19732 assign(src_vec, getXMMReg( rG ));
19733 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19734 putXMMReg( rG, mkexpr(res_vec) );
19735 goto decode_success;
19737 break;
19739 case 0x40:
19740 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19741 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19742 if (have66noF2noF3(pfx) && sz == 2) {
19743 modrm = getUChar(delta);
19744 Int imm8;
19745 IRTemp src_vec = newTemp(Ity_V128);
19746 IRTemp dst_vec = newTemp(Ity_V128);
19747 UInt rG = gregOfRexRM(pfx, modrm);
19748 assign( dst_vec, getXMMReg( rG ) );
19749 if ( epartIsReg( modrm ) ) {
19750 UInt rE = eregOfRexRM(pfx, modrm);
19751 imm8 = (Int)getUChar(delta+1);
19752 assign( src_vec, getXMMReg(rE) );
19753 delta += 1+1;
19754 DIP( "dpps $%d, %s,%s\n",
19755 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19756 } else {
19757 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19758 1/* imm8 is 1 byte after the amode */ );
19759 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19760 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19761 imm8 = (Int)getUChar(delta+alen);
19762 delta += alen+1;
19763 DIP( "dpps $%d, %s,%s\n",
19764 imm8, dis_buf, nameXMMReg(rG) );
19766 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19767 putXMMReg( rG, mkexpr(res) );
19768 goto decode_success;
19770 break;
19772 case 0x41:
19773 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19774 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19775 if (have66noF2noF3(pfx) && sz == 2) {
19776 modrm = getUChar(delta);
19777 Int imm8;
19778 IRTemp src_vec = newTemp(Ity_V128);
19779 IRTemp dst_vec = newTemp(Ity_V128);
19780 UInt rG = gregOfRexRM(pfx, modrm);
19781 assign( dst_vec, getXMMReg( rG ) );
19782 if ( epartIsReg( modrm ) ) {
19783 UInt rE = eregOfRexRM(pfx, modrm);
19784 imm8 = (Int)getUChar(delta+1);
19785 assign( src_vec, getXMMReg(rE) );
19786 delta += 1+1;
19787 DIP( "dppd $%d, %s,%s\n",
19788 imm8, nameXMMReg(rE), nameXMMReg(rG) );
19789 } else {
19790 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19791 1/* imm8 is 1 byte after the amode */ );
19792 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19793 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19794 imm8 = (Int)getUChar(delta+alen);
19795 delta += alen+1;
19796 DIP( "dppd $%d, %s,%s\n",
19797 imm8, dis_buf, nameXMMReg(rG) );
19799 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19800 putXMMReg( rG, mkexpr(res) );
19801 goto decode_success;
19803 break;
19805 case 0x42:
19806 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19807 Multiple Packed Sums of Absolule Difference (XMM) */
19808 if (have66noF2noF3(pfx) && sz == 2) {
19809 Int imm8;
19810 IRTemp src_vec = newTemp(Ity_V128);
19811 IRTemp dst_vec = newTemp(Ity_V128);
19812 modrm = getUChar(delta);
19813 UInt rG = gregOfRexRM(pfx, modrm);
19815 assign( dst_vec, getXMMReg(rG) );
19817 if ( epartIsReg( modrm ) ) {
19818 UInt rE = eregOfRexRM(pfx, modrm);
19820 imm8 = (Int)getUChar(delta+1);
19821 assign( src_vec, getXMMReg(rE) );
19822 delta += 1+1;
19823 DIP( "mpsadbw $%d, %s,%s\n", imm8,
19824 nameXMMReg(rE), nameXMMReg(rG) );
19825 } else {
19826 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19827 1/* imm8 is 1 byte after the amode */ );
19828 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19829 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19830 imm8 = (Int)getUChar(delta+alen);
19831 delta += alen+1;
19832 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
19835 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
19836 goto decode_success;
19838 break;
19840 case 0x44:
19841 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19842 * Carry-less multiplication of selected XMM quadwords into XMM
19843 * registers (a.k.a multiplication of polynomials over GF(2))
19845 if (have66noF2noF3(pfx) && sz == 2) {
19847 Int imm8;
19848 IRTemp svec = newTemp(Ity_V128);
19849 IRTemp dvec = newTemp(Ity_V128);
19850 modrm = getUChar(delta);
19851 UInt rG = gregOfRexRM(pfx, modrm);
19853 assign( dvec, getXMMReg(rG) );
19855 if ( epartIsReg( modrm ) ) {
19856 UInt rE = eregOfRexRM(pfx, modrm);
19857 imm8 = (Int)getUChar(delta+1);
19858 assign( svec, getXMMReg(rE) );
19859 delta += 1+1;
19860 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
19861 nameXMMReg(rE), nameXMMReg(rG) );
19862 } else {
19863 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19864 1/* imm8 is 1 byte after the amode */ );
19865 gen_SIGNAL_if_not_16_aligned( vbi, addr );
19866 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19867 imm8 = (Int)getUChar(delta+alen);
19868 delta += alen+1;
19869 DIP( "pclmulqdq $%d, %s,%s\n",
19870 imm8, dis_buf, nameXMMReg(rG) );
19873 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
19874 goto decode_success;
19876 break;
19878 case 0x60:
19879 case 0x61:
19880 case 0x62:
19881 case 0x63:
19882 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19883 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19884 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19885 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19886 (selected special cases that actually occur in glibc,
19887 not by any means a complete implementation.)
19889 if (have66noF2noF3(pfx) && sz == 2) {
19890 Long delta0 = delta;
19891 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19892 if (delta > delta0) goto decode_success;
19893 /* else fall though; dis_PCMPxSTRx failed to decode it */
19895 break;
19897 case 0xDF:
19898 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19899 if (have66noF2noF3(pfx) && sz == 2) {
19900 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
19901 goto decode_success;
19903 break;
19905 default:
19906 break;
19910 decode_failure:
19911 *decode_OK = False;
19912 return deltaIN;
19914 decode_success:
19915 *decode_OK = True;
19916 return delta;
19920 /*------------------------------------------------------------*/
19921 /*--- ---*/
19922 /*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19923 /*--- ---*/
19924 /*------------------------------------------------------------*/
19926 __attribute__((noinline))
19927 static
19928 Long dis_ESC_NONE (
19929 /*MB_OUT*/DisResult* dres,
19930 /*MB_OUT*/Bool* expect_CAS,
19931 const VexArchInfo* archinfo,
19932 const VexAbiInfo* vbi,
19933 Prefix pfx, Int sz, Long deltaIN
19936 Long d64 = 0;
19937 UChar abyte = 0;
19938 IRTemp addr = IRTemp_INVALID;
19939 IRTemp t1 = IRTemp_INVALID;
19940 IRTemp t2 = IRTemp_INVALID;
19941 IRTemp t3 = IRTemp_INVALID;
19942 IRTemp t4 = IRTemp_INVALID;
19943 IRTemp t5 = IRTemp_INVALID;
19944 IRType ty = Ity_INVALID;
19945 UChar modrm = 0;
19946 Int am_sz = 0;
19947 Int d_sz = 0;
19948 Int alen = 0;
19949 HChar dis_buf[50];
19951 Long delta = deltaIN;
19952 UChar opc = getUChar(delta); delta++;
19954 /* delta now points at the modrm byte. In most of the cases that
19955 follow, neither the F2 nor F3 prefixes are allowed. However,
19956 for some basic arithmetic operations we have to allow F2/XACQ or
19957 F3/XREL in the case where the destination is memory and the LOCK
19958 prefix is also present. Do this check by looking at the modrm
19959 byte but not advancing delta over it. */
19960 /* By default, F2 and F3 are not allowed, so let's start off with
19961 that setting. */
19962 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19963 { UChar tmp_modrm = getUChar(delta);
19964 switch (opc) {
19965 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19966 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19967 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19968 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19969 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19970 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19971 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19972 if (!epartIsReg(tmp_modrm)
19973 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19974 /* dst is mem, and we have F2 or F3 but not both */
19975 validF2orF3 = True;
19977 break;
19978 default:
19979 break;
19983 /* Now, in the switch below, for the opc values examined by the
19984 switch above, use validF2orF3 rather than looking at pfx
19985 directly. */
19986 switch (opc) {
19988 case 0x00: /* ADD Gb,Eb */
19989 if (!validF2orF3) goto decode_failure;
19990 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
19991 return delta;
19992 case 0x01: /* ADD Gv,Ev */
19993 if (!validF2orF3) goto decode_failure;
19994 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
19995 return delta;
19997 case 0x02: /* ADD Eb,Gb */
19998 if (haveF2orF3(pfx)) goto decode_failure;
19999 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, 1, delta, "add" );
20000 return delta;
20001 case 0x03: /* ADD Ev,Gv */
20002 if (haveF2orF3(pfx)) goto decode_failure;
20003 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagNone, True, sz, delta, "add" );
20004 return delta;
20006 case 0x04: /* ADD Ib, AL */
20007 if (haveF2orF3(pfx)) goto decode_failure;
20008 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
20009 return delta;
20010 case 0x05: /* ADD Iv, eAX */
20011 if (haveF2orF3(pfx)) goto decode_failure;
20012 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
20013 return delta;
20015 case 0x08: /* OR Gb,Eb */
20016 if (!validF2orF3) goto decode_failure;
20017 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20018 return delta;
20019 case 0x09: /* OR Gv,Ev */
20020 if (!validF2orF3) goto decode_failure;
20021 delta = dis_op2_G_E ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20022 return delta;
20024 case 0x0A: /* OR Eb,Gb */
20025 if (haveF2orF3(pfx)) goto decode_failure;
20026 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, 1, delta, "or" );
20027 return delta;
20028 case 0x0B: /* OR Ev,Gv */
20029 if (haveF2orF3(pfx)) goto decode_failure;
20030 delta = dis_op2_E_G ( vbi, pfx, Iop_Or8, WithFlagNone, True, sz, delta, "or" );
20031 return delta;
20033 case 0x0C: /* OR Ib, AL */
20034 if (haveF2orF3(pfx)) goto decode_failure;
20035 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
20036 return delta;
20037 case 0x0D: /* OR Iv, eAX */
20038 if (haveF2orF3(pfx)) goto decode_failure;
20039 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
20040 return delta;
20042 case 0x10: /* ADC Gb,Eb */
20043 if (!validF2orF3) goto decode_failure;
20044 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20045 return delta;
20046 case 0x11: /* ADC Gv,Ev */
20047 if (!validF2orF3) goto decode_failure;
20048 delta = dis_op2_G_E ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20049 return delta;
20051 case 0x12: /* ADC Eb,Gb */
20052 if (haveF2orF3(pfx)) goto decode_failure;
20053 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, 1, delta, "adc" );
20054 return delta;
20055 case 0x13: /* ADC Ev,Gv */
20056 if (haveF2orF3(pfx)) goto decode_failure;
20057 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarry, True, sz, delta, "adc" );
20058 return delta;
20060 case 0x14: /* ADC Ib, AL */
20061 if (haveF2orF3(pfx)) goto decode_failure;
20062 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
20063 return delta;
20064 case 0x15: /* ADC Iv, eAX */
20065 if (haveF2orF3(pfx)) goto decode_failure;
20066 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
20067 return delta;
20069 case 0x18: /* SBB Gb,Eb */
20070 if (!validF2orF3) goto decode_failure;
20071 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20072 return delta;
20073 case 0x19: /* SBB Gv,Ev */
20074 if (!validF2orF3) goto decode_failure;
20075 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20076 return delta;
20078 case 0x1A: /* SBB Eb,Gb */
20079 if (haveF2orF3(pfx)) goto decode_failure;
20080 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, 1, delta, "sbb" );
20081 return delta;
20082 case 0x1B: /* SBB Ev,Gv */
20083 if (haveF2orF3(pfx)) goto decode_failure;
20084 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagCarry, True, sz, delta, "sbb" );
20085 return delta;
20087 case 0x1C: /* SBB Ib, AL */
20088 if (haveF2orF3(pfx)) goto decode_failure;
20089 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
20090 return delta;
20091 case 0x1D: /* SBB Iv, eAX */
20092 if (haveF2orF3(pfx)) goto decode_failure;
20093 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
20094 return delta;
20096 case 0x20: /* AND Gb,Eb */
20097 if (!validF2orF3) goto decode_failure;
20098 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20099 return delta;
20100 case 0x21: /* AND Gv,Ev */
20101 if (!validF2orF3) goto decode_failure;
20102 delta = dis_op2_G_E ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20103 return delta;
20105 case 0x22: /* AND Eb,Gb */
20106 if (haveF2orF3(pfx)) goto decode_failure;
20107 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, 1, delta, "and" );
20108 return delta;
20109 case 0x23: /* AND Ev,Gv */
20110 if (haveF2orF3(pfx)) goto decode_failure;
20111 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, True, sz, delta, "and" );
20112 return delta;
20114 case 0x24: /* AND Ib, AL */
20115 if (haveF2orF3(pfx)) goto decode_failure;
20116 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
20117 return delta;
20118 case 0x25: /* AND Iv, eAX */
20119 if (haveF2orF3(pfx)) goto decode_failure;
20120 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
20121 return delta;
20123 case 0x28: /* SUB Gb,Eb */
20124 if (!validF2orF3) goto decode_failure;
20125 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20126 return delta;
20127 case 0x29: /* SUB Gv,Ev */
20128 if (!validF2orF3) goto decode_failure;
20129 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20130 return delta;
20132 case 0x2A: /* SUB Eb,Gb */
20133 if (haveF2orF3(pfx)) goto decode_failure;
20134 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, 1, delta, "sub" );
20135 return delta;
20136 case 0x2B: /* SUB Ev,Gv */
20137 if (haveF2orF3(pfx)) goto decode_failure;
20138 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, True, sz, delta, "sub" );
20139 return delta;
20141 case 0x2C: /* SUB Ib, AL */
20142 if (haveF2orF3(pfx)) goto decode_failure;
20143 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
20144 return delta;
20145 case 0x2D: /* SUB Iv, eAX */
20146 if (haveF2orF3(pfx)) goto decode_failure;
20147 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
20148 return delta;
20150 case 0x30: /* XOR Gb,Eb */
20151 if (!validF2orF3) goto decode_failure;
20152 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20153 return delta;
20154 case 0x31: /* XOR Gv,Ev */
20155 if (!validF2orF3) goto decode_failure;
20156 delta = dis_op2_G_E ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20157 return delta;
20159 case 0x32: /* XOR Eb,Gb */
20160 if (haveF2orF3(pfx)) goto decode_failure;
20161 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, 1, delta, "xor" );
20162 return delta;
20163 case 0x33: /* XOR Ev,Gv */
20164 if (haveF2orF3(pfx)) goto decode_failure;
20165 delta = dis_op2_E_G ( vbi, pfx, Iop_Xor8, WithFlagNone, True, sz, delta, "xor" );
20166 return delta;
20168 case 0x34: /* XOR Ib, AL */
20169 if (haveF2orF3(pfx)) goto decode_failure;
20170 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
20171 return delta;
20172 case 0x35: /* XOR Iv, eAX */
20173 if (haveF2orF3(pfx)) goto decode_failure;
20174 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
20175 return delta;
20177 case 0x38: /* CMP Gb,Eb */
20178 if (haveF2orF3(pfx)) goto decode_failure;
20179 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20180 return delta;
20181 case 0x39: /* CMP Gv,Ev */
20182 if (haveF2orF3(pfx)) goto decode_failure;
20183 delta = dis_op2_G_E ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20184 return delta;
20186 case 0x3A: /* CMP Eb,Gb */
20187 if (haveF2orF3(pfx)) goto decode_failure;
20188 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, 1, delta, "cmp" );
20189 return delta;
20190 case 0x3B: /* CMP Ev,Gv */
20191 if (haveF2orF3(pfx)) goto decode_failure;
20192 delta = dis_op2_E_G ( vbi, pfx, Iop_Sub8, WithFlagNone, False, sz, delta, "cmp" );
20193 return delta;
20195 case 0x3C: /* CMP Ib, AL */
20196 if (haveF2orF3(pfx)) goto decode_failure;
20197 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
20198 return delta;
20199 case 0x3D: /* CMP Iv, eAX */
20200 if (haveF2orF3(pfx)) goto decode_failure;
20201 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
20202 return delta;
20204 case 0x50: /* PUSH eAX */
20205 case 0x51: /* PUSH eCX */
20206 case 0x52: /* PUSH eDX */
20207 case 0x53: /* PUSH eBX */
20208 case 0x55: /* PUSH eBP */
20209 case 0x56: /* PUSH eSI */
20210 case 0x57: /* PUSH eDI */
20211 case 0x54: /* PUSH eSP */
20212 /* This is the Right Way, in that the value to be pushed is
20213 established before %rsp is changed, so that pushq %rsp
20214 correctly pushes the old value. */
20215 if (haveF2orF3(pfx)) goto decode_failure;
20216 vassert(sz == 2 || sz == 4 || sz == 8);
20217 if (sz == 4)
20218 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
20219 ty = sz==2 ? Ity_I16 : Ity_I64;
20220 t1 = newTemp(ty);
20221 t2 = newTemp(Ity_I64);
20222 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
20223 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
20224 putIReg64(R_RSP, mkexpr(t2) );
20225 storeLE(mkexpr(t2),mkexpr(t1));
20226 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
20227 return delta;
20229 case 0x58: /* POP eAX */
20230 case 0x59: /* POP eCX */
20231 case 0x5A: /* POP eDX */
20232 case 0x5B: /* POP eBX */
20233 case 0x5D: /* POP eBP */
20234 case 0x5E: /* POP eSI */
20235 case 0x5F: /* POP eDI */
20236 case 0x5C: /* POP eSP */
20237 if (haveF2orF3(pfx)) goto decode_failure;
20238 vassert(sz == 2 || sz == 4 || sz == 8);
20239 if (sz == 4)
20240 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
20241 t1 = newTemp(szToITy(sz));
20242 t2 = newTemp(Ity_I64);
20243 assign(t2, getIReg64(R_RSP));
20244 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
20245 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20246 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
20247 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
20248 return delta;
20250 case 0x63: /* MOVSX */
20251 if (haveF2orF3(pfx)) goto decode_failure;
20252 if (haveREX(pfx) && 1==getRexW(pfx)) {
20253 vassert(sz == 8);
20254 /* movsx r/m32 to r64 */
20255 modrm = getUChar(delta);
20256 if (epartIsReg(modrm)) {
20257 delta++;
20258 putIRegG(8, pfx, modrm,
20259 unop(Iop_32Sto64,
20260 getIRegE(4, pfx, modrm)));
20261 DIP("movslq %s,%s\n",
20262 nameIRegE(4, pfx, modrm),
20263 nameIRegG(8, pfx, modrm));
20264 return delta;
20265 } else {
20266 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20267 delta += alen;
20268 putIRegG(8, pfx, modrm,
20269 unop(Iop_32Sto64,
20270 loadLE(Ity_I32, mkexpr(addr))));
20271 DIP("movslq %s,%s\n", dis_buf,
20272 nameIRegG(8, pfx, modrm));
20273 return delta;
20275 } else {
20276 goto decode_failure;
20279 case 0x68: /* PUSH Iv */
20280 if (haveF2orF3(pfx)) goto decode_failure;
20281 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20282 if (sz == 4) sz = 8;
20283 d64 = getSDisp(imin(4,sz),delta);
20284 delta += imin(4,sz);
20285 goto do_push_I;
20287 case 0x69: /* IMUL Iv, Ev, Gv */
20288 if (haveF2orF3(pfx)) goto decode_failure;
20289 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
20290 return delta;
20292 case 0x6A: /* PUSH Ib, sign-extended to sz */
20293 if (haveF2orF3(pfx)) goto decode_failure;
20294 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
20295 if (sz == 4) sz = 8;
20296 d64 = getSDisp8(delta); delta += 1;
20297 goto do_push_I;
20298 do_push_I:
20299 ty = szToITy(sz);
20300 t1 = newTemp(Ity_I64);
20301 t2 = newTemp(ty);
20302 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20303 putIReg64(R_RSP, mkexpr(t1) );
20304 /* stop mkU16 asserting if d32 is a negative 16-bit number
20305 (bug #132813) */
20306 if (ty == Ity_I16)
20307 d64 &= 0xFFFF;
20308 storeLE( mkexpr(t1), mkU(ty,d64) );
20309 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
20310 return delta;
20312 case 0x6B: /* IMUL Ib, Ev, Gv */
20313 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
20314 return delta;
20316 case 0x70:
20317 case 0x71:
20318 case 0x72: /* JBb/JNAEb (jump below) */
20319 case 0x73: /* JNBb/JAEb (jump not below) */
20320 case 0x74: /* JZb/JEb (jump zero) */
20321 case 0x75: /* JNZb/JNEb (jump not zero) */
20322 case 0x76: /* JBEb/JNAb (jump below or equal) */
20323 case 0x77: /* JNBEb/JAb (jump not below or equal) */
20324 case 0x78: /* JSb (jump negative) */
20325 case 0x79: /* JSb (jump not negative) */
20326 case 0x7A: /* JP (jump parity even) */
20327 case 0x7B: /* JNP/JPO (jump parity odd) */
20328 case 0x7C: /* JLb/JNGEb (jump less) */
20329 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
20330 case 0x7E: /* JLEb/JNGb (jump less or equal) */
20331 case 0x7F: { /* JGb/JNLEb (jump greater) */
20332 Long jmpDelta;
20333 const HChar* comment = "";
20334 if (haveF3(pfx)) goto decode_failure;
20335 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20336 jmpDelta = getSDisp8(delta);
20337 vassert(-128 <= jmpDelta && jmpDelta < 128);
20338 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
20339 delta++;
20340 /* End the block at this point. */
20341 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
20342 guest_RIP_bbstart+delta, d64 );
20343 vassert(dres->whatNext == Dis_StopHere);
20344 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), (ULong)d64,
20345 comment);
20346 return delta;
20349 case 0x80: /* Grp1 Ib,Eb */
20350 modrm = getUChar(delta);
20351 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
20352 just one for the mem case and also require LOCK in this case.
20353 Note that this erroneously allows XACQ/XREL on CMP since we
20354 don't check the subopcode here. No big deal. */
20355 if (epartIsReg(modrm) && haveF2orF3(pfx))
20356 goto decode_failure;
20357 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20358 goto decode_failure;
20359 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20360 goto decode_failure;
20361 am_sz = lengthAMode(pfx,delta);
20362 sz = 1;
20363 d_sz = 1;
20364 d64 = getSDisp8(delta + am_sz);
20365 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20366 return delta;
20368 case 0x81: /* Grp1 Iv,Ev */
20369 modrm = getUChar(delta);
20370 /* Same comment as for case 0x80 just above. */
20371 if (epartIsReg(modrm) && haveF2orF3(pfx))
20372 goto decode_failure;
20373 if (!epartIsReg(modrm) && haveF2andF3(pfx))
20374 goto decode_failure;
20375 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
20376 goto decode_failure;
20377 am_sz = lengthAMode(pfx,delta);
20378 d_sz = imin(sz,4);
20379 d64 = getSDisp(d_sz, delta + am_sz);
20380 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20381 return delta;
20383 case 0x83: /* Grp1 Ib,Ev */
20384 if (haveF2orF3(pfx)) goto decode_failure;
20385 modrm = getUChar(delta);
20386 am_sz = lengthAMode(pfx,delta);
20387 d_sz = 1;
20388 d64 = getSDisp8(delta + am_sz);
20389 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
20390 return delta;
20392 case 0x84: /* TEST Eb,Gb */
20393 if (haveF2orF3(pfx)) goto decode_failure;
20394 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20395 1, delta, "test" );
20396 return delta;
20398 case 0x85: /* TEST Ev,Gv */
20399 if (haveF2orF3(pfx)) goto decode_failure;
20400 delta = dis_op2_E_G ( vbi, pfx, Iop_And8, WithFlagNone, False,
20401 sz, delta, "test" );
20402 return delta;
20404 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
20405 prefix. Therefore, generate CAS regardless of the presence or
20406 otherwise of a LOCK prefix. */
20407 case 0x86: /* XCHG Gb,Eb */
20408 sz = 1;
20409 /* Fall through ... */
20410 case 0x87: /* XCHG Gv,Ev */
20411 modrm = getUChar(delta);
20412 /* Check whether F2 or F3 are allowable. For the mem case, one
20413 or the othter but not both are. We don't care about the
20414 presence of LOCK in this case -- XCHG is unusual in this
20415 respect. */
20416 if (haveF2orF3(pfx)) {
20417 if (epartIsReg(modrm)) {
20418 goto decode_failure;
20419 } else {
20420 if (haveF2andF3(pfx))
20421 goto decode_failure;
20424 ty = szToITy(sz);
20425 t1 = newTemp(ty); t2 = newTemp(ty);
20426 if (epartIsReg(modrm)) {
20427 assign(t1, getIRegE(sz, pfx, modrm));
20428 assign(t2, getIRegG(sz, pfx, modrm));
20429 putIRegG(sz, pfx, modrm, mkexpr(t1));
20430 putIRegE(sz, pfx, modrm, mkexpr(t2));
20431 delta++;
20432 DIP("xchg%c %s, %s\n",
20433 nameISize(sz), nameIRegG(sz, pfx, modrm),
20434 nameIRegE(sz, pfx, modrm));
20435 } else {
20436 *expect_CAS = True;
20437 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
20438 assign( t1, loadLE(ty, mkexpr(addr)) );
20439 assign( t2, getIRegG(sz, pfx, modrm) );
20440 casLE( mkexpr(addr),
20441 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
20442 putIRegG( sz, pfx, modrm, mkexpr(t1) );
20443 delta += alen;
20444 DIP("xchg%c %s, %s\n", nameISize(sz),
20445 nameIRegG(sz, pfx, modrm), dis_buf);
20447 return delta;
20449 case 0x88: { /* MOV Gb,Eb */
20450 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20451 Bool ok = True;
20452 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
20453 if (!ok) goto decode_failure;
20454 return delta;
20457 case 0x89: { /* MOV Gv,Ev */
20458 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
20459 Bool ok = True;
20460 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
20461 if (!ok) goto decode_failure;
20462 return delta;
20465 case 0x8A: /* MOV Eb,Gb */
20466 if (haveF2orF3(pfx)) goto decode_failure;
20467 delta = dis_mov_E_G(vbi, pfx, 1, delta);
20468 return delta;
20470 case 0x8B: /* MOV Ev,Gv */
20471 if (haveF2orF3(pfx)) goto decode_failure;
20472 delta = dis_mov_E_G(vbi, pfx, sz, delta);
20473 return delta;
20475 case 0x8C: /* MOV S,E -- MOV from a SEGMENT REGISTER */
20476 if (haveF2orF3(pfx)) goto decode_failure;
20477 delta = dis_mov_S_E(vbi, pfx, sz, delta);
20478 return delta;
20480 case 0x8D: /* LEA M,Gv */
20481 if (haveF2orF3(pfx)) goto decode_failure;
20482 if (sz != 4 && sz != 8)
20483 goto decode_failure;
20484 modrm = getUChar(delta);
20485 if (epartIsReg(modrm))
20486 goto decode_failure;
20487 /* NOTE! this is the one place where a segment override prefix
20488 has no effect on the address calculation. Therefore we clear
20489 any segment override bits in pfx. */
20490 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
20491 delta += alen;
20492 /* This is a hack. But it isn't clear that really doing the
20493 calculation at 32 bits is really worth it. Hence for leal,
20494 do the full 64-bit calculation and then truncate it. */
20495 putIRegG( sz, pfx, modrm,
20496 sz == 4
20497 ? unop(Iop_64to32, mkexpr(addr))
20498 : mkexpr(addr)
20500 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
20501 nameIRegG(sz,pfx,modrm));
20502 return delta;
20504 case 0x8F: { /* POPQ m64 / POPW m16 */
20505 Int len;
20506 UChar rm;
20507 /* There is no encoding for 32-bit pop in 64-bit mode.
20508 So sz==4 actually means sz==8. */
20509 if (haveF2orF3(pfx)) goto decode_failure;
20510 vassert(sz == 2 || sz == 4
20511 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
20512 if (sz == 4) sz = 8;
20513 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20515 rm = getUChar(delta);
20517 /* make sure this instruction is correct POP */
20518 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
20519 goto decode_failure;
20520 /* and has correct size */
20521 vassert(sz == 8);
20523 t1 = newTemp(Ity_I64);
20524 t3 = newTemp(Ity_I64);
20525 assign( t1, getIReg64(R_RSP) );
20526 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
20528 /* Increase RSP; must be done before the STORE. Intel manual
20529 says: If the RSP register is used as a base register for
20530 addressing a destination operand in memory, the POP
20531 instruction computes the effective address of the operand
20532 after it increments the RSP register. */
20533 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
20535 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
20536 storeLE( mkexpr(addr), mkexpr(t3) );
20538 DIP("popl %s\n", dis_buf);
20540 delta += len;
20541 return delta;
20544 case 0x90: /* XCHG eAX,eAX */
20545 /* detect and handle F3 90 (rep nop) specially */
20546 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
20547 DIP("rep nop (P4 pause)\n");
20548 /* "observe" the hint. The Vex client needs to be careful not
20549 to cause very long delays as a result, though. */
20550 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
20551 vassert(dres->whatNext == Dis_StopHere);
20552 return delta;
20554 /* detect and handle NOPs specially */
20555 if (/* F2/F3 probably change meaning completely */
20556 !haveF2orF3(pfx)
20557 /* If REX.B is 1, we're not exchanging rAX with itself */
20558 && getRexB(pfx)==0 ) {
20559 DIP("nop\n");
20560 return delta;
20562 /* else fall through to normal case. */
20563 case 0x91: /* XCHG rAX,rCX */
20564 case 0x92: /* XCHG rAX,rDX */
20565 case 0x93: /* XCHG rAX,rBX */
20566 case 0x94: /* XCHG rAX,rSP */
20567 case 0x95: /* XCHG rAX,rBP */
20568 case 0x96: /* XCHG rAX,rSI */
20569 case 0x97: /* XCHG rAX,rDI */
20570 /* guard against mutancy */
20571 if (haveF2orF3(pfx)) goto decode_failure;
20572 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20573 return delta;
20575 case 0x98: /* CBW */
20576 if (haveF2orF3(pfx)) goto decode_failure;
20577 if (sz == 8) {
20578 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20579 DIP(/*"cdqe\n"*/"cltq\n");
20580 return delta;
20582 if (sz == 4) {
20583 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20584 DIP("cwtl\n");
20585 return delta;
20587 if (sz == 2) {
20588 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20589 DIP("cbw\n");
20590 return delta;
20592 goto decode_failure;
20594 case 0x99: /* CWD/CDQ/CQO */
20595 if (haveF2orF3(pfx)) goto decode_failure;
20596 vassert(sz == 2 || sz == 4 || sz == 8);
20597 ty = szToITy(sz);
20598 putIRegRDX( sz,
20599 binop(mkSizedOp(ty,Iop_Sar8),
20600 getIRegRAX(sz),
20601 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20602 DIP(sz == 2 ? "cwd\n"
20603 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20604 : "cqo\n"));
20605 return delta;
20607 case 0x9B: /* FWAIT (X87 insn) */
20608 /* ignore? */
20609 DIP("fwait\n");
20610 return delta;
20612 case 0x9C: /* PUSHF */ {
20613 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20614 mode. So sz==4 actually means sz==8. */
20615 /* 24 July 06: has also been seen with a redundant REX prefix,
20616 so must also allow sz==8. */
20617 if (haveF2orF3(pfx)) goto decode_failure;
20618 vassert(sz == 2 || sz == 4 || sz == 8);
20619 if (sz == 4) sz = 8;
20620 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20622 t1 = newTemp(Ity_I64);
20623 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20624 putIReg64(R_RSP, mkexpr(t1) );
20626 t2 = newTemp(Ity_I64);
20627 assign( t2, mk_amd64g_calculate_rflags_all() );
20629 /* Patch in the D flag. This can simply be a copy of bit 10 of
20630 baseBlock[OFFB_DFLAG]. */
20631 t3 = newTemp(Ity_I64);
20632 assign( t3, binop(Iop_Or64,
20633 mkexpr(t2),
20634 binop(Iop_And64,
20635 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20636 mkU64(1<<10)))
20639 /* And patch in the ID flag. */
20640 t4 = newTemp(Ity_I64);
20641 assign( t4, binop(Iop_Or64,
20642 mkexpr(t3),
20643 binop(Iop_And64,
20644 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20645 mkU8(21)),
20646 mkU64(1<<21)))
20649 /* And patch in the AC flag too. */
20650 t5 = newTemp(Ity_I64);
20651 assign( t5, binop(Iop_Or64,
20652 mkexpr(t4),
20653 binop(Iop_And64,
20654 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20655 mkU8(18)),
20656 mkU64(1<<18)))
20659 /* if sz==2, the stored value needs to be narrowed. */
20660 if (sz == 2)
20661 storeLE( mkexpr(t1), unop(Iop_32to16,
20662 unop(Iop_64to32,mkexpr(t5))) );
20663 else
20664 storeLE( mkexpr(t1), mkexpr(t5) );
20666 DIP("pushf%c\n", nameISize(sz));
20667 return delta;
20670 case 0x9D: /* POPF */
20671 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20672 So sz==4 actually means sz==8. */
20673 if (haveF2orF3(pfx)) goto decode_failure;
20674 vassert(sz == 2 || sz == 4 || sz == 8);
20675 if (sz == 4) sz = 8;
20676 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20677 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20678 assign(t2, getIReg64(R_RSP));
20679 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20680 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20681 /* t1 is the flag word. Mask out everything except OSZACP and
20682 set the flags thunk to AMD64G_CC_OP_COPY. */
20683 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20684 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20685 stmt( IRStmt_Put( OFFB_CC_DEP1,
20686 binop(Iop_And64,
20687 mkexpr(t1),
20688 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20689 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20690 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20694 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
20696 /* Also need to set the D flag, which is held in bit 10 of t1.
20697 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20698 stmt( IRStmt_Put(
20699 OFFB_DFLAG,
20700 IRExpr_ITE(
20701 unop(Iop_64to1,
20702 binop(Iop_And64,
20703 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
20704 mkU64(1))),
20705 mkU64(0xFFFFFFFFFFFFFFFFULL),
20706 mkU64(1)))
20709 /* And set the ID flag */
20710 stmt( IRStmt_Put(
20711 OFFB_IDFLAG,
20712 IRExpr_ITE(
20713 unop(Iop_64to1,
20714 binop(Iop_And64,
20715 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
20716 mkU64(1))),
20717 mkU64(1),
20718 mkU64(0)))
20721 /* And set the AC flag too */
20722 stmt( IRStmt_Put(
20723 OFFB_ACFLAG,
20724 IRExpr_ITE(
20725 unop(Iop_64to1,
20726 binop(Iop_And64,
20727 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
20728 mkU64(1))),
20729 mkU64(1),
20730 mkU64(0)))
20733 DIP("popf%c\n", nameISize(sz));
20734 return delta;
20736 case 0x9E: /* SAHF */
20737 codegen_SAHF();
20738 DIP("sahf\n");
20739 return delta;
20741 case 0x9F: /* LAHF */
20742 codegen_LAHF();
20743 DIP("lahf\n");
20744 return delta;
20746 case 0xA0: /* MOV Ob,AL */
20747 if (have66orF2orF3(pfx)) goto decode_failure;
20748 sz = 1;
20749 /* Fall through ... */
20750 case 0xA1: /* MOV Ov,eAX */
20751 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20752 goto decode_failure;
20753 d64 = getDisp64(delta);
20754 delta += 8;
20755 ty = szToITy(sz);
20756 addr = newTemp(Ity_I64);
20757 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20758 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20759 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20760 segRegTxt(pfx), (ULong)d64,
20761 nameIRegRAX(sz));
20762 return delta;
20764 case 0xA2: /* MOV AL,Ob */
20765 if (have66orF2orF3(pfx)) goto decode_failure;
20766 sz = 1;
20767 /* Fall through ... */
20768 case 0xA3: /* MOV eAX,Ov */
20769 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20770 goto decode_failure;
20771 d64 = getDisp64(delta);
20772 delta += 8;
20773 ty = szToITy(sz);
20774 addr = newTemp(Ity_I64);
20775 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20776 storeLE( mkexpr(addr), getIRegRAX(sz) );
20777 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20778 segRegTxt(pfx), (ULong)d64);
20779 return delta;
20781 case 0xA4:
20782 case 0xA5:
20783 /* F3 A4: rep movsb */
20784 if (haveF3(pfx) && !haveF2(pfx)) {
20785 if (opc == 0xA4)
20786 sz = 1;
20787 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
20788 guest_RIP_curr_instr,
20789 guest_RIP_bbstart+delta, "rep movs", pfx );
20790 dres->whatNext = Dis_StopHere;
20791 return delta;
20793 /* A4: movsb */
20794 if (!haveF3(pfx) && !haveF2(pfx)) {
20795 if (opc == 0xA4)
20796 sz = 1;
20797 dis_string_op( dis_MOVS, sz, "movs", pfx );
20798 return delta;
20800 goto decode_failure;
20802 case 0xA6:
20803 case 0xA7:
20804 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20805 if (haveF3(pfx) && !haveF2(pfx)) {
20806 if (opc == 0xA6)
20807 sz = 1;
20808 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
20809 guest_RIP_curr_instr,
20810 guest_RIP_bbstart+delta, "repe cmps", pfx );
20811 dres->whatNext = Dis_StopHere;
20812 return delta;
20814 goto decode_failure;
20816 case 0xAA:
20817 case 0xAB:
20818 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20819 if (haveF3(pfx) && !haveF2(pfx)) {
20820 if (opc == 0xAA)
20821 sz = 1;
20822 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
20823 guest_RIP_curr_instr,
20824 guest_RIP_bbstart+delta, "rep stos", pfx );
20825 vassert(dres->whatNext == Dis_StopHere);
20826 return delta;
20828 /* AA/AB: stosb/stos{w,l,q} */
20829 if (!haveF3(pfx) && !haveF2(pfx)) {
20830 if (opc == 0xAA)
20831 sz = 1;
20832 dis_string_op( dis_STOS, sz, "stos", pfx );
20833 return delta;
20835 goto decode_failure;
20837 case 0xA8: /* TEST Ib, AL */
20838 if (haveF2orF3(pfx)) goto decode_failure;
20839 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20840 return delta;
20841 case 0xA9: /* TEST Iv, eAX */
20842 if (haveF2orF3(pfx)) goto decode_failure;
20843 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20844 return delta;
20846 case 0xAC: /* LODS, no REP prefix */
20847 case 0xAD:
20848 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20849 return delta;
20851 case 0xAE:
20852 case 0xAF:
20853 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20854 if (haveF2(pfx) && !haveF3(pfx)) {
20855 if (opc == 0xAE)
20856 sz = 1;
20857 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
20858 guest_RIP_curr_instr,
20859 guest_RIP_bbstart+delta, "repne scas", pfx );
20860 vassert(dres->whatNext == Dis_StopHere);
20861 return delta;
20863 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20864 if (!haveF2(pfx) && haveF3(pfx)) {
20865 if (opc == 0xAE)
20866 sz = 1;
20867 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
20868 guest_RIP_curr_instr,
20869 guest_RIP_bbstart+delta, "repe scas", pfx );
20870 vassert(dres->whatNext == Dis_StopHere);
20871 return delta;
20873 /* AE/AF: scasb/scas{w,l,q} */
20874 if (!haveF2(pfx) && !haveF3(pfx)) {
20875 if (opc == 0xAE)
20876 sz = 1;
20877 dis_string_op( dis_SCAS, sz, "scas", pfx );
20878 return delta;
20880 goto decode_failure;
20882 /* XXXX be careful here with moves to AH/BH/CH/DH */
20883 case 0xB0: /* MOV imm,AL */
20884 case 0xB1: /* MOV imm,CL */
20885 case 0xB2: /* MOV imm,DL */
20886 case 0xB3: /* MOV imm,BL */
20887 case 0xB4: /* MOV imm,AH */
20888 case 0xB5: /* MOV imm,CH */
20889 case 0xB6: /* MOV imm,DH */
20890 case 0xB7: /* MOV imm,BH */
20891 if (haveF2orF3(pfx)) goto decode_failure;
20892 d64 = getUChar(delta);
20893 delta += 1;
20894 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20895 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20896 return delta;
20898 case 0xB8: /* MOV imm,eAX */
20899 case 0xB9: /* MOV imm,eCX */
20900 case 0xBA: /* MOV imm,eDX */
20901 case 0xBB: /* MOV imm,eBX */
20902 case 0xBC: /* MOV imm,eSP */
20903 case 0xBD: /* MOV imm,eBP */
20904 case 0xBE: /* MOV imm,eSI */
20905 case 0xBF: /* MOV imm,eDI */
20906 /* This is the one-and-only place where 64-bit literals are
20907 allowed in the instruction stream. */
20908 if (haveF2orF3(pfx)) goto decode_failure;
20909 if (sz == 8) {
20910 d64 = getDisp64(delta);
20911 delta += 8;
20912 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20913 DIP("movabsq $%lld,%s\n", (Long)d64,
20914 nameIRegRexB(8,pfx,opc-0xB8));
20915 } else {
20916 d64 = getSDisp(imin(4,sz),delta);
20917 delta += imin(4,sz);
20918 putIRegRexB(sz, pfx, opc-0xB8,
20919 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20920 DIP("mov%c $%lld,%s\n", nameISize(sz),
20921 (Long)d64,
20922 nameIRegRexB(sz,pfx,opc-0xB8));
20924 return delta;
20926 case 0xC0: { /* Grp2 Ib,Eb */
20927 Bool decode_OK = True;
20928 if (haveF2orF3(pfx)) goto decode_failure;
20929 modrm = getUChar(delta);
20930 am_sz = lengthAMode(pfx,delta);
20931 d_sz = 1;
20932 d64 = getUChar(delta + am_sz);
20933 sz = 1;
20934 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20935 mkU8(d64 & 0xFF), NULL, &decode_OK );
20936 if (!decode_OK) goto decode_failure;
20937 return delta;
20940 case 0xC1: { /* Grp2 Ib,Ev */
20941 Bool decode_OK = True;
20942 if (haveF2orF3(pfx)) goto decode_failure;
20943 modrm = getUChar(delta);
20944 am_sz = lengthAMode(pfx,delta);
20945 d_sz = 1;
20946 d64 = getUChar(delta + am_sz);
20947 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20948 mkU8(d64 & 0xFF), NULL, &decode_OK );
20949 if (!decode_OK) goto decode_failure;
20950 return delta;
20953 case 0xC2: /* RET imm16 */
20954 if (have66orF3(pfx)) goto decode_failure;
20955 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20956 d64 = getUDisp16(delta);
20957 delta += 2;
20958 dis_ret(dres, vbi, d64);
20959 DIP("ret $%lld\n", d64);
20960 return delta;
20962 case 0xC3: /* RET */
20963 if (have66(pfx)) goto decode_failure;
20964 /* F3 is acceptable on AMD. */
20965 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
20966 dis_ret(dres, vbi, 0);
20967 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20968 return delta;
20970 case 0xC6: /* C6 /0 = MOV Ib,Eb */
20971 sz = 1;
20972 goto maybe_do_Mov_I_E;
20973 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20974 goto maybe_do_Mov_I_E;
20975 maybe_do_Mov_I_E:
20976 modrm = getUChar(delta);
20977 if (gregLO3ofRM(modrm) == 0) {
20978 if (epartIsReg(modrm)) {
20979 /* Neither F2 nor F3 are allowable. */
20980 if (haveF2orF3(pfx)) goto decode_failure;
20981 delta++; /* mod/rm byte */
20982 d64 = getSDisp(imin(4,sz),delta);
20983 delta += imin(4,sz);
20984 putIRegE(sz, pfx, modrm,
20985 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20986 DIP("mov%c $%lld, %s\n", nameISize(sz),
20987 (Long)d64,
20988 nameIRegE(sz,pfx,modrm));
20989 } else {
20990 if (haveF2(pfx)) goto decode_failure;
20991 /* F3(XRELEASE) is allowable here */
20992 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20993 /*xtra*/imin(4,sz) );
20994 delta += alen;
20995 d64 = getSDisp(imin(4,sz),delta);
20996 delta += imin(4,sz);
20997 storeLE(mkexpr(addr),
20998 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20999 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
21001 return delta;
21003 /* BEGIN HACKY SUPPORT FOR xbegin */
21004 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
21005 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21006 delta++; /* mod/rm byte */
21007 d64 = getSDisp(4,delta);
21008 delta += 4;
21009 guest_RIP_next_mustcheck = True;
21010 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21011 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
21012 /* EAX contains the failure status code. Bit 3 is "Set if an
21013 internal buffer overflowed", which seems like the
21014 least-bogus choice we can make here. */
21015 putIRegRAX(4, mkU32(1<<3));
21016 /* And jump to the fail address. */
21017 jmp_lit(dres, Ijk_Boring, failAddr);
21018 vassert(dres->whatNext == Dis_StopHere);
21019 DIP("xbeginq 0x%llx\n", failAddr);
21020 return delta;
21022 /* END HACKY SUPPORT FOR xbegin */
21023 /* BEGIN HACKY SUPPORT FOR xabort */
21024 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
21025 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21026 delta++; /* mod/rm byte */
21027 abyte = getUChar(delta); delta++;
21028 /* There is never a real transaction in progress, so do nothing. */
21029 DIP("xabort $%d", (Int)abyte);
21030 return delta;
21032 /* END HACKY SUPPORT FOR xabort */
21033 goto decode_failure;
21035 case 0xC8: /* ENTER */
21036 /* Same comments re operand size as for LEAVE below apply.
21037 Also, only handles the case "enter $imm16, $0"; other cases
21038 for the second operand (nesting depth) are not handled. */
21039 if (sz != 4)
21040 goto decode_failure;
21041 d64 = getUDisp16(delta);
21042 delta += 2;
21043 vassert(d64 >= 0 && d64 <= 0xFFFF);
21044 if (getUChar(delta) != 0)
21045 goto decode_failure;
21046 delta++;
21047 /* Intel docs seem to suggest:
21048 push rbp
21049 temp = rsp
21050 rbp = temp
21051 rsp = rsp - imm16
21053 t1 = newTemp(Ity_I64);
21054 assign(t1, getIReg64(R_RBP));
21055 t2 = newTemp(Ity_I64);
21056 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21057 putIReg64(R_RSP, mkexpr(t2));
21058 storeLE(mkexpr(t2), mkexpr(t1));
21059 putIReg64(R_RBP, mkexpr(t2));
21060 if (d64 > 0) {
21061 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
21063 DIP("enter $%u, $0\n", (UInt)d64);
21064 return delta;
21066 case 0xC9: /* LEAVE */
21067 /* In 64-bit mode this defaults to a 64-bit operand size. There
21068 is no way to encode a 32-bit variant. Hence sz==4 but we do
21069 it as if sz=8. */
21070 if (sz != 4)
21071 goto decode_failure;
21072 t1 = newTemp(Ity_I64);
21073 t2 = newTemp(Ity_I64);
21074 assign(t1, getIReg64(R_RBP));
21075 /* First PUT RSP looks redundant, but need it because RSP must
21076 always be up-to-date for Memcheck to work... */
21077 putIReg64(R_RSP, mkexpr(t1));
21078 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
21079 putIReg64(R_RBP, mkexpr(t2));
21080 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
21081 DIP("leave\n");
21082 return delta;
21084 case 0xCC: /* INT 3 */
21085 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
21086 vassert(dres->whatNext == Dis_StopHere);
21087 DIP("int $0x3\n");
21088 return delta;
21090 case 0xCD: /* INT imm8 */
21091 d64 = getUChar(delta); delta++;
21093 /* Handle int $0xD2 (Solaris fasttrap syscalls). */
21094 if (d64 == 0xD2) {
21095 jmp_lit(dres, Ijk_Sys_int210, guest_RIP_bbstart + delta);
21096 vassert(dres->whatNext == Dis_StopHere);
21097 DIP("int $0xD2\n");
21098 return delta;
21100 goto decode_failure;
21102 case 0xCF: /* IRET */
21103 /* Note, this is an extremely kludgey and limited implementation of iret
21104 based on the extremely kludgey and limited implementation of iret for x86
21105 popq %RIP; popl %CS; popq %RFLAGS; popq %RSP; popl %SS
21106 %CS and %SS are ignored */
21107 if (sz != 8 || have66orF2orF3(pfx)) goto decode_failure;
21109 t1 = newTemp(Ity_I64); /* RSP */
21110 t2 = newTemp(Ity_I64); /* new RIP */
21111 /* t3 = newTemp(Ity_I32); new CS */
21112 t4 = newTemp(Ity_I64); /* new RFLAGS */
21113 t5 = newTemp(Ity_I64); /* new RSP */
21114 /* t6 = newTemp(Ity_I32); new SS */
21116 assign(t1, getIReg64(R_RSP));
21117 assign(t2, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(0))));
21118 /* assign(t3, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(8)))); */
21119 assign(t4, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(16))));
21120 assign(t5, loadLE(Ity_I64, binop(Iop_Add64,mkexpr(t1),mkU64(24))));
21121 /* assign(t6, loadLE(Ity_I32, binop(Iop_Add64,mkexpr(t1),mkU64(32)))); */
21123 /* set %RFLAGS */
21124 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21125 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21126 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21127 stmt( IRStmt_Put( OFFB_CC_DEP1,
21128 binop(Iop_And64,
21129 mkexpr(t4),
21130 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
21131 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
21132 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
21137 /* Also need to set the D flag, which is held in bit 10 of t4.
21138 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
21139 stmt( IRStmt_Put(
21140 OFFB_DFLAG,
21141 IRExpr_ITE(
21142 unop(Iop_64to1,
21143 binop(Iop_And64,
21144 binop(Iop_Shr64, mkexpr(t4), mkU8(10)),
21145 mkU64(1))),
21146 mkU64(0xFFFFFFFFFFFFFFFFULL),
21147 mkU64(1)))
21150 /* And set the ID flag */
21151 stmt( IRStmt_Put(
21152 OFFB_IDFLAG,
21153 IRExpr_ITE(
21154 unop(Iop_64to1,
21155 binop(Iop_And64,
21156 binop(Iop_Shr64, mkexpr(t4), mkU8(21)),
21157 mkU64(1))),
21158 mkU64(1),
21159 mkU64(0)))
21162 /* And set the AC flag too */
21163 stmt( IRStmt_Put(
21164 OFFB_ACFLAG,
21165 IRExpr_ITE(
21166 unop(Iop_64to1,
21167 binop(Iop_And64,
21168 binop(Iop_Shr64, mkexpr(t4), mkU8(18)),
21169 mkU64(1))),
21170 mkU64(1),
21171 mkU64(0)))
21175 /* set new stack */
21176 putIReg64(R_RSP, mkexpr(t5));
21178 /* goto new RIP value */
21179 jmp_treg(dres, Ijk_Ret, t2);
21180 DIP("iret (very kludgey)\n");
21181 return delta;
21183 case 0xD0: { /* Grp2 1,Eb */
21184 Bool decode_OK = True;
21185 if (haveF2orF3(pfx)) goto decode_failure;
21186 modrm = getUChar(delta);
21187 am_sz = lengthAMode(pfx,delta);
21188 d_sz = 0;
21189 d64 = 1;
21190 sz = 1;
21191 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21192 mkU8(d64), NULL, &decode_OK );
21193 if (!decode_OK) goto decode_failure;
21194 return delta;
21197 case 0xD1: { /* Grp2 1,Ev */
21198 Bool decode_OK = True;
21199 if (haveF2orF3(pfx)) goto decode_failure;
21200 modrm = getUChar(delta);
21201 am_sz = lengthAMode(pfx,delta);
21202 d_sz = 0;
21203 d64 = 1;
21204 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21205 mkU8(d64), NULL, &decode_OK );
21206 if (!decode_OK) goto decode_failure;
21207 return delta;
21210 case 0xD2: { /* Grp2 CL,Eb */
21211 Bool decode_OK = True;
21212 if (haveF2orF3(pfx)) goto decode_failure;
21213 modrm = getUChar(delta);
21214 am_sz = lengthAMode(pfx,delta);
21215 d_sz = 0;
21216 sz = 1;
21217 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21218 getIRegCL(), "%cl", &decode_OK );
21219 if (!decode_OK) goto decode_failure;
21220 return delta;
21223 case 0xD3: { /* Grp2 CL,Ev */
21224 Bool decode_OK = True;
21225 if (haveF2orF3(pfx)) goto decode_failure;
21226 modrm = getUChar(delta);
21227 am_sz = lengthAMode(pfx,delta);
21228 d_sz = 0;
21229 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
21230 getIRegCL(), "%cl", &decode_OK );
21231 if (!decode_OK) goto decode_failure;
21232 return delta;
21235 case 0xD8: /* X87 instructions */
21236 case 0xD9:
21237 case 0xDA:
21238 case 0xDB:
21239 case 0xDC:
21240 case 0xDD:
21241 case 0xDE:
21242 case 0xDF: {
21243 Bool redundantREXWok = False;
21245 if (haveF2orF3(pfx))
21246 goto decode_failure;
21248 /* kludge to tolerate redundant rex.w prefixes (should do this
21249 properly one day) */
21250 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
21251 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
21252 redundantREXWok = True;
21254 Bool size_OK = False;
21255 if ( sz == 4 )
21256 size_OK = True;
21257 else if ( sz == 8 )
21258 size_OK = redundantREXWok;
21259 else if ( sz == 2 ) {
21260 int mod_rm = getUChar(delta+0);
21261 int reg = gregLO3ofRM(mod_rm);
21262 /* The HotSpot JVM uses these */
21263 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
21264 reg == 4 /* FNSAVE */ ||
21265 reg == 6 /* FRSTOR */ ) )
21266 size_OK = True;
21268 /* AMD manual says 0x66 size override is ignored, except where
21269 it is meaningful */
21270 if (!size_OK)
21271 goto decode_failure;
21273 Bool decode_OK = False;
21274 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
21275 if (!decode_OK)
21276 goto decode_failure;
21278 return delta;
21281 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
21282 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
21283 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
21284 { /* The docs say this uses rCX as a count depending on the
21285 address size override, not the operand one. */
21286 IRExpr* zbit = NULL;
21287 IRExpr* count = NULL;
21288 IRExpr* cond = NULL;
21289 const HChar* xtra = NULL;
21291 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
21292 /* So at this point we've rejected any variants which appear to
21293 be governed by the usual operand-size modifiers. Hence only
21294 the address size prefix can have an effect. It changes the
21295 size from 64 (default) to 32. */
21296 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
21297 delta++;
21298 if (haveASO(pfx)) {
21299 /* 64to32 of 64-bit get is merely a get-put improvement
21300 trick. */
21301 putIReg32(R_RCX, binop(Iop_Sub32,
21302 unop(Iop_64to32, getIReg64(R_RCX)),
21303 mkU32(1)));
21304 } else {
21305 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
21308 /* This is correct, both for 32- and 64-bit versions. If we're
21309 doing a 32-bit dec and the result is zero then the default
21310 zero extension rule will cause the upper 32 bits to be zero
21311 too. Hence a 64-bit check against zero is OK. */
21312 count = getIReg64(R_RCX);
21313 cond = binop(Iop_CmpNE64, count, mkU64(0));
21314 switch (opc) {
21315 case 0xE2:
21316 xtra = "";
21317 break;
21318 case 0xE1:
21319 xtra = "e";
21320 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
21321 cond = mkAnd1(cond, zbit);
21322 break;
21323 case 0xE0:
21324 xtra = "ne";
21325 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
21326 cond = mkAnd1(cond, zbit);
21327 break;
21328 default:
21329 vassert(0);
21331 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
21333 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", (ULong)d64);
21334 return delta;
21337 case 0xE3:
21338 /* JRCXZ or JECXZ, depending address size override. */
21339 if (have66orF2orF3(pfx)) goto decode_failure;
21340 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21341 delta++;
21342 if (haveASO(pfx)) {
21343 /* 32-bit */
21344 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21345 unop(Iop_32Uto64, getIReg32(R_RCX)),
21346 mkU64(0)),
21347 Ijk_Boring,
21348 IRConst_U64(d64),
21349 OFFB_RIP
21351 DIP("jecxz 0x%llx\n", (ULong)d64);
21352 } else {
21353 /* 64-bit */
21354 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
21355 getIReg64(R_RCX),
21356 mkU64(0)),
21357 Ijk_Boring,
21358 IRConst_U64(d64),
21359 OFFB_RIP
21361 DIP("jrcxz 0x%llx\n", (ULong)d64);
21363 return delta;
21365 case 0xE4: /* IN imm8, AL */
21366 sz = 1;
21367 t1 = newTemp(Ity_I64);
21368 abyte = getUChar(delta); delta++;
21369 assign(t1, mkU64( abyte & 0xFF ));
21370 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21371 goto do_IN;
21372 case 0xE5: /* IN imm8, eAX */
21373 if (!(sz == 2 || sz == 4)) goto decode_failure;
21374 t1 = newTemp(Ity_I64);
21375 abyte = getUChar(delta); delta++;
21376 assign(t1, mkU64( abyte & 0xFF ));
21377 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
21378 goto do_IN;
21379 case 0xEC: /* IN %DX, AL */
21380 sz = 1;
21381 t1 = newTemp(Ity_I64);
21382 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21383 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21384 nameIRegRAX(sz));
21385 goto do_IN;
21386 case 0xED: /* IN %DX, eAX */
21387 if (!(sz == 2 || sz == 4)) goto decode_failure;
21388 t1 = newTemp(Ity_I64);
21389 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
21390 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
21391 nameIRegRAX(sz));
21392 goto do_IN;
21393 do_IN: {
21394 /* At this point, sz indicates the width, and t1 is a 64-bit
21395 value giving port number. */
21396 IRDirty* d;
21397 if (haveF2orF3(pfx)) goto decode_failure;
21398 vassert(sz == 1 || sz == 2 || sz == 4);
21399 ty = szToITy(sz);
21400 t2 = newTemp(Ity_I64);
21401 d = unsafeIRDirty_1_N(
21403 0/*regparms*/,
21404 "amd64g_dirtyhelper_IN",
21405 &amd64g_dirtyhelper_IN,
21406 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
21408 /* do the call, dumping the result in t2. */
21409 stmt( IRStmt_Dirty(d) );
21410 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
21411 return delta;
21414 case 0xE6: /* OUT AL, imm8 */
21415 sz = 1;
21416 t1 = newTemp(Ity_I64);
21417 abyte = getUChar(delta); delta++;
21418 assign( t1, mkU64( abyte & 0xFF ) );
21419 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21420 goto do_OUT;
21421 case 0xE7: /* OUT eAX, imm8 */
21422 if (!(sz == 2 || sz == 4)) goto decode_failure;
21423 t1 = newTemp(Ity_I64);
21424 abyte = getUChar(delta); delta++;
21425 assign( t1, mkU64( abyte & 0xFF ) );
21426 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
21427 goto do_OUT;
21428 case 0xEE: /* OUT AL, %DX */
21429 sz = 1;
21430 t1 = newTemp(Ity_I64);
21431 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21432 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21433 nameIRegRDX(2));
21434 goto do_OUT;
21435 case 0xEF: /* OUT eAX, %DX */
21436 if (!(sz == 2 || sz == 4)) goto decode_failure;
21437 t1 = newTemp(Ity_I64);
21438 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
21439 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
21440 nameIRegRDX(2));
21441 goto do_OUT;
21442 do_OUT: {
21443 /* At this point, sz indicates the width, and t1 is a 64-bit
21444 value giving port number. */
21445 IRDirty* d;
21446 if (haveF2orF3(pfx)) goto decode_failure;
21447 vassert(sz == 1 || sz == 2 || sz == 4);
21448 ty = szToITy(sz);
21449 d = unsafeIRDirty_0_N(
21450 0/*regparms*/,
21451 "amd64g_dirtyhelper_OUT",
21452 &amd64g_dirtyhelper_OUT,
21453 mkIRExprVec_3( mkexpr(t1),
21454 widenUto64( getIRegRAX(sz) ),
21455 mkU64(sz) )
21457 stmt( IRStmt_Dirty(d) );
21458 return delta;
21461 case 0xE8: /* CALL J4 */
21462 if (haveF3(pfx)) goto decode_failure;
21463 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21464 d64 = getSDisp32(delta); delta += 4;
21465 d64 += (guest_RIP_bbstart+delta);
21466 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
21467 t1 = newTemp(Ity_I64);
21468 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
21469 putIReg64(R_RSP, mkexpr(t1));
21470 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
21471 t2 = newTemp(Ity_I64);
21472 assign(t2, mkU64((Addr64)d64));
21473 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
21474 jmp_lit(dres, Ijk_Call, d64);
21475 vassert(dres->whatNext == Dis_StopHere);
21476 DIP("call 0x%llx\n", (ULong)d64);
21477 return delta;
21479 case 0xE9: /* Jv (jump, 16/32 offset) */
21480 if (haveF3(pfx)) goto decode_failure;
21481 sz = 4; /* Prefixes that change operand size are ignored for this
21482 instruction. Operand size is forced to 32bit. */
21483 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21484 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
21485 delta += sz;
21486 jmp_lit(dres, Ijk_Boring, d64);
21487 vassert(dres->whatNext == Dis_StopHere);
21488 DIP("jmp 0x%llx\n", (ULong)d64);
21489 return delta;
21491 case 0xEB: /* Jb (jump, byte offset) */
21492 if (haveF3(pfx)) goto decode_failure;
21493 /* Prefixes that change operand size are ignored for this instruction. */
21494 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21495 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
21496 delta++;
21497 jmp_lit(dres, Ijk_Boring, d64);
21498 vassert(dres->whatNext == Dis_StopHere);
21499 DIP("jmp-8 0x%llx\n", (ULong)d64);
21500 return delta;
21502 case 0xF5: /* CMC */
21503 case 0xF8: /* CLC */
21504 case 0xF9: /* STC */
21505 t1 = newTemp(Ity_I64);
21506 t2 = newTemp(Ity_I64);
21507 assign( t1, mk_amd64g_calculate_rflags_all() );
21508 switch (opc) {
21509 case 0xF5:
21510 assign( t2, binop(Iop_Xor64, mkexpr(t1),
21511 mkU64(AMD64G_CC_MASK_C)));
21512 DIP("cmc\n");
21513 break;
21514 case 0xF8:
21515 assign( t2, binop(Iop_And64, mkexpr(t1),
21516 mkU64(~AMD64G_CC_MASK_C)));
21517 DIP("clc\n");
21518 break;
21519 case 0xF9:
21520 assign( t2, binop(Iop_Or64, mkexpr(t1),
21521 mkU64(AMD64G_CC_MASK_C)));
21522 DIP("stc\n");
21523 break;
21524 default:
21525 vpanic("disInstr(x64)(cmc/clc/stc)");
21527 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21528 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21529 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
21530 /* Set NDEP even though it isn't used. This makes redundant-PUT
21531 elimination of previous stores to this field work better. */
21532 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21533 return delta;
21535 case 0xF6: { /* Grp3 Eb */
21536 Bool decode_OK = True;
21537 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21538 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21539 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
21540 if (!decode_OK) goto decode_failure;
21541 return delta;
21544 case 0xF7: { /* Grp3 Ev */
21545 Bool decode_OK = True;
21546 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21547 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
21548 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
21549 if (!decode_OK) goto decode_failure;
21550 return delta;
21553 case 0xFC: /* CLD */
21554 if (haveF2orF3(pfx)) goto decode_failure;
21555 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
21556 DIP("cld\n");
21557 return delta;
21559 case 0xFD: /* STD */
21560 if (haveF2orF3(pfx)) goto decode_failure;
21561 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
21562 DIP("std\n");
21563 return delta;
21565 case 0xFE: { /* Grp4 Eb */
21566 Bool decode_OK = True;
21567 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21568 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
21569 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
21570 if (!decode_OK) goto decode_failure;
21571 return delta;
21574 case 0xFF: { /* Grp5 Ev */
21575 Bool decode_OK = True;
21576 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
21577 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
21578 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
21579 if (!decode_OK) goto decode_failure;
21580 return delta;
21583 default:
21584 break;
21588 decode_failure:
21589 return deltaIN; /* fail */
21593 /*------------------------------------------------------------*/
21594 /*--- ---*/
21595 /*--- Top-level post-escape decoders: dis_ESC_0F ---*/
21596 /*--- ---*/
21597 /*------------------------------------------------------------*/
21599 static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
21601 IRTemp t2 = newTemp(ty);
21602 if (ty == Ity_I64) {
21603 IRTemp m8 = newTemp(Ity_I64);
21604 IRTemp s8 = newTemp(Ity_I64);
21605 IRTemp m16 = newTemp(Ity_I64);
21606 IRTemp s16 = newTemp(Ity_I64);
21607 IRTemp m32 = newTemp(Ity_I64);
21608 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
21609 assign( s8,
21610 binop(Iop_Or64,
21611 binop(Iop_Shr64,
21612 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
21613 mkU8(8)),
21614 binop(Iop_And64,
21615 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
21616 mkexpr(m8))
21620 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
21621 assign( s16,
21622 binop(Iop_Or64,
21623 binop(Iop_Shr64,
21624 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
21625 mkU8(16)),
21626 binop(Iop_And64,
21627 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
21628 mkexpr(m16))
21632 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
21633 assign( t2,
21634 binop(Iop_Or64,
21635 binop(Iop_Shr64,
21636 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
21637 mkU8(32)),
21638 binop(Iop_And64,
21639 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21640 mkexpr(m32))
21643 return t2;
21645 if (ty == Ity_I32) {
21646 assign( t2,
21647 binop(
21648 Iop_Or32,
21649 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21650 binop(
21651 Iop_Or32,
21652 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21653 mkU32(0x00FF0000)),
21654 binop(Iop_Or32,
21655 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21656 mkU32(0x0000FF00)),
21657 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21658 mkU32(0x000000FF) )
21661 return t2;
21663 if (ty == Ity_I16) {
21664 assign(t2,
21665 binop(Iop_Or16,
21666 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21667 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21668 return t2;
21670 vassert(0);
21671 /*NOTREACHED*/
21672 return IRTemp_INVALID;
21676 __attribute__((noinline))
21677 static
21678 Long dis_ESC_0F (
21679 /*MB_OUT*/DisResult* dres,
21680 /*MB_OUT*/Bool* expect_CAS,
21681 const VexArchInfo* archinfo,
21682 const VexAbiInfo* vbi,
21683 Prefix pfx, Int sz, Long deltaIN
21686 Long d64 = 0;
21687 IRTemp addr = IRTemp_INVALID;
21688 IRTemp t1 = IRTemp_INVALID;
21689 IRTemp t2 = IRTemp_INVALID;
21690 UChar modrm = 0;
21691 Int am_sz = 0;
21692 Int alen = 0;
21693 HChar dis_buf[50];
21695 /* In the first switch, look for ordinary integer insns. */
21696 Long delta = deltaIN;
21697 UChar opc = getUChar(delta);
21698 delta++;
21699 switch (opc) { /* first switch */
21701 case 0x01:
21703 modrm = getUChar(delta);
21704 /* 0F 01 /0 -- SGDT */
21705 /* 0F 01 /1 -- SIDT */
21706 if (!epartIsReg(modrm)
21707 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21708 /* This is really revolting, but ... since each processor
21709 (core) only has one IDT and one GDT, just let the guest
21710 see it (pass-through semantics). I can't see any way to
21711 construct a faked-up value, so don't bother to try. */
21712 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21713 delta += alen;
21714 switch (gregLO3ofRM(modrm)) {
21715 case 0: DIP("sgdt %s\n", dis_buf); break;
21716 case 1: DIP("sidt %s\n", dis_buf); break;
21717 default: vassert(0); /*NOTREACHED*/
21719 IRDirty* d = unsafeIRDirty_0_N (
21720 0/*regparms*/,
21721 "amd64g_dirtyhelper_SxDT",
21722 &amd64g_dirtyhelper_SxDT,
21723 mkIRExprVec_2( mkexpr(addr),
21724 mkU64(gregLO3ofRM(modrm)) )
21726 /* declare we're writing memory */
21727 d->mFx = Ifx_Write;
21728 d->mAddr = mkexpr(addr);
21729 d->mSize = 6;
21730 stmt( IRStmt_Dirty(d) );
21731 return delta;
21733 /* 0F 01 D0 = XGETBV */
21734 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21735 delta += 1;
21736 DIP("xgetbv\n");
21737 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21738 am not sure if that translates in to SEGV or to something
21739 else, in user space. */
21740 t1 = newTemp(Ity_I32);
21741 assign( t1, getIReg32(R_RCX) );
21742 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21743 Ijk_SigSEGV,
21744 IRConst_U64(guest_RIP_curr_instr),
21745 OFFB_RIP
21747 putIRegRAX(4, mkU32(7));
21748 putIRegRDX(4, mkU32(0));
21749 return delta;
21751 /* BEGIN HACKY SUPPORT FOR xend */
21752 /* 0F 01 D5 = XEND */
21753 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21754 /* We are never in an transaction (xbegin immediately aborts).
21755 So this just always generates a General Protection Fault. */
21756 delta += 1;
21757 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21758 vassert(dres->whatNext == Dis_StopHere);
21759 DIP("xend\n");
21760 return delta;
21762 /* END HACKY SUPPORT FOR xend */
21763 /* BEGIN HACKY SUPPORT FOR xtest */
21764 /* 0F 01 D6 = XTEST */
21765 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21766 /* Sets ZF because there never is a transaction, and all
21767 CF, OF, SF, PF and AF are always cleared by xtest. */
21768 delta += 1;
21769 DIP("xtest\n");
21770 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21771 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21772 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21773 /* Set NDEP even though it isn't used. This makes redundant-PUT
21774 elimination of previous stores to this field work better. */
21775 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21776 return delta;
21778 /* END HACKY SUPPORT FOR xtest */
21779 /* 0F 01 F9 = RDTSCP */
21780 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21781 delta += 1;
21782 /* Uses dirty helper:
21783 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21784 declared to wr rax, rcx, rdx
21786 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21787 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21788 IRDirty* d
21789 = unsafeIRDirty_0_N ( 0/*regparms*/,
21790 fName, fAddr, mkIRExprVec_1(IRExpr_GSPTR()) );
21791 /* declare guest state effects */
21792 d->nFxState = 3;
21793 vex_bzero(&d->fxState, sizeof(d->fxState));
21794 d->fxState[0].fx = Ifx_Write;
21795 d->fxState[0].offset = OFFB_RAX;
21796 d->fxState[0].size = 8;
21797 d->fxState[1].fx = Ifx_Write;
21798 d->fxState[1].offset = OFFB_RCX;
21799 d->fxState[1].size = 8;
21800 d->fxState[2].fx = Ifx_Write;
21801 d->fxState[2].offset = OFFB_RDX;
21802 d->fxState[2].size = 8;
21803 /* execute the dirty call, side-effecting guest state */
21804 stmt( IRStmt_Dirty(d) );
21805 /* RDTSCP is a serialising insn. So, just in case someone is
21806 using it as a memory fence ... */
21807 stmt( IRStmt_MBE(Imbe_Fence) );
21808 DIP("rdtscp\n");
21809 return delta;
21811 /* else decode failed */
21812 break;
21815 case 0x05: /* SYSCALL */
21816 guest_RIP_next_mustcheck = True;
21817 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21818 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21819 /* It's important that all guest state is up-to-date
21820 at this point. So we declare an end-of-block here, which
21821 forces any cached guest state to be flushed. */
21822 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21823 vassert(dres->whatNext == Dis_StopHere);
21824 DIP("syscall\n");
21825 return delta;
21827 case 0x0B: /* UD2 */
21828 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21829 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21830 vassert(dres->whatNext == Dis_StopHere);
21831 DIP("ud2\n");
21832 return delta;
21834 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21835 /* 0F 0D /1 -- prefetchw mem8 */
21836 if (have66orF2orF3(pfx)) goto decode_failure;
21837 modrm = getUChar(delta);
21838 if (epartIsReg(modrm)) goto decode_failure;
21839 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21840 goto decode_failure;
21841 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21842 delta += alen;
21843 switch (gregLO3ofRM(modrm)) {
21844 case 0: DIP("prefetch %s\n", dis_buf); break;
21845 case 1: DIP("prefetchw %s\n", dis_buf); break;
21846 default: vassert(0); /*NOTREACHED*/
21848 return delta;
21850 case 0x19:
21851 case 0x1C:
21852 case 0x1D:
21853 case 0x1E:
21854 case 0x1F:
21855 // Intel CET instructions can have any prefixes before NOPs
21856 // and can use any ModRM, SIB and disp
21857 modrm = getUChar(delta);
21858 if (epartIsReg(modrm)) {
21859 delta += 1;
21860 DIP("nop%c\n", nameISize(sz));
21861 } else {
21862 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21863 delta += alen;
21864 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21866 return delta;
21868 case 0x31: { /* RDTSC */
21869 IRTemp val = newTemp(Ity_I64);
21870 IRExpr** args = mkIRExprVec_0();
21871 IRDirty* d = unsafeIRDirty_1_N (
21872 val,
21873 0/*regparms*/,
21874 "amd64g_dirtyhelper_RDTSC",
21875 &amd64g_dirtyhelper_RDTSC,
21876 args
21878 if (have66orF2orF3(pfx)) goto decode_failure;
21879 /* execute the dirty call, dumping the result in val. */
21880 stmt( IRStmt_Dirty(d) );
21881 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21882 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21883 DIP("rdtsc\n");
21884 return delta;
21887 case 0x40:
21888 case 0x41:
21889 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21890 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21891 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21892 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21893 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21894 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21895 case 0x48: /* CMOVSb (cmov negative) */
21896 case 0x49: /* CMOVSb (cmov not negative) */
21897 case 0x4A: /* CMOVP (cmov parity even) */
21898 case 0x4B: /* CMOVNP (cmov parity odd) */
21899 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21900 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21901 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21902 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21903 if (haveF2orF3(pfx)) goto decode_failure;
21904 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21905 return delta;
21907 case 0x80:
21908 case 0x81:
21909 case 0x82: /* JBb/JNAEb (jump below) */
21910 case 0x83: /* JNBb/JAEb (jump not below) */
21911 case 0x84: /* JZb/JEb (jump zero) */
21912 case 0x85: /* JNZb/JNEb (jump not zero) */
21913 case 0x86: /* JBEb/JNAb (jump below or equal) */
21914 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21915 case 0x88: /* JSb (jump negative) */
21916 case 0x89: /* JSb (jump not negative) */
21917 case 0x8A: /* JP (jump parity even) */
21918 case 0x8B: /* JNP/JPO (jump parity odd) */
21919 case 0x8C: /* JLb/JNGEb (jump less) */
21920 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21921 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21922 case 0x8F: { /* JGb/JNLEb (jump greater) */
21923 Long jmpDelta;
21924 const HChar* comment = "";
21925 if (haveF3(pfx)) goto decode_failure;
21926 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
21927 jmpDelta = getSDisp32(delta);
21928 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21929 delta += 4;
21930 /* End the block at this point. */
21931 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21932 guest_RIP_bbstart+delta, d64 );
21933 vassert(dres->whatNext == Dis_StopHere);
21934 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), (ULong)d64,
21935 comment);
21936 return delta;
21939 case 0x90:
21940 case 0x91:
21941 case 0x92: /* set-Bb/set-NAEb (set if below) */
21942 case 0x93: /* set-NBb/set-AEb (set if not below) */
21943 case 0x94: /* set-Zb/set-Eb (set if zero) */
21944 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21945 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21946 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21947 case 0x98: /* set-Sb (set if negative) */
21948 case 0x99: /* set-Sb (set if not negative) */
21949 case 0x9A: /* set-P (set if parity even) */
21950 case 0x9B: /* set-NP (set if parity odd) */
21951 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21952 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21953 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21954 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21955 if (haveF2orF3(pfx)) goto decode_failure;
21956 t1 = newTemp(Ity_I8);
21957 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21958 modrm = getUChar(delta);
21959 if (epartIsReg(modrm)) {
21960 delta++;
21961 putIRegE(1, pfx, modrm, mkexpr(t1));
21962 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21963 nameIRegE(1,pfx,modrm));
21964 } else {
21965 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21966 delta += alen;
21967 storeLE( mkexpr(addr), mkexpr(t1) );
21968 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21970 return delta;
21972 case 0x1A:
21973 case 0x1B: { /* Future MPX instructions, currently NOPs.
21974 BNDMK b, m F3 0F 1B
21975 BNDCL b, r/m F3 0F 1A
21976 BNDCU b, r/m F2 0F 1A
21977 BNDCN b, r/m F2 0F 1B
21978 BNDMOV b, b/m 66 0F 1A
21979 BNDMOV b/m, b 66 0F 1B
21980 BNDLDX b, mib 0F 1A
21981 BNDSTX mib, b 0F 1B */
21983 /* All instructions have two operands. One operand is always the
21984 bnd register number (bnd0-bnd3, other register numbers are
21985 ignored when MPX isn't enabled, but should generate an
21986 exception if MPX is enabled) given by gregOfRexRM. The other
21987 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21988 address, all of which can be decoded by using either
21989 eregOfRexRM or disAMode. */
21991 modrm = getUChar(delta);
21992 int bnd = gregOfRexRM(pfx,modrm);
21993 const HChar *oper;
21994 if (epartIsReg(modrm)) {
21995 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
21996 delta += 1;
21997 } else {
21998 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21999 delta += alen;
22000 oper = dis_buf;
22003 if (haveF3no66noF2 (pfx)) {
22004 if (opc == 0x1B) {
22005 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
22006 } else /* opc == 0x1A */ {
22007 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
22009 } else if (haveF2no66noF3 (pfx)) {
22010 if (opc == 0x1A) {
22011 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
22012 } else /* opc == 0x1B */ {
22013 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
22015 } else if (have66noF2noF3 (pfx)) {
22016 if (opc == 0x1A) {
22017 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
22018 } else /* opc == 0x1B */ {
22019 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
22021 } else if (haveNo66noF2noF3 (pfx)) {
22022 if (opc == 0x1A) {
22023 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
22024 } else /* opc == 0x1B */ {
22025 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
22027 } else goto decode_failure;
22029 return delta;
22032 case 0xA2: { /* CPUID */
22033 /* Uses dirty helper:
22034 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
22035 declared to mod rax, wr rbx, rcx, rdx
22037 IRDirty* d = NULL;
22038 const HChar* fName = NULL;
22039 void* fAddr = NULL;
22041 if (haveF2orF3(pfx)) goto decode_failure;
22043 /* This isn't entirely correct, CPUID should depend on the VEX
22044 capabilities, not on the underlying CPU. See bug #324882. */
22045 if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22046 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22047 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX2)) {
22048 fName = "amd64g_dirtyhelper_CPUID_avx2";
22049 fAddr = &amd64g_dirtyhelper_CPUID_avx2;
22050 /* This is a Core-i7-4910-like machine */
22052 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22053 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22054 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22055 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
22056 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
22057 /* This is a Core-i5-2300-like machine */
22059 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22060 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
22061 (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
22062 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
22063 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
22065 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSSE3) &&
22066 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
22067 fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
22068 fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
22069 /* This is a Core-i5-670-like machine */
22071 else {
22072 /* Give a CPUID for at least a baseline machine, SSE2
22073 only, and no CX16 */
22074 fName = "amd64g_dirtyhelper_CPUID_baseline";
22075 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
22078 vassert(fName); vassert(fAddr);
22079 IRExpr** args = NULL;
22080 if (fAddr == &amd64g_dirtyhelper_CPUID_avx2
22081 || fAddr == &amd64g_dirtyhelper_CPUID_avx_and_cx16) {
22082 Bool hasF16C = (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C) != 0;
22083 Bool hasRDRAND = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND) != 0;
22084 Bool hasRDSEED = (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDSEED) != 0;
22085 args = mkIRExprVec_4(IRExpr_GSPTR(),
22086 mkIRExpr_HWord(hasF16C ? 1 : 0),
22087 mkIRExpr_HWord(hasRDRAND ? 1 : 0),
22088 mkIRExpr_HWord(hasRDSEED ? 1 : 0));
22089 } else {
22090 args = mkIRExprVec_1(IRExpr_GSPTR());
22092 d = unsafeIRDirty_0_N ( 0/*regparms*/, fName, fAddr, args );
22094 /* Declare guest state effects. EAX, EBX, ECX and EDX are written. EAX
22095 is also read, hence is marked as Modified. ECX is sometimes also
22096 read, depending on the value in EAX; that much is obvious from
22097 inspection of the helper function.
22099 This is a bit of a problem: if we mark ECX as Modified -- hence, by
22100 implication, Read -- then we may get false positives from Memcheck in
22101 the case where ECX contains undefined bits, but the EAX value is such
22102 that the instruction wouldn't read ECX anyway. The obvious way out
22103 of this is to mark it as written only, but that means Memcheck will
22104 effectively ignore undefinedness in the incoming ECX value. That
22105 seems like a small loss to take to avoid false positives here,
22106 though. Fundamentally the problem exists because CPUID itself has
22107 conditional dataflow -- whether ECX is read depends on the value in
22108 EAX -- but the annotation mechanism for dirty helpers can't represent
22109 that conditionality.
22111 A fully-accurate solution might be to change the helpers so that the
22112 EAX and ECX values are passed as parameters. Then, for the ECX
22113 value, we can pass, effectively "if EAX is some value for which ECX
22114 is ignored { 0 } else { ECX }", and Memcheck will see and understand
22115 this conditionality. */
22116 d->nFxState = 4;
22117 vex_bzero(&d->fxState, sizeof(d->fxState));
22118 d->fxState[0].fx = Ifx_Modify;
22119 d->fxState[0].offset = OFFB_RAX;
22120 d->fxState[0].size = 8;
22121 d->fxState[1].fx = Ifx_Write;
22122 d->fxState[1].offset = OFFB_RBX;
22123 d->fxState[1].size = 8;
22124 d->fxState[2].fx = Ifx_Write; /* was: Ifx_Modify; */
22125 d->fxState[2].offset = OFFB_RCX;
22126 d->fxState[2].size = 8;
22127 d->fxState[3].fx = Ifx_Write;
22128 d->fxState[3].offset = OFFB_RDX;
22129 d->fxState[3].size = 8;
22130 /* Execute the dirty call, side-effecting guest state. */
22131 stmt( IRStmt_Dirty(d) );
22132 /* CPUID is a serialising insn. So, just in case someone is
22133 using it as a memory fence ... */
22134 stmt( IRStmt_MBE(Imbe_Fence) );
22135 DIP("cpuid\n");
22136 return delta;
22139 case 0xA3: { /* BT Gv,Ev */
22140 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22141 Bool ok = True;
22142 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22143 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
22144 if (!ok) goto decode_failure;
22145 return delta;
22148 case 0xA4: /* SHLDv imm8,Gv,Ev */
22149 modrm = getUChar(delta);
22150 d64 = delta + lengthAMode(pfx, delta);
22151 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22152 delta = dis_SHLRD_Gv_Ev (
22153 vbi, pfx, delta, modrm, sz,
22154 mkU8(getUChar(d64)), True, /* literal */
22155 dis_buf, True /* left */ );
22156 return delta;
22158 case 0xA5: /* SHLDv %cl,Gv,Ev */
22159 modrm = getUChar(delta);
22160 delta = dis_SHLRD_Gv_Ev (
22161 vbi, pfx, delta, modrm, sz,
22162 getIRegCL(), False, /* not literal */
22163 "%cl", True /* left */ );
22164 return delta;
22166 case 0xAB: { /* BTS Gv,Ev */
22167 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22168 Bool ok = True;
22169 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22170 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
22171 if (!ok) goto decode_failure;
22172 return delta;
22175 case 0xAC: /* SHRDv imm8,Gv,Ev */
22176 modrm = getUChar(delta);
22177 d64 = delta + lengthAMode(pfx, delta);
22178 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
22179 delta = dis_SHLRD_Gv_Ev (
22180 vbi, pfx, delta, modrm, sz,
22181 mkU8(getUChar(d64)), True, /* literal */
22182 dis_buf, False /* right */ );
22183 return delta;
22185 case 0xAD: /* SHRDv %cl,Gv,Ev */
22186 modrm = getUChar(delta);
22187 delta = dis_SHLRD_Gv_Ev (
22188 vbi, pfx, delta, modrm, sz,
22189 getIRegCL(), False, /* not literal */
22190 "%cl", False /* right */);
22191 return delta;
22193 case 0xAF: /* IMUL Ev, Gv */
22194 if (haveF2orF3(pfx)) goto decode_failure;
22195 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
22196 return delta;
22198 case 0xB0: { /* CMPXCHG Gb,Eb */
22199 Bool ok = True;
22200 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22201 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
22202 if (!ok) goto decode_failure;
22203 return delta;
22206 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
22207 Bool ok = True;
22208 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
22209 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
22210 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
22211 if (!ok) goto decode_failure;
22212 return delta;
22215 case 0xB3: { /* BTR Gv,Ev */
22216 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22217 Bool ok = True;
22218 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22219 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
22220 if (!ok) goto decode_failure;
22221 return delta;
22224 case 0xB6: /* MOVZXb Eb,Gv */
22225 if (haveF2orF3(pfx)) goto decode_failure;
22226 if (sz != 2 && sz != 4 && sz != 8)
22227 goto decode_failure;
22228 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
22229 return delta;
22231 case 0xB7: /* MOVZXw Ew,Gv */
22232 if (haveF2orF3(pfx)) goto decode_failure;
22233 if (sz != 4 && sz != 8)
22234 goto decode_failure;
22235 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
22236 return delta;
22238 case 0xBA: { /* Grp8 Ib,Ev */
22239 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
22240 Bool decode_OK = False;
22241 modrm = getUChar(delta);
22242 am_sz = lengthAMode(pfx,delta);
22243 d64 = getSDisp8(delta + am_sz);
22244 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
22245 &decode_OK );
22246 if (!decode_OK)
22247 goto decode_failure;
22248 return delta;
22251 case 0xBB: { /* BTC Gv,Ev */
22252 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
22253 Bool ok = False;
22254 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
22255 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
22256 if (!ok) goto decode_failure;
22257 return delta;
22260 case 0xBC: /* BSF Gv,Ev */
22261 if (!haveF2orF3(pfx)
22262 || (haveF3noF2(pfx)
22263 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
22264 /* no-F2 no-F3 0F BC = BSF
22265 or F3 0F BC = REP; BSF on older CPUs. */
22266 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
22267 return delta;
22269 /* Fall through, since F3 0F BC is TZCNT, and needs to
22270 be handled by dis_ESC_0F__SSE4. */
22271 break;
22273 case 0xBD: /* BSR Gv,Ev */
22274 if (!haveF2orF3(pfx)
22275 || (haveF3noF2(pfx)
22276 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
22277 /* no-F2 no-F3 0F BD = BSR
22278 or F3 0F BD = REP; BSR on older CPUs. */
22279 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
22280 return delta;
22282 /* Fall through, since F3 0F BD is LZCNT, and needs to
22283 be handled by dis_ESC_0F__SSE4. */
22284 break;
22286 case 0xBE: /* MOVSXb Eb,Gv */
22287 if (haveF2orF3(pfx)) goto decode_failure;
22288 if (sz != 2 && sz != 4 && sz != 8)
22289 goto decode_failure;
22290 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
22291 return delta;
22293 case 0xBF: /* MOVSXw Ew,Gv */
22294 if (haveF2orF3(pfx)) goto decode_failure;
22295 if (sz != 4 && sz != 8)
22296 goto decode_failure;
22297 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
22298 return delta;
22300 case 0xC0: { /* XADD Gb,Eb */
22301 Bool decode_OK = False;
22302 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
22303 if (!decode_OK)
22304 goto decode_failure;
22305 return delta;
22308 case 0xC1: { /* XADD Gv,Ev */
22309 Bool decode_OK = False;
22310 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
22311 if (!decode_OK)
22312 goto decode_failure;
22313 return delta;
22316 case 0xC7: {
22317 modrm = getUChar(delta);
22319 // Detecting valid CMPXCHG combinations is pretty complex.
22320 Bool isValidCMPXCHG = gregLO3ofRM(modrm) == 1;
22321 if (isValidCMPXCHG) {
22322 if (have66(pfx)) isValidCMPXCHG = False;
22323 if (sz != 4 && sz != 8) isValidCMPXCHG = False;
22324 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
22325 isValidCMPXCHG = False;
22326 if (epartIsReg(modrm)) isValidCMPXCHG = False;
22327 if (haveF2orF3(pfx)) {
22328 /* Since the e-part is memory only, F2 or F3 (one or the
22329 other) is acceptable if LOCK is also present. But only
22330 for cmpxchg8b. */
22331 if (sz == 8) isValidCMPXCHG = False;
22332 if (haveF2andF3(pfx) || !haveLOCK(pfx)) isValidCMPXCHG = False;
22336 /* 0F C7 /1 (with qualifications) = CMPXCHG */
22337 if (isValidCMPXCHG) {
22338 // Note that we've already read the modrm byte by this point, but we
22339 // haven't moved delta past it.
22340 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
22341 IRTemp expdHi = newTemp(elemTy);
22342 IRTemp expdLo = newTemp(elemTy);
22343 IRTemp dataHi = newTemp(elemTy);
22344 IRTemp dataLo = newTemp(elemTy);
22345 IRTemp oldHi = newTemp(elemTy);
22346 IRTemp oldLo = newTemp(elemTy);
22347 IRTemp flags_old = newTemp(Ity_I64);
22348 IRTemp flags_new = newTemp(Ity_I64);
22349 IRTemp success = newTemp(Ity_I1);
22350 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
22351 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
22352 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
22353 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
22354 IRTemp expdHi64 = newTemp(Ity_I64);
22355 IRTemp expdLo64 = newTemp(Ity_I64);
22357 /* Translate this using a DCAS, even if there is no LOCK
22358 prefix. Life is too short to bother with generating two
22359 different translations for the with/without-LOCK-prefix
22360 cases. */
22361 *expect_CAS = True;
22363 /* Generate address */
22364 vassert(!epartIsReg(modrm));
22365 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22366 delta += alen;
22368 /* cmpxchg16b requires an alignment check. */
22369 if (sz == 8)
22370 gen_SIGNAL_if_not_16_aligned( vbi, addr );
22372 /* Get the expected and new values. */
22373 assign( expdHi64, getIReg64(R_RDX) );
22374 assign( expdLo64, getIReg64(R_RAX) );
22376 /* These are the correctly-sized expected and new values.
22377 However, we also get expdHi64/expdLo64 above as 64-bits
22378 regardless, because we will need them later in the 32-bit
22379 case (paradoxically). */
22380 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
22381 : mkexpr(expdHi64) );
22382 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
22383 : mkexpr(expdLo64) );
22384 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
22385 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
22387 /* Do the DCAS */
22388 stmt( IRStmt_CAS(
22389 mkIRCAS( oldHi, oldLo,
22390 Iend_LE, mkexpr(addr),
22391 mkexpr(expdHi), mkexpr(expdLo),
22392 mkexpr(dataHi), mkexpr(dataLo)
22393 )));
22395 /* success when oldHi:oldLo == expdHi:expdLo */
22396 assign( success,
22397 binop(opCasCmpEQ,
22398 binop(opOR,
22399 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
22400 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
22402 zero
22405 /* If the DCAS is successful, that is to say oldHi:oldLo ==
22406 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
22407 which is where they came from originally. Both the actual
22408 contents of these two regs, and any shadow values, are
22409 unchanged. If the DCAS fails then we're putting into
22410 RDX:RAX the value seen in memory. */
22411 /* Now of course there's a complication in the 32-bit case
22412 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
22413 unchanged; but if we use the same scheme as in the 64-bit
22414 case, we get hit by the standard rule that a write to the
22415 bottom 32 bits of an integer register zeros the upper 32
22416 bits. And so the upper halves of RDX and RAX mysteriously
22417 become zero. So we have to stuff back in the original
22418 64-bit values which we previously stashed in
22419 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
22420 /* It's just _so_ much fun ... */
22421 putIRegRDX( 8,
22422 IRExpr_ITE( mkexpr(success),
22423 mkexpr(expdHi64),
22424 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
22425 : mkexpr(oldHi)
22427 putIRegRAX( 8,
22428 IRExpr_ITE( mkexpr(success),
22429 mkexpr(expdLo64),
22430 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
22431 : mkexpr(oldLo)
22434 /* Copy the success bit into the Z flag and leave the others
22435 unchanged */
22436 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
22437 assign(
22438 flags_new,
22439 binop(Iop_Or64,
22440 binop(Iop_And64, mkexpr(flags_old),
22441 mkU64(~AMD64G_CC_MASK_Z)),
22442 binop(Iop_Shl64,
22443 binop(Iop_And64,
22444 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
22445 mkU8(AMD64G_CC_SHIFT_Z)) ));
22447 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22448 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
22449 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22450 /* Set NDEP even though it isn't used. This makes
22451 redundant-PUT elimination of previous stores to this field
22452 work better. */
22453 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22455 /* Sheesh. Aren't you glad it was me and not you that had to
22456 write and validate all this grunge? */
22458 DIP("cmpxchg8b %s\n", dis_buf);
22459 return delta;
22460 } // if (isValidCMPXCHG)
22462 /* 0F C7 /6 no-F2-or-F3 = RDRAND, 0F C7 /7 = RDSEED */
22463 int insn = gregLO3ofRM(modrm);
22464 if (((insn == 6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDRAND))
22465 || (insn == 7 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDSEED)))
22466 && epartIsReg(modrm) && haveNoF2noF3(pfx)
22467 && (sz == 8 || sz == 4 || sz == 2)) {
22469 delta++; // move past modrm
22470 IRType ty = szToITy(sz);
22472 // Pull a first 32 bits of randomness, plus C flag, out of the host.
22473 IRTemp pairLO = newTemp(Ity_I64);
22474 IRDirty* dLO;
22475 if (insn == 6) /* RDRAND */
22476 dLO = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22477 "amd64g_dirtyhelper_RDRAND",
22478 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22479 else /* RDSEED */
22480 dLO = unsafeIRDirty_1_N(pairLO, 0/*regparms*/,
22481 "amd64g_dirtyhelper_RDSEED",
22482 &amd64g_dirtyhelper_RDSEED, mkIRExprVec_0());
22484 // There are no guest state or memory effects to declare for |dLO|.
22485 stmt( IRStmt_Dirty(dLO) );
22487 IRTemp randsLO = newTemp(Ity_I32);
22488 assign(randsLO, unop(Iop_64to32, mkexpr(pairLO)));
22489 IRTemp cLO = newTemp(Ity_I64);
22490 assign(cLO, binop(Iop_Shr64, mkexpr(pairLO), mkU8(32)));
22492 // We'll assemble the final pairing in (cFinal, randsNearlyFinal).
22493 IRTemp randsNearlyFinal = newTemp(Ity_I64);
22494 IRTemp cFinal = newTemp(Ity_I64);
22496 if (ty == Ity_I64) {
22497 // Pull another 32 bits of randomness out of the host.
22498 IRTemp pairHI = newTemp(Ity_I64);
22499 IRDirty* dHI;
22500 if (insn == 6) /* RDRAND */
22501 dHI = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22502 "amd64g_dirtyhelper_RDRAND",
22503 &amd64g_dirtyhelper_RDRAND, mkIRExprVec_0());
22504 else /* RDSEED */
22505 dHI = unsafeIRDirty_1_N(pairHI, 0/*regparms*/,
22506 "amd64g_dirtyhelper_RDSEED",
22507 &amd64g_dirtyhelper_RDSEED, mkIRExprVec_0());
22509 // There are no guest state or memory effects to declare for |dHI|.
22510 stmt( IRStmt_Dirty(dHI) );
22512 IRTemp randsHI = newTemp(Ity_I32);
22513 assign(randsHI, unop(Iop_64to32, mkexpr(pairHI)));
22514 IRTemp cHI = newTemp(Ity_I64);
22515 assign(cHI, binop(Iop_Shr64, mkexpr(pairHI), mkU8(32)));
22516 assign(randsNearlyFinal, binop(Iop_32HLto64,
22517 mkexpr(randsHI), mkexpr(randsLO)));
22518 assign(cFinal, binop(Iop_And64,
22519 binop(Iop_And64, mkexpr(cHI), mkexpr(cLO)),
22520 mkU64(1)));
22521 } else {
22522 assign(randsNearlyFinal, unop(Iop_32Uto64, mkexpr(randsLO)));
22523 assign(cFinal, binop(Iop_And64, mkexpr(cLO), mkU64(1)));
22526 /* Now cFinal[0] is the final success/failure flag (cFinal[0] == 1
22527 means success). But there's another twist. If we failed then the
22528 returned value must be forced to zero. Otherwise we could have the
22529 situation, when sz==8, where one of the host calls failed but the
22530 other didn't. This would give cFinal[0] == 0 (correctly) but
22531 randsNearlyFinal not being zero, because it contains the 32 bit
22532 result of the non-failing call. */
22533 IRTemp randsFinal = newTemp(Ity_I64);
22534 assign(randsFinal,
22535 binop(Iop_And64,
22536 mkexpr(randsNearlyFinal),
22537 binop(Iop_Sar64,
22538 binop(Iop_Shl64, mkexpr(cFinal), mkU8(63)),
22539 mkU8(63))
22542 // So, finally, update the guest state.
22543 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(randsFinal)));
22545 // Set C=<success indication>, O,S,Z,A,P = 0. cFinal has already been
22546 // masked so only the lowest bit remains.
22547 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
22548 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(cFinal) ));
22549 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
22550 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
22552 if (insn == 6) {
22553 DIP("rdrand %s", nameIRegE(sz, pfx, modrm));
22554 } else {
22555 DIP("rdseed %s", nameIRegE(sz, pfx, modrm));
22558 return delta;
22561 goto decode_failure;
22564 case 0xC8: /* BSWAP %eax */
22565 case 0xC9:
22566 case 0xCA:
22567 case 0xCB:
22568 case 0xCC:
22569 case 0xCD:
22570 case 0xCE:
22571 case 0xCF: /* BSWAP %edi */
22572 if (haveF2orF3(pfx)) goto decode_failure;
22573 /* According to the AMD64 docs, this insn can have size 4 or
22574 8. */
22575 if (sz == 4) {
22576 t1 = newTemp(Ity_I32);
22577 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
22578 t2 = math_BSWAP( t1, Ity_I32 );
22579 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
22580 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
22581 return delta;
22583 if (sz == 8) {
22584 t1 = newTemp(Ity_I64);
22585 t2 = newTemp(Ity_I64);
22586 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
22587 t2 = math_BSWAP( t1, Ity_I64 );
22588 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
22589 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
22590 return delta;
22592 goto decode_failure;
22594 default:
22595 break;
22597 } /* first switch */
22600 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
22601 /* In the second switch, pick off MMX insns. */
22603 if (!have66orF2orF3(pfx)) {
22604 /* So there's no SIMD prefix. */
22606 vassert(sz == 4 || sz == 8);
22608 switch (opc) { /* second switch */
22610 case 0x71:
22611 case 0x72:
22612 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
22614 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
22615 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
22616 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
22617 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
22619 case 0xFC:
22620 case 0xFD:
22621 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
22623 case 0xEC:
22624 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
22626 case 0xDC:
22627 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22629 case 0xF8:
22630 case 0xF9:
22631 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
22633 case 0xE8:
22634 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
22636 case 0xD8:
22637 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
22639 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
22640 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
22642 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
22644 case 0x74:
22645 case 0x75:
22646 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
22648 case 0x64:
22649 case 0x65:
22650 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
22652 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
22653 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
22654 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
22656 case 0x68:
22657 case 0x69:
22658 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
22660 case 0x60:
22661 case 0x61:
22662 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
22664 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
22665 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
22666 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
22667 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
22669 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
22670 case 0xF2:
22671 case 0xF3:
22673 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
22674 case 0xD2:
22675 case 0xD3:
22677 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
22678 case 0xE2: {
22679 Bool decode_OK = False;
22680 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
22681 if (decode_OK)
22682 return delta;
22683 goto decode_failure;
22686 default:
22687 break;
22688 } /* second switch */
22692 /* A couple of MMX corner cases */
22693 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
22694 if (sz != 4)
22695 goto decode_failure;
22696 do_EMMS_preamble();
22697 DIP("{f}emms\n");
22698 return delta;
22701 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
22702 /* Perhaps it's an SSE or SSE2 instruction. We can try this
22703 without checking the guest hwcaps because SSE2 is a baseline
22704 facility in 64 bit mode. */
22706 Bool decode_OK = False;
22707 delta = dis_ESC_0F__SSE2 ( &decode_OK,
22708 archinfo, vbi, pfx, sz, deltaIN, dres );
22709 if (decode_OK)
22710 return delta;
22713 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
22714 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
22715 first. */
22717 Bool decode_OK = False;
22718 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22719 if (decode_OK)
22720 return delta;
22723 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22724 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
22725 first. */
22727 Bool decode_OK = False;
22728 delta = dis_ESC_0F__SSE4 ( &decode_OK,
22729 archinfo, vbi, pfx, sz, deltaIN );
22730 if (decode_OK)
22731 return delta;
22734 decode_failure:
22735 return deltaIN; /* fail */
22739 /*------------------------------------------------------------*/
22740 /*--- ---*/
22741 /*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22742 /*--- ---*/
22743 /*------------------------------------------------------------*/
22745 __attribute__((noinline))
22746 static
22747 Long dis_ESC_0F38 (
22748 /*MB_OUT*/DisResult* dres,
22749 const VexArchInfo* archinfo,
22750 const VexAbiInfo* vbi,
22751 Prefix pfx, Int sz, Long deltaIN
22754 Long delta = deltaIN;
22755 UChar opc = getUChar(delta);
22756 delta++;
22757 switch (opc) {
22759 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22760 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22761 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22762 && (sz == 2 || sz == 4 || sz == 8)) {
22763 IRTemp addr = IRTemp_INVALID;
22764 UChar modrm = 0;
22765 Int alen = 0;
22766 HChar dis_buf[50];
22767 modrm = getUChar(delta);
22768 if (epartIsReg(modrm)) break;
22769 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22770 delta += alen;
22771 IRType ty = szToITy(sz);
22772 IRTemp src = newTemp(ty);
22773 if (opc == 0xF0) { /* LOAD */
22774 assign(src, loadLE(ty, mkexpr(addr)));
22775 IRTemp dst = math_BSWAP(src, ty);
22776 putIRegG(sz, pfx, modrm, mkexpr(dst));
22777 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22778 } else { /* STORE */
22779 assign(src, getIRegG(sz, pfx, modrm));
22780 IRTemp dst = math_BSWAP(src, ty);
22781 storeLE(mkexpr(addr), mkexpr(dst));
22782 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22784 return delta;
22786 /* else fall through; maybe one of the decoders below knows what
22787 it is. */
22788 break;
22791 default:
22792 break;
22795 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22796 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22797 rather than proceeding indiscriminately. */
22799 Bool decode_OK = False;
22800 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22801 if (decode_OK)
22802 return delta;
22805 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22806 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22807 rather than proceeding indiscriminately. */
22809 Bool decode_OK = False;
22810 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22811 if (decode_OK)
22812 return delta;
22815 /* Ignore previous decode attempts and restart from the beginning of
22816 the instruction. */
22817 delta = deltaIN;
22818 opc = getUChar(delta);
22819 delta++;
22821 switch (opc) {
22823 case 0xF6: {
22824 /* 66 0F 38 F6 = ADCX r32/64(G), m32/64(E) */
22825 /* F3 0F 38 F6 = ADOX r32/64(G), m32/64(E) */
22826 /* These were introduced in Broadwell. Gate them on AVX so as to at
22827 least reject them on earlier guests. Has no host requirements. */
22828 if (have66noF2noF3(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22829 if (sz == 2) {
22830 sz = 4; /* 66 prefix but operand size is 4/8 */
22832 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagCarryX, True,
22833 sz, delta, "adcx" );
22834 return delta;
22836 if (haveF3no66noF2(pfx) && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
22837 delta = dis_op2_E_G ( vbi, pfx, Iop_Add8, WithFlagOverX, True,
22838 sz, delta, "adox" );
22839 return delta;
22841 /* else fall through */
22842 break;
22845 default:
22846 break;
22849 /*decode_failure:*/
22850 return deltaIN; /* fail */
22854 /*------------------------------------------------------------*/
22855 /*--- ---*/
22856 /*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22857 /*--- ---*/
22858 /*------------------------------------------------------------*/
22860 __attribute__((noinline))
22861 static
22862 Long dis_ESC_0F3A (
22863 /*MB_OUT*/DisResult* dres,
22864 const VexArchInfo* archinfo,
22865 const VexAbiInfo* vbi,
22866 Prefix pfx, Int sz, Long deltaIN
22869 Long delta = deltaIN;
22870 UChar opc = getUChar(delta);
22871 delta++;
22872 switch (opc) {
22874 default:
22875 break;
22879 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22880 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22881 rather than proceeding indiscriminately. */
22883 Bool decode_OK = False;
22884 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22885 if (decode_OK)
22886 return delta;
22889 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22890 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22891 rather than proceeding indiscriminately. */
22893 Bool decode_OK = False;
22894 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22895 if (decode_OK)
22896 return delta;
22899 return deltaIN; /* fail */
22903 /*------------------------------------------------------------*/
22904 /*--- ---*/
22905 /*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22906 /*--- ---*/
22907 /*------------------------------------------------------------*/
22909 /* FIXME: common up with the _256_ version below? */
22910 static
22911 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22912 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22913 Prefix pfx, Long delta, const HChar* name,
22914 /* The actual operation. Use either 'op' or 'opfn',
22915 but not both. */
22916 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
22917 Bool invertLeftArg,
22918 Bool swapArgs
22921 UChar modrm = getUChar(delta);
22922 UInt rD = gregOfRexRM(pfx, modrm);
22923 UInt rSL = getVexNvvvv(pfx);
22924 IRTemp tSL = newTemp(Ity_V128);
22925 IRTemp tSR = newTemp(Ity_V128);
22926 IRTemp addr = IRTemp_INVALID;
22927 HChar dis_buf[50];
22928 Int alen = 0;
22929 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22931 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22932 : getXMMReg(rSL));
22934 if (epartIsReg(modrm)) {
22935 UInt rSR = eregOfRexRM(pfx, modrm);
22936 delta += 1;
22937 assign(tSR, getXMMReg(rSR));
22938 DIP("%s %s,%s,%s\n",
22939 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22940 } else {
22941 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22942 delta += alen;
22943 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22944 DIP("%s %s,%s,%s\n",
22945 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22948 IRTemp res = IRTemp_INVALID;
22949 if (op != Iop_INVALID) {
22950 vassert(opFn == NULL);
22951 res = newTemp(Ity_V128);
22952 if (requiresRMode(op)) {
22953 IRTemp rm = newTemp(Ity_I32);
22954 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22955 assign(res, swapArgs
22956 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22957 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22958 } else {
22959 assign(res, swapArgs
22960 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22961 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22963 } else {
22964 vassert(opFn != NULL);
22965 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
22968 putYMMRegLoAndZU(rD, mkexpr(res));
22970 *uses_vvvv = True;
22971 return delta;
22975 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
22976 for the operation, no inversion of the left arg, and no swapping of
22977 args. */
22978 static
22979 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22980 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22981 Prefix pfx, Long delta, const HChar* name,
22982 IROp op
22985 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22986 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
22990 /* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
22991 generator to compute the result, no inversion of the left
22992 arg, and no swapping of args. */
22993 static
22994 Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22995 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
22996 Prefix pfx, Long delta, const HChar* name,
22997 IRTemp(*opFn)(IRTemp,IRTemp)
23000 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23001 uses_vvvv, vbi, pfx, delta, name,
23002 Iop_INVALID, opFn, False, False );
23006 /* Vector by scalar shift of V by the amount specified at the bottom
23007 of E. */
23008 static ULong dis_AVX128_shiftV_byE ( const VexAbiInfo* vbi,
23009 Prefix pfx, Long delta,
23010 const HChar* opname, IROp op )
23012 HChar dis_buf[50];
23013 Int alen, size;
23014 IRTemp addr;
23015 Bool shl, shr, sar;
23016 UChar modrm = getUChar(delta);
23017 UInt rG = gregOfRexRM(pfx,modrm);
23018 UInt rV = getVexNvvvv(pfx);;
23019 IRTemp g0 = newTemp(Ity_V128);
23020 IRTemp g1 = newTemp(Ity_V128);
23021 IRTemp amt = newTemp(Ity_I64);
23022 IRTemp amt8 = newTemp(Ity_I8);
23023 if (epartIsReg(modrm)) {
23024 UInt rE = eregOfRexRM(pfx,modrm);
23025 assign( amt, getXMMRegLane64(rE, 0) );
23026 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23027 nameXMMReg(rV), nameXMMReg(rG) );
23028 delta++;
23029 } else {
23030 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23031 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23032 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
23033 delta += alen;
23035 assign( g0, getXMMReg(rV) );
23036 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23038 shl = shr = sar = False;
23039 size = 0;
23040 switch (op) {
23041 case Iop_ShlN16x8: shl = True; size = 32; break;
23042 case Iop_ShlN32x4: shl = True; size = 32; break;
23043 case Iop_ShlN64x2: shl = True; size = 64; break;
23044 case Iop_SarN16x8: sar = True; size = 16; break;
23045 case Iop_SarN32x4: sar = True; size = 32; break;
23046 case Iop_ShrN16x8: shr = True; size = 16; break;
23047 case Iop_ShrN32x4: shr = True; size = 32; break;
23048 case Iop_ShrN64x2: shr = True; size = 64; break;
23049 default: vassert(0);
23052 if (shl || shr) {
23053 assign(
23055 IRExpr_ITE(
23056 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23057 binop(op, mkexpr(g0), mkexpr(amt8)),
23058 mkV128(0x0000)
23061 } else
23062 if (sar) {
23063 assign(
23065 IRExpr_ITE(
23066 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23067 binop(op, mkexpr(g0), mkexpr(amt8)),
23068 binop(op, mkexpr(g0), mkU8(size-1))
23071 } else {
23072 vassert(0);
23075 putYMMRegLoAndZU( rG, mkexpr(g1) );
23076 return delta;
23080 /* Vector by scalar shift of V by the amount specified at the bottom
23081 of E. */
23082 static ULong dis_AVX256_shiftV_byE ( const VexAbiInfo* vbi,
23083 Prefix pfx, Long delta,
23084 const HChar* opname, IROp op )
23086 HChar dis_buf[50];
23087 Int alen, size;
23088 IRTemp addr;
23089 Bool shl, shr, sar;
23090 UChar modrm = getUChar(delta);
23091 UInt rG = gregOfRexRM(pfx,modrm);
23092 UInt rV = getVexNvvvv(pfx);;
23093 IRTemp g0 = newTemp(Ity_V256);
23094 IRTemp g1 = newTemp(Ity_V256);
23095 IRTemp amt = newTemp(Ity_I64);
23096 IRTemp amt8 = newTemp(Ity_I8);
23097 if (epartIsReg(modrm)) {
23098 UInt rE = eregOfRexRM(pfx,modrm);
23099 assign( amt, getXMMRegLane64(rE, 0) );
23100 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23101 nameYMMReg(rV), nameYMMReg(rG) );
23102 delta++;
23103 } else {
23104 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23105 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
23106 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
23107 delta += alen;
23109 assign( g0, getYMMReg(rV) );
23110 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
23112 shl = shr = sar = False;
23113 size = 0;
23114 switch (op) {
23115 case Iop_ShlN16x16: shl = True; size = 32; break;
23116 case Iop_ShlN32x8: shl = True; size = 32; break;
23117 case Iop_ShlN64x4: shl = True; size = 64; break;
23118 case Iop_SarN16x16: sar = True; size = 16; break;
23119 case Iop_SarN32x8: sar = True; size = 32; break;
23120 case Iop_ShrN16x16: shr = True; size = 16; break;
23121 case Iop_ShrN32x8: shr = True; size = 32; break;
23122 case Iop_ShrN64x4: shr = True; size = 64; break;
23123 default: vassert(0);
23126 if (shl || shr) {
23127 assign(
23129 IRExpr_ITE(
23130 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23131 binop(op, mkexpr(g0), mkexpr(amt8)),
23132 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23135 } else
23136 if (sar) {
23137 assign(
23139 IRExpr_ITE(
23140 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
23141 binop(op, mkexpr(g0), mkexpr(amt8)),
23142 binop(op, mkexpr(g0), mkU8(size-1))
23145 } else {
23146 vassert(0);
23149 putYMMReg( rG, mkexpr(g1) );
23150 return delta;
23154 /* Vector by vector shift of V by the amount specified at the bottom
23155 of E. Vector by vector shifts are defined for all shift amounts,
23156 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
23157 anyway). */
23158 static ULong dis_AVX_var_shiftV_byE ( const VexAbiInfo* vbi,
23159 Prefix pfx, Long delta,
23160 const HChar* opname, IROp op, Bool isYMM )
23162 HChar dis_buf[50];
23163 Int alen, size, i;
23164 IRTemp addr;
23165 UChar modrm = getUChar(delta);
23166 UInt rG = gregOfRexRM(pfx,modrm);
23167 UInt rV = getVexNvvvv(pfx);;
23168 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23169 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
23170 IRTemp amts[8], sVs[8], res[8];
23171 if (epartIsReg(modrm)) {
23172 UInt rE = eregOfRexRM(pfx,modrm);
23173 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
23174 if (isYMM) {
23175 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
23176 nameYMMReg(rV), nameYMMReg(rG) );
23177 } else {
23178 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
23179 nameXMMReg(rV), nameXMMReg(rG) );
23181 delta++;
23182 } else {
23183 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23184 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
23185 if (isYMM) {
23186 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
23187 nameYMMReg(rG) );
23188 } else {
23189 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
23190 nameXMMReg(rG) );
23192 delta += alen;
23194 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
23196 size = 0;
23197 switch (op) {
23198 case Iop_Shl32: size = 32; break;
23199 case Iop_Shl64: size = 64; break;
23200 case Iop_Sar32: size = 32; break;
23201 case Iop_Shr32: size = 32; break;
23202 case Iop_Shr64: size = 64; break;
23203 default: vassert(0);
23206 for (i = 0; i < 8; i++) {
23207 sVs[i] = IRTemp_INVALID;
23208 amts[i] = IRTemp_INVALID;
23210 switch (size) {
23211 case 32:
23212 if (isYMM) {
23213 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
23214 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23215 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
23216 &amts[3], &amts[2], &amts[1], &amts[0] );
23217 } else {
23218 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23219 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23221 break;
23222 case 64:
23223 if (isYMM) {
23224 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
23225 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
23226 } else {
23227 breakupV128to64s( sV, &sVs[1], &sVs[0] );
23228 breakupV128to64s( amt, &amts[1], &amts[0] );
23230 break;
23231 default: vassert(0);
23233 for (i = 0; i < 8; i++)
23234 if (sVs[i] != IRTemp_INVALID) {
23235 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
23236 assign( res[i],
23237 IRExpr_ITE(
23238 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
23239 mkexpr(amts[i]),
23240 size == 32 ? mkU32(size) : mkU64(size)),
23241 binop(op, mkexpr(sVs[i]),
23242 unop(size == 32 ? Iop_32to8 : Iop_64to8,
23243 mkexpr(amts[i]))),
23244 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
23245 : size == 32 ? mkU32(0) : mkU64(0)
23247 } else {
23248 res[i] = IRTemp_INVALID;
23250 switch (size) {
23251 case 32:
23252 for (i = 0; i < 8; i++)
23253 putYMMRegLane32( rG, i, (i < 4 || isYMM)
23254 ? mkexpr(res[i]) : mkU32(0) );
23255 break;
23256 case 64:
23257 for (i = 0; i < 4; i++)
23258 putYMMRegLane64( rG, i, (i < 2 || isYMM)
23259 ? mkexpr(res[i]) : mkU64(0) );
23260 break;
23261 default: vassert(0);
23264 return delta;
23268 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23269 version of dis_SSE_shiftE_imm. */
23270 static
23271 Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
23272 Long delta, const HChar* opname, IROp op )
23274 Bool shl, shr, sar;
23275 UChar rm = getUChar(delta);
23276 IRTemp e0 = newTemp(Ity_V128);
23277 IRTemp e1 = newTemp(Ity_V128);
23278 UInt rD = getVexNvvvv(pfx);
23279 UChar amt, size;
23280 vassert(epartIsReg(rm));
23281 vassert(gregLO3ofRM(rm) == 2
23282 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23283 amt = getUChar(delta+1);
23284 delta += 2;
23285 DIP("%s $%d,%s,%s\n", opname,
23286 (Int)amt,
23287 nameXMMReg(eregOfRexRM(pfx,rm)),
23288 nameXMMReg(rD));
23289 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
23291 shl = shr = sar = False;
23292 size = 0;
23293 switch (op) {
23294 case Iop_ShlN16x8: shl = True; size = 16; break;
23295 case Iop_ShlN32x4: shl = True; size = 32; break;
23296 case Iop_ShlN64x2: shl = True; size = 64; break;
23297 case Iop_SarN16x8: sar = True; size = 16; break;
23298 case Iop_SarN32x4: sar = True; size = 32; break;
23299 case Iop_ShrN16x8: shr = True; size = 16; break;
23300 case Iop_ShrN32x4: shr = True; size = 32; break;
23301 case Iop_ShrN64x2: shr = True; size = 64; break;
23302 default: vassert(0);
23305 if (shl || shr) {
23306 assign( e1, amt >= size
23307 ? mkV128(0x0000)
23308 : binop(op, mkexpr(e0), mkU8(amt))
23310 } else
23311 if (sar) {
23312 assign( e1, amt >= size
23313 ? binop(op, mkexpr(e0), mkU8(size-1))
23314 : binop(op, mkexpr(e0), mkU8(amt))
23316 } else {
23317 vassert(0);
23320 putYMMRegLoAndZU( rD, mkexpr(e1) );
23321 return delta;
23325 /* Vector by scalar shift of E into V, by an immediate byte. Modified
23326 version of dis_AVX128_shiftE_to_V_imm. */
23327 static
23328 Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
23329 Long delta, const HChar* opname, IROp op )
23331 Bool shl, shr, sar;
23332 UChar rm = getUChar(delta);
23333 IRTemp e0 = newTemp(Ity_V256);
23334 IRTemp e1 = newTemp(Ity_V256);
23335 UInt rD = getVexNvvvv(pfx);
23336 UChar amt, size;
23337 vassert(epartIsReg(rm));
23338 vassert(gregLO3ofRM(rm) == 2
23339 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
23340 amt = getUChar(delta+1);
23341 delta += 2;
23342 DIP("%s $%d,%s,%s\n", opname,
23343 (Int)amt,
23344 nameYMMReg(eregOfRexRM(pfx,rm)),
23345 nameYMMReg(rD));
23346 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
23348 shl = shr = sar = False;
23349 size = 0;
23350 switch (op) {
23351 case Iop_ShlN16x16: shl = True; size = 16; break;
23352 case Iop_ShlN32x8: shl = True; size = 32; break;
23353 case Iop_ShlN64x4: shl = True; size = 64; break;
23354 case Iop_SarN16x16: sar = True; size = 16; break;
23355 case Iop_SarN32x8: sar = True; size = 32; break;
23356 case Iop_ShrN16x16: shr = True; size = 16; break;
23357 case Iop_ShrN32x8: shr = True; size = 32; break;
23358 case Iop_ShrN64x4: shr = True; size = 64; break;
23359 default: vassert(0);
23363 if (shl || shr) {
23364 assign( e1, amt >= size
23365 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
23366 : binop(op, mkexpr(e0), mkU8(amt))
23368 } else
23369 if (sar) {
23370 assign( e1, amt >= size
23371 ? binop(op, mkexpr(e0), mkU8(size-1))
23372 : binop(op, mkexpr(e0), mkU8(amt))
23374 } else {
23375 vassert(0);
23378 putYMMReg( rD, mkexpr(e1) );
23379 return delta;
23383 /* Lower 64-bit lane only AVX128 binary operation:
23384 G[63:0] = V[63:0] `op` E[63:0]
23385 G[127:64] = V[127:64]
23386 G[255:128] = 0.
23387 The specified op must be of the 64F0x2 kind, so that it
23388 copies the upper half of the left operand to the result.
23390 static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
23391 const VexAbiInfo* vbi,
23392 Prefix pfx, Long delta,
23393 const HChar* opname, IROp op )
23395 HChar dis_buf[50];
23396 Int alen;
23397 IRTemp addr;
23398 UChar rm = getUChar(delta);
23399 UInt rG = gregOfRexRM(pfx,rm);
23400 UInt rV = getVexNvvvv(pfx);
23401 IRExpr* vpart = getXMMReg(rV);
23402 if (epartIsReg(rm)) {
23403 UInt rE = eregOfRexRM(pfx,rm);
23404 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23405 DIP("%s %s,%s,%s\n", opname,
23406 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23407 delta = delta+1;
23408 } else {
23409 /* We can only do a 64-bit memory read, so the upper half of the
23410 E operand needs to be made simply of zeroes. */
23411 IRTemp epart = newTemp(Ity_V128);
23412 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23413 assign( epart, unop( Iop_64UtoV128,
23414 loadLE(Ity_I64, mkexpr(addr))) );
23415 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23416 DIP("%s %s,%s,%s\n", opname,
23417 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23418 delta = delta+alen;
23420 putYMMRegLane128( rG, 1, mkV128(0) );
23421 *uses_vvvv = True;
23422 return delta;
23426 /* Lower 64-bit lane only AVX128 unary operation:
23427 G[63:0] = op(E[63:0])
23428 G[127:64] = V[127:64]
23429 G[255:128] = 0
23430 The specified op must be of the 64F0x2 kind, so that it
23431 copies the upper half of the operand to the result.
23433 static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
23434 const VexAbiInfo* vbi,
23435 Prefix pfx, Long delta,
23436 const HChar* opname, IROp op )
23438 HChar dis_buf[50];
23439 Int alen;
23440 IRTemp addr;
23441 UChar rm = getUChar(delta);
23442 UInt rG = gregOfRexRM(pfx,rm);
23443 UInt rV = getVexNvvvv(pfx);
23444 IRTemp e64 = newTemp(Ity_I64);
23446 /* Fetch E[63:0] */
23447 if (epartIsReg(rm)) {
23448 UInt rE = eregOfRexRM(pfx,rm);
23449 assign(e64, getXMMRegLane64(rE, 0));
23450 DIP("%s %s,%s,%s\n", opname,
23451 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23452 delta += 1;
23453 } else {
23454 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23455 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
23456 DIP("%s %s,%s,%s\n", opname,
23457 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23458 delta += alen;
23461 /* Create a value 'arg' as V[127:64]++E[63:0] */
23462 IRTemp arg = newTemp(Ity_V128);
23463 assign(arg,
23464 binop(Iop_SetV128lo64,
23465 getXMMReg(rV), mkexpr(e64)));
23466 /* and apply op to it */
23467 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23468 *uses_vvvv = True;
23469 return delta;
23473 /* Lower 32-bit lane only AVX128 unary operation:
23474 G[31:0] = op(E[31:0])
23475 G[127:32] = V[127:32]
23476 G[255:128] = 0
23477 The specified op must be of the 32F0x4 kind, so that it
23478 copies the upper 3/4 of the operand to the result.
23480 static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
23481 const VexAbiInfo* vbi,
23482 Prefix pfx, Long delta,
23483 const HChar* opname, IROp op )
23485 HChar dis_buf[50];
23486 Int alen;
23487 IRTemp addr;
23488 UChar rm = getUChar(delta);
23489 UInt rG = gregOfRexRM(pfx,rm);
23490 UInt rV = getVexNvvvv(pfx);
23491 IRTemp e32 = newTemp(Ity_I32);
23493 /* Fetch E[31:0] */
23494 if (epartIsReg(rm)) {
23495 UInt rE = eregOfRexRM(pfx,rm);
23496 assign(e32, getXMMRegLane32(rE, 0));
23497 DIP("%s %s,%s,%s\n", opname,
23498 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23499 delta += 1;
23500 } else {
23501 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23502 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
23503 DIP("%s %s,%s,%s\n", opname,
23504 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23505 delta += alen;
23508 /* Create a value 'arg' as V[127:32]++E[31:0] */
23509 IRTemp arg = newTemp(Ity_V128);
23510 assign(arg,
23511 binop(Iop_SetV128lo32,
23512 getXMMReg(rV), mkexpr(e32)));
23513 /* and apply op to it */
23514 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23515 *uses_vvvv = True;
23516 return delta;
23520 /* Lower 32-bit lane only AVX128 binary operation:
23521 G[31:0] = V[31:0] `op` E[31:0]
23522 G[127:32] = V[127:32]
23523 G[255:128] = 0.
23524 The specified op must be of the 32F0x4 kind, so that it
23525 copies the upper 3/4 of the left operand to the result.
23527 static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
23528 const VexAbiInfo* vbi,
23529 Prefix pfx, Long delta,
23530 const HChar* opname, IROp op )
23532 HChar dis_buf[50];
23533 Int alen;
23534 IRTemp addr;
23535 UChar rm = getUChar(delta);
23536 UInt rG = gregOfRexRM(pfx,rm);
23537 UInt rV = getVexNvvvv(pfx);
23538 IRExpr* vpart = getXMMReg(rV);
23539 if (epartIsReg(rm)) {
23540 UInt rE = eregOfRexRM(pfx,rm);
23541 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
23542 DIP("%s %s,%s,%s\n", opname,
23543 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23544 delta = delta+1;
23545 } else {
23546 /* We can only do a 32-bit memory read, so the upper 3/4 of the
23547 E operand needs to be made simply of zeroes. */
23548 IRTemp epart = newTemp(Ity_V128);
23549 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23550 assign( epart, unop( Iop_32UtoV128,
23551 loadLE(Ity_I32, mkexpr(addr))) );
23552 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
23553 DIP("%s %s,%s,%s\n", opname,
23554 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23555 delta = delta+alen;
23557 putYMMRegLane128( rG, 1, mkV128(0) );
23558 *uses_vvvv = True;
23559 return delta;
23563 /* All-lanes AVX128 binary operation:
23564 G[127:0] = V[127:0] `op` E[127:0]
23565 G[255:128] = 0.
23567 static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23568 const VexAbiInfo* vbi,
23569 Prefix pfx, Long delta,
23570 const HChar* opname, IROp op )
23572 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
23573 uses_vvvv, vbi, pfx, delta, opname, op,
23574 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23579 /* Handles AVX128 32F/64F comparisons. A derivative of
23580 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23581 original delta to indicate failure. */
23582 static
23583 Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23584 const VexAbiInfo* vbi,
23585 Prefix pfx, Long delta,
23586 const HChar* opname, Bool all_lanes, Int sz )
23588 vassert(sz == 4 || sz == 8);
23589 Long deltaIN = delta;
23590 HChar dis_buf[50];
23591 Int alen;
23592 UInt imm8;
23593 IRTemp addr;
23594 Bool preZero = False;
23595 Bool preSwap = False;
23596 IROp op = Iop_INVALID;
23597 Bool postNot = False;
23598 IRTemp plain = newTemp(Ity_V128);
23599 UChar rm = getUChar(delta);
23600 UInt rG = gregOfRexRM(pfx, rm);
23601 UInt rV = getVexNvvvv(pfx);
23602 IRTemp argL = newTemp(Ity_V128);
23603 IRTemp argR = newTemp(Ity_V128);
23605 assign(argL, getXMMReg(rV));
23606 if (epartIsReg(rm)) {
23607 imm8 = getUChar(delta+1);
23608 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23609 imm8, all_lanes, sz);
23610 if (!ok) return deltaIN; /* FAIL */
23611 UInt rE = eregOfRexRM(pfx,rm);
23612 assign(argR, getXMMReg(rE));
23613 delta += 1+1;
23614 DIP("%s $%u,%s,%s,%s\n",
23615 opname, imm8,
23616 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23617 } else {
23618 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23619 imm8 = getUChar(delta+alen);
23620 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot,
23621 imm8, all_lanes, sz);
23622 if (!ok) return deltaIN; /* FAIL */
23623 assign(argR,
23624 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
23625 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
23626 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
23627 delta += alen+1;
23628 DIP("%s $%u,%s,%s,%s\n",
23629 opname, imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23632 IRTemp argMask = newTemp(Ity_V128);
23633 if (preZero) {
23634 // In this case, preSwap is irrelevant, but it's harmless to honour it
23635 // anyway.
23636 assign(argMask, mkV128(all_lanes ? 0x0000 : (sz==4 ? 0xFFF0 : 0xFF00)));
23637 } else {
23638 assign(argMask, mkV128(0xFFFF));
23641 assign(
23642 plain,
23643 preSwap ? binop(op, binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)),
23644 binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)))
23645 : binop(op, binop(Iop_AndV128, mkexpr(argL), mkexpr(argMask)),
23646 binop(Iop_AndV128, mkexpr(argR), mkexpr(argMask)))
23649 if (all_lanes) {
23650 /* This is simple: just invert the result, if necessary, and
23651 have done. */
23652 if (postNot) {
23653 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
23654 } else {
23655 putYMMRegLoAndZU( rG, mkexpr(plain) );
23658 else
23659 if (!preSwap) {
23660 /* More complex. It's a one-lane-only, hence need to possibly
23661 invert only that one lane. But at least the other lanes are
23662 correctly "in" the result, having been copied from the left
23663 operand (argL). */
23664 if (postNot) {
23665 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
23666 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
23667 mask) );
23668 } else {
23669 putYMMRegLoAndZU( rG, mkexpr(plain) );
23672 else {
23673 /* This is the most complex case. One-lane-only, but the args
23674 were swapped. So we have to possibly invert the bottom lane,
23675 and (definitely) we have to copy the upper lane(s) from argL
23676 since, due to the swapping, what's currently there is from
23677 argR, which is not correct. */
23678 IRTemp res = newTemp(Ity_V128);
23679 IRTemp mask = newTemp(Ity_V128);
23680 IRTemp notMask = newTemp(Ity_V128);
23681 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
23682 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
23683 if (postNot) {
23684 assign(res,
23685 binop(Iop_OrV128,
23686 binop(Iop_AndV128,
23687 unop(Iop_NotV128, mkexpr(plain)),
23688 mkexpr(mask)),
23689 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23690 } else {
23691 assign(res,
23692 binop(Iop_OrV128,
23693 binop(Iop_AndV128,
23694 mkexpr(plain),
23695 mkexpr(mask)),
23696 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
23698 putYMMRegLoAndZU( rG, mkexpr(res) );
23701 *uses_vvvv = True;
23702 return delta;
23706 /* Handles AVX256 32F/64F comparisons. A derivative of
23707 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
23708 original delta to indicate failure. */
23709 static
23710 Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
23711 const VexAbiInfo* vbi,
23712 Prefix pfx, Long delta,
23713 const HChar* opname, Int sz )
23715 vassert(sz == 4 || sz == 8);
23716 Long deltaIN = delta;
23717 HChar dis_buf[50];
23718 Int alen;
23719 UInt imm8;
23720 IRTemp addr;
23721 Bool preZero = False;
23722 Bool preSwap = False;
23723 IROp op = Iop_INVALID;
23724 Bool postNot = False;
23725 IRTemp plain = newTemp(Ity_V256);
23726 UChar rm = getUChar(delta);
23727 UInt rG = gregOfRexRM(pfx, rm);
23728 UInt rV = getVexNvvvv(pfx);
23729 IRTemp argL = newTemp(Ity_V256);
23730 IRTemp argR = newTemp(Ity_V256);
23731 IRTemp argLhi = IRTemp_INVALID;
23732 IRTemp argLlo = IRTemp_INVALID;
23733 IRTemp argRhi = IRTemp_INVALID;
23734 IRTemp argRlo = IRTemp_INVALID;
23736 assign(argL, getYMMReg(rV));
23737 if (epartIsReg(rm)) {
23738 imm8 = getUChar(delta+1);
23739 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23740 True/*all_lanes*/, sz);
23741 if (!ok) return deltaIN; /* FAIL */
23742 UInt rE = eregOfRexRM(pfx,rm);
23743 assign(argR, getYMMReg(rE));
23744 delta += 1+1;
23745 DIP("%s $%u,%s,%s,%s\n",
23746 opname, imm8,
23747 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
23748 } else {
23749 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
23750 imm8 = getUChar(delta+alen);
23751 Bool ok = findSSECmpOp(&preZero, &preSwap, &op, &postNot, imm8,
23752 True/*all_lanes*/, sz);
23753 if (!ok) return deltaIN; /* FAIL */
23754 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
23755 delta += alen+1;
23756 DIP("%s $%u,%s,%s,%s\n",
23757 opname, imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
23760 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
23761 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
23763 IRTemp argMask = newTemp(Ity_V128);
23764 if (preZero) {
23765 // In this case, preSwap is irrelevant, but it's harmless to honour it
23766 // anyway.
23767 assign(argMask, mkV128(0x0000));
23768 } else {
23769 assign(argMask, mkV128(0xFFFF));
23772 assign(
23773 plain,
23774 binop( Iop_V128HLtoV256,
23775 binop(op, binop(Iop_AndV128, mkexpr(argLhi), mkexpr(argMask)),
23776 binop(Iop_AndV128, mkexpr(argRhi), mkexpr(argMask))),
23777 binop(op, binop(Iop_AndV128, mkexpr(argLlo), mkexpr(argMask)),
23778 binop(Iop_AndV128, mkexpr(argRlo), mkexpr(argMask))))
23781 /* This is simple: just invert the result, if necessary, and
23782 have done. */
23783 if (postNot) {
23784 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
23785 } else {
23786 putYMMReg( rG, mkexpr(plain) );
23789 *uses_vvvv = True;
23790 return delta;
23794 /* Handles AVX128 unary E-to-G all-lanes operations. */
23795 static
23796 Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23797 const VexAbiInfo* vbi,
23798 Prefix pfx, Long delta,
23799 const HChar* opname,
23800 IRTemp (*opFn)(IRTemp) )
23802 HChar dis_buf[50];
23803 Int alen;
23804 IRTemp addr;
23805 IRTemp res = newTemp(Ity_V128);
23806 IRTemp arg = newTemp(Ity_V128);
23807 UChar rm = getUChar(delta);
23808 UInt rG = gregOfRexRM(pfx, rm);
23809 if (epartIsReg(rm)) {
23810 UInt rE = eregOfRexRM(pfx,rm);
23811 assign(arg, getXMMReg(rE));
23812 delta += 1;
23813 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23814 } else {
23815 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23816 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23817 delta += alen;
23818 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23820 res = opFn(arg);
23821 putYMMRegLoAndZU( rG, mkexpr(res) );
23822 *uses_vvvv = False;
23823 return delta;
23827 /* Handles AVX128 unary E-to-G all-lanes operations. */
23828 static
23829 Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23830 const VexAbiInfo* vbi,
23831 Prefix pfx, Long delta,
23832 const HChar* opname, IROp op )
23834 HChar dis_buf[50];
23835 Int alen;
23836 IRTemp addr;
23837 IRTemp arg = newTemp(Ity_V128);
23838 UChar rm = getUChar(delta);
23839 UInt rG = gregOfRexRM(pfx, rm);
23840 if (epartIsReg(rm)) {
23841 UInt rE = eregOfRexRM(pfx,rm);
23842 assign(arg, getXMMReg(rE));
23843 delta += 1;
23844 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23845 } else {
23846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23847 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23848 delta += alen;
23849 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23851 // Sqrt32Fx4 and Sqrt64Fx2 take a rounding mode, which is faked
23852 // up in the usual way.
23853 Bool needsIRRM = op == Iop_Sqrt32Fx4 || op == Iop_Sqrt64Fx2;
23854 /* XXXROUNDINGFIXME */
23855 IRExpr* res = needsIRRM ? binop(op, get_FAKE_roundingmode(), mkexpr(arg))
23856 : unop(op, mkexpr(arg));
23857 putYMMRegLoAndZU( rG, res );
23858 *uses_vvvv = False;
23859 return delta;
23863 /* FIXME: common up with the _128_ version above? */
23864 static
23865 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23866 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23867 Prefix pfx, Long delta, const HChar* name,
23868 /* The actual operation. Use either 'op' or 'opfn',
23869 but not both. */
23870 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23871 Bool invertLeftArg,
23872 Bool swapArgs
23875 UChar modrm = getUChar(delta);
23876 UInt rD = gregOfRexRM(pfx, modrm);
23877 UInt rSL = getVexNvvvv(pfx);
23878 IRTemp tSL = newTemp(Ity_V256);
23879 IRTemp tSR = newTemp(Ity_V256);
23880 IRTemp addr = IRTemp_INVALID;
23881 HChar dis_buf[50];
23882 Int alen = 0;
23883 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23885 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23886 : getYMMReg(rSL));
23888 if (epartIsReg(modrm)) {
23889 UInt rSR = eregOfRexRM(pfx, modrm);
23890 delta += 1;
23891 assign(tSR, getYMMReg(rSR));
23892 DIP("%s %s,%s,%s\n",
23893 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23894 } else {
23895 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23896 delta += alen;
23897 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23898 DIP("%s %s,%s,%s\n",
23899 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23902 IRTemp res = IRTemp_INVALID;
23903 if (op != Iop_INVALID) {
23904 vassert(opFn == NULL);
23905 res = newTemp(Ity_V256);
23906 if (requiresRMode(op)) {
23907 IRTemp rm = newTemp(Ity_I32);
23908 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23909 assign(res, swapArgs
23910 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23911 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23912 } else {
23913 assign(res, swapArgs
23914 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23915 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23917 } else {
23918 vassert(opFn != NULL);
23919 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23922 putYMMReg(rD, mkexpr(res));
23924 *uses_vvvv = True;
23925 return delta;
23929 /* All-lanes AVX256 binary operation:
23930 G[255:0] = V[255:0] `op` E[255:0]
23932 static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23933 const VexAbiInfo* vbi,
23934 Prefix pfx, Long delta,
23935 const HChar* opname, IROp op )
23937 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23938 uses_vvvv, vbi, pfx, delta, opname, op,
23939 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23944 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23945 for the operation, no inversion of the left arg, and no swapping of
23946 args. */
23947 static
23948 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23949 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23950 Prefix pfx, Long delta, const HChar* name,
23951 IROp op
23954 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23955 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23959 /* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23960 generator to compute the result, no inversion of the left
23961 arg, and no swapping of args. */
23962 static
23963 Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23964 /*OUT*/Bool* uses_vvvv, const VexAbiInfo* vbi,
23965 Prefix pfx, Long delta, const HChar* name,
23966 IRTemp(*opFn)(IRTemp,IRTemp)
23969 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23970 uses_vvvv, vbi, pfx, delta, name,
23971 Iop_INVALID, opFn, False, False );
23975 /* Handles AVX256 unary E-to-G all-lanes operations. */
23976 static
23977 Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23978 const VexAbiInfo* vbi,
23979 Prefix pfx, Long delta,
23980 const HChar* opname,
23981 IRTemp (*opFn)(IRTemp) )
23983 HChar dis_buf[50];
23984 Int alen;
23985 IRTemp addr;
23986 IRTemp res = newTemp(Ity_V256);
23987 IRTemp arg = newTemp(Ity_V256);
23988 UChar rm = getUChar(delta);
23989 UInt rG = gregOfRexRM(pfx, rm);
23990 if (epartIsReg(rm)) {
23991 UInt rE = eregOfRexRM(pfx,rm);
23992 assign(arg, getYMMReg(rE));
23993 delta += 1;
23994 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23995 } else {
23996 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23997 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23998 delta += alen;
23999 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
24001 res = opFn(arg);
24002 putYMMReg( rG, mkexpr(res) );
24003 *uses_vvvv = False;
24004 return delta;
24008 /* Handles AVX256 unary E-to-G all-lanes operations. */
24009 static
24010 Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
24011 const VexAbiInfo* vbi,
24012 Prefix pfx, Long delta,
24013 const HChar* opname, IROp op )
24015 HChar dis_buf[50];
24016 Int alen;
24017 IRTemp addr;
24018 IRTemp arg = newTemp(Ity_V256);
24019 UChar rm = getUChar(delta);
24020 UInt rG = gregOfRexRM(pfx, rm);
24021 if (epartIsReg(rm)) {
24022 UInt rE = eregOfRexRM(pfx,rm);
24023 assign(arg, getYMMReg(rE));
24024 delta += 1;
24025 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
24026 } else {
24027 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24028 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
24029 delta += alen;
24030 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
24032 putYMMReg( rG, unop(op, mkexpr(arg)) );
24033 *uses_vvvv = False;
24034 return delta;
24038 /* The use of ReinterpF64asI64 is ugly. Surely could do better if we
24039 had a variant of Iop_64x4toV256 that took F64s as args instead. */
24040 static Long dis_CVTDQ2PD_256 ( const VexAbiInfo* vbi, Prefix pfx,
24041 Long delta )
24043 IRTemp addr = IRTemp_INVALID;
24044 Int alen = 0;
24045 HChar dis_buf[50];
24046 UChar modrm = getUChar(delta);
24047 IRTemp sV = newTemp(Ity_V128);
24048 UInt rG = gregOfRexRM(pfx,modrm);
24049 if (epartIsReg(modrm)) {
24050 UInt rE = eregOfRexRM(pfx,modrm);
24051 assign( sV, getXMMReg(rE) );
24052 delta += 1;
24053 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
24054 } else {
24055 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24056 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
24057 delta += alen;
24058 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
24060 IRTemp s3, s2, s1, s0;
24061 s3 = s2 = s1 = s0 = IRTemp_INVALID;
24062 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
24063 IRExpr* res
24064 = IRExpr_Qop(
24065 Iop_64x4toV256,
24066 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
24067 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
24068 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
24069 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
24071 putYMMReg(rG, res);
24072 return delta;
24076 static Long dis_CVTPD2PS_256 ( const VexAbiInfo* vbi, Prefix pfx,
24077 Long delta )
24079 IRTemp addr = IRTemp_INVALID;
24080 Int alen = 0;
24081 HChar dis_buf[50];
24082 UChar modrm = getUChar(delta);
24083 UInt rG = gregOfRexRM(pfx,modrm);
24084 IRTemp argV = newTemp(Ity_V256);
24085 IRTemp rmode = newTemp(Ity_I32);
24086 if (epartIsReg(modrm)) {
24087 UInt rE = eregOfRexRM(pfx,modrm);
24088 assign( argV, getYMMReg(rE) );
24089 delta += 1;
24090 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
24091 } else {
24092 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24093 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
24094 delta += alen;
24095 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
24098 assign( rmode, get_sse_roundingmode() );
24099 IRTemp t3, t2, t1, t0;
24100 t3 = t2 = t1 = t0 = IRTemp_INVALID;
24101 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
24102 # define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
24103 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
24104 putXMMRegLane32F( rG, 3, CVT(t3) );
24105 putXMMRegLane32F( rG, 2, CVT(t2) );
24106 putXMMRegLane32F( rG, 1, CVT(t1) );
24107 putXMMRegLane32F( rG, 0, CVT(t0) );
24108 # undef CVT
24109 putYMMRegLane128( rG, 1, mkV128(0) );
24110 return delta;
24114 static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
24116 IRTemp tLhi, tLlo, tRhi, tRlo;
24117 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
24118 IRTemp res = newTemp(Ity_V256);
24119 breakupV256toV128s( tL, &tLhi, &tLlo );
24120 breakupV256toV128s( tR, &tRhi, &tRlo );
24121 assign( res, binop( Iop_V128HLtoV256,
24122 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
24123 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
24124 return res;
24128 static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
24130 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
24134 static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
24136 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
24140 static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
24142 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
24146 static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
24148 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
24152 static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
24154 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
24158 static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
24160 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
24164 static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
24166 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
24170 static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
24172 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
24176 static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
24178 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
24182 static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
24184 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
24188 static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
24190 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
24194 static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
24196 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
24200 __attribute__((noinline))
24201 static
24202 Long dis_ESC_0F__VEX (
24203 /*MB_OUT*/DisResult* dres,
24204 /*OUT*/ Bool* uses_vvvv,
24205 const VexArchInfo* archinfo,
24206 const VexAbiInfo* vbi,
24207 Prefix pfx, Int sz, Long deltaIN
24210 IRTemp addr = IRTemp_INVALID;
24211 Int alen = 0;
24212 HChar dis_buf[50];
24213 Long delta = deltaIN;
24214 UChar opc = getUChar(delta);
24215 delta++;
24216 *uses_vvvv = False;
24218 switch (opc) {
24220 case 0x10:
24221 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24222 /* Move 64 bits from E (mem only) to G (lo half xmm).
24223 Bits 255-64 of the dest are zeroed out. */
24224 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24225 UChar modrm = getUChar(delta);
24226 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24227 UInt rG = gregOfRexRM(pfx,modrm);
24228 IRTemp z128 = newTemp(Ity_V128);
24229 assign(z128, mkV128(0));
24230 putXMMReg( rG, mkexpr(z128) );
24231 /* FIXME: ALIGNMENT CHECK? */
24232 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
24233 putYMMRegLane128( rG, 1, mkexpr(z128) );
24234 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
24235 delta += alen;
24236 goto decode_success;
24238 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
24239 /* Reg form. */
24240 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24241 UChar modrm = getUChar(delta);
24242 UInt rG = gregOfRexRM(pfx, modrm);
24243 UInt rE = eregOfRexRM(pfx, modrm);
24244 UInt rV = getVexNvvvv(pfx);
24245 delta++;
24246 DIP("vmovsd %s,%s,%s\n",
24247 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24248 IRTemp res = newTemp(Ity_V128);
24249 assign(res, binop(Iop_64HLtoV128,
24250 getXMMRegLane64(rV, 1),
24251 getXMMRegLane64(rE, 0)));
24252 putYMMRegLoAndZU(rG, mkexpr(res));
24253 *uses_vvvv = True;
24254 goto decode_success;
24256 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24257 /* Move 32 bits from E (mem only) to G (lo half xmm).
24258 Bits 255-32 of the dest are zeroed out. */
24259 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24260 UChar modrm = getUChar(delta);
24261 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24262 UInt rG = gregOfRexRM(pfx,modrm);
24263 IRTemp z128 = newTemp(Ity_V128);
24264 assign(z128, mkV128(0));
24265 putXMMReg( rG, mkexpr(z128) );
24266 /* FIXME: ALIGNMENT CHECK? */
24267 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
24268 putYMMRegLane128( rG, 1, mkexpr(z128) );
24269 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
24270 delta += alen;
24271 goto decode_success;
24273 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
24274 /* Reg form. */
24275 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24276 UChar modrm = getUChar(delta);
24277 UInt rG = gregOfRexRM(pfx, modrm);
24278 UInt rE = eregOfRexRM(pfx, modrm);
24279 UInt rV = getVexNvvvv(pfx);
24280 delta++;
24281 DIP("vmovss %s,%s,%s\n",
24282 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24283 IRTemp res = newTemp(Ity_V128);
24284 assign( res, binop( Iop_64HLtoV128,
24285 getXMMRegLane64(rV, 1),
24286 binop(Iop_32HLto64,
24287 getXMMRegLane32(rV, 1),
24288 getXMMRegLane32(rE, 0)) ) );
24289 putYMMRegLoAndZU(rG, mkexpr(res));
24290 *uses_vvvv = True;
24291 goto decode_success;
24293 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
24294 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24295 UChar modrm = getUChar(delta);
24296 UInt rG = gregOfRexRM(pfx, modrm);
24297 if (epartIsReg(modrm)) {
24298 UInt rE = eregOfRexRM(pfx,modrm);
24299 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24300 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24301 delta += 1;
24302 } else {
24303 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24304 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24305 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
24306 delta += alen;
24308 goto decode_success;
24310 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
24311 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24312 UChar modrm = getUChar(delta);
24313 UInt rG = gregOfRexRM(pfx, modrm);
24314 if (epartIsReg(modrm)) {
24315 UInt rE = eregOfRexRM(pfx,modrm);
24316 putYMMReg( rG, getYMMReg( rE ));
24317 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24318 delta += 1;
24319 } else {
24320 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24321 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24322 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
24323 delta += alen;
24325 goto decode_success;
24327 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
24328 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24329 UChar modrm = getUChar(delta);
24330 UInt rG = gregOfRexRM(pfx, modrm);
24331 if (epartIsReg(modrm)) {
24332 UInt rE = eregOfRexRM(pfx,modrm);
24333 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24334 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24335 delta += 1;
24336 } else {
24337 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24338 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24339 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
24340 delta += alen;
24342 goto decode_success;
24344 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
24345 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24346 UChar modrm = getUChar(delta);
24347 UInt rG = gregOfRexRM(pfx, modrm);
24348 if (epartIsReg(modrm)) {
24349 UInt rE = eregOfRexRM(pfx,modrm);
24350 putYMMReg( rG, getYMMReg( rE ));
24351 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24352 delta += 1;
24353 } else {
24354 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24355 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24356 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
24357 delta += alen;
24359 goto decode_success;
24361 break;
24363 case 0x11:
24364 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
24365 /* Move 64 bits from G (low half xmm) to mem only. */
24366 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
24367 UChar modrm = getUChar(delta);
24368 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24369 UInt rG = gregOfRexRM(pfx,modrm);
24370 /* FIXME: ALIGNMENT CHECK? */
24371 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
24372 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
24373 delta += alen;
24374 goto decode_success;
24376 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
24377 /* Reg form. */
24378 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
24379 UChar modrm = getUChar(delta);
24380 UInt rG = gregOfRexRM(pfx, modrm);
24381 UInt rE = eregOfRexRM(pfx, modrm);
24382 UInt rV = getVexNvvvv(pfx);
24383 delta++;
24384 DIP("vmovsd %s,%s,%s\n",
24385 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24386 IRTemp res = newTemp(Ity_V128);
24387 assign(res, binop(Iop_64HLtoV128,
24388 getXMMRegLane64(rV, 1),
24389 getXMMRegLane64(rG, 0)));
24390 putYMMRegLoAndZU(rE, mkexpr(res));
24391 *uses_vvvv = True;
24392 goto decode_success;
24394 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
24395 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
24396 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
24397 UChar modrm = getUChar(delta);
24398 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24399 UInt rG = gregOfRexRM(pfx,modrm);
24400 /* FIXME: ALIGNMENT CHECK? */
24401 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
24402 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
24403 delta += alen;
24404 goto decode_success;
24406 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
24407 /* Reg form. */
24408 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
24409 UChar modrm = getUChar(delta);
24410 UInt rG = gregOfRexRM(pfx, modrm);
24411 UInt rE = eregOfRexRM(pfx, modrm);
24412 UInt rV = getVexNvvvv(pfx);
24413 delta++;
24414 DIP("vmovss %s,%s,%s\n",
24415 nameXMMReg(rG), nameXMMReg(rV), nameXMMReg(rE));
24416 IRTemp res = newTemp(Ity_V128);
24417 assign( res, binop( Iop_64HLtoV128,
24418 getXMMRegLane64(rV, 1),
24419 binop(Iop_32HLto64,
24420 getXMMRegLane32(rV, 1),
24421 getXMMRegLane32(rG, 0)) ) );
24422 putYMMRegLoAndZU(rE, mkexpr(res));
24423 *uses_vvvv = True;
24424 goto decode_success;
24426 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
24427 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24428 UChar modrm = getUChar(delta);
24429 UInt rG = gregOfRexRM(pfx,modrm);
24430 if (epartIsReg(modrm)) {
24431 UInt rE = eregOfRexRM(pfx,modrm);
24432 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24433 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24434 delta += 1;
24435 } else {
24436 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24437 storeLE( mkexpr(addr), getXMMReg(rG) );
24438 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
24439 delta += alen;
24441 goto decode_success;
24443 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
24444 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24445 UChar modrm = getUChar(delta);
24446 UInt rG = gregOfRexRM(pfx,modrm);
24447 if (epartIsReg(modrm)) {
24448 UInt rE = eregOfRexRM(pfx,modrm);
24449 putYMMReg( rE, getYMMReg(rG) );
24450 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24451 delta += 1;
24452 } else {
24453 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24454 storeLE( mkexpr(addr), getYMMReg(rG) );
24455 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
24456 delta += alen;
24458 goto decode_success;
24460 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
24461 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24462 UChar modrm = getUChar(delta);
24463 UInt rG = gregOfRexRM(pfx,modrm);
24464 if (epartIsReg(modrm)) {
24465 UInt rE = eregOfRexRM(pfx,modrm);
24466 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24467 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24468 delta += 1;
24469 } else {
24470 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24471 storeLE( mkexpr(addr), getXMMReg(rG) );
24472 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
24473 delta += alen;
24475 goto decode_success;
24477 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
24478 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24479 UChar modrm = getUChar(delta);
24480 UInt rG = gregOfRexRM(pfx,modrm);
24481 if (epartIsReg(modrm)) {
24482 UInt rE = eregOfRexRM(pfx,modrm);
24483 putYMMReg( rE, getYMMReg(rG) );
24484 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24485 delta += 1;
24486 } else {
24487 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24488 storeLE( mkexpr(addr), getYMMReg(rG) );
24489 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
24490 delta += alen;
24492 goto decode_success;
24494 break;
24496 case 0x12:
24497 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
24498 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24499 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
24500 goto decode_success;
24502 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
24503 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24504 delta = dis_MOVDDUP_256( vbi, pfx, delta );
24505 goto decode_success;
24507 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
24508 /* Insn only exists in reg form */
24509 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24510 && epartIsReg(getUChar(delta))) {
24511 UChar modrm = getUChar(delta);
24512 UInt rG = gregOfRexRM(pfx, modrm);
24513 UInt rE = eregOfRexRM(pfx, modrm);
24514 UInt rV = getVexNvvvv(pfx);
24515 delta++;
24516 DIP("vmovhlps %s,%s,%s\n",
24517 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24518 IRTemp res = newTemp(Ity_V128);
24519 assign(res, binop(Iop_64HLtoV128,
24520 getXMMRegLane64(rV, 1),
24521 getXMMRegLane64(rE, 1)));
24522 putYMMRegLoAndZU(rG, mkexpr(res));
24523 *uses_vvvv = True;
24524 goto decode_success;
24526 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
24527 /* Insn exists only in mem form, it appears. */
24528 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
24529 /* Insn exists only in mem form, it appears. */
24530 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24531 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24532 UChar modrm = getUChar(delta);
24533 UInt rG = gregOfRexRM(pfx, modrm);
24534 UInt rV = getVexNvvvv(pfx);
24535 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24536 delta += alen;
24537 DIP("vmovlpd %s,%s,%s\n",
24538 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24539 IRTemp res = newTemp(Ity_V128);
24540 assign(res, binop(Iop_64HLtoV128,
24541 getXMMRegLane64(rV, 1),
24542 loadLE(Ity_I64, mkexpr(addr))));
24543 putYMMRegLoAndZU(rG, mkexpr(res));
24544 *uses_vvvv = True;
24545 goto decode_success;
24547 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
24548 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24549 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24550 True/*isL*/ );
24551 goto decode_success;
24553 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
24554 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24555 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
24556 goto decode_success;
24558 break;
24560 case 0x13:
24561 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
24562 /* Insn exists only in mem form, it appears. */
24563 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
24564 /* Insn exists only in mem form, it appears. */
24565 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24566 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24567 UChar modrm = getUChar(delta);
24568 UInt rG = gregOfRexRM(pfx, modrm);
24569 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24570 delta += alen;
24571 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
24572 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
24573 goto decode_success;
24575 break;
24577 case 0x14:
24578 case 0x15:
24579 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
24580 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
24581 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24582 Bool hi = opc == 0x15;
24583 UChar modrm = getUChar(delta);
24584 UInt rG = gregOfRexRM(pfx,modrm);
24585 UInt rV = getVexNvvvv(pfx);
24586 IRTemp eV = newTemp(Ity_V128);
24587 IRTemp vV = newTemp(Ity_V128);
24588 assign( vV, getXMMReg(rV) );
24589 if (epartIsReg(modrm)) {
24590 UInt rE = eregOfRexRM(pfx,modrm);
24591 assign( eV, getXMMReg(rE) );
24592 delta += 1;
24593 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24594 nameXMMReg(rE), nameXMMReg(rG));
24595 } else {
24596 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24597 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24598 delta += alen;
24599 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24600 dis_buf, nameXMMReg(rG));
24602 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
24603 putYMMRegLoAndZU( rG, mkexpr(res) );
24604 *uses_vvvv = True;
24605 goto decode_success;
24607 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
24608 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
24609 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24610 Bool hi = opc == 0x15;
24611 UChar modrm = getUChar(delta);
24612 UInt rG = gregOfRexRM(pfx,modrm);
24613 UInt rV = getVexNvvvv(pfx);
24614 IRTemp eV = newTemp(Ity_V256);
24615 IRTemp vV = newTemp(Ity_V256);
24616 assign( vV, getYMMReg(rV) );
24617 if (epartIsReg(modrm)) {
24618 UInt rE = eregOfRexRM(pfx,modrm);
24619 assign( eV, getYMMReg(rE) );
24620 delta += 1;
24621 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24622 nameYMMReg(rE), nameYMMReg(rG));
24623 } else {
24624 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24625 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24626 delta += alen;
24627 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
24628 dis_buf, nameYMMReg(rG));
24630 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
24631 putYMMReg( rG, mkexpr(res) );
24632 *uses_vvvv = True;
24633 goto decode_success;
24635 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
24636 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
24637 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24638 Bool hi = opc == 0x15;
24639 UChar modrm = getUChar(delta);
24640 UInt rG = gregOfRexRM(pfx,modrm);
24641 UInt rV = getVexNvvvv(pfx);
24642 IRTemp eV = newTemp(Ity_V128);
24643 IRTemp vV = newTemp(Ity_V128);
24644 assign( vV, getXMMReg(rV) );
24645 if (epartIsReg(modrm)) {
24646 UInt rE = eregOfRexRM(pfx,modrm);
24647 assign( eV, getXMMReg(rE) );
24648 delta += 1;
24649 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24650 nameXMMReg(rE), nameXMMReg(rG));
24651 } else {
24652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24653 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
24654 delta += alen;
24655 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24656 dis_buf, nameXMMReg(rG));
24658 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
24659 putYMMRegLoAndZU( rG, mkexpr(res) );
24660 *uses_vvvv = True;
24661 goto decode_success;
24663 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
24664 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
24665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24666 Bool hi = opc == 0x15;
24667 UChar modrm = getUChar(delta);
24668 UInt rG = gregOfRexRM(pfx,modrm);
24669 UInt rV = getVexNvvvv(pfx);
24670 IRTemp eV = newTemp(Ity_V256);
24671 IRTemp vV = newTemp(Ity_V256);
24672 assign( vV, getYMMReg(rV) );
24673 if (epartIsReg(modrm)) {
24674 UInt rE = eregOfRexRM(pfx,modrm);
24675 assign( eV, getYMMReg(rE) );
24676 delta += 1;
24677 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24678 nameYMMReg(rE), nameYMMReg(rG));
24679 } else {
24680 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24681 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
24682 delta += alen;
24683 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
24684 dis_buf, nameYMMReg(rG));
24686 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
24687 putYMMReg( rG, mkexpr(res) );
24688 *uses_vvvv = True;
24689 goto decode_success;
24691 break;
24693 case 0x16:
24694 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
24695 /* Insn only exists in reg form */
24696 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
24697 && epartIsReg(getUChar(delta))) {
24698 UChar modrm = getUChar(delta);
24699 UInt rG = gregOfRexRM(pfx, modrm);
24700 UInt rE = eregOfRexRM(pfx, modrm);
24701 UInt rV = getVexNvvvv(pfx);
24702 delta++;
24703 DIP("vmovlhps %s,%s,%s\n",
24704 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
24705 IRTemp res = newTemp(Ity_V128);
24706 assign(res, binop(Iop_64HLtoV128,
24707 getXMMRegLane64(rE, 0),
24708 getXMMRegLane64(rV, 0)));
24709 putYMMRegLoAndZU(rG, mkexpr(res));
24710 *uses_vvvv = True;
24711 goto decode_success;
24713 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
24714 /* Insn exists only in mem form, it appears. */
24715 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
24716 /* Insn exists only in mem form, it appears. */
24717 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24718 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24719 UChar modrm = getUChar(delta);
24720 UInt rG = gregOfRexRM(pfx, modrm);
24721 UInt rV = getVexNvvvv(pfx);
24722 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24723 delta += alen;
24724 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
24725 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
24726 IRTemp res = newTemp(Ity_V128);
24727 assign(res, binop(Iop_64HLtoV128,
24728 loadLE(Ity_I64, mkexpr(addr)),
24729 getXMMRegLane64(rV, 0)));
24730 putYMMRegLoAndZU(rG, mkexpr(res));
24731 *uses_vvvv = True;
24732 goto decode_success;
24734 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
24735 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24736 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
24737 False/*!isL*/ );
24738 goto decode_success;
24740 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
24741 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24742 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
24743 goto decode_success;
24745 break;
24747 case 0x17:
24748 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
24749 /* Insn exists only in mem form, it appears. */
24750 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
24751 /* Insn exists only in mem form, it appears. */
24752 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24753 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24754 UChar modrm = getUChar(delta);
24755 UInt rG = gregOfRexRM(pfx, modrm);
24756 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24757 delta += alen;
24758 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
24759 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24760 nameXMMReg(rG), dis_buf);
24761 goto decode_success;
24763 break;
24765 case 0x28:
24766 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
24767 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24768 UChar modrm = getUChar(delta);
24769 UInt rG = gregOfRexRM(pfx, modrm);
24770 if (epartIsReg(modrm)) {
24771 UInt rE = eregOfRexRM(pfx,modrm);
24772 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24773 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24774 delta += 1;
24775 } else {
24776 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24777 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24778 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24779 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
24780 delta += alen;
24782 goto decode_success;
24784 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
24785 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24786 UChar modrm = getUChar(delta);
24787 UInt rG = gregOfRexRM(pfx, modrm);
24788 if (epartIsReg(modrm)) {
24789 UInt rE = eregOfRexRM(pfx,modrm);
24790 putYMMReg( rG, getYMMReg( rE ));
24791 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24792 delta += 1;
24793 } else {
24794 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24795 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24796 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24797 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
24798 delta += alen;
24800 goto decode_success;
24802 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
24803 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24804 UChar modrm = getUChar(delta);
24805 UInt rG = gregOfRexRM(pfx, modrm);
24806 if (epartIsReg(modrm)) {
24807 UInt rE = eregOfRexRM(pfx,modrm);
24808 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24809 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24810 delta += 1;
24811 } else {
24812 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24813 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24814 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24815 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24816 delta += alen;
24818 goto decode_success;
24820 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24821 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24822 UChar modrm = getUChar(delta);
24823 UInt rG = gregOfRexRM(pfx, modrm);
24824 if (epartIsReg(modrm)) {
24825 UInt rE = eregOfRexRM(pfx,modrm);
24826 putYMMReg( rG, getYMMReg( rE ));
24827 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24828 delta += 1;
24829 } else {
24830 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24831 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24832 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24833 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24834 delta += alen;
24836 goto decode_success;
24838 break;
24840 case 0x29:
24841 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24842 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24843 UChar modrm = getUChar(delta);
24844 UInt rG = gregOfRexRM(pfx,modrm);
24845 if (epartIsReg(modrm)) {
24846 UInt rE = eregOfRexRM(pfx,modrm);
24847 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24848 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24849 delta += 1;
24850 } else {
24851 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24852 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24853 storeLE( mkexpr(addr), getXMMReg(rG) );
24854 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24855 delta += alen;
24857 goto decode_success;
24859 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24860 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24861 UChar modrm = getUChar(delta);
24862 UInt rG = gregOfRexRM(pfx,modrm);
24863 if (epartIsReg(modrm)) {
24864 UInt rE = eregOfRexRM(pfx,modrm);
24865 putYMMReg( rE, getYMMReg(rG) );
24866 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24867 delta += 1;
24868 } else {
24869 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24870 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24871 storeLE( mkexpr(addr), getYMMReg(rG) );
24872 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24873 delta += alen;
24875 goto decode_success;
24877 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24878 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24879 UChar modrm = getUChar(delta);
24880 UInt rG = gregOfRexRM(pfx,modrm);
24881 if (epartIsReg(modrm)) {
24882 UInt rE = eregOfRexRM(pfx,modrm);
24883 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24884 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24885 delta += 1;
24886 goto decode_success;
24887 } else {
24888 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24889 gen_SIGNAL_if_not_16_aligned( vbi, addr );
24890 storeLE( mkexpr(addr), getXMMReg(rG) );
24891 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24892 delta += alen;
24893 goto decode_success;
24896 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24897 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24898 UChar modrm = getUChar(delta);
24899 UInt rG = gregOfRexRM(pfx,modrm);
24900 if (epartIsReg(modrm)) {
24901 UInt rE = eregOfRexRM(pfx,modrm);
24902 putYMMReg( rE, getYMMReg(rG) );
24903 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24904 delta += 1;
24905 goto decode_success;
24906 } else {
24907 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24908 gen_SIGNAL_if_not_32_aligned( vbi, addr );
24909 storeLE( mkexpr(addr), getYMMReg(rG) );
24910 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
24911 delta += alen;
24912 goto decode_success;
24915 break;
24917 case 0x2A: {
24918 IRTemp rmode = newTemp(Ity_I32);
24919 assign( rmode, get_sse_roundingmode() );
24920 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24921 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24922 UChar modrm = getUChar(delta);
24923 UInt rV = getVexNvvvv(pfx);
24924 UInt rD = gregOfRexRM(pfx, modrm);
24925 IRTemp arg32 = newTemp(Ity_I32);
24926 if (epartIsReg(modrm)) {
24927 UInt rS = eregOfRexRM(pfx,modrm);
24928 assign( arg32, getIReg32(rS) );
24929 delta += 1;
24930 DIP("vcvtsi2sdl %s,%s,%s\n",
24931 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24932 } else {
24933 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24934 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24935 delta += alen;
24936 DIP("vcvtsi2sdl %s,%s,%s\n",
24937 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24939 putXMMRegLane64F( rD, 0,
24940 unop(Iop_I32StoF64, mkexpr(arg32)));
24941 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24942 putYMMRegLane128( rD, 1, mkV128(0) );
24943 *uses_vvvv = True;
24944 goto decode_success;
24946 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24947 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24948 UChar modrm = getUChar(delta);
24949 UInt rV = getVexNvvvv(pfx);
24950 UInt rD = gregOfRexRM(pfx, modrm);
24951 IRTemp arg64 = newTemp(Ity_I64);
24952 if (epartIsReg(modrm)) {
24953 UInt rS = eregOfRexRM(pfx,modrm);
24954 assign( arg64, getIReg64(rS) );
24955 delta += 1;
24956 DIP("vcvtsi2sdq %s,%s,%s\n",
24957 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24958 } else {
24959 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24960 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24961 delta += alen;
24962 DIP("vcvtsi2sdq %s,%s,%s\n",
24963 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24965 putXMMRegLane64F( rD, 0,
24966 binop( Iop_I64StoF64,
24967 get_sse_roundingmode(),
24968 mkexpr(arg64)) );
24969 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24970 putYMMRegLane128( rD, 1, mkV128(0) );
24971 *uses_vvvv = True;
24972 goto decode_success;
24974 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24975 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24976 UChar modrm = getUChar(delta);
24977 UInt rV = getVexNvvvv(pfx);
24978 UInt rD = gregOfRexRM(pfx, modrm);
24979 IRTemp arg64 = newTemp(Ity_I64);
24980 if (epartIsReg(modrm)) {
24981 UInt rS = eregOfRexRM(pfx,modrm);
24982 assign( arg64, getIReg64(rS) );
24983 delta += 1;
24984 DIP("vcvtsi2ssq %s,%s,%s\n",
24985 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24986 } else {
24987 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24988 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24989 delta += alen;
24990 DIP("vcvtsi2ssq %s,%s,%s\n",
24991 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24993 putXMMRegLane32F( rD, 0,
24994 binop(Iop_F64toF32,
24995 mkexpr(rmode),
24996 binop(Iop_I64StoF64, mkexpr(rmode),
24997 mkexpr(arg64)) ) );
24998 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24999 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25000 putYMMRegLane128( rD, 1, mkV128(0) );
25001 *uses_vvvv = True;
25002 goto decode_success;
25004 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
25005 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25006 UChar modrm = getUChar(delta);
25007 UInt rV = getVexNvvvv(pfx);
25008 UInt rD = gregOfRexRM(pfx, modrm);
25009 IRTemp arg32 = newTemp(Ity_I32);
25010 if (epartIsReg(modrm)) {
25011 UInt rS = eregOfRexRM(pfx,modrm);
25012 assign( arg32, getIReg32(rS) );
25013 delta += 1;
25014 DIP("vcvtsi2ssl %s,%s,%s\n",
25015 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
25016 } else {
25017 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25018 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
25019 delta += alen;
25020 DIP("vcvtsi2ssl %s,%s,%s\n",
25021 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25023 putXMMRegLane32F( rD, 0,
25024 binop(Iop_F64toF32,
25025 mkexpr(rmode),
25026 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
25027 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25028 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25029 putYMMRegLane128( rD, 1, mkV128(0) );
25030 *uses_vvvv = True;
25031 goto decode_success;
25033 break;
25036 case 0x2B:
25037 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
25038 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
25039 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25040 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
25041 UChar modrm = getUChar(delta);
25042 UInt rS = gregOfRexRM(pfx, modrm);
25043 IRTemp tS = newTemp(Ity_V128);
25044 assign(tS, getXMMReg(rS));
25045 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25046 delta += alen;
25047 gen_SIGNAL_if_not_16_aligned(vbi, addr);
25048 storeLE(mkexpr(addr), mkexpr(tS));
25049 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25050 nameXMMReg(rS), dis_buf);
25051 goto decode_success;
25053 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
25054 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
25055 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
25056 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
25057 UChar modrm = getUChar(delta);
25058 UInt rS = gregOfRexRM(pfx, modrm);
25059 IRTemp tS = newTemp(Ity_V256);
25060 assign(tS, getYMMReg(rS));
25061 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25062 delta += alen;
25063 gen_SIGNAL_if_not_32_aligned(vbi, addr);
25064 storeLE(mkexpr(addr), mkexpr(tS));
25065 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
25066 nameYMMReg(rS), dis_buf);
25067 goto decode_success;
25069 break;
25071 case 0x2C:
25072 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
25073 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25074 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25075 goto decode_success;
25077 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
25078 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25079 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25080 goto decode_success;
25082 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
25083 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25084 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25085 goto decode_success;
25087 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
25088 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25089 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25090 goto decode_success;
25092 break;
25094 case 0x2D:
25095 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
25096 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
25097 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25098 goto decode_success;
25100 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
25101 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
25102 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25103 goto decode_success;
25105 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
25106 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
25107 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
25108 goto decode_success;
25110 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
25111 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
25112 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
25113 goto decode_success;
25115 break;
25117 case 0x2E:
25118 case 0x2F:
25119 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
25120 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
25121 if (have66noF2noF3(pfx)) {
25122 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
25123 goto decode_success;
25125 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
25126 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
25127 if (haveNo66noF2noF3(pfx)) {
25128 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
25129 goto decode_success;
25131 break;
25133 case 0x50:
25134 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
25135 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25136 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
25137 goto decode_success;
25139 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
25140 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25141 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
25142 goto decode_success;
25144 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
25145 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25146 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
25147 goto decode_success;
25149 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
25150 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25151 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
25152 goto decode_success;
25154 break;
25156 case 0x51:
25157 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
25158 if (haveF3no66noF2(pfx)) {
25159 delta = dis_AVX128_E_V_to_G_lo32_unary(
25160 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
25161 goto decode_success;
25163 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
25164 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25165 delta = dis_AVX128_E_to_G_unary_all(
25166 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
25167 goto decode_success;
25169 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
25170 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25171 delta = dis_AVX256_E_to_G_unary_all(
25172 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
25173 goto decode_success;
25175 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
25176 if (haveF2no66noF3(pfx)) {
25177 delta = dis_AVX128_E_V_to_G_lo64_unary(
25178 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
25179 goto decode_success;
25181 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
25182 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25183 delta = dis_AVX128_E_to_G_unary_all(
25184 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
25185 goto decode_success;
25187 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
25188 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25189 delta = dis_AVX256_E_to_G_unary_all(
25190 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
25191 goto decode_success;
25193 break;
25195 case 0x52:
25196 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
25197 if (haveF3no66noF2(pfx)) {
25198 delta = dis_AVX128_E_V_to_G_lo32_unary(
25199 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
25200 Iop_RSqrtEst32F0x4 );
25201 goto decode_success;
25203 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
25204 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25205 delta = dis_AVX128_E_to_G_unary_all(
25206 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
25207 goto decode_success;
25209 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
25210 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25211 delta = dis_AVX256_E_to_G_unary_all(
25212 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
25213 goto decode_success;
25215 break;
25217 case 0x53:
25218 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
25219 if (haveF3no66noF2(pfx)) {
25220 delta = dis_AVX128_E_V_to_G_lo32_unary(
25221 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
25222 goto decode_success;
25224 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
25225 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25226 delta = dis_AVX128_E_to_G_unary_all(
25227 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
25228 goto decode_success;
25230 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
25231 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25232 delta = dis_AVX256_E_to_G_unary_all(
25233 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
25234 goto decode_success;
25236 break;
25238 case 0x54:
25239 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25240 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
25241 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25242 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25243 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
25244 goto decode_success;
25246 /* VANDPD r/m, rV, r ::: r = rV & r/m */
25247 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
25248 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25249 delta = dis_AVX256_E_V_to_G(
25250 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
25251 goto decode_success;
25253 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
25254 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25255 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25256 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
25257 goto decode_success;
25259 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
25260 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25261 delta = dis_AVX256_E_V_to_G(
25262 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
25263 goto decode_success;
25265 break;
25267 case 0x55:
25268 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
25269 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
25270 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25271 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25272 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
25273 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25274 goto decode_success;
25276 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
25277 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25278 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25279 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
25280 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25281 goto decode_success;
25283 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
25284 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25285 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25286 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
25287 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25288 goto decode_success;
25290 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
25291 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25292 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
25293 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
25294 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
25295 goto decode_success;
25297 break;
25299 case 0x56:
25300 /* VORPD r/m, rV, r ::: r = rV | r/m */
25301 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
25302 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25303 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25304 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
25305 goto decode_success;
25307 /* VORPD r/m, rV, r ::: r = rV | r/m */
25308 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
25309 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25310 delta = dis_AVX256_E_V_to_G(
25311 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
25312 goto decode_success;
25314 /* VORPS r/m, rV, r ::: r = rV | r/m */
25315 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
25316 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25317 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25318 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
25319 goto decode_success;
25321 /* VORPS r/m, rV, r ::: r = rV | r/m */
25322 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
25323 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25324 delta = dis_AVX256_E_V_to_G(
25325 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
25326 goto decode_success;
25328 break;
25330 case 0x57:
25331 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25332 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
25333 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25334 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25335 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
25336 goto decode_success;
25338 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
25339 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
25340 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25341 delta = dis_AVX256_E_V_to_G(
25342 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
25343 goto decode_success;
25345 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25346 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
25347 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25348 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25349 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
25350 goto decode_success;
25352 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
25353 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
25354 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25355 delta = dis_AVX256_E_V_to_G(
25356 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
25357 goto decode_success;
25359 break;
25361 case 0x58:
25362 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
25363 if (haveF2no66noF3(pfx)) {
25364 delta = dis_AVX128_E_V_to_G_lo64(
25365 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
25366 goto decode_success;
25368 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
25369 if (haveF3no66noF2(pfx)) {
25370 delta = dis_AVX128_E_V_to_G_lo32(
25371 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
25372 goto decode_success;
25374 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
25375 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25376 delta = dis_AVX128_E_V_to_G(
25377 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
25378 goto decode_success;
25380 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
25381 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25382 delta = dis_AVX256_E_V_to_G(
25383 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
25384 goto decode_success;
25386 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
25387 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25388 delta = dis_AVX128_E_V_to_G(
25389 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
25390 goto decode_success;
25392 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
25393 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25394 delta = dis_AVX256_E_V_to_G(
25395 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
25396 goto decode_success;
25398 break;
25400 case 0x59:
25401 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
25402 if (haveF2no66noF3(pfx)) {
25403 delta = dis_AVX128_E_V_to_G_lo64(
25404 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
25405 goto decode_success;
25407 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
25408 if (haveF3no66noF2(pfx)) {
25409 delta = dis_AVX128_E_V_to_G_lo32(
25410 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
25411 goto decode_success;
25413 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
25414 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25415 delta = dis_AVX128_E_V_to_G(
25416 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
25417 goto decode_success;
25419 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
25420 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25421 delta = dis_AVX256_E_V_to_G(
25422 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
25423 goto decode_success;
25425 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
25426 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25427 delta = dis_AVX128_E_V_to_G(
25428 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
25429 goto decode_success;
25431 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
25432 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25433 delta = dis_AVX256_E_V_to_G(
25434 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
25435 goto decode_success;
25437 break;
25439 case 0x5A:
25440 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
25441 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25442 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
25443 goto decode_success;
25445 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
25446 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25447 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
25448 goto decode_success;
25450 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
25451 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25452 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
25453 goto decode_success;
25455 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
25456 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25457 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
25458 goto decode_success;
25460 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
25461 if (haveF2no66noF3(pfx)) {
25462 UChar modrm = getUChar(delta);
25463 UInt rV = getVexNvvvv(pfx);
25464 UInt rD = gregOfRexRM(pfx, modrm);
25465 IRTemp f64lo = newTemp(Ity_F64);
25466 IRTemp rmode = newTemp(Ity_I32);
25467 assign( rmode, get_sse_roundingmode() );
25468 if (epartIsReg(modrm)) {
25469 UInt rS = eregOfRexRM(pfx,modrm);
25470 assign(f64lo, getXMMRegLane64F(rS, 0));
25471 delta += 1;
25472 DIP("vcvtsd2ss %s,%s,%s\n",
25473 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25474 } else {
25475 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25476 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
25477 delta += alen;
25478 DIP("vcvtsd2ss %s,%s,%s\n",
25479 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25481 putXMMRegLane32F( rD, 0,
25482 binop( Iop_F64toF32, mkexpr(rmode),
25483 mkexpr(f64lo)) );
25484 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
25485 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25486 putYMMRegLane128( rD, 1, mkV128(0) );
25487 *uses_vvvv = True;
25488 goto decode_success;
25490 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
25491 if (haveF3no66noF2(pfx)) {
25492 UChar modrm = getUChar(delta);
25493 UInt rV = getVexNvvvv(pfx);
25494 UInt rD = gregOfRexRM(pfx, modrm);
25495 IRTemp f32lo = newTemp(Ity_F32);
25496 if (epartIsReg(modrm)) {
25497 UInt rS = eregOfRexRM(pfx,modrm);
25498 assign(f32lo, getXMMRegLane32F(rS, 0));
25499 delta += 1;
25500 DIP("vcvtss2sd %s,%s,%s\n",
25501 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
25502 } else {
25503 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25504 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
25505 delta += alen;
25506 DIP("vcvtss2sd %s,%s,%s\n",
25507 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
25509 putXMMRegLane64F( rD, 0,
25510 unop( Iop_F32toF64, mkexpr(f32lo)) );
25511 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
25512 putYMMRegLane128( rD, 1, mkV128(0) );
25513 *uses_vvvv = True;
25514 goto decode_success;
25516 break;
25518 case 0x5B:
25519 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
25520 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25521 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25522 True/*isAvx*/, False/*!r2zero*/ );
25523 goto decode_success;
25525 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
25526 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25527 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25528 False/*!r2zero*/ );
25529 goto decode_success;
25531 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
25532 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25533 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
25534 True/*isAvx*/, True/*r2zero*/ );
25535 goto decode_success;
25537 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
25538 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25539 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
25540 True/*r2zero*/ );
25541 goto decode_success;
25543 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
25544 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25545 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
25546 goto decode_success;
25548 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
25549 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25550 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
25551 goto decode_success;
25553 break;
25555 case 0x5C:
25556 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
25557 if (haveF2no66noF3(pfx)) {
25558 delta = dis_AVX128_E_V_to_G_lo64(
25559 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
25560 goto decode_success;
25562 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
25563 if (haveF3no66noF2(pfx)) {
25564 delta = dis_AVX128_E_V_to_G_lo32(
25565 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
25566 goto decode_success;
25568 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
25569 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25570 delta = dis_AVX128_E_V_to_G(
25571 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
25572 goto decode_success;
25574 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
25575 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25576 delta = dis_AVX256_E_V_to_G(
25577 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
25578 goto decode_success;
25580 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
25581 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25582 delta = dis_AVX128_E_V_to_G(
25583 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
25584 goto decode_success;
25586 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
25587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25588 delta = dis_AVX256_E_V_to_G(
25589 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
25590 goto decode_success;
25592 break;
25594 case 0x5D:
25595 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
25596 if (haveF2no66noF3(pfx)) {
25597 delta = dis_AVX128_E_V_to_G_lo64(
25598 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
25599 goto decode_success;
25601 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
25602 if (haveF3no66noF2(pfx)) {
25603 delta = dis_AVX128_E_V_to_G_lo32(
25604 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
25605 goto decode_success;
25607 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
25608 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25609 delta = dis_AVX128_E_V_to_G(
25610 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
25611 goto decode_success;
25613 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
25614 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25615 delta = dis_AVX256_E_V_to_G(
25616 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
25617 goto decode_success;
25619 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
25620 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25621 delta = dis_AVX128_E_V_to_G(
25622 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
25623 goto decode_success;
25625 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
25626 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25627 delta = dis_AVX256_E_V_to_G(
25628 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
25629 goto decode_success;
25631 break;
25633 case 0x5E:
25634 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
25635 if (haveF2no66noF3(pfx)) {
25636 delta = dis_AVX128_E_V_to_G_lo64(
25637 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
25638 goto decode_success;
25640 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
25641 if (haveF3no66noF2(pfx)) {
25642 delta = dis_AVX128_E_V_to_G_lo32(
25643 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
25644 goto decode_success;
25646 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
25647 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25648 delta = dis_AVX128_E_V_to_G(
25649 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
25650 goto decode_success;
25652 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
25653 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25654 delta = dis_AVX256_E_V_to_G(
25655 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
25656 goto decode_success;
25658 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
25659 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25660 delta = dis_AVX128_E_V_to_G(
25661 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
25662 goto decode_success;
25664 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
25665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25666 delta = dis_AVX256_E_V_to_G(
25667 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
25668 goto decode_success;
25670 break;
25672 case 0x5F:
25673 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
25674 if (haveF2no66noF3(pfx)) {
25675 delta = dis_AVX128_E_V_to_G_lo64(
25676 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
25677 goto decode_success;
25679 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
25680 if (haveF3no66noF2(pfx)) {
25681 delta = dis_AVX128_E_V_to_G_lo32(
25682 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
25683 goto decode_success;
25685 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
25686 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25687 delta = dis_AVX128_E_V_to_G(
25688 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
25689 goto decode_success;
25691 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
25692 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25693 delta = dis_AVX256_E_V_to_G(
25694 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
25695 goto decode_success;
25697 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
25698 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25699 delta = dis_AVX128_E_V_to_G(
25700 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
25701 goto decode_success;
25703 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
25704 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25705 delta = dis_AVX256_E_V_to_G(
25706 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
25707 goto decode_success;
25709 break;
25711 case 0x60:
25712 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25713 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
25714 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25715 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25716 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25717 Iop_InterleaveLO8x16, NULL,
25718 False/*!invertLeftArg*/, True/*swapArgs*/ );
25719 goto decode_success;
25721 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
25722 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
25723 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25724 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25725 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
25726 math_VPUNPCKLBW_YMM );
25727 goto decode_success;
25729 break;
25731 case 0x61:
25732 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25733 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
25734 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25735 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25736 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25737 Iop_InterleaveLO16x8, NULL,
25738 False/*!invertLeftArg*/, True/*swapArgs*/ );
25739 goto decode_success;
25741 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
25742 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
25743 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25744 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25745 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
25746 math_VPUNPCKLWD_YMM );
25747 goto decode_success;
25749 break;
25751 case 0x62:
25752 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25753 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
25754 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25755 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25756 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25757 Iop_InterleaveLO32x4, NULL,
25758 False/*!invertLeftArg*/, True/*swapArgs*/ );
25759 goto decode_success;
25761 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
25762 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
25763 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25764 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25765 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
25766 math_VPUNPCKLDQ_YMM );
25767 goto decode_success;
25769 break;
25771 case 0x63:
25772 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25773 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
25774 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25775 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25776 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25777 Iop_QNarrowBin16Sto8Sx16, NULL,
25778 False/*!invertLeftArg*/, True/*swapArgs*/ );
25779 goto decode_success;
25781 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
25782 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
25783 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25784 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25785 uses_vvvv, vbi, pfx, delta, "vpacksswb",
25786 math_VPACKSSWB_YMM );
25787 goto decode_success;
25789 break;
25791 case 0x64:
25792 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25793 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
25794 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25795 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25796 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
25797 goto decode_success;
25799 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
25800 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
25801 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25802 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25803 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
25804 goto decode_success;
25806 break;
25808 case 0x65:
25809 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25810 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25811 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25812 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25813 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25814 goto decode_success;
25816 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25817 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25818 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25819 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25820 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25821 goto decode_success;
25823 break;
25825 case 0x66:
25826 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25827 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25828 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25829 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25830 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25831 goto decode_success;
25833 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25834 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25835 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25836 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25837 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25838 goto decode_success;
25840 break;
25842 case 0x67:
25843 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25844 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
25845 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25846 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25847 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25848 Iop_QNarrowBin16Sto8Ux16, NULL,
25849 False/*!invertLeftArg*/, True/*swapArgs*/ );
25850 goto decode_success;
25852 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25853 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25854 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25855 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25856 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25857 math_VPACKUSWB_YMM );
25858 goto decode_success;
25860 break;
25862 case 0x68:
25863 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25864 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25865 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25866 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25867 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25868 Iop_InterleaveHI8x16, NULL,
25869 False/*!invertLeftArg*/, True/*swapArgs*/ );
25870 goto decode_success;
25872 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25873 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25874 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25875 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25876 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25877 math_VPUNPCKHBW_YMM );
25878 goto decode_success;
25880 break;
25882 case 0x69:
25883 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25884 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25885 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25886 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25887 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25888 Iop_InterleaveHI16x8, NULL,
25889 False/*!invertLeftArg*/, True/*swapArgs*/ );
25890 goto decode_success;
25892 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25893 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25894 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25895 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25896 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25897 math_VPUNPCKHWD_YMM );
25898 goto decode_success;
25900 break;
25902 case 0x6A:
25903 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25904 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25905 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25906 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25907 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25908 Iop_InterleaveHI32x4, NULL,
25909 False/*!invertLeftArg*/, True/*swapArgs*/ );
25910 goto decode_success;
25912 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25913 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25914 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25915 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25916 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25917 math_VPUNPCKHDQ_YMM );
25918 goto decode_success;
25920 break;
25922 case 0x6B:
25923 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25924 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25925 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25926 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25927 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25928 Iop_QNarrowBin32Sto16Sx8, NULL,
25929 False/*!invertLeftArg*/, True/*swapArgs*/ );
25930 goto decode_success;
25932 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25933 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25934 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25935 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25936 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25937 math_VPACKSSDW_YMM );
25938 goto decode_success;
25940 break;
25942 case 0x6C:
25943 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25944 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25945 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25946 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25947 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25948 Iop_InterleaveLO64x2, NULL,
25949 False/*!invertLeftArg*/, True/*swapArgs*/ );
25950 goto decode_success;
25952 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25953 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25954 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25955 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25956 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25957 math_VPUNPCKLQDQ_YMM );
25958 goto decode_success;
25960 break;
25962 case 0x6D:
25963 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25964 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25965 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25966 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25967 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25968 Iop_InterleaveHI64x2, NULL,
25969 False/*!invertLeftArg*/, True/*swapArgs*/ );
25970 goto decode_success;
25972 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25973 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25974 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25975 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25976 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25977 math_VPUNPCKHQDQ_YMM );
25978 goto decode_success;
25980 break;
25982 case 0x6E:
25983 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25984 if (have66noF2noF3(pfx)
25985 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25986 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25987 UChar modrm = getUChar(delta);
25988 if (epartIsReg(modrm)) {
25989 delta += 1;
25990 putYMMRegLoAndZU(
25991 gregOfRexRM(pfx,modrm),
25992 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25994 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25995 nameXMMReg(gregOfRexRM(pfx,modrm)));
25996 } else {
25997 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25998 delta += alen;
25999 putYMMRegLoAndZU(
26000 gregOfRexRM(pfx,modrm),
26001 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
26003 DIP("vmovd %s, %s\n", dis_buf,
26004 nameXMMReg(gregOfRexRM(pfx,modrm)));
26006 goto decode_success;
26008 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
26009 if (have66noF2noF3(pfx)
26010 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26011 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
26012 UChar modrm = getUChar(delta);
26013 if (epartIsReg(modrm)) {
26014 delta += 1;
26015 putYMMRegLoAndZU(
26016 gregOfRexRM(pfx,modrm),
26017 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
26019 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
26020 nameXMMReg(gregOfRexRM(pfx,modrm)));
26021 } else {
26022 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26023 delta += alen;
26024 putYMMRegLoAndZU(
26025 gregOfRexRM(pfx,modrm),
26026 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
26028 DIP("vmovq %s, %s\n", dis_buf,
26029 nameXMMReg(gregOfRexRM(pfx,modrm)));
26031 goto decode_success;
26033 break;
26035 case 0x6F:
26036 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
26037 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
26038 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26039 && 1==getVexL(pfx)/*256*/) {
26040 UChar modrm = getUChar(delta);
26041 UInt rD = gregOfRexRM(pfx, modrm);
26042 IRTemp tD = newTemp(Ity_V256);
26043 Bool isA = have66noF2noF3(pfx);
26044 HChar ch = isA ? 'a' : 'u';
26045 if (epartIsReg(modrm)) {
26046 UInt rS = eregOfRexRM(pfx, modrm);
26047 delta += 1;
26048 assign(tD, getYMMReg(rS));
26049 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26050 } else {
26051 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26052 delta += alen;
26053 if (isA)
26054 gen_SIGNAL_if_not_32_aligned(vbi, addr);
26055 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
26056 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
26058 putYMMReg(rD, mkexpr(tD));
26059 goto decode_success;
26061 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
26062 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
26063 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26064 && 0==getVexL(pfx)/*128*/) {
26065 UChar modrm = getUChar(delta);
26066 UInt rD = gregOfRexRM(pfx, modrm);
26067 IRTemp tD = newTemp(Ity_V128);
26068 Bool isA = have66noF2noF3(pfx);
26069 HChar ch = isA ? 'a' : 'u';
26070 if (epartIsReg(modrm)) {
26071 UInt rS = eregOfRexRM(pfx, modrm);
26072 delta += 1;
26073 assign(tD, getXMMReg(rS));
26074 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26075 } else {
26076 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26077 delta += alen;
26078 if (isA)
26079 gen_SIGNAL_if_not_16_aligned(vbi, addr);
26080 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
26081 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
26083 putYMMRegLoAndZU(rD, mkexpr(tD));
26084 goto decode_success;
26086 break;
26088 case 0x70:
26089 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
26090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26091 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
26092 goto decode_success;
26094 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
26095 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26096 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
26097 goto decode_success;
26099 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
26100 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26101 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26102 True/*isAvx*/, False/*!xIsH*/ );
26103 goto decode_success;
26105 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
26106 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26107 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
26108 goto decode_success;
26110 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
26111 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
26112 delta = dis_PSHUFxW_128( vbi, pfx, delta,
26113 True/*isAvx*/, True/*xIsH*/ );
26114 goto decode_success;
26116 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
26117 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
26118 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
26119 goto decode_success;
26121 break;
26123 case 0x71:
26124 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
26125 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
26126 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
26127 if (have66noF2noF3(pfx)
26128 && 0==getVexL(pfx)/*128*/
26129 && epartIsReg(getUChar(delta))) {
26130 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26131 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26132 "vpsrlw", Iop_ShrN16x8 );
26133 *uses_vvvv = True;
26134 goto decode_success;
26136 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26137 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26138 "vpsraw", Iop_SarN16x8 );
26139 *uses_vvvv = True;
26140 goto decode_success;
26142 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26143 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26144 "vpsllw", Iop_ShlN16x8 );
26145 *uses_vvvv = True;
26146 goto decode_success;
26148 /* else fall through */
26150 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
26151 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
26152 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
26153 if (have66noF2noF3(pfx)
26154 && 1==getVexL(pfx)/*256*/
26155 && epartIsReg(getUChar(delta))) {
26156 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26157 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26158 "vpsrlw", Iop_ShrN16x16 );
26159 *uses_vvvv = True;
26160 goto decode_success;
26162 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26163 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26164 "vpsraw", Iop_SarN16x16 );
26165 *uses_vvvv = True;
26166 goto decode_success;
26168 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26169 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26170 "vpsllw", Iop_ShlN16x16 );
26171 *uses_vvvv = True;
26172 goto decode_success;
26174 /* else fall through */
26176 break;
26178 case 0x72:
26179 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
26180 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
26181 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
26182 if (have66noF2noF3(pfx)
26183 && 0==getVexL(pfx)/*128*/
26184 && epartIsReg(getUChar(delta))) {
26185 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26186 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26187 "vpsrld", Iop_ShrN32x4 );
26188 *uses_vvvv = True;
26189 goto decode_success;
26191 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26192 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26193 "vpsrad", Iop_SarN32x4 );
26194 *uses_vvvv = True;
26195 goto decode_success;
26197 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26198 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26199 "vpslld", Iop_ShlN32x4 );
26200 *uses_vvvv = True;
26201 goto decode_success;
26203 /* else fall through */
26205 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
26206 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
26207 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
26208 if (have66noF2noF3(pfx)
26209 && 1==getVexL(pfx)/*256*/
26210 && epartIsReg(getUChar(delta))) {
26211 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
26212 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26213 "vpsrld", Iop_ShrN32x8 );
26214 *uses_vvvv = True;
26215 goto decode_success;
26217 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
26218 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26219 "vpsrad", Iop_SarN32x8 );
26220 *uses_vvvv = True;
26221 goto decode_success;
26223 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
26224 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26225 "vpslld", Iop_ShlN32x8 );
26226 *uses_vvvv = True;
26227 goto decode_success;
26229 /* else fall through */
26231 break;
26233 case 0x73:
26234 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
26235 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
26236 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
26237 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
26238 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26239 && epartIsReg(getUChar(delta))) {
26240 Int rS = eregOfRexRM(pfx,getUChar(delta));
26241 Int rD = getVexNvvvv(pfx);
26242 IRTemp vecS = newTemp(Ity_V128);
26243 if (gregLO3ofRM(getUChar(delta)) == 3) {
26244 Int imm = (Int)getUChar(delta+1);
26245 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26246 delta += 2;
26247 assign( vecS, getXMMReg(rS) );
26248 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
26249 *uses_vvvv = True;
26250 goto decode_success;
26252 if (gregLO3ofRM(getUChar(delta)) == 7) {
26253 Int imm = (Int)getUChar(delta+1);
26254 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
26255 delta += 2;
26256 assign( vecS, getXMMReg(rS) );
26257 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
26258 *uses_vvvv = True;
26259 goto decode_success;
26261 if (gregLO3ofRM(getUChar(delta)) == 2) {
26262 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26263 "vpsrlq", Iop_ShrN64x2 );
26264 *uses_vvvv = True;
26265 goto decode_success;
26267 if (gregLO3ofRM(getUChar(delta)) == 6) {
26268 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
26269 "vpsllq", Iop_ShlN64x2 );
26270 *uses_vvvv = True;
26271 goto decode_success;
26273 /* else fall through */
26275 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
26276 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
26277 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
26278 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
26279 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
26280 && epartIsReg(getUChar(delta))) {
26281 Int rS = eregOfRexRM(pfx,getUChar(delta));
26282 Int rD = getVexNvvvv(pfx);
26283 if (gregLO3ofRM(getUChar(delta)) == 3) {
26284 IRTemp vecS0 = newTemp(Ity_V128);
26285 IRTemp vecS1 = newTemp(Ity_V128);
26286 Int imm = (Int)getUChar(delta+1);
26287 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26288 delta += 2;
26289 assign( vecS0, getYMMRegLane128(rS, 0));
26290 assign( vecS1, getYMMRegLane128(rS, 1));
26291 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
26292 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
26293 *uses_vvvv = True;
26294 goto decode_success;
26296 if (gregLO3ofRM(getUChar(delta)) == 7) {
26297 IRTemp vecS0 = newTemp(Ity_V128);
26298 IRTemp vecS1 = newTemp(Ity_V128);
26299 Int imm = (Int)getUChar(delta+1);
26300 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
26301 delta += 2;
26302 assign( vecS0, getYMMRegLane128(rS, 0));
26303 assign( vecS1, getYMMRegLane128(rS, 1));
26304 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
26305 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
26306 *uses_vvvv = True;
26307 goto decode_success;
26309 if (gregLO3ofRM(getUChar(delta)) == 2) {
26310 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26311 "vpsrlq", Iop_ShrN64x4 );
26312 *uses_vvvv = True;
26313 goto decode_success;
26315 if (gregLO3ofRM(getUChar(delta)) == 6) {
26316 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
26317 "vpsllq", Iop_ShlN64x4 );
26318 *uses_vvvv = True;
26319 goto decode_success;
26321 /* else fall through */
26323 break;
26325 case 0x74:
26326 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26327 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
26328 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26329 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26330 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
26331 goto decode_success;
26333 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
26334 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
26335 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26336 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26337 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
26338 goto decode_success;
26340 break;
26342 case 0x75:
26343 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26344 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
26345 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26346 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26347 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
26348 goto decode_success;
26350 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
26351 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
26352 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26353 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26354 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
26355 goto decode_success;
26357 break;
26359 case 0x76:
26360 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26361 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
26362 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26363 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26364 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
26365 goto decode_success;
26367 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
26368 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
26369 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26370 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26371 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
26372 goto decode_success;
26374 break;
26376 case 0x77:
26377 /* VZEROUPPER = VEX.128.0F.WIG 77 */
26378 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26379 Int i;
26380 IRTemp zero128 = newTemp(Ity_V128);
26381 assign(zero128, mkV128(0));
26382 for (i = 0; i < 16; i++) {
26383 putYMMRegLane128(i, 1, mkexpr(zero128));
26385 DIP("vzeroupper\n");
26386 goto decode_success;
26388 /* VZEROALL = VEX.256.0F.WIG 77 */
26389 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26390 Int i;
26391 IRTemp zero128 = newTemp(Ity_V128);
26392 assign(zero128, mkV128(0));
26393 for (i = 0; i < 16; i++) {
26394 putYMMRegLoAndZU(i, mkexpr(zero128));
26396 DIP("vzeroall\n");
26397 goto decode_success;
26399 break;
26401 case 0x7C:
26402 case 0x7D:
26403 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
26404 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
26405 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26406 IRTemp sV = newTemp(Ity_V128);
26407 IRTemp dV = newTemp(Ity_V128);
26408 Bool isAdd = opc == 0x7C;
26409 const HChar* str = isAdd ? "add" : "sub";
26410 UChar modrm = getUChar(delta);
26411 UInt rG = gregOfRexRM(pfx,modrm);
26412 UInt rV = getVexNvvvv(pfx);
26413 if (epartIsReg(modrm)) {
26414 UInt rE = eregOfRexRM(pfx,modrm);
26415 assign( sV, getXMMReg(rE) );
26416 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26417 nameXMMReg(rV), nameXMMReg(rG));
26418 delta += 1;
26419 } else {
26420 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26421 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26422 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26423 nameXMMReg(rV), nameXMMReg(rG));
26424 delta += alen;
26426 assign( dV, getXMMReg(rV) );
26427 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
26428 *uses_vvvv = True;
26429 goto decode_success;
26431 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
26432 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
26433 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26434 IRTemp sV = newTemp(Ity_V256);
26435 IRTemp dV = newTemp(Ity_V256);
26436 IRTemp s1, s0, d1, d0;
26437 Bool isAdd = opc == 0x7C;
26438 const HChar* str = isAdd ? "add" : "sub";
26439 UChar modrm = getUChar(delta);
26440 UInt rG = gregOfRexRM(pfx,modrm);
26441 UInt rV = getVexNvvvv(pfx);
26442 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26443 if (epartIsReg(modrm)) {
26444 UInt rE = eregOfRexRM(pfx,modrm);
26445 assign( sV, getYMMReg(rE) );
26446 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26447 nameYMMReg(rV), nameYMMReg(rG));
26448 delta += 1;
26449 } else {
26450 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26451 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26452 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26453 nameYMMReg(rV), nameYMMReg(rG));
26454 delta += alen;
26456 assign( dV, getYMMReg(rV) );
26457 breakupV256toV128s( dV, &d1, &d0 );
26458 breakupV256toV128s( sV, &s1, &s0 );
26459 putYMMReg( rG, binop(Iop_V128HLtoV256,
26460 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
26461 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
26462 *uses_vvvv = True;
26463 goto decode_success;
26465 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
26466 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
26467 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26468 IRTemp sV = newTemp(Ity_V128);
26469 IRTemp dV = newTemp(Ity_V128);
26470 Bool isAdd = opc == 0x7C;
26471 const HChar* str = isAdd ? "add" : "sub";
26472 UChar modrm = getUChar(delta);
26473 UInt rG = gregOfRexRM(pfx,modrm);
26474 UInt rV = getVexNvvvv(pfx);
26475 if (epartIsReg(modrm)) {
26476 UInt rE = eregOfRexRM(pfx,modrm);
26477 assign( sV, getXMMReg(rE) );
26478 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
26479 nameXMMReg(rV), nameXMMReg(rG));
26480 delta += 1;
26481 } else {
26482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26483 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
26484 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26485 nameXMMReg(rV), nameXMMReg(rG));
26486 delta += alen;
26488 assign( dV, getXMMReg(rV) );
26489 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
26490 *uses_vvvv = True;
26491 goto decode_success;
26493 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
26494 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
26495 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26496 IRTemp sV = newTemp(Ity_V256);
26497 IRTemp dV = newTemp(Ity_V256);
26498 IRTemp s1, s0, d1, d0;
26499 Bool isAdd = opc == 0x7C;
26500 const HChar* str = isAdd ? "add" : "sub";
26501 UChar modrm = getUChar(delta);
26502 UInt rG = gregOfRexRM(pfx,modrm);
26503 UInt rV = getVexNvvvv(pfx);
26504 s1 = s0 = d1 = d0 = IRTemp_INVALID;
26505 if (epartIsReg(modrm)) {
26506 UInt rE = eregOfRexRM(pfx,modrm);
26507 assign( sV, getYMMReg(rE) );
26508 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
26509 nameYMMReg(rV), nameYMMReg(rG));
26510 delta += 1;
26511 } else {
26512 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26513 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
26514 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
26515 nameYMMReg(rV), nameYMMReg(rG));
26516 delta += alen;
26518 assign( dV, getYMMReg(rV) );
26519 breakupV256toV128s( dV, &d1, &d0 );
26520 breakupV256toV128s( sV, &s1, &s0 );
26521 putYMMReg( rG, binop(Iop_V128HLtoV256,
26522 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
26523 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
26524 *uses_vvvv = True;
26525 goto decode_success;
26527 break;
26529 case 0x7E:
26530 /* Note the Intel docs don't make sense for this. I think they
26531 are wrong. They seem to imply it is a store when in fact I
26532 think it is a load. Also it's unclear whether this is W0, W1
26533 or WIG. */
26534 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
26535 if (haveF3no66noF2(pfx)
26536 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26537 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
26538 UChar modrm = getUChar(delta);
26539 UInt rG = gregOfRexRM(pfx,modrm);
26540 if (epartIsReg(modrm)) {
26541 UInt rE = eregOfRexRM(pfx,modrm);
26542 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
26543 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
26544 delta += 1;
26545 } else {
26546 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26547 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
26548 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26549 delta += alen;
26551 /* zero bits 255:64 */
26552 putXMMRegLane64( rG, 1, mkU64(0) );
26553 putYMMRegLane128( rG, 1, mkV128(0) );
26554 goto decode_success;
26556 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
26557 /* Moves from G to E, so is a store-form insn */
26558 /* Intel docs list this in the VMOVD entry for some reason. */
26559 if (have66noF2noF3(pfx)
26560 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
26561 UChar modrm = getUChar(delta);
26562 UInt rG = gregOfRexRM(pfx,modrm);
26563 if (epartIsReg(modrm)) {
26564 UInt rE = eregOfRexRM(pfx,modrm);
26565 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
26566 putIReg64(rE, getXMMRegLane64(rG, 0));
26567 delta += 1;
26568 } else {
26569 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26570 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
26571 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
26572 delta += alen;
26574 goto decode_success;
26576 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
26577 /* Moves from G to E, so is a store-form insn */
26578 if (have66noF2noF3(pfx)
26579 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26580 UChar modrm = getUChar(delta);
26581 UInt rG = gregOfRexRM(pfx,modrm);
26582 if (epartIsReg(modrm)) {
26583 UInt rE = eregOfRexRM(pfx,modrm);
26584 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
26585 putIReg32(rE, getXMMRegLane32(rG, 0));
26586 delta += 1;
26587 } else {
26588 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26589 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
26590 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
26591 delta += alen;
26593 goto decode_success;
26595 break;
26597 case 0x7F:
26598 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
26599 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
26600 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26601 && 1==getVexL(pfx)/*256*/) {
26602 UChar modrm = getUChar(delta);
26603 UInt rS = gregOfRexRM(pfx, modrm);
26604 IRTemp tS = newTemp(Ity_V256);
26605 Bool isA = have66noF2noF3(pfx);
26606 HChar ch = isA ? 'a' : 'u';
26607 assign(tS, getYMMReg(rS));
26608 if (epartIsReg(modrm)) {
26609 UInt rD = eregOfRexRM(pfx, modrm);
26610 delta += 1;
26611 putYMMReg(rD, mkexpr(tS));
26612 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
26613 } else {
26614 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26615 delta += alen;
26616 if (isA)
26617 gen_SIGNAL_if_not_32_aligned(vbi, addr);
26618 storeLE(mkexpr(addr), mkexpr(tS));
26619 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
26621 goto decode_success;
26623 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
26624 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
26625 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
26626 && 0==getVexL(pfx)/*128*/) {
26627 UChar modrm = getUChar(delta);
26628 UInt rS = gregOfRexRM(pfx, modrm);
26629 IRTemp tS = newTemp(Ity_V128);
26630 Bool isA = have66noF2noF3(pfx);
26631 HChar ch = isA ? 'a' : 'u';
26632 assign(tS, getXMMReg(rS));
26633 if (epartIsReg(modrm)) {
26634 UInt rD = eregOfRexRM(pfx, modrm);
26635 delta += 1;
26636 putYMMRegLoAndZU(rD, mkexpr(tS));
26637 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
26638 } else {
26639 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26640 delta += alen;
26641 if (isA)
26642 gen_SIGNAL_if_not_16_aligned(vbi, addr);
26643 storeLE(mkexpr(addr), mkexpr(tS));
26644 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
26646 goto decode_success;
26648 break;
26650 case 0xAE:
26651 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
26652 if (haveNo66noF2noF3(pfx)
26653 && 0==getVexL(pfx)/*LZ*/
26654 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26655 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
26656 && sz == 4) {
26657 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
26658 goto decode_success;
26660 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
26661 if (haveNo66noF2noF3(pfx)
26662 && 0==getVexL(pfx)/*LZ*/
26663 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
26664 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
26665 && sz == 4) {
26666 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
26667 goto decode_success;
26669 break;
26671 case 0xC2:
26672 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
26673 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
26674 if (haveF2no66noF3(pfx)) {
26675 Long delta0 = delta;
26676 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26677 "vcmpsd", False/*!all_lanes*/,
26678 8/*sz*/);
26679 if (delta > delta0) goto decode_success;
26680 /* else fall through -- decoding has failed */
26682 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
26683 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
26684 if (haveF3no66noF2(pfx)) {
26685 Long delta0 = delta;
26686 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26687 "vcmpss", False/*!all_lanes*/,
26688 4/*sz*/);
26689 if (delta > delta0) goto decode_success;
26690 /* else fall through -- decoding has failed */
26692 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26693 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
26694 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26695 Long delta0 = delta;
26696 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26697 "vcmppd", True/*all_lanes*/,
26698 8/*sz*/);
26699 if (delta > delta0) goto decode_success;
26700 /* else fall through -- decoding has failed */
26702 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26703 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
26704 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26705 Long delta0 = delta;
26706 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26707 "vcmppd", 8/*sz*/);
26708 if (delta > delta0) goto decode_success;
26709 /* else fall through -- decoding has failed */
26711 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
26712 /* = VEX.NDS.128.0F.WIG C2 /r ib */
26713 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26714 Long delta0 = delta;
26715 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26716 "vcmpps", True/*all_lanes*/,
26717 4/*sz*/);
26718 if (delta > delta0) goto decode_success;
26719 /* else fall through -- decoding has failed */
26721 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
26722 /* = VEX.NDS.256.0F.WIG C2 /r ib */
26723 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26724 Long delta0 = delta;
26725 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
26726 "vcmpps", 4/*sz*/);
26727 if (delta > delta0) goto decode_success;
26728 /* else fall through -- decoding has failed */
26730 break;
26732 case 0xC4:
26733 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
26734 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26735 UChar modrm = getUChar(delta);
26736 UInt rG = gregOfRexRM(pfx, modrm);
26737 UInt rV = getVexNvvvv(pfx);
26738 Int imm8;
26739 IRTemp new16 = newTemp(Ity_I16);
26741 if ( epartIsReg( modrm ) ) {
26742 imm8 = (Int)(getUChar(delta+1) & 7);
26743 assign( new16, unop(Iop_32to16,
26744 getIReg32(eregOfRexRM(pfx,modrm))) );
26745 delta += 1+1;
26746 DIP( "vpinsrw $%d,%s,%s\n", imm8,
26747 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
26748 } else {
26749 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
26750 imm8 = (Int)(getUChar(delta+alen) & 7);
26751 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
26752 delta += alen+1;
26753 DIP( "vpinsrw $%d,%s,%s\n",
26754 imm8, dis_buf, nameXMMReg(rG) );
26757 IRTemp src_vec = newTemp(Ity_V128);
26758 assign(src_vec, getXMMReg( rV ));
26759 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
26760 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
26761 *uses_vvvv = True;
26762 goto decode_success;
26764 break;
26766 case 0xC5:
26767 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
26768 if (have66noF2noF3(pfx)
26769 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
26770 Long delta0 = delta;
26771 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
26772 True/*isAvx*/ );
26773 if (delta > delta0) goto decode_success;
26774 /* else fall through -- decoding has failed */
26776 break;
26778 case 0xC6:
26779 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26780 /* = VEX.NDS.128.0F.WIG C6 /r ib */
26781 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26782 Int imm8 = 0;
26783 IRTemp eV = newTemp(Ity_V128);
26784 IRTemp vV = newTemp(Ity_V128);
26785 UInt modrm = getUChar(delta);
26786 UInt rG = gregOfRexRM(pfx,modrm);
26787 UInt rV = getVexNvvvv(pfx);
26788 assign( vV, getXMMReg(rV) );
26789 if (epartIsReg(modrm)) {
26790 UInt rE = eregOfRexRM(pfx,modrm);
26791 assign( eV, getXMMReg(rE) );
26792 imm8 = (Int)getUChar(delta+1);
26793 delta += 1+1;
26794 DIP("vshufps $%d,%s,%s,%s\n",
26795 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26796 } else {
26797 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26798 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26799 imm8 = (Int)getUChar(delta+alen);
26800 delta += 1+alen;
26801 DIP("vshufps $%d,%s,%s,%s\n",
26802 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26804 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
26805 putYMMRegLoAndZU( rG, mkexpr(res) );
26806 *uses_vvvv = True;
26807 goto decode_success;
26809 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26810 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26811 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26812 Int imm8 = 0;
26813 IRTemp eV = newTemp(Ity_V256);
26814 IRTemp vV = newTemp(Ity_V256);
26815 UInt modrm = getUChar(delta);
26816 UInt rG = gregOfRexRM(pfx,modrm);
26817 UInt rV = getVexNvvvv(pfx);
26818 assign( vV, getYMMReg(rV) );
26819 if (epartIsReg(modrm)) {
26820 UInt rE = eregOfRexRM(pfx,modrm);
26821 assign( eV, getYMMReg(rE) );
26822 imm8 = (Int)getUChar(delta+1);
26823 delta += 1+1;
26824 DIP("vshufps $%d,%s,%s,%s\n",
26825 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26826 } else {
26827 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26828 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26829 imm8 = (Int)getUChar(delta+alen);
26830 delta += 1+alen;
26831 DIP("vshufps $%d,%s,%s,%s\n",
26832 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26834 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26835 putYMMReg( rG, mkexpr(res) );
26836 *uses_vvvv = True;
26837 goto decode_success;
26839 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26840 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26841 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26842 Int imm8 = 0;
26843 IRTemp eV = newTemp(Ity_V128);
26844 IRTemp vV = newTemp(Ity_V128);
26845 UInt modrm = getUChar(delta);
26846 UInt rG = gregOfRexRM(pfx,modrm);
26847 UInt rV = getVexNvvvv(pfx);
26848 assign( vV, getXMMReg(rV) );
26849 if (epartIsReg(modrm)) {
26850 UInt rE = eregOfRexRM(pfx,modrm);
26851 assign( eV, getXMMReg(rE) );
26852 imm8 = (Int)getUChar(delta+1);
26853 delta += 1+1;
26854 DIP("vshufpd $%d,%s,%s,%s\n",
26855 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26856 } else {
26857 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26858 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
26859 imm8 = (Int)getUChar(delta+alen);
26860 delta += 1+alen;
26861 DIP("vshufpd $%d,%s,%s,%s\n",
26862 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26864 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26865 putYMMRegLoAndZU( rG, mkexpr(res) );
26866 *uses_vvvv = True;
26867 goto decode_success;
26869 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26870 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26871 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26872 Int imm8 = 0;
26873 IRTemp eV = newTemp(Ity_V256);
26874 IRTemp vV = newTemp(Ity_V256);
26875 UInt modrm = getUChar(delta);
26876 UInt rG = gregOfRexRM(pfx,modrm);
26877 UInt rV = getVexNvvvv(pfx);
26878 assign( vV, getYMMReg(rV) );
26879 if (epartIsReg(modrm)) {
26880 UInt rE = eregOfRexRM(pfx,modrm);
26881 assign( eV, getYMMReg(rE) );
26882 imm8 = (Int)getUChar(delta+1);
26883 delta += 1+1;
26884 DIP("vshufpd $%d,%s,%s,%s\n",
26885 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26886 } else {
26887 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26888 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26889 imm8 = (Int)getUChar(delta+alen);
26890 delta += 1+alen;
26891 DIP("vshufpd $%d,%s,%s,%s\n",
26892 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26894 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26895 putYMMReg( rG, mkexpr(res) );
26896 *uses_vvvv = True;
26897 goto decode_success;
26899 break;
26901 case 0xD0:
26902 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26903 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26904 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26905 uses_vvvv, vbi, pfx, delta,
26906 "vaddsubpd", math_ADDSUBPD_128 );
26907 goto decode_success;
26909 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26910 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26911 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26912 uses_vvvv, vbi, pfx, delta,
26913 "vaddsubpd", math_ADDSUBPD_256 );
26914 goto decode_success;
26916 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26917 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26918 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26919 uses_vvvv, vbi, pfx, delta,
26920 "vaddsubps", math_ADDSUBPS_128 );
26921 goto decode_success;
26923 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26924 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26925 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26926 uses_vvvv, vbi, pfx, delta,
26927 "vaddsubps", math_ADDSUBPS_256 );
26928 goto decode_success;
26930 break;
26932 case 0xD1:
26933 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26934 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26935 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26936 "vpsrlw", Iop_ShrN16x8 );
26937 *uses_vvvv = True;
26938 goto decode_success;
26941 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26942 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26943 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26944 "vpsrlw", Iop_ShrN16x16 );
26945 *uses_vvvv = True;
26946 goto decode_success;
26949 break;
26951 case 0xD2:
26952 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26953 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26954 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26955 "vpsrld", Iop_ShrN32x4 );
26956 *uses_vvvv = True;
26957 goto decode_success;
26959 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26960 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26961 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26962 "vpsrld", Iop_ShrN32x8 );
26963 *uses_vvvv = True;
26964 goto decode_success;
26966 break;
26968 case 0xD3:
26969 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26970 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26971 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26972 "vpsrlq", Iop_ShrN64x2 );
26973 *uses_vvvv = True;
26974 goto decode_success;
26976 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26977 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26978 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26979 "vpsrlq", Iop_ShrN64x4 );
26980 *uses_vvvv = True;
26981 goto decode_success;
26983 break;
26985 case 0xD4:
26986 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26987 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26988 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26989 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26990 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26991 goto decode_success;
26993 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26994 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26995 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26996 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26997 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26998 goto decode_success;
27000 break;
27002 case 0xD5:
27003 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
27004 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27005 delta = dis_AVX128_E_V_to_G(
27006 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
27007 goto decode_success;
27009 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
27010 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27011 delta = dis_AVX256_E_V_to_G(
27012 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
27013 goto decode_success;
27015 break;
27017 case 0xD6:
27018 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
27019 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
27020 (WIG, maybe?) */
27021 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27022 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
27023 UChar modrm = getUChar(delta);
27024 UInt rG = gregOfRexRM(pfx,modrm);
27025 if (epartIsReg(modrm)) {
27026 /* dst: lo half copied, hi half zeroed */
27027 UInt rE = eregOfRexRM(pfx,modrm);
27028 putXMMRegLane64( rE, 0, getXMMRegLane64( rG, 0 ));
27029 /* zero bits 255:64 */
27030 putXMMRegLane64( rE, 1, mkU64(0) );
27031 putYMMRegLane128( rE, 1, mkV128(0) );
27032 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
27033 delta += 1;
27034 goto decode_success;
27035 } else {
27036 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27037 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
27038 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
27039 delta += alen;
27040 goto decode_success;
27043 break;
27045 case 0xD7:
27046 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
27047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27048 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
27049 goto decode_success;
27051 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
27052 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27053 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
27054 goto decode_success;
27056 break;
27058 case 0xD8:
27059 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
27060 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27061 delta = dis_AVX128_E_V_to_G(
27062 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
27063 goto decode_success;
27065 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
27066 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27067 delta = dis_AVX256_E_V_to_G(
27068 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
27069 goto decode_success;
27071 break;
27073 case 0xD9:
27074 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
27075 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27076 delta = dis_AVX128_E_V_to_G(
27077 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
27078 goto decode_success;
27080 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
27081 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27082 delta = dis_AVX256_E_V_to_G(
27083 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
27084 goto decode_success;
27086 break;
27088 case 0xDA:
27089 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
27090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27091 delta = dis_AVX128_E_V_to_G(
27092 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
27093 goto decode_success;
27095 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
27096 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27097 delta = dis_AVX256_E_V_to_G(
27098 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
27099 goto decode_success;
27101 break;
27103 case 0xDB:
27104 /* VPAND r/m, rV, r ::: r = rV & r/m */
27105 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
27106 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27107 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27108 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
27109 goto decode_success;
27111 /* VPAND r/m, rV, r ::: r = rV & r/m */
27112 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
27113 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27114 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27115 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
27116 goto decode_success;
27118 break;
27120 case 0xDC:
27121 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
27122 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27123 delta = dis_AVX128_E_V_to_G(
27124 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
27125 goto decode_success;
27127 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
27128 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27129 delta = dis_AVX256_E_V_to_G(
27130 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
27131 goto decode_success;
27133 break;
27135 case 0xDD:
27136 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
27137 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27138 delta = dis_AVX128_E_V_to_G(
27139 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
27140 goto decode_success;
27142 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
27143 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27144 delta = dis_AVX256_E_V_to_G(
27145 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
27146 goto decode_success;
27148 break;
27150 case 0xDE:
27151 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
27152 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27153 delta = dis_AVX128_E_V_to_G(
27154 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
27155 goto decode_success;
27157 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
27158 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27159 delta = dis_AVX256_E_V_to_G(
27160 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
27161 goto decode_success;
27163 break;
27165 case 0xDF:
27166 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27167 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
27168 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27169 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
27170 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
27171 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27172 goto decode_success;
27174 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
27175 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
27176 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27177 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
27178 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
27179 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
27180 goto decode_success;
27182 break;
27184 case 0xE0:
27185 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
27186 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27187 delta = dis_AVX128_E_V_to_G(
27188 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
27189 goto decode_success;
27191 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
27192 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27193 delta = dis_AVX256_E_V_to_G(
27194 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
27195 goto decode_success;
27197 break;
27199 case 0xE1:
27200 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
27201 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27202 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27203 "vpsraw", Iop_SarN16x8 );
27204 *uses_vvvv = True;
27205 goto decode_success;
27207 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
27208 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27209 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27210 "vpsraw", Iop_SarN16x16 );
27211 *uses_vvvv = True;
27212 goto decode_success;
27214 break;
27216 case 0xE2:
27217 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
27218 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27219 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27220 "vpsrad", Iop_SarN32x4 );
27221 *uses_vvvv = True;
27222 goto decode_success;
27224 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
27225 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27226 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27227 "vpsrad", Iop_SarN32x8 );
27228 *uses_vvvv = True;
27229 goto decode_success;
27231 break;
27233 case 0xE3:
27234 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
27235 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27236 delta = dis_AVX128_E_V_to_G(
27237 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
27238 goto decode_success;
27240 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
27241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27242 delta = dis_AVX256_E_V_to_G(
27243 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
27244 goto decode_success;
27246 break;
27248 case 0xE4:
27249 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
27250 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27251 delta = dis_AVX128_E_V_to_G(
27252 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
27253 goto decode_success;
27255 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
27256 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27257 delta = dis_AVX256_E_V_to_G(
27258 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
27259 goto decode_success;
27261 break;
27263 case 0xE5:
27264 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
27265 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27266 delta = dis_AVX128_E_V_to_G(
27267 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
27268 goto decode_success;
27270 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
27271 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27272 delta = dis_AVX256_E_V_to_G(
27273 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
27274 goto decode_success;
27276 break;
27278 case 0xE6:
27279 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
27280 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
27281 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
27282 goto decode_success;
27284 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
27285 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
27286 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
27287 goto decode_success;
27289 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
27290 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27291 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27292 True/*r2zero*/);
27293 goto decode_success;
27295 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
27296 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27297 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
27298 goto decode_success;
27300 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
27301 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27302 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
27303 False/*!r2zero*/);
27304 goto decode_success;
27306 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
27307 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27308 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
27309 goto decode_success;
27311 break;
27313 case 0xE7:
27314 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
27315 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27316 UChar modrm = getUChar(delta);
27317 UInt rG = gregOfRexRM(pfx,modrm);
27318 if (!epartIsReg(modrm)) {
27319 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27320 gen_SIGNAL_if_not_16_aligned( vbi, addr );
27321 storeLE( mkexpr(addr), getXMMReg(rG) );
27322 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
27323 delta += alen;
27324 goto decode_success;
27326 /* else fall through */
27328 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
27329 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27330 UChar modrm = getUChar(delta);
27331 UInt rG = gregOfRexRM(pfx,modrm);
27332 if (!epartIsReg(modrm)) {
27333 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27334 gen_SIGNAL_if_not_32_aligned( vbi, addr );
27335 storeLE( mkexpr(addr), getYMMReg(rG) );
27336 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
27337 delta += alen;
27338 goto decode_success;
27340 /* else fall through */
27342 break;
27344 case 0xE8:
27345 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
27346 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27347 delta = dis_AVX128_E_V_to_G(
27348 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
27349 goto decode_success;
27351 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
27352 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27353 delta = dis_AVX256_E_V_to_G(
27354 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
27355 goto decode_success;
27357 break;
27359 case 0xE9:
27360 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
27361 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27362 delta = dis_AVX128_E_V_to_G(
27363 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
27364 goto decode_success;
27366 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
27367 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27368 delta = dis_AVX256_E_V_to_G(
27369 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
27370 goto decode_success;
27372 break;
27374 case 0xEA:
27375 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27376 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
27377 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27378 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27379 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
27380 goto decode_success;
27382 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
27383 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
27384 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27385 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27386 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
27387 goto decode_success;
27389 break;
27391 case 0xEB:
27392 /* VPOR r/m, rV, r ::: r = rV | r/m */
27393 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
27394 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27395 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27396 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
27397 goto decode_success;
27399 /* VPOR r/m, rV, r ::: r = rV | r/m */
27400 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
27401 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27402 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27403 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
27404 goto decode_success;
27406 break;
27408 case 0xEC:
27409 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
27410 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27411 delta = dis_AVX128_E_V_to_G(
27412 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
27413 goto decode_success;
27415 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
27416 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27417 delta = dis_AVX256_E_V_to_G(
27418 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
27419 goto decode_success;
27421 break;
27423 case 0xED:
27424 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
27425 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27426 delta = dis_AVX128_E_V_to_G(
27427 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
27428 goto decode_success;
27430 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
27431 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27432 delta = dis_AVX256_E_V_to_G(
27433 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
27434 goto decode_success;
27436 break;
27438 case 0xEE:
27439 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27440 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
27441 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27442 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27443 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
27444 goto decode_success;
27446 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
27447 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
27448 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27449 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27450 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
27451 goto decode_success;
27453 break;
27455 case 0xEF:
27456 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27457 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
27458 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27459 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27460 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
27461 goto decode_success;
27463 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
27464 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
27465 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27466 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27467 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
27468 goto decode_success;
27470 break;
27472 case 0xF0:
27473 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
27474 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27475 UChar modrm = getUChar(delta);
27476 UInt rD = gregOfRexRM(pfx, modrm);
27477 IRTemp tD = newTemp(Ity_V256);
27478 if (epartIsReg(modrm)) break;
27479 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27480 delta += alen;
27481 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
27482 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
27483 putYMMReg(rD, mkexpr(tD));
27484 goto decode_success;
27486 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
27487 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27488 UChar modrm = getUChar(delta);
27489 UInt rD = gregOfRexRM(pfx, modrm);
27490 IRTemp tD = newTemp(Ity_V128);
27491 if (epartIsReg(modrm)) break;
27492 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27493 delta += alen;
27494 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
27495 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
27496 putYMMRegLoAndZU(rD, mkexpr(tD));
27497 goto decode_success;
27499 break;
27501 case 0xF1:
27502 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
27503 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27504 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27505 "vpsllw", Iop_ShlN16x8 );
27506 *uses_vvvv = True;
27507 goto decode_success;
27510 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
27511 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27512 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27513 "vpsllw", Iop_ShlN16x16 );
27514 *uses_vvvv = True;
27515 goto decode_success;
27518 break;
27520 case 0xF2:
27521 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
27522 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27523 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27524 "vpslld", Iop_ShlN32x4 );
27525 *uses_vvvv = True;
27526 goto decode_success;
27528 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
27529 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27530 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27531 "vpslld", Iop_ShlN32x8 );
27532 *uses_vvvv = True;
27533 goto decode_success;
27535 break;
27537 case 0xF3:
27538 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
27539 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27540 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
27541 "vpsllq", Iop_ShlN64x2 );
27542 *uses_vvvv = True;
27543 goto decode_success;
27545 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
27546 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27547 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
27548 "vpsllq", Iop_ShlN64x4 );
27549 *uses_vvvv = True;
27550 goto decode_success;
27552 break;
27554 case 0xF4:
27555 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
27556 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27557 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27558 uses_vvvv, vbi, pfx, delta,
27559 "vpmuludq", math_PMULUDQ_128 );
27560 goto decode_success;
27562 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
27563 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27564 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27565 uses_vvvv, vbi, pfx, delta,
27566 "vpmuludq", math_PMULUDQ_256 );
27567 goto decode_success;
27569 break;
27571 case 0xF5:
27572 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
27573 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27574 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27575 uses_vvvv, vbi, pfx, delta,
27576 "vpmaddwd", math_PMADDWD_128 );
27577 goto decode_success;
27579 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
27580 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27581 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27582 uses_vvvv, vbi, pfx, delta,
27583 "vpmaddwd", math_PMADDWD_256 );
27584 goto decode_success;
27586 break;
27588 case 0xF6:
27589 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
27590 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27591 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27592 uses_vvvv, vbi, pfx, delta,
27593 "vpsadbw", math_PSADBW_128 );
27594 goto decode_success;
27596 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
27597 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27598 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27599 uses_vvvv, vbi, pfx, delta,
27600 "vpsadbw", math_PSADBW_256 );
27601 goto decode_success;
27603 break;
27605 case 0xF7:
27606 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
27607 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
27608 && epartIsReg(getUChar(delta))) {
27609 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
27610 goto decode_success;
27612 break;
27614 case 0xF8:
27615 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27616 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
27617 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27618 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27619 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
27620 goto decode_success;
27622 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
27623 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
27624 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27625 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27626 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
27627 goto decode_success;
27629 break;
27631 case 0xF9:
27632 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27633 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
27634 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27635 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27636 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
27637 goto decode_success;
27639 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
27640 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
27641 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27642 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27643 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
27644 goto decode_success;
27646 break;
27648 case 0xFA:
27649 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27650 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
27651 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27652 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27653 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
27654 goto decode_success;
27656 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
27657 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
27658 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27659 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27660 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
27661 goto decode_success;
27663 break;
27665 case 0xFB:
27666 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27667 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
27668 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27669 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27670 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
27671 goto decode_success;
27673 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
27674 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
27675 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27676 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27677 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
27678 goto decode_success;
27680 break;
27682 case 0xFC:
27683 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27684 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
27685 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27686 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27687 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
27688 goto decode_success;
27690 /* VPADDB r/m, rV, r ::: r = rV + r/m */
27691 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
27692 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27693 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27694 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
27695 goto decode_success;
27697 break;
27699 case 0xFD:
27700 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27701 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
27702 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27703 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27704 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
27705 goto decode_success;
27707 /* VPADDW r/m, rV, r ::: r = rV + r/m */
27708 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
27709 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27710 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27711 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
27712 goto decode_success;
27714 break;
27716 case 0xFE:
27717 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27718 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
27719 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27720 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
27721 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
27722 goto decode_success;
27724 /* VPADDD r/m, rV, r ::: r = rV + r/m */
27725 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
27726 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27727 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
27728 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
27729 goto decode_success;
27731 break;
27733 default:
27734 break;
27738 //decode_failure:
27739 return deltaIN;
27741 decode_success:
27742 return delta;
27746 /*------------------------------------------------------------*/
27747 /*--- ---*/
27748 /*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
27749 /*--- ---*/
27750 /*------------------------------------------------------------*/
27752 static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27754 /* In the control vector, zero out all but the bottom two bits of
27755 each 32-bit lane. */
27756 IRExpr* cv1 = binop(Iop_ShrN32x4,
27757 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
27758 mkU8(30));
27759 /* And use the resulting cleaned-up control vector as steering
27760 in a Perm operation. */
27761 IRTemp res = newTemp(Ity_V128);
27762 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
27763 return res;
27766 static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27768 IRTemp dHi, dLo, cHi, cLo;
27769 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27770 breakupV256toV128s( dataV, &dHi, &dLo );
27771 breakupV256toV128s( ctrlV, &cHi, &cLo );
27772 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
27773 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
27774 IRTemp res = newTemp(Ity_V256);
27775 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27776 return res;
27779 static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
27781 /* No cleverness here .. */
27782 IRTemp dHi, dLo, cHi, cLo;
27783 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27784 breakupV128to64s( dataV, &dHi, &dLo );
27785 breakupV128to64s( ctrlV, &cHi, &cLo );
27786 IRExpr* rHi
27787 = IRExpr_ITE( unop(Iop_64to1,
27788 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
27789 mkexpr(dHi), mkexpr(dLo) );
27790 IRExpr* rLo
27791 = IRExpr_ITE( unop(Iop_64to1,
27792 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
27793 mkexpr(dHi), mkexpr(dLo) );
27794 IRTemp res = newTemp(Ity_V128);
27795 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
27796 return res;
27799 static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
27801 IRTemp dHi, dLo, cHi, cLo;
27802 dHi = dLo = cHi = cLo = IRTemp_INVALID;
27803 breakupV256toV128s( dataV, &dHi, &dLo );
27804 breakupV256toV128s( ctrlV, &cHi, &cLo );
27805 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
27806 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
27807 IRTemp res = newTemp(Ity_V256);
27808 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
27809 return res;
27812 static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27814 /* In the control vector, zero out all but the bottom three bits of
27815 each 32-bit lane. */
27816 IRExpr* cv1 = binop(Iop_ShrN32x8,
27817 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27818 mkU8(29));
27819 /* And use the resulting cleaned-up control vector as steering
27820 in a Perm operation. */
27821 IRTemp res = newTemp(Ity_V256);
27822 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27823 return res;
27826 static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27827 const VexAbiInfo* vbi, Prefix pfx, Long delta,
27828 const HChar* opname, IROp op8 )
27830 HChar dis_buf[50];
27831 Int alen;
27832 Int size = getRexW(pfx) ? 8 : 4;
27833 IRType ty = szToITy(size);
27834 IRTemp src = newTemp(ty);
27835 IRTemp amt = newTemp(ty);
27836 UChar rm = getUChar(delta);
27838 assign( amt, getIRegV(size,pfx) );
27839 if (epartIsReg(rm)) {
27840 assign( src, getIRegE(size,pfx,rm) );
27841 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27842 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27843 delta++;
27844 } else {
27845 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27846 assign( src, loadLE(ty, mkexpr(addr)) );
27847 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27848 nameIRegG(size,pfx,rm));
27849 delta += alen;
27852 putIRegG( size, pfx, rm,
27853 binop(mkSizedOp(ty,op8), mkexpr(src),
27854 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27855 mkU(ty,8*size-1)))) );
27856 /* Flags aren't modified. */
27857 *uses_vvvv = True;
27858 return delta;
27862 static Long dis_FMA ( const VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27864 UChar modrm = getUChar(delta);
27865 UInt rG = gregOfRexRM(pfx, modrm);
27866 UInt rV = getVexNvvvv(pfx);
27867 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27868 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27869 IRType vty = scalar ? ty : (getVexL(pfx) ? Ity_V256 : Ity_V128);
27870 IRTemp addr = IRTemp_INVALID;
27871 HChar dis_buf[50];
27872 Int alen = 0;
27873 const HChar *name;
27874 const HChar *suffix;
27875 const HChar *order;
27876 Bool negateRes = False;
27877 Bool negateZeven = False;
27878 Bool negateZodd = False;
27879 UInt count = 0;
27881 switch (opc & 0xF) {
27882 case 0x6: name = "addsub"; negateZeven = True; break;
27883 case 0x7: name = "subadd"; negateZodd = True; break;
27884 case 0x8:
27885 case 0x9: name = "add"; break;
27886 case 0xA:
27887 case 0xB: name = "sub"; negateZeven = True; negateZodd = True;
27888 break;
27889 case 0xC:
27890 case 0xD: name = "add"; negateRes = True; negateZeven = True;
27891 negateZodd = True; break;
27892 case 0xE:
27893 case 0xF: name = "sub"; negateRes = True; break;
27894 default: vpanic("dis_FMA(amd64)"); break;
27896 switch (opc & 0xF0) {
27897 case 0x90: order = "132"; break;
27898 case 0xA0: order = "213"; break;
27899 case 0xB0: order = "231"; break;
27900 default: vpanic("dis_FMA(amd64)"); break;
27902 if (scalar) {
27903 suffix = ty == Ity_F64 ? "sd" : "ss";
27904 } else {
27905 suffix = ty == Ity_F64 ? "pd" : "ps";
27908 // Figure out |count| (the number of elements) by considering |vty| and |ty|.
27909 count = sizeofIRType(vty) / sizeofIRType(ty);
27910 vassert(count == 1 || count == 2 || count == 4 || count == 8);
27912 // Fetch operands into the first |count| elements of |sX|, |sY| and |sZ|.
27913 UInt i;
27914 IRExpr *sX[8], *sY[8], *sZ[8], *res[8];
27915 for (i = 0; i < 8; i++) sX[i] = sY[i] = sZ[i] = res[i] = NULL;
27917 IRExpr* (*getYMMRegLane)(UInt,Int)
27918 = ty == Ity_F32 ? getYMMRegLane32F : getYMMRegLane64F;
27919 void (*putYMMRegLane)(UInt,Int,IRExpr*)
27920 = ty == Ity_F32 ? putYMMRegLane32F : putYMMRegLane64F;
27922 for (i = 0; i < count; i++) {
27923 sX[i] = getYMMRegLane(rG, i);
27924 sZ[i] = getYMMRegLane(rV, i);
27927 if (epartIsReg(modrm)) {
27928 UInt rE = eregOfRexRM(pfx, modrm);
27929 delta += 1;
27930 for (i = 0; i < count; i++) {
27931 sY[i] = getYMMRegLane(rE, i);
27933 if (vty == Ity_V256) {
27934 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27935 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27936 nameYMMReg(rG));
27937 } else {
27938 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27939 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27940 nameXMMReg(rG));
27942 } else {
27943 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27944 delta += alen;
27945 for (i = 0; i < count; i++) {
27946 sY[i] = loadLE(ty, binop(Iop_Add64, mkexpr(addr),
27947 mkU64(i * sizeofIRType(ty))));
27949 if (vty == Ity_V256) {
27950 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27951 name, order, suffix, dis_buf, nameYMMReg(rV),
27952 nameYMMReg(rG));
27953 } else {
27954 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27955 name, order, suffix, dis_buf, nameXMMReg(rV),
27956 nameXMMReg(rG));
27960 /* vX/vY/vZ are now in 132 order. If the instruction requires a different
27961 order, swap them around. */
27963 # define COPY_ARR(_dst, _src) \
27964 do { for (int j = 0; j < 8; j++) { _dst[j] = _src[j]; } } while (0)
27966 if ((opc & 0xF0) != 0x90) {
27967 IRExpr* temp[8];
27968 COPY_ARR(temp, sX);
27969 if ((opc & 0xF0) == 0xA0) {
27970 COPY_ARR(sX, sZ);
27971 COPY_ARR(sZ, sY);
27972 COPY_ARR(sY, temp);
27973 } else {
27974 COPY_ARR(sX, sZ);
27975 COPY_ARR(sZ, temp);
27979 # undef COPY_ARR
27981 for (i = 0; i < count; i++) {
27982 IROp opNEG = ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32;
27983 if ((i & 1) ? negateZodd : negateZeven) {
27984 sZ[i] = unop(opNEG, sZ[i]);
27986 res[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27987 get_FAKE_roundingmode(), sX[i], sY[i], sZ[i]);
27988 if (negateRes) {
27989 res[i] = unop(opNEG, res[i]);
27993 for (i = 0; i < count; i++) {
27994 putYMMRegLane(rG, i, res[i]);
27997 switch (vty) {
27998 case Ity_F32:
27999 case Ity_F64:
28000 case Ity_V128: putYMMRegLane128(rG, 1, mkV128(0)); /*fallthru*/
28001 case Ity_V256: break;
28002 default: vassert(0);
28005 return delta;
28009 /* Masked load or masked store. */
28010 static ULong dis_VMASKMOV ( Bool *uses_vvvv, const VexAbiInfo* vbi,
28011 Prefix pfx, Long delta,
28012 const HChar* opname, Bool isYMM, IRType ty,
28013 Bool isLoad )
28015 HChar dis_buf[50];
28016 Int alen, i;
28017 IRTemp addr;
28018 UChar modrm = getUChar(delta);
28019 UInt rG = gregOfRexRM(pfx,modrm);
28020 UInt rV = getVexNvvvv(pfx);
28022 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28023 delta += alen;
28025 /**/ if (isLoad && isYMM) {
28026 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
28028 else if (isLoad && !isYMM) {
28029 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
28032 else if (!isLoad && isYMM) {
28033 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rG), nameYMMReg(rV), dis_buf );
28035 else {
28036 vassert(!isLoad && !isYMM);
28037 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rG), nameXMMReg(rV), dis_buf );
28040 vassert(ty == Ity_I32 || ty == Ity_I64);
28041 Bool laneIs32 = ty == Ity_I32;
28043 Int nLanes = (isYMM ? 2 : 1) * (laneIs32 ? 4 : 2);
28045 for (i = 0; i < nLanes; i++) {
28046 IRExpr* shAmt = laneIs32 ? mkU8(31) : mkU8(63);
28047 IRExpr* one = laneIs32 ? mkU32(1) : mkU64(1);
28048 IROp opSHR = laneIs32 ? Iop_Shr32 : Iop_Shr64;
28049 IROp opEQ = laneIs32 ? Iop_CmpEQ32 : Iop_CmpEQ64;
28050 IRExpr* lane = (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rV, i );
28052 IRTemp cond = newTemp(Ity_I1);
28053 assign(cond, binop(opEQ, binop(opSHR, lane, shAmt), one));
28055 IRTemp data = newTemp(ty);
28056 IRExpr* ea = binop(Iop_Add64, mkexpr(addr),
28057 mkU64(i * (laneIs32 ? 4 : 8)));
28058 if (isLoad) {
28059 stmt(
28060 IRStmt_LoadG(
28061 Iend_LE, laneIs32 ? ILGop_Ident32 : ILGop_Ident64,
28062 data, ea, laneIs32 ? mkU32(0) : mkU64(0), mkexpr(cond)
28064 (laneIs32 ? putYMMRegLane32 : putYMMRegLane64)( rG, i, mkexpr(data) );
28065 } else {
28066 assign(data, (laneIs32 ? getYMMRegLane32 : getYMMRegLane64)( rG, i ));
28067 stmt( IRStmt_StoreG(Iend_LE, ea, mkexpr(data), mkexpr(cond)) );
28071 if (isLoad && !isYMM)
28072 putYMMRegLane128( rG, 1, mkV128(0) );
28074 *uses_vvvv = True;
28075 return delta;
28079 /* Gather. */
28080 static ULong dis_VGATHER ( Bool *uses_vvvv, const VexAbiInfo* vbi,
28081 Prefix pfx, Long delta,
28082 const HChar* opname, Bool isYMM,
28083 Bool isVM64x, IRType ty )
28085 HChar dis_buf[50];
28086 Int alen, i, vscale, count1, count2;
28087 IRTemp addr;
28088 UChar modrm = getUChar(delta);
28089 UInt rG = gregOfRexRM(pfx,modrm);
28090 UInt rV = getVexNvvvv(pfx);
28091 UInt rI;
28092 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
28093 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
28094 IRTemp cond;
28095 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
28096 idxTy, &vscale );
28097 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
28098 return delta;
28099 if (dstTy == Ity_V256) {
28100 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
28101 } else {
28102 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
28104 delta += alen;
28106 if (ty == Ity_I32) {
28107 count1 = isYMM ? 8 : 4;
28108 count2 = isVM64x ? count1 / 2 : count1;
28109 } else {
28110 count1 = count2 = isYMM ? 4 : 2;
28113 /* First update the mask register to copies of the sign bit. */
28114 if (ty == Ity_I32) {
28115 if (isYMM)
28116 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
28117 else
28118 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
28119 } else {
28120 for (i = 0; i < count1; i++) {
28121 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
28122 mkU8(63)) );
28126 /* Next gather the individual elements. If any fault occurs, the
28127 corresponding mask element will be set and the loop stops. */
28128 for (i = 0; i < count2; i++) {
28129 IRExpr *expr, *addr_expr;
28130 cond = newTemp(Ity_I1);
28131 assign( cond,
28132 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
28133 ty == Ity_I32 ? getYMMRegLane32( rV, i )
28134 : getYMMRegLane64( rV, i ),
28135 mkU(ty, 0)) );
28136 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
28137 : getYMMRegLane64( rG, i );
28138 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
28139 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
28140 switch (vscale) {
28141 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
28142 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
28143 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
28144 default: break;
28146 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
28147 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
28148 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
28149 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
28150 if (ty == Ity_I32) {
28151 putYMMRegLane32( rG, i, expr );
28152 putYMMRegLane32( rV, i, mkU32(0) );
28153 } else {
28154 putYMMRegLane64( rG, i, expr);
28155 putYMMRegLane64( rV, i, mkU64(0) );
28159 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
28160 if (ty == Ity_I64 || isYMM)
28161 putYMMRegLane128( rV, 1, mkV128(0) );
28162 else if (ty == Ity_I32 && count2 == 2) {
28163 putYMMRegLane64( rV, 1, mkU64(0) );
28164 putYMMRegLane64( rG, 1, mkU64(0) );
28166 putYMMRegLane128( rG, 1, mkV128(0) );
28169 *uses_vvvv = True;
28170 return delta;
28174 __attribute__((noinline))
28175 static
28176 Long dis_ESC_0F38__VEX (
28177 /*MB_OUT*/DisResult* dres,
28178 /*OUT*/ Bool* uses_vvvv,
28179 const VexArchInfo* archinfo,
28180 const VexAbiInfo* vbi,
28181 Prefix pfx, Int sz, Long deltaIN
28184 IRTemp addr = IRTemp_INVALID;
28185 Int alen = 0;
28186 HChar dis_buf[50];
28187 Long delta = deltaIN;
28188 UChar opc = getUChar(delta);
28189 delta++;
28190 *uses_vvvv = False;
28192 switch (opc) {
28194 case 0x00:
28195 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28196 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
28197 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28198 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28199 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
28200 goto decode_success;
28202 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
28203 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
28204 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28205 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28206 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
28207 goto decode_success;
28209 break;
28211 case 0x01:
28212 case 0x02:
28213 case 0x03:
28214 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
28215 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
28216 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
28217 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28218 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28219 *uses_vvvv = True;
28220 goto decode_success;
28222 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
28223 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
28224 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
28225 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28226 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28227 *uses_vvvv = True;
28228 goto decode_success;
28230 break;
28232 case 0x04:
28233 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
28234 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28235 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28236 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28237 math_PMADDUBSW_128 );
28238 goto decode_success;
28240 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
28241 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28242 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28243 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
28244 math_PMADDUBSW_256 );
28245 goto decode_success;
28247 break;
28249 case 0x05:
28250 case 0x06:
28251 case 0x07:
28252 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
28253 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
28254 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
28255 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28256 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
28257 *uses_vvvv = True;
28258 goto decode_success;
28260 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
28261 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
28262 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
28263 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28264 delta = dis_PHADD_256( vbi, pfx, delta, opc );
28265 *uses_vvvv = True;
28266 goto decode_success;
28268 break;
28270 case 0x08:
28271 case 0x09:
28272 case 0x0A:
28273 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
28274 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
28275 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
28276 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28277 IRTemp sV = newTemp(Ity_V128);
28278 IRTemp dV = newTemp(Ity_V128);
28279 IRTemp sHi, sLo, dHi, dLo;
28280 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28281 HChar ch = '?';
28282 Int laneszB = 0;
28283 UChar modrm = getUChar(delta);
28284 UInt rG = gregOfRexRM(pfx,modrm);
28285 UInt rV = getVexNvvvv(pfx);
28287 switch (opc) {
28288 case 0x08: laneszB = 1; ch = 'b'; break;
28289 case 0x09: laneszB = 2; ch = 'w'; break;
28290 case 0x0A: laneszB = 4; ch = 'd'; break;
28291 default: vassert(0);
28294 assign( dV, getXMMReg(rV) );
28296 if (epartIsReg(modrm)) {
28297 UInt rE = eregOfRexRM(pfx,modrm);
28298 assign( sV, getXMMReg(rE) );
28299 delta += 1;
28300 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
28301 nameXMMReg(rV), nameXMMReg(rG));
28302 } else {
28303 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28304 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28305 delta += alen;
28306 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28307 nameXMMReg(rV), nameXMMReg(rG));
28310 breakupV128to64s( dV, &dHi, &dLo );
28311 breakupV128to64s( sV, &sHi, &sLo );
28313 putYMMRegLoAndZU(
28315 binop(Iop_64HLtoV128,
28316 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
28317 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
28320 *uses_vvvv = True;
28321 goto decode_success;
28323 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
28324 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
28325 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
28326 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28327 IRTemp sV = newTemp(Ity_V256);
28328 IRTemp dV = newTemp(Ity_V256);
28329 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28330 s3 = s2 = s1 = s0 = IRTemp_INVALID;
28331 d3 = d2 = d1 = d0 = IRTemp_INVALID;
28332 UChar ch = '?';
28333 Int laneszB = 0;
28334 UChar modrm = getUChar(delta);
28335 UInt rG = gregOfRexRM(pfx,modrm);
28336 UInt rV = getVexNvvvv(pfx);
28338 switch (opc) {
28339 case 0x08: laneszB = 1; ch = 'b'; break;
28340 case 0x09: laneszB = 2; ch = 'w'; break;
28341 case 0x0A: laneszB = 4; ch = 'd'; break;
28342 default: vassert(0);
28345 assign( dV, getYMMReg(rV) );
28347 if (epartIsReg(modrm)) {
28348 UInt rE = eregOfRexRM(pfx,modrm);
28349 assign( sV, getYMMReg(rE) );
28350 delta += 1;
28351 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
28352 nameYMMReg(rV), nameYMMReg(rG));
28353 } else {
28354 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28355 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28356 delta += alen;
28357 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
28358 nameYMMReg(rV), nameYMMReg(rG));
28361 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28362 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28364 putYMMReg(
28366 binop( Iop_V128HLtoV256,
28367 binop(Iop_64HLtoV128,
28368 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
28369 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
28371 binop(Iop_64HLtoV128,
28372 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
28373 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
28377 *uses_vvvv = True;
28378 goto decode_success;
28380 break;
28382 case 0x0B:
28383 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
28384 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28385 IRTemp sV = newTemp(Ity_V128);
28386 IRTemp dV = newTemp(Ity_V128);
28387 IRTemp sHi, sLo, dHi, dLo;
28388 sHi = sLo = dHi = dLo = IRTemp_INVALID;
28389 UChar modrm = getUChar(delta);
28390 UInt rG = gregOfRexRM(pfx,modrm);
28391 UInt rV = getVexNvvvv(pfx);
28393 assign( dV, getXMMReg(rV) );
28395 if (epartIsReg(modrm)) {
28396 UInt rE = eregOfRexRM(pfx,modrm);
28397 assign( sV, getXMMReg(rE) );
28398 delta += 1;
28399 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
28400 nameXMMReg(rV), nameXMMReg(rG));
28401 } else {
28402 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28403 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
28404 delta += alen;
28405 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28406 nameXMMReg(rV), nameXMMReg(rG));
28409 breakupV128to64s( dV, &dHi, &dLo );
28410 breakupV128to64s( sV, &sHi, &sLo );
28412 putYMMRegLoAndZU(
28414 binop(Iop_64HLtoV128,
28415 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
28416 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
28419 *uses_vvvv = True;
28420 goto decode_success;
28422 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
28423 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28424 IRTemp sV = newTemp(Ity_V256);
28425 IRTemp dV = newTemp(Ity_V256);
28426 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
28427 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
28428 UChar modrm = getUChar(delta);
28429 UInt rG = gregOfRexRM(pfx,modrm);
28430 UInt rV = getVexNvvvv(pfx);
28432 assign( dV, getYMMReg(rV) );
28434 if (epartIsReg(modrm)) {
28435 UInt rE = eregOfRexRM(pfx,modrm);
28436 assign( sV, getYMMReg(rE) );
28437 delta += 1;
28438 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
28439 nameYMMReg(rV), nameYMMReg(rG));
28440 } else {
28441 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
28442 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
28443 delta += alen;
28444 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
28445 nameYMMReg(rV), nameYMMReg(rG));
28448 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
28449 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
28451 putYMMReg(
28453 binop(Iop_V128HLtoV256,
28454 binop(Iop_64HLtoV128,
28455 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
28456 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
28457 binop(Iop_64HLtoV128,
28458 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
28459 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
28462 *uses_vvvv = True;
28463 dres->hint = Dis_HintVerbose;
28464 goto decode_success;
28466 break;
28468 case 0x0C:
28469 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
28470 if (have66noF2noF3(pfx)
28471 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28472 UChar modrm = getUChar(delta);
28473 UInt rG = gregOfRexRM(pfx, modrm);
28474 UInt rV = getVexNvvvv(pfx);
28475 IRTemp ctrlV = newTemp(Ity_V128);
28476 if (epartIsReg(modrm)) {
28477 UInt rE = eregOfRexRM(pfx, modrm);
28478 delta += 1;
28479 DIP("vpermilps %s,%s,%s\n",
28480 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28481 assign(ctrlV, getXMMReg(rE));
28482 } else {
28483 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28484 delta += alen;
28485 DIP("vpermilps %s,%s,%s\n",
28486 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28487 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28489 IRTemp dataV = newTemp(Ity_V128);
28490 assign(dataV, getXMMReg(rV));
28491 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
28492 putYMMRegLoAndZU(rG, mkexpr(resV));
28493 *uses_vvvv = True;
28494 goto decode_success;
28496 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
28497 if (have66noF2noF3(pfx)
28498 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28499 UChar modrm = getUChar(delta);
28500 UInt rG = gregOfRexRM(pfx, modrm);
28501 UInt rV = getVexNvvvv(pfx);
28502 IRTemp ctrlV = newTemp(Ity_V256);
28503 if (epartIsReg(modrm)) {
28504 UInt rE = eregOfRexRM(pfx, modrm);
28505 delta += 1;
28506 DIP("vpermilps %s,%s,%s\n",
28507 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28508 assign(ctrlV, getYMMReg(rE));
28509 } else {
28510 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28511 delta += alen;
28512 DIP("vpermilps %s,%s,%s\n",
28513 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28514 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28516 IRTemp dataV = newTemp(Ity_V256);
28517 assign(dataV, getYMMReg(rV));
28518 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
28519 putYMMReg(rG, mkexpr(resV));
28520 *uses_vvvv = True;
28521 goto decode_success;
28523 break;
28525 case 0x0D:
28526 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
28527 if (have66noF2noF3(pfx)
28528 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
28529 UChar modrm = getUChar(delta);
28530 UInt rG = gregOfRexRM(pfx, modrm);
28531 UInt rV = getVexNvvvv(pfx);
28532 IRTemp ctrlV = newTemp(Ity_V128);
28533 if (epartIsReg(modrm)) {
28534 UInt rE = eregOfRexRM(pfx, modrm);
28535 delta += 1;
28536 DIP("vpermilpd %s,%s,%s\n",
28537 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
28538 assign(ctrlV, getXMMReg(rE));
28539 } else {
28540 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28541 delta += alen;
28542 DIP("vpermilpd %s,%s,%s\n",
28543 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
28544 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
28546 IRTemp dataV = newTemp(Ity_V128);
28547 assign(dataV, getXMMReg(rV));
28548 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
28549 putYMMRegLoAndZU(rG, mkexpr(resV));
28550 *uses_vvvv = True;
28551 goto decode_success;
28553 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
28554 if (have66noF2noF3(pfx)
28555 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28556 UChar modrm = getUChar(delta);
28557 UInt rG = gregOfRexRM(pfx, modrm);
28558 UInt rV = getVexNvvvv(pfx);
28559 IRTemp ctrlV = newTemp(Ity_V256);
28560 if (epartIsReg(modrm)) {
28561 UInt rE = eregOfRexRM(pfx, modrm);
28562 delta += 1;
28563 DIP("vpermilpd %s,%s,%s\n",
28564 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
28565 assign(ctrlV, getYMMReg(rE));
28566 } else {
28567 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28568 delta += alen;
28569 DIP("vpermilpd %s,%s,%s\n",
28570 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
28571 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
28573 IRTemp dataV = newTemp(Ity_V256);
28574 assign(dataV, getYMMReg(rV));
28575 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
28576 putYMMReg(rG, mkexpr(resV));
28577 *uses_vvvv = True;
28578 goto decode_success;
28580 break;
28582 case 0x0E:
28583 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
28584 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28585 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
28586 goto decode_success;
28588 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
28589 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28590 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
28591 goto decode_success;
28593 break;
28595 case 0x0F:
28596 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
28597 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28598 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
28599 goto decode_success;
28601 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
28602 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28603 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
28604 goto decode_success;
28606 break;
28608 case 0x13:
28609 /* VCVTPH2PS xmm2/m64, xmm1 = VEX.128.66.0F38.W0 13 /r */
28610 if (have66noF2noF3(pfx)
28611 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
28612 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28613 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/False );
28614 goto decode_success;
28616 /* VCVTPH2PS xmm2/m128, xmm1 = VEX.256.66.0F38.W0 13 /r */
28617 if (have66noF2noF3(pfx)
28618 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
28619 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
28620 delta = dis_VCVTPH2PS( vbi, pfx, delta, /*is256bit=*/True );
28621 goto decode_success;
28623 break;
28625 case 0x16:
28626 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
28627 if (have66noF2noF3(pfx)
28628 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28629 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28630 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
28631 goto decode_success;
28633 break;
28635 case 0x17:
28636 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
28637 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28638 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
28639 goto decode_success;
28641 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
28642 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28643 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
28644 goto decode_success;
28646 break;
28648 case 0x18:
28649 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28650 if (have66noF2noF3(pfx)
28651 && 0==getVexL(pfx)/*128*/
28652 && !epartIsReg(getUChar(delta))) {
28653 UChar modrm = getUChar(delta);
28654 UInt rG = gregOfRexRM(pfx, modrm);
28655 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28656 delta += alen;
28657 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
28658 IRTemp t32 = newTemp(Ity_I32);
28659 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28660 IRTemp t64 = newTemp(Ity_I64);
28661 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28662 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28663 putYMMRegLoAndZU(rG, res);
28664 goto decode_success;
28666 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28667 if (have66noF2noF3(pfx)
28668 && 1==getVexL(pfx)/*256*/
28669 && !epartIsReg(getUChar(delta))) {
28670 UChar modrm = getUChar(delta);
28671 UInt rG = gregOfRexRM(pfx, modrm);
28672 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28673 delta += alen;
28674 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
28675 IRTemp t32 = newTemp(Ity_I32);
28676 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28677 IRTemp t64 = newTemp(Ity_I64);
28678 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28679 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28680 mkexpr(t64), mkexpr(t64));
28681 putYMMReg(rG, res);
28682 goto decode_success;
28684 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
28685 if (have66noF2noF3(pfx)
28686 && 0==getVexL(pfx)/*128*/
28687 && epartIsReg(getUChar(delta))) {
28688 UChar modrm = getUChar(delta);
28689 UInt rG = gregOfRexRM(pfx, modrm);
28690 UInt rE = eregOfRexRM(pfx, modrm);
28691 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28692 IRTemp t32 = newTemp(Ity_I32);
28693 assign(t32, getXMMRegLane32(rE, 0));
28694 IRTemp t64 = newTemp(Ity_I64);
28695 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28696 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28697 putYMMRegLoAndZU(rG, res);
28698 delta++;
28699 goto decode_success;
28701 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
28702 if (have66noF2noF3(pfx)
28703 && 1==getVexL(pfx)/*256*/
28704 && epartIsReg(getUChar(delta))) {
28705 UChar modrm = getUChar(delta);
28706 UInt rG = gregOfRexRM(pfx, modrm);
28707 UInt rE = eregOfRexRM(pfx, modrm);
28708 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28709 IRTemp t32 = newTemp(Ity_I32);
28710 assign(t32, getXMMRegLane32(rE, 0));
28711 IRTemp t64 = newTemp(Ity_I64);
28712 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28713 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28714 mkexpr(t64), mkexpr(t64));
28715 putYMMReg(rG, res);
28716 delta++;
28717 goto decode_success;
28719 break;
28721 case 0x19:
28722 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28723 if (have66noF2noF3(pfx)
28724 && 1==getVexL(pfx)/*256*/
28725 && !epartIsReg(getUChar(delta))) {
28726 UChar modrm = getUChar(delta);
28727 UInt rG = gregOfRexRM(pfx, modrm);
28728 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28729 delta += alen;
28730 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
28731 IRTemp t64 = newTemp(Ity_I64);
28732 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28733 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28734 mkexpr(t64), mkexpr(t64));
28735 putYMMReg(rG, res);
28736 goto decode_success;
28738 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
28739 if (have66noF2noF3(pfx)
28740 && 1==getVexL(pfx)/*256*/
28741 && epartIsReg(getUChar(delta))) {
28742 UChar modrm = getUChar(delta);
28743 UInt rG = gregOfRexRM(pfx, modrm);
28744 UInt rE = eregOfRexRM(pfx, modrm);
28745 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28746 IRTemp t64 = newTemp(Ity_I64);
28747 assign(t64, getXMMRegLane64(rE, 0));
28748 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28749 mkexpr(t64), mkexpr(t64));
28750 putYMMReg(rG, res);
28751 delta++;
28752 goto decode_success;
28754 break;
28756 case 0x1A:
28757 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
28758 if (have66noF2noF3(pfx)
28759 && 1==getVexL(pfx)/*256*/
28760 && !epartIsReg(getUChar(delta))) {
28761 UChar modrm = getUChar(delta);
28762 UInt rG = gregOfRexRM(pfx, modrm);
28763 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28764 delta += alen;
28765 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
28766 IRTemp t128 = newTemp(Ity_V128);
28767 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28768 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28769 goto decode_success;
28771 break;
28773 case 0x1C:
28774 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
28775 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28776 delta = dis_AVX128_E_to_G_unary(
28777 uses_vvvv, vbi, pfx, delta,
28778 "vpabsb", math_PABS_XMM_pap1 );
28779 goto decode_success;
28781 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
28782 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28783 delta = dis_AVX256_E_to_G_unary(
28784 uses_vvvv, vbi, pfx, delta,
28785 "vpabsb", math_PABS_YMM_pap1 );
28786 goto decode_success;
28788 break;
28790 case 0x1D:
28791 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
28792 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28793 delta = dis_AVX128_E_to_G_unary(
28794 uses_vvvv, vbi, pfx, delta,
28795 "vpabsw", math_PABS_XMM_pap2 );
28796 goto decode_success;
28798 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28799 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28800 delta = dis_AVX256_E_to_G_unary(
28801 uses_vvvv, vbi, pfx, delta,
28802 "vpabsw", math_PABS_YMM_pap2 );
28803 goto decode_success;
28805 break;
28807 case 0x1E:
28808 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28809 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28810 delta = dis_AVX128_E_to_G_unary(
28811 uses_vvvv, vbi, pfx, delta,
28812 "vpabsd", math_PABS_XMM_pap4 );
28813 goto decode_success;
28815 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28816 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28817 delta = dis_AVX256_E_to_G_unary(
28818 uses_vvvv, vbi, pfx, delta,
28819 "vpabsd", math_PABS_YMM_pap4 );
28820 goto decode_success;
28822 break;
28824 case 0x20:
28825 /* VPMOVSXBW xmm2/m64, xmm1 */
28826 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28827 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28828 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28829 True/*isAvx*/, False/*!xIsZ*/ );
28830 goto decode_success;
28832 /* VPMOVSXBW xmm2/m128, ymm1 */
28833 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28834 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28835 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28836 goto decode_success;
28838 break;
28840 case 0x21:
28841 /* VPMOVSXBD xmm2/m32, xmm1 */
28842 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28843 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28844 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28845 True/*isAvx*/, False/*!xIsZ*/ );
28846 goto decode_success;
28848 /* VPMOVSXBD xmm2/m64, ymm1 */
28849 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28850 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28851 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28852 goto decode_success;
28854 break;
28856 case 0x22:
28857 /* VPMOVSXBQ xmm2/m16, xmm1 */
28858 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28859 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28860 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28861 goto decode_success;
28863 /* VPMOVSXBQ xmm2/m32, ymm1 */
28864 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28865 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28866 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28867 goto decode_success;
28869 break;
28871 case 0x23:
28872 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28873 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28874 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28875 True/*isAvx*/, False/*!xIsZ*/ );
28876 goto decode_success;
28878 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28879 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28880 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28881 goto decode_success;
28883 break;
28885 case 0x24:
28886 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28887 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28888 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28889 goto decode_success;
28891 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28892 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28893 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28894 goto decode_success;
28896 break;
28898 case 0x25:
28899 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28900 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28901 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28902 True/*isAvx*/, False/*!xIsZ*/ );
28903 goto decode_success;
28905 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28906 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28907 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28908 goto decode_success;
28910 break;
28912 case 0x28:
28913 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28914 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28915 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28916 uses_vvvv, vbi, pfx, delta,
28917 "vpmuldq", math_PMULDQ_128 );
28918 goto decode_success;
28920 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28921 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28922 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28923 uses_vvvv, vbi, pfx, delta,
28924 "vpmuldq", math_PMULDQ_256 );
28925 goto decode_success;
28927 break;
28929 case 0x29:
28930 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28931 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28932 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28933 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28934 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28935 goto decode_success;
28937 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28938 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28939 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28940 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28941 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28942 goto decode_success;
28944 break;
28946 case 0x2A:
28947 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28948 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28949 && !epartIsReg(getUChar(delta))) {
28950 UChar modrm = getUChar(delta);
28951 UInt rD = gregOfRexRM(pfx, modrm);
28952 IRTemp tD = newTemp(Ity_V128);
28953 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28954 delta += alen;
28955 gen_SIGNAL_if_not_16_aligned(vbi, addr);
28956 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28957 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28958 putYMMRegLoAndZU(rD, mkexpr(tD));
28959 goto decode_success;
28961 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28962 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28963 && !epartIsReg(getUChar(delta))) {
28964 UChar modrm = getUChar(delta);
28965 UInt rD = gregOfRexRM(pfx, modrm);
28966 IRTemp tD = newTemp(Ity_V256);
28967 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28968 delta += alen;
28969 gen_SIGNAL_if_not_32_aligned(vbi, addr);
28970 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28971 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28972 putYMMReg(rD, mkexpr(tD));
28973 goto decode_success;
28975 break;
28977 case 0x2B:
28978 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28979 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28980 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28981 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28982 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28983 Iop_QNarrowBin32Sto16Ux8, NULL,
28984 False/*!invertLeftArg*/, True/*swapArgs*/ );
28985 goto decode_success;
28987 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28988 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28989 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28990 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28991 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28992 math_VPACKUSDW_YMM );
28993 goto decode_success;
28995 break;
28997 case 0x2C:
28998 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2C /r */
28999 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29000 && 0==getRexW(pfx)/*W0*/
29001 && !epartIsReg(getUChar(delta))) {
29002 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29003 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29004 goto decode_success;
29006 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2C /r */
29007 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29008 && 0==getRexW(pfx)/*W0*/
29009 && !epartIsReg(getUChar(delta))) {
29010 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29011 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29012 goto decode_success;
29014 break;
29016 case 0x2D:
29017 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 2D /r */
29018 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29019 && 0==getRexW(pfx)/*W0*/
29020 && !epartIsReg(getUChar(delta))) {
29021 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29022 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29023 goto decode_success;
29025 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 2D /r */
29026 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29027 && 0==getRexW(pfx)/*W0*/
29028 && !epartIsReg(getUChar(delta))) {
29029 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29030 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29031 goto decode_success;
29033 break;
29035 case 0x2E:
29036 /* VMASKMOVPS xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2E /r */
29037 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29038 && 0==getRexW(pfx)/*W0*/
29039 && !epartIsReg(getUChar(delta))) {
29040 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29041 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29042 goto decode_success;
29044 /* VMASKMOVPS ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2E /r */
29045 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29046 && 0==getRexW(pfx)/*W0*/
29047 && !epartIsReg(getUChar(delta))) {
29048 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
29049 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29050 goto decode_success;
29052 break;
29054 case 0x2F:
29055 /* VMASKMOVPD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 2F /r */
29056 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29057 && 0==getRexW(pfx)/*W0*/
29058 && !epartIsReg(getUChar(delta))) {
29059 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29060 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29061 goto decode_success;
29063 /* VMASKMOVPD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 2F /r */
29064 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29065 && 0==getRexW(pfx)/*W0*/
29066 && !epartIsReg(getUChar(delta))) {
29067 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
29068 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29069 goto decode_success;
29071 break;
29073 case 0x30:
29074 /* VPMOVZXBW xmm2/m64, xmm1 */
29075 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
29076 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29077 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
29078 True/*isAvx*/, True/*xIsZ*/ );
29079 goto decode_success;
29081 /* VPMOVZXBW xmm2/m128, ymm1 */
29082 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
29083 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29084 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
29085 goto decode_success;
29087 break;
29089 case 0x31:
29090 /* VPMOVZXBD xmm2/m32, xmm1 */
29091 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
29092 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29093 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
29094 True/*isAvx*/, True/*xIsZ*/ );
29095 goto decode_success;
29097 /* VPMOVZXBD xmm2/m64, ymm1 */
29098 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
29099 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29100 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
29101 goto decode_success;
29103 break;
29105 case 0x32:
29106 /* VPMOVZXBQ xmm2/m16, xmm1 */
29107 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
29108 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29109 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
29110 goto decode_success;
29112 /* VPMOVZXBQ xmm2/m32, ymm1 */
29113 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
29114 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29115 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
29116 goto decode_success;
29118 break;
29120 case 0x33:
29121 /* VPMOVZXWD xmm2/m64, xmm1 */
29122 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
29123 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29124 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
29125 True/*isAvx*/, True/*xIsZ*/ );
29126 goto decode_success;
29128 /* VPMOVZXWD xmm2/m128, ymm1 */
29129 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
29130 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29131 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
29132 goto decode_success;
29134 break;
29136 case 0x34:
29137 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
29138 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29139 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
29140 goto decode_success;
29142 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
29143 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29144 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
29145 goto decode_success;
29147 break;
29149 case 0x35:
29150 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
29151 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29152 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
29153 True/*isAvx*/, True/*xIsZ*/ );
29154 goto decode_success;
29156 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
29157 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29158 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
29159 goto decode_success;
29161 break;
29163 case 0x36:
29164 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
29165 if (have66noF2noF3(pfx)
29166 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29167 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
29168 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
29169 goto decode_success;
29171 break;
29173 case 0x37:
29174 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29175 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
29176 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29177 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29178 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
29179 goto decode_success;
29181 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
29182 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
29183 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29184 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29185 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
29186 goto decode_success;
29188 break;
29190 case 0x38:
29191 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29192 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
29193 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29194 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29195 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
29196 goto decode_success;
29198 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
29199 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
29200 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29201 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29202 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
29203 goto decode_success;
29205 break;
29207 case 0x39:
29208 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29209 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
29210 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29211 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29212 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
29213 goto decode_success;
29215 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
29216 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
29217 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29218 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29219 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
29220 goto decode_success;
29222 break;
29224 case 0x3A:
29225 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29226 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
29227 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29228 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29229 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
29230 goto decode_success;
29232 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
29233 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
29234 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29235 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29236 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
29237 goto decode_success;
29239 break;
29241 case 0x3B:
29242 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29243 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
29244 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29245 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29246 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
29247 goto decode_success;
29249 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
29250 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
29251 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29252 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29253 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
29254 goto decode_success;
29256 break;
29258 case 0x3C:
29259 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29260 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
29261 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29262 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29263 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
29264 goto decode_success;
29266 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
29267 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
29268 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29269 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29270 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
29271 goto decode_success;
29273 break;
29275 case 0x3D:
29276 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29277 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
29278 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29279 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29280 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
29281 goto decode_success;
29283 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
29284 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
29285 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29286 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29287 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
29288 goto decode_success;
29290 break;
29292 case 0x3E:
29293 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29294 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
29295 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29296 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29297 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
29298 goto decode_success;
29300 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
29301 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
29302 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29303 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29304 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
29305 goto decode_success;
29307 break;
29309 case 0x3F:
29310 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29311 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
29312 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29313 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29314 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
29315 goto decode_success;
29317 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
29318 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
29319 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29320 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29321 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
29322 goto decode_success;
29324 break;
29326 case 0x40:
29327 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29328 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
29329 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29330 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
29331 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
29332 goto decode_success;
29334 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
29335 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
29336 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29337 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
29338 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
29339 goto decode_success;
29341 break;
29343 case 0x41:
29344 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
29345 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29346 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
29347 goto decode_success;
29349 break;
29351 case 0x45:
29352 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
29353 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
29354 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29355 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
29356 Iop_Shr32, 1==getVexL(pfx) );
29357 *uses_vvvv = True;
29358 goto decode_success;
29360 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
29361 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
29362 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29363 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
29364 Iop_Shr64, 1==getVexL(pfx) );
29365 *uses_vvvv = True;
29366 goto decode_success;
29368 break;
29370 case 0x46:
29371 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
29372 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
29373 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29374 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
29375 Iop_Sar32, 1==getVexL(pfx) );
29376 *uses_vvvv = True;
29377 goto decode_success;
29379 break;
29381 case 0x47:
29382 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
29383 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
29384 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
29385 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
29386 Iop_Shl32, 1==getVexL(pfx) );
29387 *uses_vvvv = True;
29388 goto decode_success;
29390 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
29391 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
29392 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
29393 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
29394 Iop_Shl64, 1==getVexL(pfx) );
29395 *uses_vvvv = True;
29396 goto decode_success;
29398 break;
29400 case 0x58:
29401 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
29402 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29403 && 0==getRexW(pfx)/*W0*/) {
29404 UChar modrm = getUChar(delta);
29405 UInt rG = gregOfRexRM(pfx, modrm);
29406 IRTemp t32 = newTemp(Ity_I32);
29407 if (epartIsReg(modrm)) {
29408 UInt rE = eregOfRexRM(pfx, modrm);
29409 delta++;
29410 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29411 assign(t32, getXMMRegLane32(rE, 0));
29412 } else {
29413 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29414 delta += alen;
29415 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
29416 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29418 IRTemp t64 = newTemp(Ity_I64);
29419 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29420 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29421 putYMMRegLoAndZU(rG, res);
29422 goto decode_success;
29424 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
29425 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29426 && 0==getRexW(pfx)/*W0*/) {
29427 UChar modrm = getUChar(delta);
29428 UInt rG = gregOfRexRM(pfx, modrm);
29429 IRTemp t32 = newTemp(Ity_I32);
29430 if (epartIsReg(modrm)) {
29431 UInt rE = eregOfRexRM(pfx, modrm);
29432 delta++;
29433 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29434 assign(t32, getXMMRegLane32(rE, 0));
29435 } else {
29436 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29437 delta += alen;
29438 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
29439 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
29441 IRTemp t64 = newTemp(Ity_I64);
29442 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29443 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29444 mkexpr(t64), mkexpr(t64));
29445 putYMMReg(rG, res);
29446 goto decode_success;
29448 break;
29450 case 0x59:
29451 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
29452 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29453 && 0==getRexW(pfx)/*W0*/) {
29454 UChar modrm = getUChar(delta);
29455 UInt rG = gregOfRexRM(pfx, modrm);
29456 IRTemp t64 = newTemp(Ity_I64);
29457 if (epartIsReg(modrm)) {
29458 UInt rE = eregOfRexRM(pfx, modrm);
29459 delta++;
29460 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29461 assign(t64, getXMMRegLane64(rE, 0));
29462 } else {
29463 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29464 delta += alen;
29465 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
29466 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29468 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29469 putYMMRegLoAndZU(rG, res);
29470 goto decode_success;
29472 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
29473 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29474 && 0==getRexW(pfx)/*W0*/) {
29475 UChar modrm = getUChar(delta);
29476 UInt rG = gregOfRexRM(pfx, modrm);
29477 IRTemp t64 = newTemp(Ity_I64);
29478 if (epartIsReg(modrm)) {
29479 UInt rE = eregOfRexRM(pfx, modrm);
29480 delta++;
29481 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29482 assign(t64, getXMMRegLane64(rE, 0));
29483 } else {
29484 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29485 delta += alen;
29486 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
29487 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
29489 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29490 mkexpr(t64), mkexpr(t64));
29491 putYMMReg(rG, res);
29492 goto decode_success;
29494 break;
29496 case 0x5A:
29497 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
29498 if (have66noF2noF3(pfx)
29499 && 1==getVexL(pfx)/*256*/
29500 && !epartIsReg(getUChar(delta))) {
29501 UChar modrm = getUChar(delta);
29502 UInt rG = gregOfRexRM(pfx, modrm);
29503 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29504 delta += alen;
29505 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
29506 IRTemp t128 = newTemp(Ity_V128);
29507 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
29508 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
29509 goto decode_success;
29511 break;
29513 case 0x78:
29514 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
29515 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29516 && 0==getRexW(pfx)/*W0*/) {
29517 UChar modrm = getUChar(delta);
29518 UInt rG = gregOfRexRM(pfx, modrm);
29519 IRTemp t8 = newTemp(Ity_I8);
29520 if (epartIsReg(modrm)) {
29521 UInt rE = eregOfRexRM(pfx, modrm);
29522 delta++;
29523 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29524 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29525 } else {
29526 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29527 delta += alen;
29528 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
29529 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29531 IRTemp t16 = newTemp(Ity_I16);
29532 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29533 IRTemp t32 = newTemp(Ity_I32);
29534 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29535 IRTemp t64 = newTemp(Ity_I64);
29536 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29537 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29538 putYMMRegLoAndZU(rG, res);
29539 goto decode_success;
29541 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
29542 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29543 && 0==getRexW(pfx)/*W0*/) {
29544 UChar modrm = getUChar(delta);
29545 UInt rG = gregOfRexRM(pfx, modrm);
29546 IRTemp t8 = newTemp(Ity_I8);
29547 if (epartIsReg(modrm)) {
29548 UInt rE = eregOfRexRM(pfx, modrm);
29549 delta++;
29550 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29551 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
29552 } else {
29553 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29554 delta += alen;
29555 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
29556 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
29558 IRTemp t16 = newTemp(Ity_I16);
29559 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
29560 IRTemp t32 = newTemp(Ity_I32);
29561 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29562 IRTemp t64 = newTemp(Ity_I64);
29563 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29564 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29565 mkexpr(t64), mkexpr(t64));
29566 putYMMReg(rG, res);
29567 goto decode_success;
29569 break;
29571 case 0x79:
29572 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
29573 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29574 && 0==getRexW(pfx)/*W0*/) {
29575 UChar modrm = getUChar(delta);
29576 UInt rG = gregOfRexRM(pfx, modrm);
29577 IRTemp t16 = newTemp(Ity_I16);
29578 if (epartIsReg(modrm)) {
29579 UInt rE = eregOfRexRM(pfx, modrm);
29580 delta++;
29581 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
29582 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29583 } else {
29584 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29585 delta += alen;
29586 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
29587 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29589 IRTemp t32 = newTemp(Ity_I32);
29590 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29591 IRTemp t64 = newTemp(Ity_I64);
29592 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29593 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
29594 putYMMRegLoAndZU(rG, res);
29595 goto decode_success;
29597 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
29598 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29599 && 0==getRexW(pfx)/*W0*/) {
29600 UChar modrm = getUChar(delta);
29601 UInt rG = gregOfRexRM(pfx, modrm);
29602 IRTemp t16 = newTemp(Ity_I16);
29603 if (epartIsReg(modrm)) {
29604 UInt rE = eregOfRexRM(pfx, modrm);
29605 delta++;
29606 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
29607 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
29608 } else {
29609 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
29610 delta += alen;
29611 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
29612 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
29614 IRTemp t32 = newTemp(Ity_I32);
29615 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
29616 IRTemp t64 = newTemp(Ity_I64);
29617 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
29618 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
29619 mkexpr(t64), mkexpr(t64));
29620 putYMMReg(rG, res);
29621 goto decode_success;
29623 break;
29625 case 0x8C:
29626 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
29627 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29628 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29629 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29630 /*!isYMM*/False, Ity_I32, /*isLoad*/True );
29631 goto decode_success;
29633 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
29634 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29635 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29636 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29637 /*isYMM*/True, Ity_I32, /*isLoad*/True );
29638 goto decode_success;
29640 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
29641 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29642 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29643 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29644 /*!isYMM*/False, Ity_I64, /*isLoad*/True );
29645 goto decode_success;
29647 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
29648 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29649 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29650 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29651 /*isYMM*/True, Ity_I64, /*isLoad*/True );
29652 goto decode_success;
29654 break;
29656 case 0x8E:
29657 /* VPMASKMOVD xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W0 8E /r */
29658 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29659 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29660 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29661 /*!isYMM*/False, Ity_I32, /*!isLoad*/False );
29662 goto decode_success;
29664 /* VPMASKMOVD ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W0 8E /r */
29665 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29666 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29667 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
29668 /*isYMM*/True, Ity_I32, /*!isLoad*/False );
29669 goto decode_success;
29671 /* VPMASKMOVQ xmm1, xmm2, m128 = VEX.NDS.128.66.0F38.W1 8E /r */
29672 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29673 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29674 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29675 /*!isYMM*/False, Ity_I64, /*!isLoad*/False );
29676 goto decode_success;
29678 /* VPMASKMOVQ ymm1, ymm2, m256 = VEX.NDS.256.66.0F38.W1 8E /r */
29679 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29680 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29681 delta = dis_VMASKMOV( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
29682 /*isYMM*/True, Ity_I64, /*!isLoad*/False );
29683 goto decode_success;
29685 break;
29687 case 0x90:
29688 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
29689 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29690 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29691 Long delta0 = delta;
29692 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29693 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29694 if (delta != delta0)
29695 goto decode_success;
29697 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
29698 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29699 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29700 Long delta0 = delta;
29701 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
29702 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29703 if (delta != delta0)
29704 goto decode_success;
29706 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
29707 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29708 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29709 Long delta0 = delta;
29710 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29711 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29712 if (delta != delta0)
29713 goto decode_success;
29715 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
29716 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29717 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29718 Long delta0 = delta;
29719 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
29720 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29721 if (delta != delta0)
29722 goto decode_success;
29724 break;
29726 case 0x91:
29727 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
29728 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29729 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29730 Long delta0 = delta;
29731 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29732 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29733 if (delta != delta0)
29734 goto decode_success;
29736 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
29737 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29738 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29739 Long delta0 = delta;
29740 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
29741 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29742 if (delta != delta0)
29743 goto decode_success;
29745 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
29746 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29747 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29748 Long delta0 = delta;
29749 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29750 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29751 if (delta != delta0)
29752 goto decode_success;
29754 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
29755 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29756 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29757 Long delta0 = delta;
29758 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
29759 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29760 if (delta != delta0)
29761 goto decode_success;
29763 break;
29765 case 0x92:
29766 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
29767 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29768 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29769 Long delta0 = delta;
29770 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29771 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
29772 if (delta != delta0)
29773 goto decode_success;
29775 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
29776 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29777 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29778 Long delta0 = delta;
29779 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
29780 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
29781 if (delta != delta0)
29782 goto decode_success;
29784 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
29785 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29786 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29787 Long delta0 = delta;
29788 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29789 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
29790 if (delta != delta0)
29791 goto decode_success;
29793 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
29794 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29795 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29796 Long delta0 = delta;
29797 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
29798 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
29799 if (delta != delta0)
29800 goto decode_success;
29802 break;
29804 case 0x93:
29805 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
29806 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29807 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29808 Long delta0 = delta;
29809 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29810 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
29811 if (delta != delta0)
29812 goto decode_success;
29814 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
29815 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29816 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
29817 Long delta0 = delta;
29818 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
29819 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
29820 if (delta != delta0)
29821 goto decode_success;
29823 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
29824 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
29825 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29826 Long delta0 = delta;
29827 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29828 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
29829 if (delta != delta0)
29830 goto decode_success;
29832 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
29833 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29834 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
29835 Long delta0 = delta;
29836 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
29837 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
29838 if (delta != delta0)
29839 goto decode_success;
29841 break;
29843 case 0x96 ... 0x9F:
29844 case 0xA6 ... 0xAF:
29845 case 0xB6 ... 0xBF:
29846 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
29847 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
29848 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
29849 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
29850 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
29851 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
29852 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
29853 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
29854 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
29855 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
29856 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
29857 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
29858 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
29859 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
29860 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
29861 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
29862 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
29863 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
29864 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
29865 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
29866 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
29867 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
29868 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29869 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29870 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29871 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29872 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29873 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29874 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29875 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29876 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29877 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29878 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29879 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29880 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29881 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29882 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29883 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29884 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29885 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29886 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29887 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29888 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29889 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29890 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29891 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29892 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29893 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29894 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29895 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29896 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29897 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29898 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29899 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29900 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29901 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29902 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29903 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29904 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29905 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29906 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29907 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29908 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29909 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29910 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29911 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29912 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29913 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29914 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29915 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29916 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29917 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29918 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29919 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29920 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29921 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29922 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29923 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29924 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29925 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29926 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29927 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29928 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29929 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29930 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29931 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29932 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29933 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29934 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29935 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29936 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29937 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29938 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29939 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29940 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29941 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29942 if (have66noF2noF3(pfx)) {
29943 delta = dis_FMA( vbi, pfx, delta, opc );
29944 *uses_vvvv = True;
29945 dres->hint = Dis_HintVerbose;
29946 goto decode_success;
29948 break;
29950 case 0xDB:
29951 case 0xDC:
29952 case 0xDD:
29953 case 0xDE:
29954 case 0xDF:
29955 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29956 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29957 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29958 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29959 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29960 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29961 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29962 if (opc != 0xDB) *uses_vvvv = True;
29963 goto decode_success;
29965 break;
29967 case 0xF2:
29968 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29969 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29970 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29971 Int size = getRexW(pfx) ? 8 : 4;
29972 IRType ty = szToITy(size);
29973 IRTemp dst = newTemp(ty);
29974 IRTemp src1 = newTemp(ty);
29975 IRTemp src2 = newTemp(ty);
29976 UChar rm = getUChar(delta);
29978 assign( src1, getIRegV(size,pfx) );
29979 if (epartIsReg(rm)) {
29980 assign( src2, getIRegE(size,pfx,rm) );
29981 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29982 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29983 delta++;
29984 } else {
29985 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29986 assign( src2, loadLE(ty, mkexpr(addr)) );
29987 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29988 nameIRegG(size,pfx,rm));
29989 delta += alen;
29992 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29993 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29994 mkexpr(src2) ) );
29995 putIRegG( size, pfx, rm, mkexpr(dst) );
29996 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29997 ? AMD64G_CC_OP_ANDN64
29998 : AMD64G_CC_OP_ANDN32)) );
29999 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30000 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30001 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30002 *uses_vvvv = True;
30003 goto decode_success;
30005 break;
30007 case 0xF3:
30008 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
30009 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
30010 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30011 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
30012 Int size = getRexW(pfx) ? 8 : 4;
30013 IRType ty = szToITy(size);
30014 IRTemp src = newTemp(ty);
30015 IRTemp dst = newTemp(ty);
30016 UChar rm = getUChar(delta);
30018 if (epartIsReg(rm)) {
30019 assign( src, getIRegE(size,pfx,rm) );
30020 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
30021 nameIRegV(size,pfx));
30022 delta++;
30023 } else {
30024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30025 assign( src, loadLE(ty, mkexpr(addr)) );
30026 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
30027 delta += alen;
30030 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30031 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
30032 mkexpr(src)), mkexpr(src)) );
30033 putIRegV( size, pfx, mkexpr(dst) );
30034 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30035 ? AMD64G_CC_OP_BLSI64
30036 : AMD64G_CC_OP_BLSI32)) );
30037 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30038 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30039 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30040 *uses_vvvv = True;
30041 goto decode_success;
30043 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
30044 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
30045 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30046 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
30047 Int size = getRexW(pfx) ? 8 : 4;
30048 IRType ty = szToITy(size);
30049 IRTemp src = newTemp(ty);
30050 IRTemp dst = newTemp(ty);
30051 UChar rm = getUChar(delta);
30053 if (epartIsReg(rm)) {
30054 assign( src, getIRegE(size,pfx,rm) );
30055 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
30056 nameIRegV(size,pfx));
30057 delta++;
30058 } else {
30059 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30060 assign( src, loadLE(ty, mkexpr(addr)) );
30061 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
30062 delta += alen;
30065 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
30066 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30067 mkU(ty, 1)), mkexpr(src)) );
30068 putIRegV( size, pfx, mkexpr(dst) );
30069 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30070 ? AMD64G_CC_OP_BLSMSK64
30071 : AMD64G_CC_OP_BLSMSK32)) );
30072 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30073 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30074 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30075 *uses_vvvv = True;
30076 goto decode_success;
30078 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
30079 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
30080 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
30081 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
30082 Int size = getRexW(pfx) ? 8 : 4;
30083 IRType ty = szToITy(size);
30084 IRTemp src = newTemp(ty);
30085 IRTemp dst = newTemp(ty);
30086 UChar rm = getUChar(delta);
30088 if (epartIsReg(rm)) {
30089 assign( src, getIRegE(size,pfx,rm) );
30090 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
30091 nameIRegV(size,pfx));
30092 delta++;
30093 } else {
30094 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30095 assign( src, loadLE(ty, mkexpr(addr)) );
30096 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
30097 delta += alen;
30100 assign( dst, binop(mkSizedOp(ty,Iop_And8),
30101 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
30102 mkU(ty, 1)), mkexpr(src)) );
30103 putIRegV( size, pfx, mkexpr(dst) );
30104 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30105 ? AMD64G_CC_OP_BLSR64
30106 : AMD64G_CC_OP_BLSR32)) );
30107 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30108 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
30109 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30110 *uses_vvvv = True;
30111 goto decode_success;
30113 break;
30115 case 0xF5:
30116 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
30117 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
30118 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30119 Int size = getRexW(pfx) ? 8 : 4;
30120 IRType ty = szToITy(size);
30121 IRTemp dst = newTemp(ty);
30122 IRTemp src1 = newTemp(ty);
30123 IRTemp src2 = newTemp(ty);
30124 IRTemp start = newTemp(Ity_I8);
30125 IRTemp cond = newTemp(Ity_I1);
30126 UChar rm = getUChar(delta);
30128 assign( src2, getIRegV(size,pfx) );
30129 if (epartIsReg(rm)) {
30130 assign( src1, getIRegE(size,pfx,rm) );
30131 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
30132 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30133 delta++;
30134 } else {
30135 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30136 assign( src1, loadLE(ty, mkexpr(addr)) );
30137 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30138 nameIRegG(size,pfx,rm));
30139 delta += alen;
30142 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
30143 assign( cond, binop(Iop_CmpLT32U,
30144 unop(Iop_8Uto32, mkexpr(start)),
30145 mkU32(8*size)) );
30146 /* if (start < opsize) {
30147 if (start == 0)
30148 dst = 0;
30149 else
30150 dst = (src1 << (opsize-start)) u>> (opsize-start);
30151 } else {
30152 dst = src1;
30153 } */
30154 assign( dst,
30155 IRExpr_ITE(
30156 mkexpr(cond),
30157 IRExpr_ITE(
30158 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
30159 mkU(ty, 0),
30160 binop(
30161 mkSizedOp(ty,Iop_Shr8),
30162 binop(
30163 mkSizedOp(ty,Iop_Shl8),
30164 mkexpr(src1),
30165 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30167 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
30170 mkexpr(src1)
30173 putIRegG( size, pfx, rm, mkexpr(dst) );
30174 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30175 ? AMD64G_CC_OP_BLSR64
30176 : AMD64G_CC_OP_BLSR32)) );
30177 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30178 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
30179 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30180 *uses_vvvv = True;
30181 goto decode_success;
30183 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
30184 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
30185 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30186 Int size = getRexW(pfx) ? 8 : 4;
30187 IRType ty = szToITy(size);
30188 IRTemp src = newTemp(ty);
30189 IRTemp mask = newTemp(ty);
30190 UChar rm = getUChar(delta);
30192 assign( src, getIRegV(size,pfx) );
30193 if (epartIsReg(rm)) {
30194 assign( mask, getIRegE(size,pfx,rm) );
30195 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
30196 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30197 delta++;
30198 } else {
30199 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30200 assign( mask, loadLE(ty, mkexpr(addr)) );
30201 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30202 nameIRegG(size,pfx,rm));
30203 delta += alen;
30206 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
30207 widenUto64(mkexpr(mask)) );
30208 putIRegG( size, pfx, rm,
30209 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30210 "amd64g_calculate_pdep",
30211 &amd64g_calculate_pdep, args)) );
30212 *uses_vvvv = True;
30213 /* Flags aren't modified. */
30214 goto decode_success;
30216 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
30217 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
30218 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30219 Int size = getRexW(pfx) ? 8 : 4;
30220 IRType ty = szToITy(size);
30221 IRTemp src = newTemp(ty);
30222 IRTemp mask = newTemp(ty);
30223 UChar rm = getUChar(delta);
30225 assign( src, getIRegV(size,pfx) );
30226 if (epartIsReg(rm)) {
30227 assign( mask, getIRegE(size,pfx,rm) );
30228 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
30229 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30230 delta++;
30231 } else {
30232 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30233 assign( mask, loadLE(ty, mkexpr(addr)) );
30234 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30235 nameIRegG(size,pfx,rm));
30236 delta += alen;
30239 /* First mask off bits not set in mask, they are ignored
30240 and it should be fine if they contain undefined values. */
30241 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
30242 mkexpr(src), mkexpr(mask));
30243 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
30244 widenUto64(mkexpr(mask)) );
30245 putIRegG( size, pfx, rm,
30246 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
30247 "amd64g_calculate_pext",
30248 &amd64g_calculate_pext, args)) );
30249 *uses_vvvv = True;
30250 /* Flags aren't modified. */
30251 goto decode_success;
30253 break;
30255 case 0xF6:
30256 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
30257 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
30258 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30259 Int size = getRexW(pfx) ? 8 : 4;
30260 IRType ty = szToITy(size);
30261 IRTemp src1 = newTemp(ty);
30262 IRTemp src2 = newTemp(ty);
30263 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
30264 UChar rm = getUChar(delta);
30266 assign( src1, getIRegRDX(size) );
30267 if (epartIsReg(rm)) {
30268 assign( src2, getIRegE(size,pfx,rm) );
30269 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
30270 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
30271 delta++;
30272 } else {
30273 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30274 assign( src2, loadLE(ty, mkexpr(addr)) );
30275 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
30276 nameIRegG(size,pfx,rm));
30277 delta += alen;
30280 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
30281 mkexpr(src1), mkexpr(src2)) );
30282 putIRegV( size, pfx,
30283 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
30284 putIRegG( size, pfx, rm,
30285 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
30286 mkexpr(res)) );
30287 *uses_vvvv = True;
30288 /* Flags aren't modified. */
30289 goto decode_success;
30291 break;
30293 case 0xF7:
30294 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
30295 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
30296 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30297 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
30298 goto decode_success;
30300 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
30301 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
30302 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30303 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
30304 goto decode_success;
30306 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
30307 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
30308 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30309 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
30310 goto decode_success;
30312 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
30313 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
30314 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
30315 Int size = getRexW(pfx) ? 8 : 4;
30316 IRType ty = szToITy(size);
30317 IRTemp dst = newTemp(ty);
30318 IRTemp src1 = newTemp(ty);
30319 IRTemp src2 = newTemp(ty);
30320 IRTemp stle = newTemp(Ity_I16);
30321 IRTemp start = newTemp(Ity_I8);
30322 IRTemp len = newTemp(Ity_I8);
30323 UChar rm = getUChar(delta);
30325 assign( src2, getIRegV(size,pfx) );
30326 if (epartIsReg(rm)) {
30327 assign( src1, getIRegE(size,pfx,rm) );
30328 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
30329 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
30330 delta++;
30331 } else {
30332 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
30333 assign( src1, loadLE(ty, mkexpr(addr)) );
30334 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
30335 nameIRegG(size,pfx,rm));
30336 delta += alen;
30339 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
30340 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
30341 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
30342 /* if (start+len < opsize) {
30343 if (len != 0)
30344 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
30345 else
30346 dst = 0;
30347 } else {
30348 if (start < opsize)
30349 dst = src1 u>> start;
30350 else
30351 dst = 0;
30352 } */
30353 assign( dst,
30354 IRExpr_ITE(
30355 binop(Iop_CmpLT32U,
30356 binop(Iop_Add32,
30357 unop(Iop_8Uto32, mkexpr(start)),
30358 unop(Iop_8Uto32, mkexpr(len))),
30359 mkU32(8*size)),
30360 IRExpr_ITE(
30361 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
30362 mkU(ty, 0),
30363 binop(mkSizedOp(ty,Iop_Shr8),
30364 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
30365 binop(Iop_Sub8,
30366 binop(Iop_Sub8, mkU8(8*size),
30367 mkexpr(start)),
30368 mkexpr(len))),
30369 binop(Iop_Sub8, mkU8(8*size),
30370 mkexpr(len)))
30372 IRExpr_ITE(
30373 binop(Iop_CmpLT32U,
30374 unop(Iop_8Uto32, mkexpr(start)),
30375 mkU32(8*size)),
30376 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
30377 mkexpr(start)),
30378 mkU(ty, 0)
30382 putIRegG( size, pfx, rm, mkexpr(dst) );
30383 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
30384 ? AMD64G_CC_OP_ANDN64
30385 : AMD64G_CC_OP_ANDN32)) );
30386 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
30387 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
30388 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
30389 *uses_vvvv = True;
30390 goto decode_success;
30392 break;
30394 default:
30395 break;
30399 //decode_failure:
30400 return deltaIN;
30402 decode_success:
30403 return delta;
30406 /* operand format:
30407 * [0] = dst
30408 * [n] = srcn
30410 static Long decode_vregW(Int count, Long delta, UChar modrm, Prefix pfx,
30411 const VexAbiInfo* vbi, IRTemp *v, UInt *dst, Int swap)
30413 v[0] = newTemp(Ity_V128);
30414 v[1] = newTemp(Ity_V128);
30415 v[2] = newTemp(Ity_V128);
30416 v[3] = newTemp(Ity_V128);
30417 IRTemp addr = IRTemp_INVALID;
30418 Int alen = 0;
30419 HChar dis_buf[50];
30421 *dst = gregOfRexRM(pfx, modrm);
30422 assign( v[0], getXMMReg(*dst) );
30424 if ( epartIsReg( modrm ) ) {
30425 UInt ereg = eregOfRexRM(pfx, modrm);
30426 assign(swap ? v[count-1] : v[count-2], getXMMReg(ereg) );
30427 DIS(dis_buf, "%s", nameXMMReg(ereg));
30428 } else {
30429 Bool extra_byte = (getUChar(delta - 3) & 0xF) != 9;
30430 addr = disAMode(&alen, vbi, pfx, delta, dis_buf, extra_byte);
30431 assign(swap ? v[count-1] : v[count-2], loadLE(Ity_V128, mkexpr(addr)));
30432 delta += alen - 1;
30435 UInt vvvv = getVexNvvvv(pfx);
30436 switch(count) {
30437 case 2:
30438 DIP( "%s,%s", nameXMMReg(*dst), dis_buf );
30439 break;
30440 case 3:
30441 assign( swap ? v[1] : v[2], getXMMReg(vvvv) );
30442 DIP( "%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv), dis_buf );
30443 break;
30444 case 4:
30446 assign( v[1], getXMMReg(vvvv) );
30447 UInt src2 = getUChar(delta + 1) >> 4;
30448 assign( swap ? v[2] : v[3], getXMMReg(src2) );
30449 DIP( "%s,%s,%s,%s", nameXMMReg(*dst), nameXMMReg(vvvv),
30450 nameXMMReg(src2), dis_buf );
30452 break;
30454 return delta + 1;
30457 static Long dis_FMA4 (Prefix pfx, Long delta, UChar opc,
30458 Bool* uses_vvvv, const VexAbiInfo* vbi )
30460 UInt dst;
30461 *uses_vvvv = True;
30463 UChar modrm = getUChar(delta);
30465 Bool zero_64F = False;
30466 Bool zero_96F = False;
30467 UInt is_F32 = ((opc & 0x01) == 0x00) ? 1 : 0;
30468 Bool neg = (opc & 0xF0) == 0x70;
30469 Bool alt = (opc & 0xF0) == 0x50;
30470 Bool sub = alt ? (opc & 0x0E) != 0x0E : (opc & 0x0C) == 0x0C;
30472 IRTemp operand[4];
30473 switch(opc & 0xF) {
30474 case 0x0A: zero_96F = (opc >> 4) != 0x05; break;
30475 case 0x0B: zero_64F = (opc >> 4) != 0x05; break;
30476 case 0x0E: zero_96F = (opc >> 4) != 0x05; break;
30477 case 0x0F: zero_64F = (opc >> 4) != 0x05; break;
30478 default: break;
30480 DIP("vfm%s", neg ? "n" : "");
30481 if(alt) DIP("%s", sub ? "add" : "sub");
30482 DIP("%s", sub ? "sub" : "add");
30483 DIP("%c ", (zero_64F || zero_96F) ? 's' : 'p');
30484 DIP("%c ", is_F32 ? 's' : 'd');
30485 delta = decode_vregW(4, delta, modrm, pfx, vbi, operand, &dst, getRexW(pfx));
30486 DIP("\n");
30487 IRExpr *src[3];
30489 void (*putXMM[2])(UInt,Int,IRExpr*) = {&putXMMRegLane64F, &putXMMRegLane32F};
30491 IROp size_op[] = {Iop_V128to64, Iop_V128HIto64, Iop_64to32, Iop_64HIto32};
30492 IROp neg_op[] = {Iop_NegF64, Iop_NegF32};
30493 int i, j;
30494 for(i = 0; i < is_F32 * 2 + 2; i++) {
30495 for(j = 0; j < 3; j++) {
30496 if(is_F32) {
30497 src[j] = unop(Iop_ReinterpI32asF32,
30498 unop(size_op[i%2+2],
30499 unop(size_op[i/2],
30500 mkexpr(operand[j + 1])
30503 } else {
30504 src[j] = unop(Iop_ReinterpI64asF64,
30505 unop(size_op[i%2],
30506 mkexpr(operand[j + 1])
30510 putXMM[is_F32](dst, i, IRExpr_Qop(is_F32 ? Iop_MAddF32 : Iop_MAddF64,
30511 get_FAKE_roundingmode(),
30512 neg ? unop(neg_op[is_F32], src[0])
30513 : src[0],
30514 src[1],
30515 sub ? unop(neg_op[is_F32], src[2])
30516 : src[2]
30518 if(alt) {
30519 sub = !sub;
30523 /* Zero out top bits of ymm/xmm register. */
30524 putYMMRegLane128( dst, 1, mkV128(0) );
30526 if(zero_64F || zero_96F) {
30527 putXMMRegLane64( dst, 1, IRExpr_Const(IRConst_U64(0)));
30530 if(zero_96F) {
30531 putXMMRegLane32( dst, 1, IRExpr_Const(IRConst_U32(0)));
30534 return delta+1;
30537 /*------------------------------------------------------------*/
30538 /*--- ---*/
30539 /*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
30540 /*--- ---*/
30541 /*------------------------------------------------------------*/
30543 static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
30545 vassert(imm8 < 256);
30546 IRTemp s3, s2, s1, s0;
30547 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30548 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
30549 # define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
30550 : ((_nn)==2) ? s2 : s3)
30551 IRTemp res = newTemp(Ity_V128);
30552 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
30553 SEL((imm8 >> 4) & 3),
30554 SEL((imm8 >> 2) & 3),
30555 SEL((imm8 >> 0) & 3) ));
30556 # undef SEL
30557 return res;
30560 /* Handles 128 and 256 bit versions of VCVTPS2PH. */
30561 static Long dis_VCVTPS2PH ( const VexAbiInfo* vbi, Prefix pfx,
30562 Long delta, Bool is256bit )
30564 /* This is a width-halving store or reg-reg move, that does conversion on the
30565 transferred data. */
30566 UChar modrm = getUChar(delta);
30567 UInt rG = gregOfRexRM(pfx, modrm);
30568 IRTemp rm = newTemp(Ity_I32);
30569 IROp op = is256bit ? Iop_F32toF16x8 : Iop_F32toF16x4;
30570 IRExpr* srcG = (is256bit ? getYMMReg : getXMMReg)(rG);
30572 /* (imm & 3) contains an Intel-encoded rounding mode. Because that encoding
30573 is the same as the encoding for IRRoundingMode, we can use that value
30574 directly in the IR as a rounding mode. */
30576 if (epartIsReg(modrm)) {
30577 UInt rE = eregOfRexRM(pfx, modrm);
30578 delta += 1;
30579 UInt imm = getUChar(delta);
30580 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30581 IRExpr* res = binop(op, mkexpr(rm), srcG);
30582 if (!is256bit)
30583 res = unop(Iop_64UtoV128, res);
30584 putYMMRegLoAndZU(rE, res);
30585 DIP("vcvtps2ph $%u,%s,%s\n",
30586 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), nameXMMReg(rE));
30587 } else {
30588 Int alen = 0;
30589 HChar dis_buf[50];
30590 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30591 delta += alen;
30592 UInt imm = getUChar(delta);
30593 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30594 IRExpr* res = binop(op, mkexpr(rm), srcG);
30595 storeLE(mkexpr(addr), res);
30596 DIP("vcvtps2ph $%u,%s,%s\n",
30597 imm, (is256bit ? nameYMMReg : nameXMMReg)(rG), dis_buf);
30599 delta++;
30600 /* doesn't use vvvv */
30601 return delta;
30604 __attribute__((noinline))
30605 static
30606 Long dis_ESC_0F3A__VEX (
30607 /*MB_OUT*/DisResult* dres,
30608 /*OUT*/ Bool* uses_vvvv,
30609 const VexArchInfo* archinfo,
30610 const VexAbiInfo* vbi,
30611 Prefix pfx, Int sz, Long deltaIN
30614 IRTemp addr = IRTemp_INVALID;
30615 Int alen = 0;
30616 HChar dis_buf[50];
30617 Long delta = deltaIN;
30618 UChar opc = getUChar(delta);
30619 delta++;
30620 *uses_vvvv = False;
30622 switch (opc) {
30624 case 0x00:
30625 case 0x01:
30626 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
30627 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
30628 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
30629 && 1==getRexW(pfx)/*W1*/) {
30630 UChar modrm = getUChar(delta);
30631 UInt imm8 = 0;
30632 UInt rG = gregOfRexRM(pfx, modrm);
30633 IRTemp sV = newTemp(Ity_V256);
30634 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
30635 if (epartIsReg(modrm)) {
30636 UInt rE = eregOfRexRM(pfx, modrm);
30637 delta += 1;
30638 imm8 = getUChar(delta);
30639 DIP("%s $%u,%s,%s\n",
30640 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
30641 assign(sV, getYMMReg(rE));
30642 } else {
30643 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30644 delta += alen;
30645 imm8 = getUChar(delta);
30646 DIP("%s $%u,%s,%s\n",
30647 name, imm8, dis_buf, nameYMMReg(rG));
30648 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30650 delta++;
30651 IRTemp s[4];
30652 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
30653 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
30654 IRTemp dV = newTemp(Ity_V256);
30655 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30656 mkexpr(s[(imm8 >> 6) & 3]),
30657 mkexpr(s[(imm8 >> 4) & 3]),
30658 mkexpr(s[(imm8 >> 2) & 3]),
30659 mkexpr(s[(imm8 >> 0) & 3])));
30660 putYMMReg(rG, mkexpr(dV));
30661 goto decode_success;
30663 break;
30665 case 0x02:
30666 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
30667 if (have66noF2noF3(pfx)
30668 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30669 UChar modrm = getUChar(delta);
30670 UInt imm8 = 0;
30671 UInt rG = gregOfRexRM(pfx, modrm);
30672 UInt rV = getVexNvvvv(pfx);
30673 IRTemp sV = newTemp(Ity_V128);
30674 IRTemp dV = newTemp(Ity_V128);
30675 UInt i;
30676 IRTemp s[4], d[4];
30677 assign(sV, getXMMReg(rV));
30678 if (epartIsReg(modrm)) {
30679 UInt rE = eregOfRexRM(pfx, modrm);
30680 delta += 1;
30681 imm8 = getUChar(delta);
30682 DIP("vpblendd $%u,%s,%s,%s\n",
30683 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30684 assign(dV, getXMMReg(rE));
30685 } else {
30686 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30687 delta += alen;
30688 imm8 = getUChar(delta);
30689 DIP("vpblendd $%u,%s,%s,%s\n",
30690 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30691 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
30693 delta++;
30694 for (i = 0; i < 4; i++) {
30695 s[i] = IRTemp_INVALID;
30696 d[i] = IRTemp_INVALID;
30698 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
30699 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
30700 for (i = 0; i < 4; i++)
30701 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30702 putYMMRegLane128(rG, 1, mkV128(0));
30703 *uses_vvvv = True;
30704 goto decode_success;
30706 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
30707 if (have66noF2noF3(pfx)
30708 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30709 UChar modrm = getUChar(delta);
30710 UInt imm8 = 0;
30711 UInt rG = gregOfRexRM(pfx, modrm);
30712 UInt rV = getVexNvvvv(pfx);
30713 IRTemp sV = newTemp(Ity_V256);
30714 IRTemp dV = newTemp(Ity_V256);
30715 UInt i;
30716 IRTemp s[8], d[8];
30717 assign(sV, getYMMReg(rV));
30718 if (epartIsReg(modrm)) {
30719 UInt rE = eregOfRexRM(pfx, modrm);
30720 delta += 1;
30721 imm8 = getUChar(delta);
30722 DIP("vpblendd $%u,%s,%s,%s\n",
30723 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30724 assign(dV, getYMMReg(rE));
30725 } else {
30726 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30727 delta += alen;
30728 imm8 = getUChar(delta);
30729 DIP("vpblendd $%u,%s,%s,%s\n",
30730 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30731 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
30733 delta++;
30734 for (i = 0; i < 8; i++) {
30735 s[i] = IRTemp_INVALID;
30736 d[i] = IRTemp_INVALID;
30738 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
30739 &s[3], &s[2], &s[1], &s[0] );
30740 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
30741 &d[3], &d[2], &d[1], &d[0] );
30742 for (i = 0; i < 8; i++)
30743 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
30744 *uses_vvvv = True;
30745 goto decode_success;
30747 break;
30749 case 0x04:
30750 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
30751 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30752 UChar modrm = getUChar(delta);
30753 UInt imm8 = 0;
30754 UInt rG = gregOfRexRM(pfx, modrm);
30755 IRTemp sV = newTemp(Ity_V256);
30756 if (epartIsReg(modrm)) {
30757 UInt rE = eregOfRexRM(pfx, modrm);
30758 delta += 1;
30759 imm8 = getUChar(delta);
30760 DIP("vpermilps $%u,%s,%s\n",
30761 imm8, nameYMMReg(rE), nameYMMReg(rG));
30762 assign(sV, getYMMReg(rE));
30763 } else {
30764 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30765 delta += alen;
30766 imm8 = getUChar(delta);
30767 DIP("vpermilps $%u,%s,%s\n",
30768 imm8, dis_buf, nameYMMReg(rG));
30769 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30771 delta++;
30772 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
30773 breakupV256toV128s( sV, &sVhi, &sVlo );
30774 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
30775 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
30776 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
30777 putYMMReg(rG, res);
30778 goto decode_success;
30780 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
30781 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30782 UChar modrm = getUChar(delta);
30783 UInt imm8 = 0;
30784 UInt rG = gregOfRexRM(pfx, modrm);
30785 IRTemp sV = newTemp(Ity_V128);
30786 if (epartIsReg(modrm)) {
30787 UInt rE = eregOfRexRM(pfx, modrm);
30788 delta += 1;
30789 imm8 = getUChar(delta);
30790 DIP("vpermilps $%u,%s,%s\n",
30791 imm8, nameXMMReg(rE), nameXMMReg(rG));
30792 assign(sV, getXMMReg(rE));
30793 } else {
30794 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30795 delta += alen;
30796 imm8 = getUChar(delta);
30797 DIP("vpermilps $%u,%s,%s\n",
30798 imm8, dis_buf, nameXMMReg(rG));
30799 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30801 delta++;
30802 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
30803 goto decode_success;
30805 break;
30807 case 0x05:
30808 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
30809 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30810 UChar modrm = getUChar(delta);
30811 UInt imm8 = 0;
30812 UInt rG = gregOfRexRM(pfx, modrm);
30813 IRTemp sV = newTemp(Ity_V128);
30814 if (epartIsReg(modrm)) {
30815 UInt rE = eregOfRexRM(pfx, modrm);
30816 delta += 1;
30817 imm8 = getUChar(delta);
30818 DIP("vpermilpd $%u,%s,%s\n",
30819 imm8, nameXMMReg(rE), nameXMMReg(rG));
30820 assign(sV, getXMMReg(rE));
30821 } else {
30822 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30823 delta += alen;
30824 imm8 = getUChar(delta);
30825 DIP("vpermilpd $%u,%s,%s\n",
30826 imm8, dis_buf, nameXMMReg(rG));
30827 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
30829 delta++;
30830 IRTemp s1 = newTemp(Ity_I64);
30831 IRTemp s0 = newTemp(Ity_I64);
30832 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
30833 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
30834 IRTemp dV = newTemp(Ity_V128);
30835 assign(dV, binop(Iop_64HLtoV128,
30836 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30837 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30838 putYMMRegLoAndZU(rG, mkexpr(dV));
30839 goto decode_success;
30841 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
30842 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30843 UChar modrm = getUChar(delta);
30844 UInt imm8 = 0;
30845 UInt rG = gregOfRexRM(pfx, modrm);
30846 IRTemp sV = newTemp(Ity_V256);
30847 if (epartIsReg(modrm)) {
30848 UInt rE = eregOfRexRM(pfx, modrm);
30849 delta += 1;
30850 imm8 = getUChar(delta);
30851 DIP("vpermilpd $%u,%s,%s\n",
30852 imm8, nameYMMReg(rE), nameYMMReg(rG));
30853 assign(sV, getYMMReg(rE));
30854 } else {
30855 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30856 delta += alen;
30857 imm8 = getUChar(delta);
30858 DIP("vpermilpd $%u,%s,%s\n",
30859 imm8, dis_buf, nameYMMReg(rG));
30860 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
30862 delta++;
30863 IRTemp s3, s2, s1, s0;
30864 s3 = s2 = s1 = s0 = IRTemp_INVALID;
30865 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
30866 IRTemp dV = newTemp(Ity_V256);
30867 assign(dV, IRExpr_Qop(Iop_64x4toV256,
30868 mkexpr((imm8 & (1<<3)) ? s3 : s2),
30869 mkexpr((imm8 & (1<<2)) ? s3 : s2),
30870 mkexpr((imm8 & (1<<1)) ? s1 : s0),
30871 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
30872 putYMMReg(rG, mkexpr(dV));
30873 goto decode_success;
30875 break;
30877 case 0x06:
30878 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
30879 if (have66noF2noF3(pfx)
30880 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30881 UChar modrm = getUChar(delta);
30882 UInt imm8 = 0;
30883 UInt rG = gregOfRexRM(pfx, modrm);
30884 UInt rV = getVexNvvvv(pfx);
30885 IRTemp s00 = newTemp(Ity_V128);
30886 IRTemp s01 = newTemp(Ity_V128);
30887 IRTemp s10 = newTemp(Ity_V128);
30888 IRTemp s11 = newTemp(Ity_V128);
30889 assign(s00, getYMMRegLane128(rV, 0));
30890 assign(s01, getYMMRegLane128(rV, 1));
30891 if (epartIsReg(modrm)) {
30892 UInt rE = eregOfRexRM(pfx, modrm);
30893 delta += 1;
30894 imm8 = getUChar(delta);
30895 DIP("vperm2f128 $%u,%s,%s,%s\n",
30896 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30897 assign(s10, getYMMRegLane128(rE, 0));
30898 assign(s11, getYMMRegLane128(rE, 1));
30899 } else {
30900 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30901 delta += alen;
30902 imm8 = getUChar(delta);
30903 DIP("vperm2f128 $%u,%s,%s,%s\n",
30904 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30905 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30906 mkexpr(addr), mkU64(0))));
30907 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30908 mkexpr(addr), mkU64(16))));
30910 delta++;
30911 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30912 : ((_nn)==2) ? s10 : s11)
30913 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30914 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30915 # undef SEL
30916 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30917 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30918 *uses_vvvv = True;
30919 goto decode_success;
30921 break;
30923 case 0x08:
30924 /* VROUNDPS imm8, xmm2/m128, xmm1 */
30925 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
30926 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30927 UChar modrm = getUChar(delta);
30928 UInt rG = gregOfRexRM(pfx, modrm);
30929 IRTemp src = newTemp(Ity_V128);
30930 IRTemp s0 = IRTemp_INVALID;
30931 IRTemp s1 = IRTemp_INVALID;
30932 IRTemp s2 = IRTemp_INVALID;
30933 IRTemp s3 = IRTemp_INVALID;
30934 IRTemp rm = newTemp(Ity_I32);
30935 Int imm = 0;
30937 modrm = getUChar(delta);
30939 if (epartIsReg(modrm)) {
30940 UInt rE = eregOfRexRM(pfx, modrm);
30941 assign( src, getXMMReg( rE ) );
30942 imm = getUChar(delta+1);
30943 if (imm & ~15) break;
30944 delta += 1+1;
30945 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30946 } else {
30947 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30948 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30949 imm = getUChar(delta+alen);
30950 if (imm & ~15) break;
30951 delta += alen+1;
30952 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30955 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30956 that encoding is the same as the encoding for IRRoundingMode,
30957 we can use that value directly in the IR as a rounding
30958 mode. */
30959 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30961 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
30962 putYMMRegLane128( rG, 1, mkV128(0) );
30963 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
30964 unop(Iop_ReinterpI32asF32, mkexpr(s)))
30965 putYMMRegLane32F( rG, 3, CVT(s3) );
30966 putYMMRegLane32F( rG, 2, CVT(s2) );
30967 putYMMRegLane32F( rG, 1, CVT(s1) );
30968 putYMMRegLane32F( rG, 0, CVT(s0) );
30969 # undef CVT
30970 goto decode_success;
30972 /* VROUNDPS imm8, ymm2/m256, ymm1 */
30973 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
30974 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30975 UChar modrm = getUChar(delta);
30976 UInt rG = gregOfRexRM(pfx, modrm);
30977 IRTemp src = newTemp(Ity_V256);
30978 IRTemp s0 = IRTemp_INVALID;
30979 IRTemp s1 = IRTemp_INVALID;
30980 IRTemp s2 = IRTemp_INVALID;
30981 IRTemp s3 = IRTemp_INVALID;
30982 IRTemp s4 = IRTemp_INVALID;
30983 IRTemp s5 = IRTemp_INVALID;
30984 IRTemp s6 = IRTemp_INVALID;
30985 IRTemp s7 = IRTemp_INVALID;
30986 IRTemp rm = newTemp(Ity_I32);
30987 Int imm = 0;
30989 modrm = getUChar(delta);
30991 if (epartIsReg(modrm)) {
30992 UInt rE = eregOfRexRM(pfx, modrm);
30993 assign( src, getYMMReg( rE ) );
30994 imm = getUChar(delta+1);
30995 if (imm & ~15) break;
30996 delta += 1+1;
30997 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30998 } else {
30999 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31000 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
31001 imm = getUChar(delta+alen);
31002 if (imm & ~15) break;
31003 delta += alen+1;
31004 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
31007 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31008 that encoding is the same as the encoding for IRRoundingMode,
31009 we can use that value directly in the IR as a rounding
31010 mode. */
31011 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31013 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
31014 # define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
31015 unop(Iop_ReinterpI32asF32, mkexpr(s)))
31016 putYMMRegLane32F( rG, 7, CVT(s7) );
31017 putYMMRegLane32F( rG, 6, CVT(s6) );
31018 putYMMRegLane32F( rG, 5, CVT(s5) );
31019 putYMMRegLane32F( rG, 4, CVT(s4) );
31020 putYMMRegLane32F( rG, 3, CVT(s3) );
31021 putYMMRegLane32F( rG, 2, CVT(s2) );
31022 putYMMRegLane32F( rG, 1, CVT(s1) );
31023 putYMMRegLane32F( rG, 0, CVT(s0) );
31024 # undef CVT
31025 goto decode_success;
31027 break;
31029 case 0x09:
31030 /* VROUNDPD imm8, xmm2/m128, xmm1 */
31031 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
31032 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31033 UChar modrm = getUChar(delta);
31034 UInt rG = gregOfRexRM(pfx, modrm);
31035 IRTemp src = newTemp(Ity_V128);
31036 IRTemp s0 = IRTemp_INVALID;
31037 IRTemp s1 = IRTemp_INVALID;
31038 IRTemp rm = newTemp(Ity_I32);
31039 Int imm = 0;
31041 modrm = getUChar(delta);
31043 if (epartIsReg(modrm)) {
31044 UInt rE = eregOfRexRM(pfx, modrm);
31045 assign( src, getXMMReg( rE ) );
31046 imm = getUChar(delta+1);
31047 if (imm & ~15) break;
31048 delta += 1+1;
31049 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
31050 } else {
31051 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31052 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
31053 imm = getUChar(delta+alen);
31054 if (imm & ~15) break;
31055 delta += alen+1;
31056 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
31059 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31060 that encoding is the same as the encoding for IRRoundingMode,
31061 we can use that value directly in the IR as a rounding
31062 mode. */
31063 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31065 breakupV128to64s( src, &s1, &s0 );
31066 putYMMRegLane128( rG, 1, mkV128(0) );
31067 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31068 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31069 putYMMRegLane64F( rG, 1, CVT(s1) );
31070 putYMMRegLane64F( rG, 0, CVT(s0) );
31071 # undef CVT
31072 goto decode_success;
31074 /* VROUNDPD imm8, ymm2/m256, ymm1 */
31075 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
31076 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31077 UChar modrm = getUChar(delta);
31078 UInt rG = gregOfRexRM(pfx, modrm);
31079 IRTemp src = newTemp(Ity_V256);
31080 IRTemp s0 = IRTemp_INVALID;
31081 IRTemp s1 = IRTemp_INVALID;
31082 IRTemp s2 = IRTemp_INVALID;
31083 IRTemp s3 = IRTemp_INVALID;
31084 IRTemp rm = newTemp(Ity_I32);
31085 Int imm = 0;
31087 modrm = getUChar(delta);
31089 if (epartIsReg(modrm)) {
31090 UInt rE = eregOfRexRM(pfx, modrm);
31091 assign( src, getYMMReg( rE ) );
31092 imm = getUChar(delta+1);
31093 if (imm & ~15) break;
31094 delta += 1+1;
31095 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
31096 } else {
31097 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31098 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
31099 imm = getUChar(delta+alen);
31100 if (imm & ~15) break;
31101 delta += alen+1;
31102 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
31105 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31106 that encoding is the same as the encoding for IRRoundingMode,
31107 we can use that value directly in the IR as a rounding
31108 mode. */
31109 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
31111 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
31112 # define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
31113 unop(Iop_ReinterpI64asF64, mkexpr(s)))
31114 putYMMRegLane64F( rG, 3, CVT(s3) );
31115 putYMMRegLane64F( rG, 2, CVT(s2) );
31116 putYMMRegLane64F( rG, 1, CVT(s1) );
31117 putYMMRegLane64F( rG, 0, CVT(s0) );
31118 # undef CVT
31119 goto decode_success;
31121 break;
31123 case 0x0A:
31124 case 0x0B:
31125 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
31126 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
31127 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
31128 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
31129 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31130 UChar modrm = getUChar(delta);
31131 UInt rG = gregOfRexRM(pfx, modrm);
31132 UInt rV = getVexNvvvv(pfx);
31133 Bool isD = opc == 0x0B;
31134 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
31135 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
31136 Int imm = 0;
31138 if (epartIsReg(modrm)) {
31139 UInt rE = eregOfRexRM(pfx, modrm);
31140 assign( src,
31141 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
31142 imm = getUChar(delta+1);
31143 if (imm & ~15) break;
31144 delta += 1+1;
31145 DIP( "vrounds%c $%d,%s,%s,%s\n",
31146 isD ? 'd' : 's',
31147 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
31148 } else {
31149 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31150 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
31151 imm = getUChar(delta+alen);
31152 if (imm & ~15) break;
31153 delta += alen+1;
31154 DIP( "vrounds%c $%d,%s,%s,%s\n",
31155 isD ? 'd' : 's',
31156 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
31159 /* (imm & 3) contains an Intel-encoded rounding mode. Because
31160 that encoding is the same as the encoding for IRRoundingMode,
31161 we can use that value directly in the IR as a rounding
31162 mode. */
31163 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
31164 (imm & 4) ? get_sse_roundingmode()
31165 : mkU32(imm & 3),
31166 mkexpr(src)) );
31168 if (isD)
31169 putXMMRegLane64F( rG, 0, mkexpr(res) );
31170 else {
31171 putXMMRegLane32F( rG, 0, mkexpr(res) );
31172 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
31174 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
31175 putYMMRegLane128( rG, 1, mkV128(0) );
31176 *uses_vvvv = True;
31177 goto decode_success;
31179 break;
31181 case 0x0C:
31182 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
31183 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
31184 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31185 UChar modrm = getUChar(delta);
31186 UInt imm8;
31187 UInt rG = gregOfRexRM(pfx, modrm);
31188 UInt rV = getVexNvvvv(pfx);
31189 IRTemp sV = newTemp(Ity_V256);
31190 IRTemp sE = newTemp(Ity_V256);
31191 assign ( sV, getYMMReg(rV) );
31192 if (epartIsReg(modrm)) {
31193 UInt rE = eregOfRexRM(pfx, modrm);
31194 delta += 1;
31195 imm8 = getUChar(delta);
31196 DIP("vblendps $%u,%s,%s,%s\n",
31197 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31198 assign(sE, getYMMReg(rE));
31199 } else {
31200 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31201 delta += alen;
31202 imm8 = getUChar(delta);
31203 DIP("vblendps $%u,%s,%s,%s\n",
31204 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31205 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31207 delta++;
31208 putYMMReg( rG,
31209 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
31210 *uses_vvvv = True;
31211 goto decode_success;
31213 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
31214 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
31215 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31216 UChar modrm = getUChar(delta);
31217 UInt imm8;
31218 UInt rG = gregOfRexRM(pfx, modrm);
31219 UInt rV = getVexNvvvv(pfx);
31220 IRTemp sV = newTemp(Ity_V128);
31221 IRTemp sE = newTemp(Ity_V128);
31222 assign ( sV, getXMMReg(rV) );
31223 if (epartIsReg(modrm)) {
31224 UInt rE = eregOfRexRM(pfx, modrm);
31225 delta += 1;
31226 imm8 = getUChar(delta);
31227 DIP("vblendps $%u,%s,%s,%s\n",
31228 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31229 assign(sE, getXMMReg(rE));
31230 } else {
31231 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31232 delta += alen;
31233 imm8 = getUChar(delta);
31234 DIP("vblendps $%u,%s,%s,%s\n",
31235 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31236 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31238 delta++;
31239 putYMMRegLoAndZU( rG,
31240 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
31241 *uses_vvvv = True;
31242 goto decode_success;
31244 break;
31246 case 0x0D:
31247 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
31248 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
31249 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31250 UChar modrm = getUChar(delta);
31251 UInt imm8;
31252 UInt rG = gregOfRexRM(pfx, modrm);
31253 UInt rV = getVexNvvvv(pfx);
31254 IRTemp sV = newTemp(Ity_V256);
31255 IRTemp sE = newTemp(Ity_V256);
31256 assign ( sV, getYMMReg(rV) );
31257 if (epartIsReg(modrm)) {
31258 UInt rE = eregOfRexRM(pfx, modrm);
31259 delta += 1;
31260 imm8 = getUChar(delta);
31261 DIP("vblendpd $%u,%s,%s,%s\n",
31262 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31263 assign(sE, getYMMReg(rE));
31264 } else {
31265 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31266 delta += alen;
31267 imm8 = getUChar(delta);
31268 DIP("vblendpd $%u,%s,%s,%s\n",
31269 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31270 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31272 delta++;
31273 putYMMReg( rG,
31274 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
31275 *uses_vvvv = True;
31276 goto decode_success;
31278 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
31279 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
31280 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31281 UChar modrm = getUChar(delta);
31282 UInt imm8;
31283 UInt rG = gregOfRexRM(pfx, modrm);
31284 UInt rV = getVexNvvvv(pfx);
31285 IRTemp sV = newTemp(Ity_V128);
31286 IRTemp sE = newTemp(Ity_V128);
31287 assign ( sV, getXMMReg(rV) );
31288 if (epartIsReg(modrm)) {
31289 UInt rE = eregOfRexRM(pfx, modrm);
31290 delta += 1;
31291 imm8 = getUChar(delta);
31292 DIP("vblendpd $%u,%s,%s,%s\n",
31293 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31294 assign(sE, getXMMReg(rE));
31295 } else {
31296 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31297 delta += alen;
31298 imm8 = getUChar(delta);
31299 DIP("vblendpd $%u,%s,%s,%s\n",
31300 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31301 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31303 delta++;
31304 putYMMRegLoAndZU( rG,
31305 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
31306 *uses_vvvv = True;
31307 goto decode_success;
31309 break;
31311 case 0x0E:
31312 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
31313 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
31314 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31315 UChar modrm = getUChar(delta);
31316 UInt imm8;
31317 UInt rG = gregOfRexRM(pfx, modrm);
31318 UInt rV = getVexNvvvv(pfx);
31319 IRTemp sV = newTemp(Ity_V128);
31320 IRTemp sE = newTemp(Ity_V128);
31321 assign ( sV, getXMMReg(rV) );
31322 if (epartIsReg(modrm)) {
31323 UInt rE = eregOfRexRM(pfx, modrm);
31324 delta += 1;
31325 imm8 = getUChar(delta);
31326 DIP("vpblendw $%u,%s,%s,%s\n",
31327 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
31328 assign(sE, getXMMReg(rE));
31329 } else {
31330 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31331 delta += alen;
31332 imm8 = getUChar(delta);
31333 DIP("vpblendw $%u,%s,%s,%s\n",
31334 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
31335 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
31337 delta++;
31338 putYMMRegLoAndZU( rG,
31339 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
31340 *uses_vvvv = True;
31341 goto decode_success;
31343 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
31344 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
31345 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31346 UChar modrm = getUChar(delta);
31347 UInt imm8;
31348 UInt rG = gregOfRexRM(pfx, modrm);
31349 UInt rV = getVexNvvvv(pfx);
31350 IRTemp sV = newTemp(Ity_V256);
31351 IRTemp sE = newTemp(Ity_V256);
31352 IRTemp sVhi, sVlo, sEhi, sElo;
31353 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
31354 assign ( sV, getYMMReg(rV) );
31355 if (epartIsReg(modrm)) {
31356 UInt rE = eregOfRexRM(pfx, modrm);
31357 delta += 1;
31358 imm8 = getUChar(delta);
31359 DIP("vpblendw $%u,%s,%s,%s\n",
31360 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31361 assign(sE, getYMMReg(rE));
31362 } else {
31363 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31364 delta += alen;
31365 imm8 = getUChar(delta);
31366 DIP("vpblendw $%u,%s,%s,%s\n",
31367 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31368 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
31370 delta++;
31371 breakupV256toV128s( sV, &sVhi, &sVlo );
31372 breakupV256toV128s( sE, &sEhi, &sElo );
31373 putYMMReg( rG, binop( Iop_V128HLtoV256,
31374 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
31375 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
31376 *uses_vvvv = True;
31377 goto decode_success;
31379 break;
31381 case 0x0F:
31382 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
31383 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
31384 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31385 UChar modrm = getUChar(delta);
31386 UInt rG = gregOfRexRM(pfx, modrm);
31387 UInt rV = getVexNvvvv(pfx);
31388 IRTemp sV = newTemp(Ity_V128);
31389 IRTemp dV = newTemp(Ity_V128);
31390 UInt imm8;
31392 assign( dV, getXMMReg(rV) );
31394 if ( epartIsReg( modrm ) ) {
31395 UInt rE = eregOfRexRM(pfx, modrm);
31396 assign( sV, getXMMReg(rE) );
31397 imm8 = getUChar(delta+1);
31398 delta += 1+1;
31399 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameXMMReg(rE),
31400 nameXMMReg(rV), nameXMMReg(rG));
31401 } else {
31402 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31403 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
31404 imm8 = getUChar(delta+alen);
31405 delta += alen+1;
31406 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31407 nameXMMReg(rV), nameXMMReg(rG));
31410 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
31411 putYMMRegLoAndZU( rG, mkexpr(res) );
31412 *uses_vvvv = True;
31413 goto decode_success;
31415 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
31416 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
31417 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31418 UChar modrm = getUChar(delta);
31419 UInt rG = gregOfRexRM(pfx, modrm);
31420 UInt rV = getVexNvvvv(pfx);
31421 IRTemp sV = newTemp(Ity_V256);
31422 IRTemp dV = newTemp(Ity_V256);
31423 IRTemp sHi, sLo, dHi, dLo;
31424 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31425 UInt imm8;
31427 assign( dV, getYMMReg(rV) );
31429 if ( epartIsReg( modrm ) ) {
31430 UInt rE = eregOfRexRM(pfx, modrm);
31431 assign( sV, getYMMReg(rE) );
31432 imm8 = getUChar(delta+1);
31433 delta += 1+1;
31434 DIP("vpalignr $%u,%s,%s,%s\n", imm8, nameYMMReg(rE),
31435 nameYMMReg(rV), nameYMMReg(rG));
31436 } else {
31437 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31438 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
31439 imm8 = getUChar(delta+alen);
31440 delta += alen+1;
31441 DIP("vpalignr $%u,%s,%s,%s\n", imm8, dis_buf,
31442 nameYMMReg(rV), nameYMMReg(rG));
31445 breakupV256toV128s( dV, &dHi, &dLo );
31446 breakupV256toV128s( sV, &sHi, &sLo );
31447 putYMMReg( rG, binop( Iop_V128HLtoV256,
31448 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
31449 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
31451 *uses_vvvv = True;
31452 goto decode_success;
31454 break;
31456 case 0x14:
31457 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
31458 if (have66noF2noF3(pfx)
31459 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31460 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
31461 goto decode_success;
31463 break;
31465 case 0x15:
31466 /* VPEXTRW imm8, reg/m16, xmm2 */
31467 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
31468 if (have66noF2noF3(pfx)
31469 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31470 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
31471 goto decode_success;
31473 break;
31475 case 0x16:
31476 /* VPEXTRD imm8, r32/m32, xmm2 */
31477 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
31478 if (have66noF2noF3(pfx)
31479 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31480 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
31481 goto decode_success;
31483 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
31484 if (have66noF2noF3(pfx)
31485 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31486 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
31487 goto decode_success;
31489 break;
31491 case 0x17:
31492 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
31493 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31494 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
31495 goto decode_success;
31497 break;
31499 case 0x18:
31500 /* VINSERTF128 r/m, rV, rD
31501 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31502 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
31503 if (have66noF2noF3(pfx)
31504 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31505 UChar modrm = getUChar(delta);
31506 UInt ib = 0;
31507 UInt rG = gregOfRexRM(pfx, modrm);
31508 UInt rV = getVexNvvvv(pfx);
31509 IRTemp t128 = newTemp(Ity_V128);
31510 if (epartIsReg(modrm)) {
31511 UInt rE = eregOfRexRM(pfx, modrm);
31512 delta += 1;
31513 assign(t128, getXMMReg(rE));
31514 ib = getUChar(delta);
31515 DIP("vinsertf128 $%u,%s,%s,%s\n",
31516 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31517 } else {
31518 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31519 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31520 delta += alen;
31521 ib = getUChar(delta);
31522 DIP("vinsertf128 $%u,%s,%s,%s\n",
31523 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31525 delta++;
31526 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31527 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31528 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31529 *uses_vvvv = True;
31530 goto decode_success;
31532 break;
31534 case 0x19:
31535 /* VEXTRACTF128 $lane_no, rS, r/m
31536 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31537 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
31538 if (have66noF2noF3(pfx)
31539 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31540 UChar modrm = getUChar(delta);
31541 UInt ib = 0;
31542 UInt rS = gregOfRexRM(pfx, modrm);
31543 IRTemp t128 = newTemp(Ity_V128);
31544 if (epartIsReg(modrm)) {
31545 UInt rD = eregOfRexRM(pfx, modrm);
31546 delta += 1;
31547 ib = getUChar(delta);
31548 assign(t128, getYMMRegLane128(rS, ib & 1));
31549 putYMMRegLoAndZU(rD, mkexpr(t128));
31550 DIP("vextractf128 $%u,%s,%s\n",
31551 ib, nameXMMReg(rS), nameYMMReg(rD));
31552 } else {
31553 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31554 delta += alen;
31555 ib = getUChar(delta);
31556 assign(t128, getYMMRegLane128(rS, ib & 1));
31557 storeLE(mkexpr(addr), mkexpr(t128));
31558 DIP("vextractf128 $%u,%s,%s\n",
31559 ib, nameYMMReg(rS), dis_buf);
31561 delta++;
31562 /* doesn't use vvvv */
31563 goto decode_success;
31565 break;
31567 case 0x1D:
31568 /* VCVTPS2PH imm8, xmm2, xmm1/m64 = VEX.128.66.0F3A.W0 1D /r ib */
31569 if (have66noF2noF3(pfx)
31570 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/
31571 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31572 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/False );
31573 goto decode_success;
31575 /* VCVTPS2PH imm8, ymm2, ymm1/m128 = VEX.256.66.0F3A.W0 1D /r ib */
31576 if (have66noF2noF3(pfx)
31577 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/
31578 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_F16C)) {
31579 delta = dis_VCVTPS2PH( vbi, pfx, delta, /*is256bit=*/True );
31580 goto decode_success;
31582 break;
31584 case 0x20:
31585 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
31586 if (have66noF2noF3(pfx)
31587 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31588 UChar modrm = getUChar(delta);
31589 UInt rG = gregOfRexRM(pfx, modrm);
31590 UInt rV = getVexNvvvv(pfx);
31591 Int imm8;
31592 IRTemp src_u8 = newTemp(Ity_I8);
31594 if ( epartIsReg( modrm ) ) {
31595 UInt rE = eregOfRexRM(pfx,modrm);
31596 imm8 = (Int)(getUChar(delta+1) & 15);
31597 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
31598 delta += 1+1;
31599 DIP( "vpinsrb $%d,%s,%s,%s\n",
31600 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31601 } else {
31602 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31603 imm8 = (Int)(getUChar(delta+alen) & 15);
31604 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
31605 delta += alen+1;
31606 DIP( "vpinsrb $%d,%s,%s,%s\n",
31607 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31610 IRTemp src_vec = newTemp(Ity_V128);
31611 assign(src_vec, getXMMReg( rV ));
31612 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
31613 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31614 *uses_vvvv = True;
31615 goto decode_success;
31617 break;
31619 case 0x21:
31620 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
31621 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
31622 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31623 UChar modrm = getUChar(delta);
31624 UInt rG = gregOfRexRM(pfx, modrm);
31625 UInt rV = getVexNvvvv(pfx);
31626 UInt imm8;
31627 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
31628 const IRTemp inval = IRTemp_INVALID;
31630 if ( epartIsReg( modrm ) ) {
31631 UInt rE = eregOfRexRM(pfx, modrm);
31632 IRTemp vE = newTemp(Ity_V128);
31633 assign( vE, getXMMReg(rE) );
31634 IRTemp dsE[4] = { inval, inval, inval, inval };
31635 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
31636 imm8 = getUChar(delta+1);
31637 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
31638 delta += 1+1;
31639 DIP( "insertps $%u, %s,%s\n",
31640 imm8, nameXMMReg(rE), nameXMMReg(rG) );
31641 } else {
31642 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31643 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
31644 imm8 = getUChar(delta+alen);
31645 delta += alen+1;
31646 DIP( "insertps $%u, %s,%s\n",
31647 imm8, dis_buf, nameXMMReg(rG) );
31650 IRTemp vV = newTemp(Ity_V128);
31651 assign( vV, getXMMReg(rV) );
31653 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
31654 *uses_vvvv = True;
31655 goto decode_success;
31657 break;
31659 case 0x22:
31660 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
31661 if (have66noF2noF3(pfx)
31662 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
31663 UChar modrm = getUChar(delta);
31664 UInt rG = gregOfRexRM(pfx, modrm);
31665 UInt rV = getVexNvvvv(pfx);
31666 Int imm8_10;
31667 IRTemp src_u32 = newTemp(Ity_I32);
31669 if ( epartIsReg( modrm ) ) {
31670 UInt rE = eregOfRexRM(pfx,modrm);
31671 imm8_10 = (Int)(getUChar(delta+1) & 3);
31672 assign( src_u32, getIReg32( rE ) );
31673 delta += 1+1;
31674 DIP( "vpinsrd $%d,%s,%s,%s\n",
31675 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
31676 } else {
31677 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31678 imm8_10 = (Int)(getUChar(delta+alen) & 3);
31679 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
31680 delta += alen+1;
31681 DIP( "vpinsrd $%d,%s,%s,%s\n",
31682 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31685 IRTemp src_vec = newTemp(Ity_V128);
31686 assign(src_vec, getXMMReg( rV ));
31687 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
31688 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31689 *uses_vvvv = True;
31690 goto decode_success;
31692 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
31693 if (have66noF2noF3(pfx)
31694 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
31695 UChar modrm = getUChar(delta);
31696 UInt rG = gregOfRexRM(pfx, modrm);
31697 UInt rV = getVexNvvvv(pfx);
31698 Int imm8_0;
31699 IRTemp src_u64 = newTemp(Ity_I64);
31701 if ( epartIsReg( modrm ) ) {
31702 UInt rE = eregOfRexRM(pfx,modrm);
31703 imm8_0 = (Int)(getUChar(delta+1) & 1);
31704 assign( src_u64, getIReg64( rE ) );
31705 delta += 1+1;
31706 DIP( "vpinsrq $%d,%s,%s,%s\n",
31707 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
31708 } else {
31709 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31710 imm8_0 = (Int)(getUChar(delta+alen) & 1);
31711 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
31712 delta += alen+1;
31713 DIP( "vpinsrq $%d,%s,%s,%s\n",
31714 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31717 IRTemp src_vec = newTemp(Ity_V128);
31718 assign(src_vec, getXMMReg( rV ));
31719 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
31720 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31721 *uses_vvvv = True;
31722 goto decode_success;
31724 break;
31726 case 0x38:
31727 /* VINSERTI128 r/m, rV, rD
31728 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
31729 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
31730 if (have66noF2noF3(pfx)
31731 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31732 UChar modrm = getUChar(delta);
31733 UInt ib = 0;
31734 UInt rG = gregOfRexRM(pfx, modrm);
31735 UInt rV = getVexNvvvv(pfx);
31736 IRTemp t128 = newTemp(Ity_V128);
31737 if (epartIsReg(modrm)) {
31738 UInt rE = eregOfRexRM(pfx, modrm);
31739 delta += 1;
31740 assign(t128, getXMMReg(rE));
31741 ib = getUChar(delta);
31742 DIP("vinserti128 $%u,%s,%s,%s\n",
31743 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
31744 } else {
31745 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31746 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
31747 delta += alen;
31748 ib = getUChar(delta);
31749 DIP("vinserti128 $%u,%s,%s,%s\n",
31750 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
31752 delta++;
31753 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
31754 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
31755 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
31756 *uses_vvvv = True;
31757 goto decode_success;
31759 break;
31761 case 0x39:
31762 /* VEXTRACTI128 $lane_no, rS, r/m
31763 ::: r/m:V128 = a lane of rS:V256 (RM format) */
31764 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
31765 if (have66noF2noF3(pfx)
31766 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
31767 UChar modrm = getUChar(delta);
31768 UInt ib = 0;
31769 UInt rS = gregOfRexRM(pfx, modrm);
31770 IRTemp t128 = newTemp(Ity_V128);
31771 if (epartIsReg(modrm)) {
31772 UInt rD = eregOfRexRM(pfx, modrm);
31773 delta += 1;
31774 ib = getUChar(delta);
31775 assign(t128, getYMMRegLane128(rS, ib & 1));
31776 putYMMRegLoAndZU(rD, mkexpr(t128));
31777 DIP("vextracti128 $%u,%s,%s\n",
31778 ib, nameXMMReg(rS), nameYMMReg(rD));
31779 } else {
31780 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31781 delta += alen;
31782 ib = getUChar(delta);
31783 assign(t128, getYMMRegLane128(rS, ib & 1));
31784 storeLE(mkexpr(addr), mkexpr(t128));
31785 DIP("vextracti128 $%u,%s,%s\n",
31786 ib, nameYMMReg(rS), dis_buf);
31788 delta++;
31789 /* doesn't use vvvv */
31790 goto decode_success;
31792 break;
31794 case 0x40:
31795 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
31796 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31797 UChar modrm = getUChar(delta);
31798 UInt rG = gregOfRexRM(pfx, modrm);
31799 UInt rV = getVexNvvvv(pfx);
31800 IRTemp dst_vec = newTemp(Ity_V128);
31801 Int imm8;
31802 if (epartIsReg( modrm )) {
31803 UInt rE = eregOfRexRM(pfx,modrm);
31804 imm8 = (Int)getUChar(delta+1);
31805 assign( dst_vec, getXMMReg( rE ) );
31806 delta += 1+1;
31807 DIP( "vdpps $%d,%s,%s,%s\n",
31808 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31809 } else {
31810 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31811 imm8 = (Int)getUChar(delta+alen);
31812 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31813 delta += alen+1;
31814 DIP( "vdpps $%d,%s,%s,%s\n",
31815 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31818 IRTemp src_vec = newTemp(Ity_V128);
31819 assign(src_vec, getXMMReg( rV ));
31820 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
31821 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31822 *uses_vvvv = True;
31823 goto decode_success;
31825 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
31826 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31827 UChar modrm = getUChar(delta);
31828 UInt rG = gregOfRexRM(pfx, modrm);
31829 UInt rV = getVexNvvvv(pfx);
31830 IRTemp dst_vec = newTemp(Ity_V256);
31831 Int imm8;
31832 if (epartIsReg( modrm )) {
31833 UInt rE = eregOfRexRM(pfx,modrm);
31834 imm8 = (Int)getUChar(delta+1);
31835 assign( dst_vec, getYMMReg( rE ) );
31836 delta += 1+1;
31837 DIP( "vdpps $%d,%s,%s,%s\n",
31838 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31839 } else {
31840 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31841 imm8 = (Int)getUChar(delta+alen);
31842 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31843 delta += alen+1;
31844 DIP( "vdpps $%d,%s,%s,%s\n",
31845 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31848 IRTemp src_vec = newTemp(Ity_V256);
31849 assign(src_vec, getYMMReg( rV ));
31850 IRTemp s0, s1, d0, d1;
31851 s0 = s1 = d0 = d1 = IRTemp_INVALID;
31852 breakupV256toV128s( dst_vec, &d1, &d0 );
31853 breakupV256toV128s( src_vec, &s1, &s0 );
31854 putYMMReg( rG, binop( Iop_V128HLtoV256,
31855 mkexpr( math_DPPS_128(s1, d1, imm8) ),
31856 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
31857 *uses_vvvv = True;
31858 goto decode_success;
31860 break;
31862 case 0x41:
31863 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
31864 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31865 UChar modrm = getUChar(delta);
31866 UInt rG = gregOfRexRM(pfx, modrm);
31867 UInt rV = getVexNvvvv(pfx);
31868 IRTemp dst_vec = newTemp(Ity_V128);
31869 Int imm8;
31870 if (epartIsReg( modrm )) {
31871 UInt rE = eregOfRexRM(pfx,modrm);
31872 imm8 = (Int)getUChar(delta+1);
31873 assign( dst_vec, getXMMReg( rE ) );
31874 delta += 1+1;
31875 DIP( "vdppd $%d,%s,%s,%s\n",
31876 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31877 } else {
31878 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
31879 imm8 = (Int)getUChar(delta+alen);
31880 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31881 delta += alen+1;
31882 DIP( "vdppd $%d,%s,%s,%s\n",
31883 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31886 IRTemp src_vec = newTemp(Ity_V128);
31887 assign(src_vec, getXMMReg( rV ));
31888 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
31889 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
31890 *uses_vvvv = True;
31891 goto decode_success;
31893 break;
31895 case 0x42:
31896 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
31897 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
31898 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31899 UChar modrm = getUChar(delta);
31900 Int imm8;
31901 IRTemp src_vec = newTemp(Ity_V128);
31902 IRTemp dst_vec = newTemp(Ity_V128);
31903 UInt rG = gregOfRexRM(pfx, modrm);
31904 UInt rV = getVexNvvvv(pfx);
31906 assign( dst_vec, getXMMReg(rV) );
31908 if ( epartIsReg( modrm ) ) {
31909 UInt rE = eregOfRexRM(pfx, modrm);
31911 imm8 = (Int)getUChar(delta+1);
31912 assign( src_vec, getXMMReg(rE) );
31913 delta += 1+1;
31914 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31915 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31916 } else {
31917 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31918 1/* imm8 is 1 byte after the amode */ );
31919 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
31920 imm8 = (Int)getUChar(delta+alen);
31921 delta += alen+1;
31922 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31923 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
31926 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
31927 src_vec, imm8) ) );
31928 *uses_vvvv = True;
31929 goto decode_success;
31931 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
31932 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
31933 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31934 UChar modrm = getUChar(delta);
31935 Int imm8;
31936 IRTemp src_vec = newTemp(Ity_V256);
31937 IRTemp dst_vec = newTemp(Ity_V256);
31938 UInt rG = gregOfRexRM(pfx, modrm);
31939 UInt rV = getVexNvvvv(pfx);
31940 IRTemp sHi, sLo, dHi, dLo;
31941 sHi = sLo = dHi = dLo = IRTemp_INVALID;
31943 assign( dst_vec, getYMMReg(rV) );
31945 if ( epartIsReg( modrm ) ) {
31946 UInt rE = eregOfRexRM(pfx, modrm);
31948 imm8 = (Int)getUChar(delta+1);
31949 assign( src_vec, getYMMReg(rE) );
31950 delta += 1+1;
31951 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31952 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
31953 } else {
31954 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31955 1/* imm8 is 1 byte after the amode */ );
31956 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
31957 imm8 = (Int)getUChar(delta+alen);
31958 delta += alen+1;
31959 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
31960 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
31963 breakupV256toV128s( dst_vec, &dHi, &dLo );
31964 breakupV256toV128s( src_vec, &sHi, &sLo );
31965 putYMMReg( rG, binop( Iop_V128HLtoV256,
31966 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
31967 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
31968 *uses_vvvv = True;
31969 goto decode_success;
31971 break;
31973 case 0x44:
31974 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
31975 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
31976 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
31977 * Carry-less multiplication of selected XMM quadwords into XMM
31978 * registers (a.k.a multiplication of polynomials over GF(2))
31980 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31981 UChar modrm = getUChar(delta);
31982 Int imm8;
31983 IRTemp sV = newTemp(Ity_V128);
31984 IRTemp dV = newTemp(Ity_V128);
31985 UInt rG = gregOfRexRM(pfx, modrm);
31986 UInt rV = getVexNvvvv(pfx);
31988 assign( dV, getXMMReg(rV) );
31990 if ( epartIsReg( modrm ) ) {
31991 UInt rE = eregOfRexRM(pfx, modrm);
31992 imm8 = (Int)getUChar(delta+1);
31993 assign( sV, getXMMReg(rE) );
31994 delta += 1+1;
31995 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
31996 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
31997 } else {
31998 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
31999 1/* imm8 is 1 byte after the amode */ );
32000 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
32001 imm8 = (Int)getUChar(delta+alen);
32002 delta += alen+1;
32003 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
32004 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
32007 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
32008 *uses_vvvv = True;
32009 goto decode_success;
32011 break;
32013 case 0x46:
32014 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
32015 if (have66noF2noF3(pfx)
32016 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
32017 UChar modrm = getUChar(delta);
32018 UInt imm8 = 0;
32019 UInt rG = gregOfRexRM(pfx, modrm);
32020 UInt rV = getVexNvvvv(pfx);
32021 IRTemp s00 = newTemp(Ity_V128);
32022 IRTemp s01 = newTemp(Ity_V128);
32023 IRTemp s10 = newTemp(Ity_V128);
32024 IRTemp s11 = newTemp(Ity_V128);
32025 assign(s00, getYMMRegLane128(rV, 0));
32026 assign(s01, getYMMRegLane128(rV, 1));
32027 if (epartIsReg(modrm)) {
32028 UInt rE = eregOfRexRM(pfx, modrm);
32029 delta += 1;
32030 imm8 = getUChar(delta);
32031 DIP("vperm2i128 $%u,%s,%s,%s\n",
32032 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
32033 assign(s10, getYMMRegLane128(rE, 0));
32034 assign(s11, getYMMRegLane128(rE, 1));
32035 } else {
32036 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
32037 delta += alen;
32038 imm8 = getUChar(delta);
32039 DIP("vperm2i128 $%u,%s,%s,%s\n",
32040 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
32041 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
32042 mkexpr(addr), mkU64(0))));
32043 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
32044 mkexpr(addr), mkU64(16))));
32046 delta++;
32047 # define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
32048 : ((_nn)==2) ? s10 : s11)
32049 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
32050 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
32051 # undef SEL
32052 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
32053 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
32054 *uses_vvvv = True;
32055 goto decode_success;
32057 break;
32059 case 0x4A:
32060 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
32061 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32062 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
32063 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32064 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32065 "vblendvps", 4, Iop_SarN32x4 );
32066 *uses_vvvv = True;
32067 goto decode_success;
32069 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
32070 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32071 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
32072 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32073 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32074 "vblendvps", 4, Iop_SarN32x4 );
32075 *uses_vvvv = True;
32076 goto decode_success;
32078 break;
32080 case 0x4B:
32081 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
32082 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32083 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
32084 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32085 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32086 "vblendvpd", 8, Iop_SarN64x2 );
32087 *uses_vvvv = True;
32088 goto decode_success;
32090 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
32091 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32092 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
32093 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32094 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32095 "vblendvpd", 8, Iop_SarN64x2 );
32096 *uses_vvvv = True;
32097 goto decode_success;
32099 break;
32101 case 0x4C:
32102 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
32103 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
32104 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
32105 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32106 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
32107 "vpblendvb", 1, Iop_SarN8x16 );
32108 *uses_vvvv = True;
32109 goto decode_success;
32111 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
32112 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
32113 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
32114 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
32115 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
32116 "vpblendvb", 1, Iop_SarN8x16 );
32117 *uses_vvvv = True;
32118 goto decode_success;
32120 break;
32122 case 0x60:
32123 case 0x61:
32124 case 0x62:
32125 case 0x63:
32126 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
32127 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
32128 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
32129 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
32130 (selected special cases that actually occur in glibc,
32131 not by any means a complete implementation.)
32133 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32134 Long delta0 = delta;
32135 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
32136 if (delta > delta0) goto decode_success;
32137 /* else fall though; dis_PCMPxSTRx failed to decode it */
32139 break;
32141 case 0x5C ... 0x5F:
32142 case 0x68 ... 0x6F:
32143 case 0x78 ... 0x7F:
32144 /* FIXME: list the instructions decoded here */
32145 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32146 Long delta0 = delta;
32147 delta = dis_FMA4( pfx, delta, opc, uses_vvvv, vbi );
32148 if (delta > delta0) {
32149 dres->hint = Dis_HintVerbose;
32150 goto decode_success;
32152 /* else fall though; dis_FMA4 failed to decode it */
32154 break;
32156 case 0xDF:
32157 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
32158 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
32159 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
32160 goto decode_success;
32162 break;
32164 case 0xF0:
32165 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
32166 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
32167 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
32168 Int size = getRexW(pfx) ? 8 : 4;
32169 IRType ty = szToITy(size);
32170 IRTemp src = newTemp(ty);
32171 UChar rm = getUChar(delta);
32172 UChar imm8;
32174 if (epartIsReg(rm)) {
32175 imm8 = getUChar(delta+1);
32176 assign( src, getIRegE(size,pfx,rm) );
32177 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
32178 nameIRegG(size,pfx,rm));
32179 delta += 2;
32180 } else {
32181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
32182 imm8 = getUChar(delta+alen);
32183 assign( src, loadLE(ty, mkexpr(addr)) );
32184 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
32185 delta += alen + 1;
32187 imm8 &= 8*size-1;
32189 /* dst = (src >>u imm8) | (src << (size-imm8)) */
32190 putIRegG( size, pfx, rm,
32191 imm8 == 0 ? mkexpr(src)
32192 : binop( mkSizedOp(ty,Iop_Or8),
32193 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
32194 mkU8(imm8) ),
32195 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
32196 mkU8(8*size-imm8) ) ) );
32197 /* Flags aren't modified. */
32198 goto decode_success;
32200 break;
32202 default:
32203 break;
32207 //decode_failure:
32208 return deltaIN;
32210 decode_success:
32211 return delta;
32215 /*------------------------------------------------------------*/
32216 /*--- ---*/
32217 /*--- Disassemble a single instruction ---*/
32218 /*--- ---*/
32219 /*------------------------------------------------------------*/
32221 /* Disassemble a single instruction into IR. The instruction is
32222 located in host memory at &guest_code[delta]. */
32224 static
32225 DisResult disInstr_AMD64_WRK (
32226 /*OUT*/Bool* expect_CAS,
32227 Long delta64,
32228 const VexArchInfo* archinfo,
32229 const VexAbiInfo* vbi,
32230 Bool sigill_diag
32233 IRTemp t1, t2;
32234 UChar pre;
32235 Int n, n_prefixes;
32236 DisResult dres;
32238 /* The running delta */
32239 Long delta = delta64;
32241 /* Holds eip at the start of the insn, so that we can print
32242 consistent error messages for unimplemented insns. */
32243 Long delta_start = delta;
32245 /* sz denotes the nominal data-op size of the insn; we change it to
32246 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
32247 conflict REX.W takes precedence. */
32248 Int sz = 4;
32250 /* pfx holds the summary of prefixes. */
32251 Prefix pfx = PFX_EMPTY;
32253 /* Holds the computed opcode-escape indication. */
32254 Escape esc = ESC_NONE;
32256 /* Set result defaults. */
32257 dres.whatNext = Dis_Continue;
32258 dres.len = 0;
32259 dres.jk_StopHere = Ijk_INVALID;
32260 dres.hint = Dis_HintNone;
32261 *expect_CAS = False;
32263 vassert(guest_RIP_next_assumed == 0);
32264 vassert(guest_RIP_next_mustcheck == False);
32266 t1 = t2 = IRTemp_INVALID;
32268 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
32270 /* Spot "Special" instructions (see comment at top of file). */
32272 const UChar* code = guest_code + delta;
32273 /* Spot the 16-byte preamble:
32274 48C1C703 rolq $3, %rdi
32275 48C1C70D rolq $13, %rdi
32276 48C1C73D rolq $61, %rdi
32277 48C1C733 rolq $51, %rdi
32279 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
32280 && code[ 3] == 0x03 &&
32281 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
32282 && code[ 7] == 0x0D &&
32283 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
32284 && code[11] == 0x3D &&
32285 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
32286 && code[15] == 0x33) {
32287 /* Got a "Special" instruction preamble. Which one is it? */
32288 if (code[16] == 0x48 && code[17] == 0x87
32289 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
32290 /* %RDX = client_request ( %RAX ) */
32291 DIP("%%rdx = client_request ( %%rax )\n");
32292 delta += 19;
32293 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
32294 vassert(dres.whatNext == Dis_StopHere);
32295 goto decode_success;
32297 else
32298 if (code[16] == 0x48 && code[17] == 0x87
32299 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
32300 /* %RAX = guest_NRADDR */
32301 DIP("%%rax = guest_NRADDR\n");
32302 delta += 19;
32303 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
32304 goto decode_success;
32306 else
32307 if (code[16] == 0x48 && code[17] == 0x87
32308 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
32309 /* call-noredir *%RAX */
32310 DIP("call-noredir *%%rax\n");
32311 delta += 19;
32312 t1 = newTemp(Ity_I64);
32313 assign(t1, getIRegRAX(8));
32314 t2 = newTemp(Ity_I64);
32315 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
32316 putIReg64(R_RSP, mkexpr(t2));
32317 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
32318 jmp_treg(&dres, Ijk_NoRedir, t1);
32319 vassert(dres.whatNext == Dis_StopHere);
32320 goto decode_success;
32322 else
32323 if (code[16] == 0x48 && code[17] == 0x87
32324 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
32325 /* IR injection */
32326 DIP("IR injection\n");
32327 vex_inject_ir(irsb, Iend_LE);
32329 // Invalidate the current insn. The reason is that the IRop we're
32330 // injecting here can change. In which case the translation has to
32331 // be redone. For ease of handling, we simply invalidate all the
32332 // time.
32333 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
32334 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
32336 delta += 19;
32338 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32339 dres.whatNext = Dis_StopHere;
32340 dres.jk_StopHere = Ijk_InvalICache;
32341 goto decode_success;
32343 /* We don't know what it is. */
32344 goto decode_failure;
32345 /*NOTREACHED*/
32349 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
32350 as many invalid combinations as possible. */
32351 n_prefixes = 0;
32352 while (True) {
32353 if (n_prefixes > 7) goto decode_failure;
32354 pre = getUChar(delta);
32355 switch (pre) {
32356 case 0x66: pfx |= PFX_66; break;
32357 case 0x67: pfx |= PFX_ASO; break;
32358 case 0xF2: pfx |= PFX_F2; break;
32359 case 0xF3: pfx |= PFX_F3; break;
32360 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
32361 case 0x2E: pfx |= PFX_CS; break;
32362 case 0x3E: pfx |= PFX_DS; break;
32363 case 0x26: pfx |= PFX_ES; break;
32364 case 0x64: pfx |= PFX_FS; break;
32365 case 0x65: pfx |= PFX_GS; break;
32366 case 0x36: pfx |= PFX_SS; break;
32367 case 0x40 ... 0x4F:
32368 pfx |= PFX_REX;
32369 if (pre & (1<<3)) pfx |= PFX_REXW;
32370 if (pre & (1<<2)) pfx |= PFX_REXR;
32371 if (pre & (1<<1)) pfx |= PFX_REXX;
32372 if (pre & (1<<0)) pfx |= PFX_REXB;
32373 break;
32374 default:
32375 goto not_a_legacy_prefix;
32377 n_prefixes++;
32378 delta++;
32381 not_a_legacy_prefix:
32382 /* We've used up all the non-VEX prefixes. Parse and validate a
32383 VEX prefix if that's appropriate. */
32384 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
32385 /* Used temporarily for holding VEX prefixes. */
32386 UChar vex0 = getUChar(delta);
32387 if (vex0 == 0xC4) {
32388 /* 3-byte VEX */
32389 UChar vex1 = getUChar(delta+1);
32390 UChar vex2 = getUChar(delta+2);
32391 delta += 3;
32392 pfx |= PFX_VEX;
32393 /* Snarf contents of byte 1 */
32394 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32395 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
32396 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
32397 /* m-mmmm */
32398 switch (vex1 & 0x1F) {
32399 case 1: esc = ESC_0F; break;
32400 case 2: esc = ESC_0F38; break;
32401 case 3: esc = ESC_0F3A; break;
32402 /* Any other m-mmmm field will #UD */
32403 default: goto decode_failure;
32405 /* Snarf contents of byte 2 */
32406 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
32407 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
32408 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
32409 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
32410 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
32411 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
32412 /* pp */
32413 switch (vex2 & 3) {
32414 case 0: break;
32415 case 1: pfx |= PFX_66; break;
32416 case 2: pfx |= PFX_F3; break;
32417 case 3: pfx |= PFX_F2; break;
32418 default: vassert(0);
32421 else if (vex0 == 0xC5) {
32422 /* 2-byte VEX */
32423 UChar vex1 = getUChar(delta+1);
32424 delta += 2;
32425 pfx |= PFX_VEX;
32426 /* Snarf contents of byte 1 */
32427 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
32428 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
32429 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
32430 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
32431 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
32432 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
32433 /* pp */
32434 switch (vex1 & 3) {
32435 case 0: break;
32436 case 1: pfx |= PFX_66; break;
32437 case 2: pfx |= PFX_F3; break;
32438 case 3: pfx |= PFX_F2; break;
32439 default: vassert(0);
32441 /* implied: */
32442 esc = ESC_0F;
32444 /* Can't have both VEX and REX */
32445 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
32446 goto decode_failure; /* can't have both */
32449 /* Dump invalid combinations */
32450 n = 0;
32451 if (pfx & PFX_F2) n++;
32452 if (pfx & PFX_F3) n++;
32453 if (n > 1)
32454 goto decode_failure; /* can't have both */
32456 n = 0;
32457 if (pfx & PFX_CS) n++;
32458 if (pfx & PFX_DS) n++;
32459 if (pfx & PFX_ES) n++;
32460 if (pfx & PFX_FS) n++;
32461 if (pfx & PFX_GS) n++;
32462 if (pfx & PFX_SS) n++;
32463 if (n > 1)
32464 goto decode_failure; /* multiple seg overrides == illegal */
32466 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
32467 that we should accept it. */
32468 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_const)
32469 goto decode_failure;
32471 /* Ditto for %gs prefixes. */
32472 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_const)
32473 goto decode_failure;
32475 /* Set up sz. */
32476 sz = 4;
32477 if (pfx & PFX_66) sz = 2;
32478 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
32480 /* Now we should be looking at the primary opcode byte or the
32481 leading escapes. Check that any LOCK prefix is actually
32482 allowed. */
32483 if (haveLOCK(pfx)) {
32484 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
32485 DIP("lock ");
32486 } else {
32487 *expect_CAS = False;
32488 goto decode_failure;
32492 /* Eat up opcode escape bytes, until we're really looking at the
32493 primary opcode byte. But only if there's no VEX present. */
32494 if (!(pfx & PFX_VEX)) {
32495 vassert(esc == ESC_NONE);
32496 pre = getUChar(delta);
32497 if (pre == 0x0F) {
32498 delta++;
32499 pre = getUChar(delta);
32500 switch (pre) {
32501 case 0x38: esc = ESC_0F38; delta++; break;
32502 case 0x3A: esc = ESC_0F3A; delta++; break;
32503 default: esc = ESC_0F; break;
32508 /* So now we're really really looking at the primary opcode
32509 byte. */
32510 Long delta_at_primary_opcode = delta;
32512 if (!(pfx & PFX_VEX)) {
32513 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
32514 instructions preserve the upper 128 bits of YMM registers;
32515 iow we can simply ignore the presence of the upper halves of
32516 these registers. */
32517 switch (esc) {
32518 case ESC_NONE:
32519 delta = dis_ESC_NONE( &dres, expect_CAS,
32520 archinfo, vbi, pfx, sz, delta );
32521 break;
32522 case ESC_0F:
32523 delta = dis_ESC_0F ( &dres, expect_CAS,
32524 archinfo, vbi, pfx, sz, delta );
32525 break;
32526 case ESC_0F38:
32527 delta = dis_ESC_0F38( &dres,
32528 archinfo, vbi, pfx, sz, delta );
32529 break;
32530 case ESC_0F3A:
32531 delta = dis_ESC_0F3A( &dres,
32532 archinfo, vbi, pfx, sz, delta );
32533 break;
32534 default:
32535 vassert(0);
32537 } else {
32538 /* VEX prefixed instruction */
32539 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
32540 prefix that loads a YMM register operand ..." zeroes out bits
32541 128 and above of the register. */
32542 Bool uses_vvvv = False;
32543 switch (esc) {
32544 case ESC_0F:
32545 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
32546 archinfo, vbi, pfx, sz, delta );
32547 break;
32548 case ESC_0F38:
32549 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
32550 archinfo, vbi, pfx, sz, delta );
32551 break;
32552 case ESC_0F3A:
32553 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
32554 archinfo, vbi, pfx, sz, delta );
32555 break;
32556 case ESC_NONE:
32557 /* The presence of a VEX prefix, by Intel definition,
32558 always implies at least an 0F escape. */
32559 goto decode_failure;
32560 default:
32561 vassert(0);
32563 /* If the insn doesn't use VEX.vvvv then it must be all ones.
32564 Check this. */
32565 if (!uses_vvvv) {
32566 if (getVexNvvvv(pfx) != 0)
32567 goto decode_failure;
32571 vassert(delta - delta_at_primary_opcode >= 0);
32572 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
32574 /* Use delta == delta_at_primary_opcode to denote decode failure.
32575 This implies that any successful decode must use at least one
32576 byte up. */
32577 if (delta == delta_at_primary_opcode)
32578 goto decode_failure;
32579 else
32580 goto decode_success; /* \o/ */
32583 decode_failure:
32584 /* All decode failures end up here. */
32585 if (sigill_diag) {
32586 vex_printf("vex amd64->IR: unhandled instruction bytes: "
32587 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32588 getUChar(delta_start+0),
32589 getUChar(delta_start+1),
32590 getUChar(delta_start+2),
32591 getUChar(delta_start+3),
32592 getUChar(delta_start+4),
32593 getUChar(delta_start+5),
32594 getUChar(delta_start+6),
32595 getUChar(delta_start+7),
32596 getUChar(delta_start+8),
32597 getUChar(delta_start+9) );
32598 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
32599 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
32600 getRexX(pfx), getRexB(pfx));
32601 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
32602 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
32603 getVexNvvvv(pfx),
32604 esc==ESC_NONE ? "NONE" :
32605 esc==ESC_0F ? "0F" :
32606 esc==ESC_0F38 ? "0F38" :
32607 esc==ESC_0F3A ? "0F3A" : "???");
32608 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
32609 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
32610 haveF3(pfx) ? 1 : 0);
32613 /* Tell the dispatcher that this insn cannot be decoded, and so has
32614 not been executed, and (is currently) the next to be executed.
32615 RIP should be up-to-date since it made so at the start of each
32616 insn, but nevertheless be paranoid and update it again right
32617 now. */
32618 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
32619 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
32620 vassert(dres.whatNext == Dis_StopHere);
32621 dres.len = 0;
32622 /* We also need to say that a CAS is not expected now, regardless
32623 of what it might have been set to at the start of the function,
32624 since the IR that we've emitted just above (to synthesis a
32625 SIGILL) does not involve any CAS, and presumably no other IR has
32626 been emitted for this (non-decoded) insn. */
32627 *expect_CAS = False;
32628 return dres;
32631 decode_success:
32632 /* All decode successes end up here. */
32633 switch (dres.whatNext) {
32634 case Dis_Continue:
32635 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
32636 break;
32637 case Dis_StopHere:
32638 break;
32639 default:
32640 vassert(0);
32643 DIP("\n");
32644 dres.len = toUInt(delta - delta_start);
32645 return dres;
32648 #undef DIP
32649 #undef DIS
32652 /*------------------------------------------------------------*/
32653 /*--- Top-level fn ---*/
32654 /*------------------------------------------------------------*/
32656 /* Disassemble a single instruction into IR. The instruction
32657 is located in host memory at &guest_code[delta]. */
32659 DisResult disInstr_AMD64 ( IRSB* irsb_IN,
32660 const UChar* guest_code_IN,
32661 Long delta,
32662 Addr guest_IP,
32663 VexArch guest_arch,
32664 const VexArchInfo* archinfo,
32665 const VexAbiInfo* abiinfo,
32666 VexEndness host_endness_IN,
32667 Bool sigill_diag_IN )
32669 Int i, x1, x2;
32670 Bool expect_CAS, has_CAS;
32671 DisResult dres;
32673 /* Set globals (see top of this file) */
32674 vassert(guest_arch == VexArchAMD64);
32675 guest_code = guest_code_IN;
32676 irsb = irsb_IN;
32677 host_endness = host_endness_IN;
32678 guest_RIP_curr_instr = guest_IP;
32679 guest_RIP_bbstart = guest_IP - delta;
32681 /* We'll consult these after doing disInstr_AMD64_WRK. */
32682 guest_RIP_next_assumed = 0;
32683 guest_RIP_next_mustcheck = False;
32685 x1 = irsb_IN->stmts_used;
32686 expect_CAS = False;
32687 dres = disInstr_AMD64_WRK ( &expect_CAS,
32688 delta, archinfo, abiinfo, sigill_diag_IN );
32689 x2 = irsb_IN->stmts_used;
32690 vassert(x2 >= x1);
32692 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
32693 got it right. Failure of this assertion is serious and denotes
32694 a bug in disInstr. */
32695 if (guest_RIP_next_mustcheck
32696 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
32697 vex_printf("\n");
32698 vex_printf(" current %%rip = 0x%llx\n",
32699 guest_RIP_curr_instr );
32700 vex_printf("assumed next %%rip = 0x%llx\n",
32701 guest_RIP_next_assumed );
32702 vex_printf(" actual next %%rip = 0x%llx\n",
32703 guest_RIP_curr_instr + dres.len );
32704 vex_printf("instruction bytes: "
32705 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
32706 getUChar(delta+0),
32707 getUChar(delta+1),
32708 getUChar(delta+2),
32709 getUChar(delta+3),
32710 getUChar(delta+4),
32711 getUChar(delta+5),
32712 getUChar(delta+6),
32713 getUChar(delta+7),
32714 getUChar(delta+8),
32715 getUChar(delta+9) );
32717 /* re-disassemble the instruction so as
32718 to generate a useful error message; then assert. */
32719 vex_traceflags |= VEX_TRACE_FE;
32720 guest_RIP_next_assumed = 0;
32721 guest_RIP_next_mustcheck = False;
32722 dres = disInstr_AMD64_WRK ( &expect_CAS,
32723 delta, archinfo, abiinfo, sigill_diag_IN );
32724 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
32727 /* See comment at the top of disInstr_AMD64_WRK for meaning of
32728 expect_CAS. Here, we (sanity-)check for the presence/absence of
32729 IRCAS as directed by the returned expect_CAS value. */
32730 has_CAS = False;
32731 for (i = x1; i < x2; i++) {
32732 if (irsb_IN->stmts[i]->tag == Ist_CAS)
32733 has_CAS = True;
32736 if (expect_CAS != has_CAS) {
32737 /* inconsistency detected. re-disassemble the instruction so as
32738 to generate a useful error message; then assert. */
32739 vex_traceflags |= VEX_TRACE_FE;
32740 dres = disInstr_AMD64_WRK ( &expect_CAS,
32741 delta, archinfo, abiinfo, sigill_diag_IN );
32742 for (i = x1; i < x2; i++) {
32743 vex_printf("\t\t");
32744 ppIRStmt(irsb_IN->stmts[i]);
32745 vex_printf("\n");
32747 /* Failure of this assertion is serious and denotes a bug in
32748 disInstr. */
32749 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
32752 return dres;
32756 /*------------------------------------------------------------*/
32757 /*--- Unused stuff ---*/
32758 /*------------------------------------------------------------*/
32760 // A potentially more Memcheck-friendly version of gen_LZCNT, if
32761 // this should ever be needed.
32763 //static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
32765 // /* Scheme is simple: propagate the most significant 1-bit into all
32766 // lower positions in the word. This gives a word of the form
32767 // 0---01---1. Now invert it, giving a word of the form
32768 // 1---10---0, then do a population-count idiom (to count the 1s,
32769 // which is the number of leading zeroes, or the word size if the
32770 // original word was 0.
32771 // */
32772 // Int i;
32773 // IRTemp t[7];
32774 // for (i = 0; i < 7; i++) {
32775 // t[i] = newTemp(ty);
32776 // }
32777 // if (ty == Ity_I64) {
32778 // assign(t[0], binop(Iop_Or64, mkexpr(src),
32779 // binop(Iop_Shr64, mkexpr(src), mkU8(1))));
32780 // assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
32781 // binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
32782 // assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
32783 // binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
32784 // assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
32785 // binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
32786 // assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
32787 // binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
32788 // assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
32789 // binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
32790 // assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
32791 // return gen_POPCOUNT(ty, t[6]);
32792 // }
32793 // if (ty == Ity_I32) {
32794 // assign(t[0], binop(Iop_Or32, mkexpr(src),
32795 // binop(Iop_Shr32, mkexpr(src), mkU8(1))));
32796 // assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
32797 // binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
32798 // assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
32799 // binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
32800 // assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
32801 // binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
32802 // assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
32803 // binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
32804 // assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
32805 // return gen_POPCOUNT(ty, t[5]);
32806 // }
32807 // if (ty == Ity_I16) {
32808 // assign(t[0], binop(Iop_Or16, mkexpr(src),
32809 // binop(Iop_Shr16, mkexpr(src), mkU8(1))));
32810 // assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
32811 // binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
32812 // assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
32813 // binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
32814 // assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
32815 // binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
32816 // assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
32817 // return gen_POPCOUNT(ty, t[4]);
32818 // }
32819 // vassert(0);
32823 /*--------------------------------------------------------------------*/
32824 /*--- end guest_amd64_toIR.c ---*/
32825 /*--------------------------------------------------------------------*/