2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_x86_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse
* getRRegUniverse_X86 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_X86
;
50 static Bool rRegUniverse_X86_initted
= False
;
52 /* Handy shorthand, nothing more */
53 RRegUniverse
* ru
= &rRegUniverse_X86
;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_X86_initted
))
59 RRegUniverse__init(ru
);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru
->allocable_start
[HRcInt32
] = ru
->size
;
65 ru
->regs
[ru
->size
++] = hregX86_EBX();
66 ru
->regs
[ru
->size
++] = hregX86_ESI();
67 ru
->regs
[ru
->size
++] = hregX86_EDI();
68 ru
->regs
[ru
->size
++] = hregX86_EAX();
69 ru
->regs
[ru
->size
++] = hregX86_ECX();
70 ru
->regs
[ru
->size
++] = hregX86_EDX();
71 ru
->allocable_end
[HRcInt32
] = ru
->size
- 1;
73 ru
->allocable_start
[HRcFlt64
] = ru
->size
;
74 ru
->regs
[ru
->size
++] = hregX86_FAKE0();
75 ru
->regs
[ru
->size
++] = hregX86_FAKE1();
76 ru
->regs
[ru
->size
++] = hregX86_FAKE2();
77 ru
->regs
[ru
->size
++] = hregX86_FAKE3();
78 ru
->regs
[ru
->size
++] = hregX86_FAKE4();
79 ru
->regs
[ru
->size
++] = hregX86_FAKE5();
80 ru
->allocable_end
[HRcFlt64
] = ru
->size
- 1;
82 ru
->allocable_start
[HRcVec128
] = ru
->size
;
83 ru
->regs
[ru
->size
++] = hregX86_XMM0();
84 ru
->regs
[ru
->size
++] = hregX86_XMM1();
85 ru
->regs
[ru
->size
++] = hregX86_XMM2();
86 ru
->regs
[ru
->size
++] = hregX86_XMM3();
87 ru
->regs
[ru
->size
++] = hregX86_XMM4();
88 ru
->regs
[ru
->size
++] = hregX86_XMM5();
89 ru
->regs
[ru
->size
++] = hregX86_XMM6();
90 ru
->regs
[ru
->size
++] = hregX86_XMM7();
91 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
92 ru
->allocable
= ru
->size
;
94 /* And other regs, not available to the allocator. */
95 ru
->regs
[ru
->size
++] = hregX86_ESP();
96 ru
->regs
[ru
->size
++] = hregX86_EBP();
98 rRegUniverse_X86_initted
= True
;
100 RRegUniverse__check_is_sane(ru
);
105 UInt
ppHRegX86 ( HReg reg
)
108 static const HChar
* ireg32_names
[8]
109 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
110 /* Be generic for all virtual regs. */
111 if (hregIsVirtual(reg
)) {
114 /* But specific for real regs. */
115 switch (hregClass(reg
)) {
117 r
= hregEncoding(reg
);
118 vassert(r
>= 0 && r
< 8);
119 return vex_printf("%s", ireg32_names
[r
]);
121 r
= hregEncoding(reg
);
122 vassert(r
>= 0 && r
< 6);
123 return vex_printf("%%fake%d", r
);
125 r
= hregEncoding(reg
);
126 vassert(r
>= 0 && r
< 8);
127 return vex_printf("%%xmm%d", r
);
134 /* --------- Condition codes, Intel encoding. --------- */
136 const HChar
* showX86CondCode ( X86CondCode cond
)
139 case Xcc_O
: return "o";
140 case Xcc_NO
: return "no";
141 case Xcc_B
: return "b";
142 case Xcc_NB
: return "nb";
143 case Xcc_Z
: return "z";
144 case Xcc_NZ
: return "nz";
145 case Xcc_BE
: return "be";
146 case Xcc_NBE
: return "nbe";
147 case Xcc_S
: return "s";
148 case Xcc_NS
: return "ns";
149 case Xcc_P
: return "p";
150 case Xcc_NP
: return "np";
151 case Xcc_L
: return "l";
152 case Xcc_NL
: return "nl";
153 case Xcc_LE
: return "le";
154 case Xcc_NLE
: return "nle";
155 case Xcc_ALWAYS
: return "ALWAYS";
156 default: vpanic("ppX86CondCode");
161 /* --------- X86AMode: memory address expressions. --------- */
163 X86AMode
* X86AMode_IR ( UInt imm32
, HReg reg
) {
164 X86AMode
* am
= LibVEX_Alloc_inline(sizeof(X86AMode
));
166 am
->Xam
.IR
.imm
= imm32
;
167 am
->Xam
.IR
.reg
= reg
;
170 X86AMode
* X86AMode_IRRS ( UInt imm32
, HReg base
, HReg indEx
, Int shift
) {
171 X86AMode
* am
= LibVEX_Alloc_inline(sizeof(X86AMode
));
173 am
->Xam
.IRRS
.imm
= imm32
;
174 am
->Xam
.IRRS
.base
= base
;
175 am
->Xam
.IRRS
.index
= indEx
;
176 am
->Xam
.IRRS
.shift
= shift
;
177 vassert(shift
>= 0 && shift
<= 3);
181 X86AMode
* dopyX86AMode ( X86AMode
* am
) {
184 return X86AMode_IR( am
->Xam
.IR
.imm
, am
->Xam
.IR
.reg
);
186 return X86AMode_IRRS( am
->Xam
.IRRS
.imm
, am
->Xam
.IRRS
.base
,
187 am
->Xam
.IRRS
.index
, am
->Xam
.IRRS
.shift
);
189 vpanic("dopyX86AMode");
193 void ppX86AMode ( X86AMode
* am
) {
196 if (am
->Xam
.IR
.imm
== 0)
199 vex_printf("0x%x(", am
->Xam
.IR
.imm
);
200 ppHRegX86(am
->Xam
.IR
.reg
);
204 vex_printf("0x%x(", am
->Xam
.IRRS
.imm
);
205 ppHRegX86(am
->Xam
.IRRS
.base
);
207 ppHRegX86(am
->Xam
.IRRS
.index
);
208 vex_printf(",%d)", 1 << am
->Xam
.IRRS
.shift
);
211 vpanic("ppX86AMode");
215 static void addRegUsage_X86AMode ( HRegUsage
* u
, X86AMode
* am
) {
218 addHRegUse(u
, HRmRead
, am
->Xam
.IR
.reg
);
221 addHRegUse(u
, HRmRead
, am
->Xam
.IRRS
.base
);
222 addHRegUse(u
, HRmRead
, am
->Xam
.IRRS
.index
);
225 vpanic("addRegUsage_X86AMode");
229 static void mapRegs_X86AMode ( HRegRemap
* m
, X86AMode
* am
) {
232 am
->Xam
.IR
.reg
= lookupHRegRemap(m
, am
->Xam
.IR
.reg
);
235 am
->Xam
.IRRS
.base
= lookupHRegRemap(m
, am
->Xam
.IRRS
.base
);
236 am
->Xam
.IRRS
.index
= lookupHRegRemap(m
, am
->Xam
.IRRS
.index
);
239 vpanic("mapRegs_X86AMode");
243 /* --------- Operand, which can be reg, immediate or memory. --------- */
245 X86RMI
* X86RMI_Imm ( UInt imm32
) {
246 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
248 op
->Xrmi
.Imm
.imm32
= imm32
;
251 X86RMI
* X86RMI_Reg ( HReg reg
) {
252 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
254 op
->Xrmi
.Reg
.reg
= reg
;
257 X86RMI
* X86RMI_Mem ( X86AMode
* am
) {
258 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
260 op
->Xrmi
.Mem
.am
= am
;
264 void ppX86RMI ( X86RMI
* op
) {
267 vex_printf("$0x%x", op
->Xrmi
.Imm
.imm32
);
270 ppHRegX86(op
->Xrmi
.Reg
.reg
);
273 ppX86AMode(op
->Xrmi
.Mem
.am
);
280 /* An X86RMI can only be used in a "read" context (what would it mean
281 to write or modify a literal?) and so we enumerate its registers
283 static void addRegUsage_X86RMI ( HRegUsage
* u
, X86RMI
* op
) {
288 addHRegUse(u
, HRmRead
, op
->Xrmi
.Reg
.reg
);
291 addRegUsage_X86AMode(u
, op
->Xrmi
.Mem
.am
);
294 vpanic("addRegUsage_X86RMI");
298 static void mapRegs_X86RMI ( HRegRemap
* m
, X86RMI
* op
) {
303 op
->Xrmi
.Reg
.reg
= lookupHRegRemap(m
, op
->Xrmi
.Reg
.reg
);
306 mapRegs_X86AMode(m
, op
->Xrmi
.Mem
.am
);
309 vpanic("mapRegs_X86RMI");
314 /* --------- Operand, which can be reg or immediate only. --------- */
316 X86RI
* X86RI_Imm ( UInt imm32
) {
317 X86RI
* op
= LibVEX_Alloc_inline(sizeof(X86RI
));
319 op
->Xri
.Imm
.imm32
= imm32
;
322 X86RI
* X86RI_Reg ( HReg reg
) {
323 X86RI
* op
= LibVEX_Alloc_inline(sizeof(X86RI
));
325 op
->Xri
.Reg
.reg
= reg
;
329 void ppX86RI ( X86RI
* op
) {
332 vex_printf("$0x%x", op
->Xri
.Imm
.imm32
);
335 ppHRegX86(op
->Xri
.Reg
.reg
);
342 /* An X86RI can only be used in a "read" context (what would it mean
343 to write or modify a literal?) and so we enumerate its registers
345 static void addRegUsage_X86RI ( HRegUsage
* u
, X86RI
* op
) {
350 addHRegUse(u
, HRmRead
, op
->Xri
.Reg
.reg
);
353 vpanic("addRegUsage_X86RI");
357 static void mapRegs_X86RI ( HRegRemap
* m
, X86RI
* op
) {
362 op
->Xri
.Reg
.reg
= lookupHRegRemap(m
, op
->Xri
.Reg
.reg
);
365 vpanic("mapRegs_X86RI");
370 /* --------- Operand, which can be reg or memory only. --------- */
372 X86RM
* X86RM_Reg ( HReg reg
) {
373 X86RM
* op
= LibVEX_Alloc_inline(sizeof(X86RM
));
375 op
->Xrm
.Reg
.reg
= reg
;
378 X86RM
* X86RM_Mem ( X86AMode
* am
) {
379 X86RM
* op
= LibVEX_Alloc_inline(sizeof(X86RM
));
385 void ppX86RM ( X86RM
* op
) {
388 ppX86AMode(op
->Xrm
.Mem
.am
);
391 ppHRegX86(op
->Xrm
.Reg
.reg
);
398 /* Because an X86RM can be both a source or destination operand, we
399 have to supply a mode -- pertaining to the operand as a whole --
400 indicating how it's being used. */
401 static void addRegUsage_X86RM ( HRegUsage
* u
, X86RM
* op
, HRegMode mode
) {
404 /* Memory is read, written or modified. So we just want to
405 know the regs read by the amode. */
406 addRegUsage_X86AMode(u
, op
->Xrm
.Mem
.am
);
409 /* reg is read, written or modified. Add it in the
411 addHRegUse(u
, mode
, op
->Xrm
.Reg
.reg
);
414 vpanic("addRegUsage_X86RM");
418 static void mapRegs_X86RM ( HRegRemap
* m
, X86RM
* op
)
422 mapRegs_X86AMode(m
, op
->Xrm
.Mem
.am
);
425 op
->Xrm
.Reg
.reg
= lookupHRegRemap(m
, op
->Xrm
.Reg
.reg
);
428 vpanic("mapRegs_X86RM");
433 /* --------- Instructions. --------- */
435 const HChar
* showX86UnaryOp ( X86UnaryOp op
) {
437 case Xun_NOT
: return "not";
438 case Xun_NEG
: return "neg";
439 default: vpanic("showX86UnaryOp");
443 const HChar
* showX86AluOp ( X86AluOp op
) {
445 case Xalu_MOV
: return "mov";
446 case Xalu_CMP
: return "cmp";
447 case Xalu_ADD
: return "add";
448 case Xalu_SUB
: return "sub";
449 case Xalu_ADC
: return "adc";
450 case Xalu_SBB
: return "sbb";
451 case Xalu_AND
: return "and";
452 case Xalu_OR
: return "or";
453 case Xalu_XOR
: return "xor";
454 case Xalu_MUL
: return "mul";
455 default: vpanic("showX86AluOp");
459 const HChar
* showX86ShiftOp ( X86ShiftOp op
) {
461 case Xsh_SHL
: return "shl";
462 case Xsh_SHR
: return "shr";
463 case Xsh_SAR
: return "sar";
464 default: vpanic("showX86ShiftOp");
468 const HChar
* showX86FpOp ( X86FpOp op
) {
470 case Xfp_ADD
: return "add";
471 case Xfp_SUB
: return "sub";
472 case Xfp_MUL
: return "mul";
473 case Xfp_DIV
: return "div";
474 case Xfp_SCALE
: return "scale";
475 case Xfp_ATAN
: return "atan";
476 case Xfp_YL2X
: return "yl2x";
477 case Xfp_YL2XP1
: return "yl2xp1";
478 case Xfp_PREM
: return "prem";
479 case Xfp_PREM1
: return "prem1";
480 case Xfp_SQRT
: return "sqrt";
481 case Xfp_ABS
: return "abs";
482 case Xfp_NEG
: return "chs";
483 case Xfp_MOV
: return "mov";
484 case Xfp_SIN
: return "sin";
485 case Xfp_COS
: return "cos";
486 case Xfp_TAN
: return "tan";
487 case Xfp_ROUND
: return "round";
488 case Xfp_2XM1
: return "2xm1";
489 default: vpanic("showX86FpOp");
493 const HChar
* showX86SseOp ( X86SseOp op
) {
495 case Xsse_MOV
: return "mov(?!)";
496 case Xsse_ADDF
: return "add";
497 case Xsse_SUBF
: return "sub";
498 case Xsse_MULF
: return "mul";
499 case Xsse_DIVF
: return "div";
500 case Xsse_MAXF
: return "max";
501 case Xsse_MINF
: return "min";
502 case Xsse_CMPEQF
: return "cmpFeq";
503 case Xsse_CMPLTF
: return "cmpFlt";
504 case Xsse_CMPLEF
: return "cmpFle";
505 case Xsse_CMPUNF
: return "cmpFun";
506 case Xsse_RCPF
: return "rcp";
507 case Xsse_RSQRTF
: return "rsqrt";
508 case Xsse_SQRTF
: return "sqrt";
509 case Xsse_AND
: return "and";
510 case Xsse_OR
: return "or";
511 case Xsse_XOR
: return "xor";
512 case Xsse_ANDN
: return "andn";
513 case Xsse_ADD8
: return "paddb";
514 case Xsse_ADD16
: return "paddw";
515 case Xsse_ADD32
: return "paddd";
516 case Xsse_ADD64
: return "paddq";
517 case Xsse_QADD8U
: return "paddusb";
518 case Xsse_QADD16U
: return "paddusw";
519 case Xsse_QADD8S
: return "paddsb";
520 case Xsse_QADD16S
: return "paddsw";
521 case Xsse_SUB8
: return "psubb";
522 case Xsse_SUB16
: return "psubw";
523 case Xsse_SUB32
: return "psubd";
524 case Xsse_SUB64
: return "psubq";
525 case Xsse_QSUB8U
: return "psubusb";
526 case Xsse_QSUB16U
: return "psubusw";
527 case Xsse_QSUB8S
: return "psubsb";
528 case Xsse_QSUB16S
: return "psubsw";
529 case Xsse_MUL16
: return "pmullw";
530 case Xsse_MULHI16U
: return "pmulhuw";
531 case Xsse_MULHI16S
: return "pmulhw";
532 case Xsse_AVG8U
: return "pavgb";
533 case Xsse_AVG16U
: return "pavgw";
534 case Xsse_MAX16S
: return "pmaxw";
535 case Xsse_MAX8U
: return "pmaxub";
536 case Xsse_MIN16S
: return "pminw";
537 case Xsse_MIN8U
: return "pminub";
538 case Xsse_CMPEQ8
: return "pcmpeqb";
539 case Xsse_CMPEQ16
: return "pcmpeqw";
540 case Xsse_CMPEQ32
: return "pcmpeqd";
541 case Xsse_CMPGT8S
: return "pcmpgtb";
542 case Xsse_CMPGT16S
: return "pcmpgtw";
543 case Xsse_CMPGT32S
: return "pcmpgtd";
544 case Xsse_SHL16
: return "psllw";
545 case Xsse_SHL32
: return "pslld";
546 case Xsse_SHL64
: return "psllq";
547 case Xsse_SHR16
: return "psrlw";
548 case Xsse_SHR32
: return "psrld";
549 case Xsse_SHR64
: return "psrlq";
550 case Xsse_SAR16
: return "psraw";
551 case Xsse_SAR32
: return "psrad";
552 case Xsse_PACKSSD
: return "packssdw";
553 case Xsse_PACKSSW
: return "packsswb";
554 case Xsse_PACKUSW
: return "packuswb";
555 case Xsse_UNPCKHB
: return "punpckhb";
556 case Xsse_UNPCKHW
: return "punpckhw";
557 case Xsse_UNPCKHD
: return "punpckhd";
558 case Xsse_UNPCKHQ
: return "punpckhq";
559 case Xsse_UNPCKLB
: return "punpcklb";
560 case Xsse_UNPCKLW
: return "punpcklw";
561 case Xsse_UNPCKLD
: return "punpckld";
562 case Xsse_UNPCKLQ
: return "punpcklq";
563 default: vpanic("showX86SseOp");
567 X86Instr
* X86Instr_Alu32R ( X86AluOp op
, X86RMI
* src
, HReg dst
) {
568 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
570 i
->Xin
.Alu32R
.op
= op
;
571 i
->Xin
.Alu32R
.src
= src
;
572 i
->Xin
.Alu32R
.dst
= dst
;
575 X86Instr
* X86Instr_Alu32M ( X86AluOp op
, X86RI
* src
, X86AMode
* dst
) {
576 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
578 i
->Xin
.Alu32M
.op
= op
;
579 i
->Xin
.Alu32M
.src
= src
;
580 i
->Xin
.Alu32M
.dst
= dst
;
581 vassert(op
!= Xalu_MUL
);
584 X86Instr
* X86Instr_Sh32 ( X86ShiftOp op
, UInt src
, HReg dst
) {
585 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
588 i
->Xin
.Sh32
.src
= src
;
589 i
->Xin
.Sh32
.dst
= dst
;
592 X86Instr
* X86Instr_Test32 ( UInt imm32
, X86RM
* dst
) {
593 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
595 i
->Xin
.Test32
.imm32
= imm32
;
596 i
->Xin
.Test32
.dst
= dst
;
599 X86Instr
* X86Instr_Unary32 ( X86UnaryOp op
, HReg dst
) {
600 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
601 i
->tag
= Xin_Unary32
;
602 i
->Xin
.Unary32
.op
= op
;
603 i
->Xin
.Unary32
.dst
= dst
;
606 X86Instr
* X86Instr_Lea32 ( X86AMode
* am
, HReg dst
) {
607 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
609 i
->Xin
.Lea32
.am
= am
;
610 i
->Xin
.Lea32
.dst
= dst
;
613 X86Instr
* X86Instr_MulL ( Bool syned
, X86RM
* src
) {
614 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
616 i
->Xin
.MulL
.syned
= syned
;
617 i
->Xin
.MulL
.src
= src
;
620 X86Instr
* X86Instr_Div ( Bool syned
, X86RM
* src
) {
621 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
623 i
->Xin
.Div
.syned
= syned
;
624 i
->Xin
.Div
.src
= src
;
627 X86Instr
* X86Instr_Sh3232 ( X86ShiftOp op
, UInt amt
, HReg src
, HReg dst
) {
628 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
630 i
->Xin
.Sh3232
.op
= op
;
631 i
->Xin
.Sh3232
.amt
= amt
;
632 i
->Xin
.Sh3232
.src
= src
;
633 i
->Xin
.Sh3232
.dst
= dst
;
634 vassert(op
== Xsh_SHL
|| op
== Xsh_SHR
);
637 X86Instr
* X86Instr_Push( X86RMI
* src
) {
638 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
640 i
->Xin
.Push
.src
= src
;
643 X86Instr
* X86Instr_Call ( X86CondCode cond
, Addr32 target
, Int regparms
,
645 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
647 i
->Xin
.Call
.cond
= cond
;
648 i
->Xin
.Call
.target
= target
;
649 i
->Xin
.Call
.regparms
= regparms
;
650 i
->Xin
.Call
.rloc
= rloc
;
651 vassert(regparms
>= 0 && regparms
<= 3);
652 vassert(is_sane_RetLoc(rloc
));
655 X86Instr
* X86Instr_XDirect ( Addr32 dstGA
, X86AMode
* amEIP
,
656 X86CondCode cond
, Bool toFastEP
) {
657 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
658 i
->tag
= Xin_XDirect
;
659 i
->Xin
.XDirect
.dstGA
= dstGA
;
660 i
->Xin
.XDirect
.amEIP
= amEIP
;
661 i
->Xin
.XDirect
.cond
= cond
;
662 i
->Xin
.XDirect
.toFastEP
= toFastEP
;
665 X86Instr
* X86Instr_XIndir ( HReg dstGA
, X86AMode
* amEIP
,
667 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
669 i
->Xin
.XIndir
.dstGA
= dstGA
;
670 i
->Xin
.XIndir
.amEIP
= amEIP
;
671 i
->Xin
.XIndir
.cond
= cond
;
674 X86Instr
* X86Instr_XAssisted ( HReg dstGA
, X86AMode
* amEIP
,
675 X86CondCode cond
, IRJumpKind jk
) {
676 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
677 i
->tag
= Xin_XAssisted
;
678 i
->Xin
.XAssisted
.dstGA
= dstGA
;
679 i
->Xin
.XAssisted
.amEIP
= amEIP
;
680 i
->Xin
.XAssisted
.cond
= cond
;
681 i
->Xin
.XAssisted
.jk
= jk
;
684 X86Instr
* X86Instr_CMov32 ( X86CondCode cond
, X86RM
* src
, HReg dst
) {
685 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
687 i
->Xin
.CMov32
.cond
= cond
;
688 i
->Xin
.CMov32
.src
= src
;
689 i
->Xin
.CMov32
.dst
= dst
;
690 vassert(cond
!= Xcc_ALWAYS
);
693 X86Instr
* X86Instr_LoadEX ( UChar szSmall
, Bool syned
,
694 X86AMode
* src
, HReg dst
) {
695 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
697 i
->Xin
.LoadEX
.szSmall
= szSmall
;
698 i
->Xin
.LoadEX
.syned
= syned
;
699 i
->Xin
.LoadEX
.src
= src
;
700 i
->Xin
.LoadEX
.dst
= dst
;
701 vassert(szSmall
== 1 || szSmall
== 2);
704 X86Instr
* X86Instr_Store ( UChar sz
, HReg src
, X86AMode
* dst
) {
705 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
707 i
->Xin
.Store
.sz
= sz
;
708 i
->Xin
.Store
.src
= src
;
709 i
->Xin
.Store
.dst
= dst
;
710 vassert(sz
== 1 || sz
== 2);
713 X86Instr
* X86Instr_Set32 ( X86CondCode cond
, HReg dst
) {
714 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
716 i
->Xin
.Set32
.cond
= cond
;
717 i
->Xin
.Set32
.dst
= dst
;
720 X86Instr
* X86Instr_Bsfr32 ( Bool isFwds
, HReg src
, HReg dst
) {
721 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
723 i
->Xin
.Bsfr32
.isFwds
= isFwds
;
724 i
->Xin
.Bsfr32
.src
= src
;
725 i
->Xin
.Bsfr32
.dst
= dst
;
728 X86Instr
* X86Instr_MFence ( UInt hwcaps
) {
729 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
731 i
->Xin
.MFence
.hwcaps
= hwcaps
;
732 vassert(0 == (hwcaps
& ~(VEX_HWCAPS_X86_MMXEXT
736 |VEX_HWCAPS_X86_LZCNT
)));
739 X86Instr
* X86Instr_ACAS ( X86AMode
* addr
, UChar sz
) {
740 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
742 i
->Xin
.ACAS
.addr
= addr
;
744 vassert(sz
== 4 || sz
== 2 || sz
== 1);
747 X86Instr
* X86Instr_DACAS ( X86AMode
* addr
) {
748 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
750 i
->Xin
.DACAS
.addr
= addr
;
754 X86Instr
* X86Instr_FpUnary ( X86FpOp op
, HReg src
, HReg dst
) {
755 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
756 i
->tag
= Xin_FpUnary
;
757 i
->Xin
.FpUnary
.op
= op
;
758 i
->Xin
.FpUnary
.src
= src
;
759 i
->Xin
.FpUnary
.dst
= dst
;
762 X86Instr
* X86Instr_FpBinary ( X86FpOp op
, HReg srcL
, HReg srcR
, HReg dst
) {
763 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
764 i
->tag
= Xin_FpBinary
;
765 i
->Xin
.FpBinary
.op
= op
;
766 i
->Xin
.FpBinary
.srcL
= srcL
;
767 i
->Xin
.FpBinary
.srcR
= srcR
;
768 i
->Xin
.FpBinary
.dst
= dst
;
771 X86Instr
* X86Instr_FpLdSt ( Bool isLoad
, UChar sz
, HReg reg
, X86AMode
* addr
) {
772 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
774 i
->Xin
.FpLdSt
.isLoad
= isLoad
;
775 i
->Xin
.FpLdSt
.sz
= sz
;
776 i
->Xin
.FpLdSt
.reg
= reg
;
777 i
->Xin
.FpLdSt
.addr
= addr
;
778 vassert(sz
== 4 || sz
== 8 || sz
== 10);
781 X86Instr
* X86Instr_FpLdStI ( Bool isLoad
, UChar sz
,
782 HReg reg
, X86AMode
* addr
) {
783 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
784 i
->tag
= Xin_FpLdStI
;
785 i
->Xin
.FpLdStI
.isLoad
= isLoad
;
786 i
->Xin
.FpLdStI
.sz
= sz
;
787 i
->Xin
.FpLdStI
.reg
= reg
;
788 i
->Xin
.FpLdStI
.addr
= addr
;
789 vassert(sz
== 2 || sz
== 4 || sz
== 8);
792 X86Instr
* X86Instr_Fp64to32 ( HReg src
, HReg dst
) {
793 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
794 i
->tag
= Xin_Fp64to32
;
795 i
->Xin
.Fp64to32
.src
= src
;
796 i
->Xin
.Fp64to32
.dst
= dst
;
799 X86Instr
* X86Instr_FpCMov ( X86CondCode cond
, HReg src
, HReg dst
) {
800 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
802 i
->Xin
.FpCMov
.cond
= cond
;
803 i
->Xin
.FpCMov
.src
= src
;
804 i
->Xin
.FpCMov
.dst
= dst
;
805 vassert(cond
!= Xcc_ALWAYS
);
808 X86Instr
* X86Instr_FpLdCW ( X86AMode
* addr
) {
809 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
811 i
->Xin
.FpLdCW
.addr
= addr
;
814 X86Instr
* X86Instr_FpStSW_AX ( void ) {
815 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
816 i
->tag
= Xin_FpStSW_AX
;
819 X86Instr
* X86Instr_FpCmp ( HReg srcL
, HReg srcR
, HReg dst
) {
820 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
822 i
->Xin
.FpCmp
.srcL
= srcL
;
823 i
->Xin
.FpCmp
.srcR
= srcR
;
824 i
->Xin
.FpCmp
.dst
= dst
;
827 X86Instr
* X86Instr_SseConst ( UShort con
, HReg dst
) {
828 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
829 i
->tag
= Xin_SseConst
;
830 i
->Xin
.SseConst
.con
= con
;
831 i
->Xin
.SseConst
.dst
= dst
;
832 vassert(hregClass(dst
) == HRcVec128
);
835 X86Instr
* X86Instr_SseLdSt ( Bool isLoad
, HReg reg
, X86AMode
* addr
) {
836 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
837 i
->tag
= Xin_SseLdSt
;
838 i
->Xin
.SseLdSt
.isLoad
= isLoad
;
839 i
->Xin
.SseLdSt
.reg
= reg
;
840 i
->Xin
.SseLdSt
.addr
= addr
;
843 X86Instr
* X86Instr_SseLdzLO ( Int sz
, HReg reg
, X86AMode
* addr
)
845 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
846 i
->tag
= Xin_SseLdzLO
;
847 i
->Xin
.SseLdzLO
.sz
= toUChar(sz
);
848 i
->Xin
.SseLdzLO
.reg
= reg
;
849 i
->Xin
.SseLdzLO
.addr
= addr
;
850 vassert(sz
== 4 || sz
== 8);
853 X86Instr
* X86Instr_Sse32Fx4 ( X86SseOp op
, HReg src
, HReg dst
) {
854 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
855 i
->tag
= Xin_Sse32Fx4
;
856 i
->Xin
.Sse32Fx4
.op
= op
;
857 i
->Xin
.Sse32Fx4
.src
= src
;
858 i
->Xin
.Sse32Fx4
.dst
= dst
;
859 vassert(op
!= Xsse_MOV
);
862 X86Instr
* X86Instr_Sse32FLo ( X86SseOp op
, HReg src
, HReg dst
) {
863 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
864 i
->tag
= Xin_Sse32FLo
;
865 i
->Xin
.Sse32FLo
.op
= op
;
866 i
->Xin
.Sse32FLo
.src
= src
;
867 i
->Xin
.Sse32FLo
.dst
= dst
;
868 vassert(op
!= Xsse_MOV
);
871 X86Instr
* X86Instr_Sse64Fx2 ( X86SseOp op
, HReg src
, HReg dst
) {
872 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
873 i
->tag
= Xin_Sse64Fx2
;
874 i
->Xin
.Sse64Fx2
.op
= op
;
875 i
->Xin
.Sse64Fx2
.src
= src
;
876 i
->Xin
.Sse64Fx2
.dst
= dst
;
877 vassert(op
!= Xsse_MOV
);
880 X86Instr
* X86Instr_Sse64FLo ( X86SseOp op
, HReg src
, HReg dst
) {
881 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
882 i
->tag
= Xin_Sse64FLo
;
883 i
->Xin
.Sse64FLo
.op
= op
;
884 i
->Xin
.Sse64FLo
.src
= src
;
885 i
->Xin
.Sse64FLo
.dst
= dst
;
886 vassert(op
!= Xsse_MOV
);
889 X86Instr
* X86Instr_SseReRg ( X86SseOp op
, HReg re
, HReg rg
) {
890 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
891 i
->tag
= Xin_SseReRg
;
892 i
->Xin
.SseReRg
.op
= op
;
893 i
->Xin
.SseReRg
.src
= re
;
894 i
->Xin
.SseReRg
.dst
= rg
;
897 X86Instr
* X86Instr_SseCMov ( X86CondCode cond
, HReg src
, HReg dst
) {
898 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
899 i
->tag
= Xin_SseCMov
;
900 i
->Xin
.SseCMov
.cond
= cond
;
901 i
->Xin
.SseCMov
.src
= src
;
902 i
->Xin
.SseCMov
.dst
= dst
;
903 vassert(cond
!= Xcc_ALWAYS
);
906 X86Instr
* X86Instr_SseShuf ( Int order
, HReg src
, HReg dst
) {
907 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
908 i
->tag
= Xin_SseShuf
;
909 i
->Xin
.SseShuf
.order
= order
;
910 i
->Xin
.SseShuf
.src
= src
;
911 i
->Xin
.SseShuf
.dst
= dst
;
912 vassert(order
>= 0 && order
<= 0xFF);
915 X86Instr
* X86Instr_EvCheck ( X86AMode
* amCounter
,
916 X86AMode
* amFailAddr
) {
917 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
918 i
->tag
= Xin_EvCheck
;
919 i
->Xin
.EvCheck
.amCounter
= amCounter
;
920 i
->Xin
.EvCheck
.amFailAddr
= amFailAddr
;
923 X86Instr
* X86Instr_ProfInc ( void ) {
924 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
925 i
->tag
= Xin_ProfInc
;
929 void ppX86Instr ( const X86Instr
* i
, Bool mode64
) {
930 vassert(mode64
== False
);
933 vex_printf("%sl ", showX86AluOp(i
->Xin
.Alu32R
.op
));
934 ppX86RMI(i
->Xin
.Alu32R
.src
);
936 ppHRegX86(i
->Xin
.Alu32R
.dst
);
939 vex_printf("%sl ", showX86AluOp(i
->Xin
.Alu32M
.op
));
940 ppX86RI(i
->Xin
.Alu32M
.src
);
942 ppX86AMode(i
->Xin
.Alu32M
.dst
);
945 vex_printf("%sl ", showX86ShiftOp(i
->Xin
.Sh32
.op
));
946 if (i
->Xin
.Sh32
.src
== 0)
949 vex_printf("$%d,", (Int
)i
->Xin
.Sh32
.src
);
950 ppHRegX86(i
->Xin
.Sh32
.dst
);
953 vex_printf("testl $%d,", (Int
)i
->Xin
.Test32
.imm32
);
954 ppX86RM(i
->Xin
.Test32
.dst
);
957 vex_printf("%sl ", showX86UnaryOp(i
->Xin
.Unary32
.op
));
958 ppHRegX86(i
->Xin
.Unary32
.dst
);
962 ppX86AMode(i
->Xin
.Lea32
.am
);
964 ppHRegX86(i
->Xin
.Lea32
.dst
);
967 vex_printf("%cmull ", i
->Xin
.MulL
.syned
? 's' : 'u');
968 ppX86RM(i
->Xin
.MulL
.src
);
971 vex_printf("%cdivl ", i
->Xin
.Div
.syned
? 's' : 'u');
972 ppX86RM(i
->Xin
.Div
.src
);
975 vex_printf("%sdl ", showX86ShiftOp(i
->Xin
.Sh3232
.op
));
976 if (i
->Xin
.Sh3232
.amt
== 0)
977 vex_printf(" %%cl,");
979 vex_printf(" $%d,", (Int
)i
->Xin
.Sh3232
.amt
);
980 ppHRegX86(i
->Xin
.Sh3232
.src
);
982 ppHRegX86(i
->Xin
.Sh3232
.dst
);
985 vex_printf("pushl ");
986 ppX86RMI(i
->Xin
.Push
.src
);
989 vex_printf("call%s[%d,",
990 i
->Xin
.Call
.cond
==Xcc_ALWAYS
991 ? "" : showX86CondCode(i
->Xin
.Call
.cond
),
992 i
->Xin
.Call
.regparms
);
993 ppRetLoc(i
->Xin
.Call
.rloc
);
994 vex_printf("] 0x%x", i
->Xin
.Call
.target
);
997 vex_printf("(xDirect) ");
998 vex_printf("if (%%eflags.%s) { ",
999 showX86CondCode(i
->Xin
.XDirect
.cond
));
1000 vex_printf("movl $0x%x,", i
->Xin
.XDirect
.dstGA
);
1001 ppX86AMode(i
->Xin
.XDirect
.amEIP
);
1003 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1004 i
->Xin
.XDirect
.toFastEP
? "fast" : "slow");
1007 vex_printf("(xIndir) ");
1008 vex_printf("if (%%eflags.%s) { movl ",
1009 showX86CondCode(i
->Xin
.XIndir
.cond
));
1010 ppHRegX86(i
->Xin
.XIndir
.dstGA
);
1012 ppX86AMode(i
->Xin
.XIndir
.amEIP
);
1013 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1016 vex_printf("(xAssisted) ");
1017 vex_printf("if (%%eflags.%s) { ",
1018 showX86CondCode(i
->Xin
.XAssisted
.cond
));
1019 vex_printf("movl ");
1020 ppHRegX86(i
->Xin
.XAssisted
.dstGA
);
1022 ppX86AMode(i
->Xin
.XAssisted
.amEIP
);
1023 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1024 (Int
)i
->Xin
.XAssisted
.jk
);
1025 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1028 vex_printf("cmov%s ", showX86CondCode(i
->Xin
.CMov32
.cond
));
1029 ppX86RM(i
->Xin
.CMov32
.src
);
1031 ppHRegX86(i
->Xin
.CMov32
.dst
);
1034 vex_printf("mov%c%cl ",
1035 i
->Xin
.LoadEX
.syned
? 's' : 'z',
1036 i
->Xin
.LoadEX
.szSmall
==1 ? 'b' : 'w');
1037 ppX86AMode(i
->Xin
.LoadEX
.src
);
1039 ppHRegX86(i
->Xin
.LoadEX
.dst
);
1042 vex_printf("mov%c ", i
->Xin
.Store
.sz
==1 ? 'b' : 'w');
1043 ppHRegX86(i
->Xin
.Store
.src
);
1045 ppX86AMode(i
->Xin
.Store
.dst
);
1048 vex_printf("setl%s ", showX86CondCode(i
->Xin
.Set32
.cond
));
1049 ppHRegX86(i
->Xin
.Set32
.dst
);
1052 vex_printf("bs%cl ", i
->Xin
.Bsfr32
.isFwds
? 'f' : 'r');
1053 ppHRegX86(i
->Xin
.Bsfr32
.src
);
1055 ppHRegX86(i
->Xin
.Bsfr32
.dst
);
1058 vex_printf("mfence(%s)",
1059 LibVEX_ppVexHwCaps(VexArchX86
,i
->Xin
.MFence
.hwcaps
));
1062 vex_printf("lock cmpxchg%c ",
1063 i
->Xin
.ACAS
.sz
==1 ? 'b'
1064 : i
->Xin
.ACAS
.sz
==2 ? 'w' : 'l');
1065 vex_printf("{%%eax->%%ebx},");
1066 ppX86AMode(i
->Xin
.ACAS
.addr
);
1069 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1070 ppX86AMode(i
->Xin
.DACAS
.addr
);
1073 vex_printf("g%sD ", showX86FpOp(i
->Xin
.FpUnary
.op
));
1074 ppHRegX86(i
->Xin
.FpUnary
.src
);
1076 ppHRegX86(i
->Xin
.FpUnary
.dst
);
1079 vex_printf("g%sD ", showX86FpOp(i
->Xin
.FpBinary
.op
));
1080 ppHRegX86(i
->Xin
.FpBinary
.srcL
);
1082 ppHRegX86(i
->Xin
.FpBinary
.srcR
);
1084 ppHRegX86(i
->Xin
.FpBinary
.dst
);
1087 if (i
->Xin
.FpLdSt
.isLoad
) {
1088 vex_printf("gld%c " , i
->Xin
.FpLdSt
.sz
==10 ? 'T'
1089 : (i
->Xin
.FpLdSt
.sz
==8 ? 'D' : 'F'));
1090 ppX86AMode(i
->Xin
.FpLdSt
.addr
);
1092 ppHRegX86(i
->Xin
.FpLdSt
.reg
);
1094 vex_printf("gst%c " , i
->Xin
.FpLdSt
.sz
==10 ? 'T'
1095 : (i
->Xin
.FpLdSt
.sz
==8 ? 'D' : 'F'));
1096 ppHRegX86(i
->Xin
.FpLdSt
.reg
);
1098 ppX86AMode(i
->Xin
.FpLdSt
.addr
);
1102 if (i
->Xin
.FpLdStI
.isLoad
) {
1103 vex_printf("gild%s ", i
->Xin
.FpLdStI
.sz
==8 ? "ll" :
1104 i
->Xin
.FpLdStI
.sz
==4 ? "l" : "w");
1105 ppX86AMode(i
->Xin
.FpLdStI
.addr
);
1107 ppHRegX86(i
->Xin
.FpLdStI
.reg
);
1109 vex_printf("gist%s ", i
->Xin
.FpLdStI
.sz
==8 ? "ll" :
1110 i
->Xin
.FpLdStI
.sz
==4 ? "l" : "w");
1111 ppHRegX86(i
->Xin
.FpLdStI
.reg
);
1113 ppX86AMode(i
->Xin
.FpLdStI
.addr
);
1117 vex_printf("gdtof ");
1118 ppHRegX86(i
->Xin
.Fp64to32
.src
);
1120 ppHRegX86(i
->Xin
.Fp64to32
.dst
);
1123 vex_printf("gcmov%s ", showX86CondCode(i
->Xin
.FpCMov
.cond
));
1124 ppHRegX86(i
->Xin
.FpCMov
.src
);
1126 ppHRegX86(i
->Xin
.FpCMov
.dst
);
1129 vex_printf("fldcw ");
1130 ppX86AMode(i
->Xin
.FpLdCW
.addr
);
1133 vex_printf("fstsw %%ax");
1136 vex_printf("gcmp ");
1137 ppHRegX86(i
->Xin
.FpCmp
.srcL
);
1139 ppHRegX86(i
->Xin
.FpCmp
.srcR
);
1141 ppHRegX86(i
->Xin
.FpCmp
.dst
);
1144 vex_printf("const $0x%04x,", (Int
)i
->Xin
.SseConst
.con
);
1145 ppHRegX86(i
->Xin
.SseConst
.dst
);
1148 vex_printf("movups ");
1149 if (i
->Xin
.SseLdSt
.isLoad
) {
1150 ppX86AMode(i
->Xin
.SseLdSt
.addr
);
1152 ppHRegX86(i
->Xin
.SseLdSt
.reg
);
1154 ppHRegX86(i
->Xin
.SseLdSt
.reg
);
1156 ppX86AMode(i
->Xin
.SseLdSt
.addr
);
1160 vex_printf("movs%s ", i
->Xin
.SseLdzLO
.sz
==4 ? "s" : "d");
1161 ppX86AMode(i
->Xin
.SseLdzLO
.addr
);
1163 ppHRegX86(i
->Xin
.SseLdzLO
.reg
);
1166 vex_printf("%sps ", showX86SseOp(i
->Xin
.Sse32Fx4
.op
));
1167 ppHRegX86(i
->Xin
.Sse32Fx4
.src
);
1169 ppHRegX86(i
->Xin
.Sse32Fx4
.dst
);
1172 vex_printf("%sss ", showX86SseOp(i
->Xin
.Sse32FLo
.op
));
1173 ppHRegX86(i
->Xin
.Sse32FLo
.src
);
1175 ppHRegX86(i
->Xin
.Sse32FLo
.dst
);
1178 vex_printf("%spd ", showX86SseOp(i
->Xin
.Sse64Fx2
.op
));
1179 ppHRegX86(i
->Xin
.Sse64Fx2
.src
);
1181 ppHRegX86(i
->Xin
.Sse64Fx2
.dst
);
1184 vex_printf("%ssd ", showX86SseOp(i
->Xin
.Sse64FLo
.op
));
1185 ppHRegX86(i
->Xin
.Sse64FLo
.src
);
1187 ppHRegX86(i
->Xin
.Sse64FLo
.dst
);
1190 vex_printf("%s ", showX86SseOp(i
->Xin
.SseReRg
.op
));
1191 ppHRegX86(i
->Xin
.SseReRg
.src
);
1193 ppHRegX86(i
->Xin
.SseReRg
.dst
);
1196 vex_printf("cmov%s ", showX86CondCode(i
->Xin
.SseCMov
.cond
));
1197 ppHRegX86(i
->Xin
.SseCMov
.src
);
1199 ppHRegX86(i
->Xin
.SseCMov
.dst
);
1202 vex_printf("pshufd $0x%x,", (UInt
)i
->Xin
.SseShuf
.order
);
1203 ppHRegX86(i
->Xin
.SseShuf
.src
);
1205 ppHRegX86(i
->Xin
.SseShuf
.dst
);
1208 vex_printf("(evCheck) decl ");
1209 ppX86AMode(i
->Xin
.EvCheck
.amCounter
);
1210 vex_printf("; jns nofail; jmp *");
1211 ppX86AMode(i
->Xin
.EvCheck
.amFailAddr
);
1212 vex_printf("; nofail:");
1215 vex_printf("(profInc) addl $1,NotKnownYet; "
1216 "adcl $0,NotKnownYet+4");
1219 vpanic("ppX86Instr");
1223 /* --------- Helpers for register allocation. --------- */
1225 void getRegUsage_X86Instr (HRegUsage
* u
, const X86Instr
* i
, Bool mode64
)
1228 vassert(mode64
== False
);
1232 addRegUsage_X86RMI(u
, i
->Xin
.Alu32R
.src
);
1233 if (i
->Xin
.Alu32R
.op
== Xalu_MOV
) {
1234 addHRegUse(u
, HRmWrite
, i
->Xin
.Alu32R
.dst
);
1236 if (i
->Xin
.Alu32R
.src
->tag
== Xrmi_Reg
) {
1237 u
->isRegRegMove
= True
;
1238 u
->regMoveSrc
= i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
;
1239 u
->regMoveDst
= i
->Xin
.Alu32R
.dst
;
1243 if (i
->Xin
.Alu32R
.op
== Xalu_CMP
) {
1244 addHRegUse(u
, HRmRead
, i
->Xin
.Alu32R
.dst
);
1247 addHRegUse(u
, HRmModify
, i
->Xin
.Alu32R
.dst
);
1250 addRegUsage_X86RI(u
, i
->Xin
.Alu32M
.src
);
1251 addRegUsage_X86AMode(u
, i
->Xin
.Alu32M
.dst
);
1254 addHRegUse(u
, HRmModify
, i
->Xin
.Sh32
.dst
);
1255 if (i
->Xin
.Sh32
.src
== 0)
1256 addHRegUse(u
, HRmRead
, hregX86_ECX());
1259 addRegUsage_X86RM(u
, i
->Xin
.Test32
.dst
, HRmRead
);
1262 addHRegUse(u
, HRmModify
, i
->Xin
.Unary32
.dst
);
1265 addRegUsage_X86AMode(u
, i
->Xin
.Lea32
.am
);
1266 addHRegUse(u
, HRmWrite
, i
->Xin
.Lea32
.dst
);
1269 addRegUsage_X86RM(u
, i
->Xin
.MulL
.src
, HRmRead
);
1270 addHRegUse(u
, HRmModify
, hregX86_EAX());
1271 addHRegUse(u
, HRmWrite
, hregX86_EDX());
1274 addRegUsage_X86RM(u
, i
->Xin
.Div
.src
, HRmRead
);
1275 addHRegUse(u
, HRmModify
, hregX86_EAX());
1276 addHRegUse(u
, HRmModify
, hregX86_EDX());
1279 addHRegUse(u
, HRmRead
, i
->Xin
.Sh3232
.src
);
1280 addHRegUse(u
, HRmModify
, i
->Xin
.Sh3232
.dst
);
1281 if (i
->Xin
.Sh3232
.amt
== 0)
1282 addHRegUse(u
, HRmRead
, hregX86_ECX());
1285 addRegUsage_X86RMI(u
, i
->Xin
.Push
.src
);
1286 addHRegUse(u
, HRmModify
, hregX86_ESP());
1289 /* This is a bit subtle. */
1290 /* First off, claim it trashes all the caller-saved regs
1291 which fall within the register allocator's jurisdiction.
1292 These I believe to be %eax %ecx %edx and all the xmm
1294 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1295 addHRegUse(u
, HRmWrite
, hregX86_ECX());
1296 addHRegUse(u
, HRmWrite
, hregX86_EDX());
1297 addHRegUse(u
, HRmWrite
, hregX86_XMM0());
1298 addHRegUse(u
, HRmWrite
, hregX86_XMM1());
1299 addHRegUse(u
, HRmWrite
, hregX86_XMM2());
1300 addHRegUse(u
, HRmWrite
, hregX86_XMM3());
1301 addHRegUse(u
, HRmWrite
, hregX86_XMM4());
1302 addHRegUse(u
, HRmWrite
, hregX86_XMM5());
1303 addHRegUse(u
, HRmWrite
, hregX86_XMM6());
1304 addHRegUse(u
, HRmWrite
, hregX86_XMM7());
1305 /* Now we have to state any parameter-carrying registers
1306 which might be read. This depends on the regparmness. */
1307 switch (i
->Xin
.Call
.regparms
) {
1308 case 3: addHRegUse(u
, HRmRead
, hregX86_ECX()); /*fallthru*/
1309 case 2: addHRegUse(u
, HRmRead
, hregX86_EDX()); /*fallthru*/
1310 case 1: addHRegUse(u
, HRmRead
, hregX86_EAX()); break;
1312 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1314 /* Finally, there is the issue that the insn trashes a
1315 register because the literal target address has to be
1316 loaded into a register. Fortunately, for the 0/1/2
1317 regparm case, we can use EAX, EDX and ECX respectively, so
1318 this does not cause any further damage. For the 3-regparm
1319 case, we'll have to choose another register arbitrarily --
1320 since A, D and C are used for parameters -- and so we might
1321 as well choose EDI. */
1322 if (i
->Xin
.Call
.regparms
== 3)
1323 addHRegUse(u
, HRmWrite
, hregX86_EDI());
1324 /* Upshot of this is that the assembler really must observe
1325 the here-stated convention of which register to use as an
1326 address temporary, depending on the regparmness: 0==EAX,
1327 1==EDX, 2==ECX, 3==EDI. */
1329 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1330 conditionally exit the block. Hence we only need to list (1)
1331 the registers that they read, and (2) the registers that they
1332 write in the case where the block is not exited. (2) is
1333 empty, hence only (1) is relevant here. */
1335 addRegUsage_X86AMode(u
, i
->Xin
.XDirect
.amEIP
);
1338 addHRegUse(u
, HRmRead
, i
->Xin
.XIndir
.dstGA
);
1339 addRegUsage_X86AMode(u
, i
->Xin
.XIndir
.amEIP
);
1342 addHRegUse(u
, HRmRead
, i
->Xin
.XAssisted
.dstGA
);
1343 addRegUsage_X86AMode(u
, i
->Xin
.XAssisted
.amEIP
);
1346 addRegUsage_X86RM(u
, i
->Xin
.CMov32
.src
, HRmRead
);
1347 addHRegUse(u
, HRmModify
, i
->Xin
.CMov32
.dst
);
1350 addRegUsage_X86AMode(u
, i
->Xin
.LoadEX
.src
);
1351 addHRegUse(u
, HRmWrite
, i
->Xin
.LoadEX
.dst
);
1354 addHRegUse(u
, HRmRead
, i
->Xin
.Store
.src
);
1355 addRegUsage_X86AMode(u
, i
->Xin
.Store
.dst
);
1358 addHRegUse(u
, HRmWrite
, i
->Xin
.Set32
.dst
);
1361 addHRegUse(u
, HRmRead
, i
->Xin
.Bsfr32
.src
);
1362 addHRegUse(u
, HRmWrite
, i
->Xin
.Bsfr32
.dst
);
1367 addRegUsage_X86AMode(u
, i
->Xin
.ACAS
.addr
);
1368 addHRegUse(u
, HRmRead
, hregX86_EBX());
1369 addHRegUse(u
, HRmModify
, hregX86_EAX());
1372 addRegUsage_X86AMode(u
, i
->Xin
.DACAS
.addr
);
1373 addHRegUse(u
, HRmRead
, hregX86_ECX());
1374 addHRegUse(u
, HRmRead
, hregX86_EBX());
1375 addHRegUse(u
, HRmModify
, hregX86_EDX());
1376 addHRegUse(u
, HRmModify
, hregX86_EAX());
1379 addHRegUse(u
, HRmRead
, i
->Xin
.FpUnary
.src
);
1380 addHRegUse(u
, HRmWrite
, i
->Xin
.FpUnary
.dst
);
1382 if (i
->Xin
.FpUnary
.op
== Xfp_MOV
) {
1383 u
->isRegRegMove
= True
;
1384 u
->regMoveSrc
= i
->Xin
.FpUnary
.src
;
1385 u
->regMoveDst
= i
->Xin
.FpUnary
.dst
;
1389 addHRegUse(u
, HRmRead
, i
->Xin
.FpBinary
.srcL
);
1390 addHRegUse(u
, HRmRead
, i
->Xin
.FpBinary
.srcR
);
1391 addHRegUse(u
, HRmWrite
, i
->Xin
.FpBinary
.dst
);
1394 addRegUsage_X86AMode(u
, i
->Xin
.FpLdSt
.addr
);
1395 addHRegUse(u
, i
->Xin
.FpLdSt
.isLoad
? HRmWrite
: HRmRead
,
1399 addRegUsage_X86AMode(u
, i
->Xin
.FpLdStI
.addr
);
1400 addHRegUse(u
, i
->Xin
.FpLdStI
.isLoad
? HRmWrite
: HRmRead
,
1401 i
->Xin
.FpLdStI
.reg
);
1404 addHRegUse(u
, HRmRead
, i
->Xin
.Fp64to32
.src
);
1405 addHRegUse(u
, HRmWrite
, i
->Xin
.Fp64to32
.dst
);
1408 addHRegUse(u
, HRmRead
, i
->Xin
.FpCMov
.src
);
1409 addHRegUse(u
, HRmModify
, i
->Xin
.FpCMov
.dst
);
1412 addRegUsage_X86AMode(u
, i
->Xin
.FpLdCW
.addr
);
1415 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1418 addHRegUse(u
, HRmRead
, i
->Xin
.FpCmp
.srcL
);
1419 addHRegUse(u
, HRmRead
, i
->Xin
.FpCmp
.srcR
);
1420 addHRegUse(u
, HRmWrite
, i
->Xin
.FpCmp
.dst
);
1421 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1424 addRegUsage_X86AMode(u
, i
->Xin
.SseLdSt
.addr
);
1425 addHRegUse(u
, i
->Xin
.SseLdSt
.isLoad
? HRmWrite
: HRmRead
,
1426 i
->Xin
.SseLdSt
.reg
);
1429 addRegUsage_X86AMode(u
, i
->Xin
.SseLdzLO
.addr
);
1430 addHRegUse(u
, HRmWrite
, i
->Xin
.SseLdzLO
.reg
);
1433 addHRegUse(u
, HRmWrite
, i
->Xin
.SseConst
.dst
);
1436 vassert(i
->Xin
.Sse32Fx4
.op
!= Xsse_MOV
);
1437 unary
= toBool( i
->Xin
.Sse32Fx4
.op
== Xsse_RCPF
1438 || i
->Xin
.Sse32Fx4
.op
== Xsse_RSQRTF
1439 || i
->Xin
.Sse32Fx4
.op
== Xsse_SQRTF
);
1440 addHRegUse(u
, HRmRead
, i
->Xin
.Sse32Fx4
.src
);
1441 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1442 i
->Xin
.Sse32Fx4
.dst
);
1445 vassert(i
->Xin
.Sse32FLo
.op
!= Xsse_MOV
);
1446 unary
= toBool( i
->Xin
.Sse32FLo
.op
== Xsse_RCPF
1447 || i
->Xin
.Sse32FLo
.op
== Xsse_RSQRTF
1448 || i
->Xin
.Sse32FLo
.op
== Xsse_SQRTF
);
1449 addHRegUse(u
, HRmRead
, i
->Xin
.Sse32FLo
.src
);
1450 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1451 i
->Xin
.Sse32FLo
.dst
);
1454 vassert(i
->Xin
.Sse64Fx2
.op
!= Xsse_MOV
);
1455 unary
= toBool( i
->Xin
.Sse64Fx2
.op
== Xsse_RCPF
1456 || i
->Xin
.Sse64Fx2
.op
== Xsse_RSQRTF
1457 || i
->Xin
.Sse64Fx2
.op
== Xsse_SQRTF
);
1458 addHRegUse(u
, HRmRead
, i
->Xin
.Sse64Fx2
.src
);
1459 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1460 i
->Xin
.Sse64Fx2
.dst
);
1463 vassert(i
->Xin
.Sse64FLo
.op
!= Xsse_MOV
);
1464 unary
= toBool( i
->Xin
.Sse64FLo
.op
== Xsse_RCPF
1465 || i
->Xin
.Sse64FLo
.op
== Xsse_RSQRTF
1466 || i
->Xin
.Sse64FLo
.op
== Xsse_SQRTF
);
1467 addHRegUse(u
, HRmRead
, i
->Xin
.Sse64FLo
.src
);
1468 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1469 i
->Xin
.Sse64FLo
.dst
);
1472 if (i
->Xin
.SseReRg
.op
== Xsse_XOR
1473 && sameHReg(i
->Xin
.SseReRg
.src
, i
->Xin
.SseReRg
.dst
)) {
1474 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1475 /* (as opposed to a rite of passage :-) */
1476 addHRegUse(u
, HRmWrite
, i
->Xin
.SseReRg
.dst
);
1478 addHRegUse(u
, HRmRead
, i
->Xin
.SseReRg
.src
);
1479 addHRegUse(u
, i
->Xin
.SseReRg
.op
== Xsse_MOV
1480 ? HRmWrite
: HRmModify
,
1481 i
->Xin
.SseReRg
.dst
);
1483 if (i
->Xin
.SseReRg
.op
== Xsse_MOV
) {
1484 u
->isRegRegMove
= True
;
1485 u
->regMoveSrc
= i
->Xin
.SseReRg
.src
;
1486 u
->regMoveDst
= i
->Xin
.SseReRg
.dst
;
1491 addHRegUse(u
, HRmRead
, i
->Xin
.SseCMov
.src
);
1492 addHRegUse(u
, HRmModify
, i
->Xin
.SseCMov
.dst
);
1495 addHRegUse(u
, HRmRead
, i
->Xin
.SseShuf
.src
);
1496 addHRegUse(u
, HRmWrite
, i
->Xin
.SseShuf
.dst
);
1499 /* We expect both amodes only to mention %ebp, so this is in
1500 fact pointless, since %ebp isn't allocatable, but anyway.. */
1501 addRegUsage_X86AMode(u
, i
->Xin
.EvCheck
.amCounter
);
1502 addRegUsage_X86AMode(u
, i
->Xin
.EvCheck
.amFailAddr
);
1505 /* does not use any registers. */
1508 ppX86Instr(i
, False
);
1509 vpanic("getRegUsage_X86Instr");
1514 static void mapReg( HRegRemap
* m
, HReg
* r
)
1516 *r
= lookupHRegRemap(m
, *r
);
1519 void mapRegs_X86Instr ( HRegRemap
* m
, X86Instr
* i
, Bool mode64
)
1521 vassert(mode64
== False
);
1524 mapRegs_X86RMI(m
, i
->Xin
.Alu32R
.src
);
1525 mapReg(m
, &i
->Xin
.Alu32R
.dst
);
1528 mapRegs_X86RI(m
, i
->Xin
.Alu32M
.src
);
1529 mapRegs_X86AMode(m
, i
->Xin
.Alu32M
.dst
);
1532 mapReg(m
, &i
->Xin
.Sh32
.dst
);
1535 mapRegs_X86RM(m
, i
->Xin
.Test32
.dst
);
1538 mapReg(m
, &i
->Xin
.Unary32
.dst
);
1541 mapRegs_X86AMode(m
, i
->Xin
.Lea32
.am
);
1542 mapReg(m
, &i
->Xin
.Lea32
.dst
);
1545 mapRegs_X86RM(m
, i
->Xin
.MulL
.src
);
1548 mapRegs_X86RM(m
, i
->Xin
.Div
.src
);
1551 mapReg(m
, &i
->Xin
.Sh3232
.src
);
1552 mapReg(m
, &i
->Xin
.Sh3232
.dst
);
1555 mapRegs_X86RMI(m
, i
->Xin
.Push
.src
);
1560 mapRegs_X86AMode(m
, i
->Xin
.XDirect
.amEIP
);
1563 mapReg(m
, &i
->Xin
.XIndir
.dstGA
);
1564 mapRegs_X86AMode(m
, i
->Xin
.XIndir
.amEIP
);
1567 mapReg(m
, &i
->Xin
.XAssisted
.dstGA
);
1568 mapRegs_X86AMode(m
, i
->Xin
.XAssisted
.amEIP
);
1571 mapRegs_X86RM(m
, i
->Xin
.CMov32
.src
);
1572 mapReg(m
, &i
->Xin
.CMov32
.dst
);
1575 mapRegs_X86AMode(m
, i
->Xin
.LoadEX
.src
);
1576 mapReg(m
, &i
->Xin
.LoadEX
.dst
);
1579 mapReg(m
, &i
->Xin
.Store
.src
);
1580 mapRegs_X86AMode(m
, i
->Xin
.Store
.dst
);
1583 mapReg(m
, &i
->Xin
.Set32
.dst
);
1586 mapReg(m
, &i
->Xin
.Bsfr32
.src
);
1587 mapReg(m
, &i
->Xin
.Bsfr32
.dst
);
1592 mapRegs_X86AMode(m
, i
->Xin
.ACAS
.addr
);
1595 mapRegs_X86AMode(m
, i
->Xin
.DACAS
.addr
);
1598 mapReg(m
, &i
->Xin
.FpUnary
.src
);
1599 mapReg(m
, &i
->Xin
.FpUnary
.dst
);
1602 mapReg(m
, &i
->Xin
.FpBinary
.srcL
);
1603 mapReg(m
, &i
->Xin
.FpBinary
.srcR
);
1604 mapReg(m
, &i
->Xin
.FpBinary
.dst
);
1607 mapRegs_X86AMode(m
, i
->Xin
.FpLdSt
.addr
);
1608 mapReg(m
, &i
->Xin
.FpLdSt
.reg
);
1611 mapRegs_X86AMode(m
, i
->Xin
.FpLdStI
.addr
);
1612 mapReg(m
, &i
->Xin
.FpLdStI
.reg
);
1615 mapReg(m
, &i
->Xin
.Fp64to32
.src
);
1616 mapReg(m
, &i
->Xin
.Fp64to32
.dst
);
1619 mapReg(m
, &i
->Xin
.FpCMov
.src
);
1620 mapReg(m
, &i
->Xin
.FpCMov
.dst
);
1623 mapRegs_X86AMode(m
, i
->Xin
.FpLdCW
.addr
);
1628 mapReg(m
, &i
->Xin
.FpCmp
.srcL
);
1629 mapReg(m
, &i
->Xin
.FpCmp
.srcR
);
1630 mapReg(m
, &i
->Xin
.FpCmp
.dst
);
1633 mapReg(m
, &i
->Xin
.SseConst
.dst
);
1636 mapReg(m
, &i
->Xin
.SseLdSt
.reg
);
1637 mapRegs_X86AMode(m
, i
->Xin
.SseLdSt
.addr
);
1640 mapReg(m
, &i
->Xin
.SseLdzLO
.reg
);
1641 mapRegs_X86AMode(m
, i
->Xin
.SseLdzLO
.addr
);
1644 mapReg(m
, &i
->Xin
.Sse32Fx4
.src
);
1645 mapReg(m
, &i
->Xin
.Sse32Fx4
.dst
);
1648 mapReg(m
, &i
->Xin
.Sse32FLo
.src
);
1649 mapReg(m
, &i
->Xin
.Sse32FLo
.dst
);
1652 mapReg(m
, &i
->Xin
.Sse64Fx2
.src
);
1653 mapReg(m
, &i
->Xin
.Sse64Fx2
.dst
);
1656 mapReg(m
, &i
->Xin
.Sse64FLo
.src
);
1657 mapReg(m
, &i
->Xin
.Sse64FLo
.dst
);
1660 mapReg(m
, &i
->Xin
.SseReRg
.src
);
1661 mapReg(m
, &i
->Xin
.SseReRg
.dst
);
1664 mapReg(m
, &i
->Xin
.SseCMov
.src
);
1665 mapReg(m
, &i
->Xin
.SseCMov
.dst
);
1668 mapReg(m
, &i
->Xin
.SseShuf
.src
);
1669 mapReg(m
, &i
->Xin
.SseShuf
.dst
);
1672 /* We expect both amodes only to mention %ebp, so this is in
1673 fact pointless, since %ebp isn't allocatable, but anyway.. */
1674 mapRegs_X86AMode(m
, i
->Xin
.EvCheck
.amCounter
);
1675 mapRegs_X86AMode(m
, i
->Xin
.EvCheck
.amFailAddr
);
1678 /* does not use any registers. */
1682 ppX86Instr(i
, mode64
);
1683 vpanic("mapRegs_X86Instr");
1687 /* Generate x86 spill/reload instructions under the direction of the
1688 register allocator. Note it's critical these don't write the
1691 void genSpill_X86 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
1692 HReg rreg
, Int offsetB
, Bool mode64
)
1695 vassert(offsetB
>= 0);
1696 vassert(!hregIsVirtual(rreg
));
1697 vassert(mode64
== False
);
1699 am
= X86AMode_IR(offsetB
, hregX86_EBP());
1700 switch (hregClass(rreg
)) {
1702 *i1
= X86Instr_Alu32M ( Xalu_MOV
, X86RI_Reg(rreg
), am
);
1705 *i1
= X86Instr_FpLdSt ( False
/*store*/, 10, rreg
, am
);
1708 *i1
= X86Instr_SseLdSt ( False
/*store*/, rreg
, am
);
1711 ppHRegClass(hregClass(rreg
));
1712 vpanic("genSpill_X86: unimplemented regclass");
1716 void genReload_X86 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
1717 HReg rreg
, Int offsetB
, Bool mode64
)
1720 vassert(offsetB
>= 0);
1721 vassert(!hregIsVirtual(rreg
));
1722 vassert(mode64
== False
);
1724 am
= X86AMode_IR(offsetB
, hregX86_EBP());
1725 switch (hregClass(rreg
)) {
1727 *i1
= X86Instr_Alu32R ( Xalu_MOV
, X86RMI_Mem(am
), rreg
);
1730 *i1
= X86Instr_FpLdSt ( True
/*load*/, 10, rreg
, am
);
1733 *i1
= X86Instr_SseLdSt ( True
/*load*/, rreg
, am
);
1736 ppHRegClass(hregClass(rreg
));
1737 vpanic("genReload_X86: unimplemented regclass");
1741 X86Instr
* genMove_X86(HReg from
, HReg to
, Bool mode64
)
1743 switch (hregClass(from
)) {
1745 return X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(from
), to
);
1747 return X86Instr_SseReRg(Xsse_MOV
, from
, to
);
1749 ppHRegClass(hregClass(from
));
1750 vpanic("genMove_X86: unimplemented regclass");
1754 /* The given instruction reads the specified vreg exactly once, and
1755 that vreg is currently located at the given spill offset. If
1756 possible, return a variant of the instruction to one which instead
1757 references the spill slot directly. */
1759 X86Instr
* directReload_X86( X86Instr
* i
, HReg vreg
, Short spill_off
)
1761 vassert(spill_off
>= 0 && spill_off
< 10000); /* let's say */
1763 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1764 Convert to: src=RMI_Mem, dst=Reg
1766 if (i
->tag
== Xin_Alu32R
1767 && (i
->Xin
.Alu32R
.op
== Xalu_MOV
|| i
->Xin
.Alu32R
.op
== Xalu_OR
1768 || i
->Xin
.Alu32R
.op
== Xalu_XOR
)
1769 && i
->Xin
.Alu32R
.src
->tag
== Xrmi_Reg
1770 && sameHReg(i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
, vreg
)) {
1771 vassert(! sameHReg(i
->Xin
.Alu32R
.dst
, vreg
));
1772 return X86Instr_Alu32R(
1774 X86RMI_Mem( X86AMode_IR( spill_off
, hregX86_EBP())),
1779 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1780 Convert to: src=RI_Imm, dst=Mem
1782 if (i
->tag
== Xin_Alu32R
1783 && (i
->Xin
.Alu32R
.op
== Xalu_CMP
)
1784 && i
->Xin
.Alu32R
.src
->tag
== Xrmi_Imm
1785 && sameHReg(i
->Xin
.Alu32R
.dst
, vreg
)) {
1786 return X86Instr_Alu32M(
1788 X86RI_Imm( i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
),
1789 X86AMode_IR( spill_off
, hregX86_EBP())
1793 /* Deal with form: Push(RMI_Reg)
1794 Convert to: Push(RMI_Mem)
1796 if (i
->tag
== Xin_Push
1797 && i
->Xin
.Push
.src
->tag
== Xrmi_Reg
1798 && sameHReg(i
->Xin
.Push
.src
->Xrmi
.Reg
.reg
, vreg
)) {
1799 return X86Instr_Push(
1800 X86RMI_Mem( X86AMode_IR( spill_off
, hregX86_EBP()))
1804 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1805 Convert to CMov32(RM_Mem, dst) */
1806 if (i
->tag
== Xin_CMov32
1807 && i
->Xin
.CMov32
.src
->tag
== Xrm_Reg
1808 && sameHReg(i
->Xin
.CMov32
.src
->Xrm
.Reg
.reg
, vreg
)) {
1809 vassert(! sameHReg(i
->Xin
.CMov32
.dst
, vreg
));
1810 return X86Instr_CMov32(
1812 X86RM_Mem( X86AMode_IR( spill_off
, hregX86_EBP() )),
1817 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1818 if (i
->tag
== Xin_Test32
1819 && i
->Xin
.Test32
.dst
->tag
== Xrm_Reg
1820 && sameHReg(i
->Xin
.Test32
.dst
->Xrm
.Reg
.reg
, vreg
)) {
1821 return X86Instr_Test32(
1822 i
->Xin
.Test32
.imm32
,
1823 X86RM_Mem( X86AMode_IR( spill_off
, hregX86_EBP() ) )
1831 /* --------- The x86 assembler (bleh.) --------- */
1833 inline static UInt
iregEnc ( HReg r
)
1836 vassert(hregClass(r
) == HRcInt32
);
1837 vassert(!hregIsVirtual(r
));
1838 n
= hregEncoding(r
);
1843 inline static UInt
fregEnc ( HReg r
)
1846 vassert(hregClass(r
) == HRcFlt64
);
1847 vassert(!hregIsVirtual(r
));
1848 n
= hregEncoding(r
);
1853 inline static UInt
vregEnc ( HReg r
)
1856 vassert(hregClass(r
) == HRcVec128
);
1857 vassert(!hregIsVirtual(r
));
1858 n
= hregEncoding(r
);
1863 inline static UChar
mkModRegRM ( UInt mod
, UInt reg
, UInt regmem
)
1866 vassert((reg
|regmem
) < 8);
1867 return (UChar
)( ((mod
& 3) << 6) | ((reg
& 7) << 3) | (regmem
& 7) );
1870 inline static UChar
mkSIB ( UInt shift
, UInt regindex
, UInt regbase
)
1873 vassert((regindex
|regbase
) < 8);
1874 return (UChar
)( ((shift
& 3) << 6) | ((regindex
& 7) << 3) | (regbase
& 7) );
1877 static UChar
* emit32 ( UChar
* p
, UInt w32
)
1879 *p
++ = toUChar( w32
& 0x000000FF);
1880 *p
++ = toUChar((w32
>> 8) & 0x000000FF);
1881 *p
++ = toUChar((w32
>> 16) & 0x000000FF);
1882 *p
++ = toUChar((w32
>> 24) & 0x000000FF);
1886 /* Does a sign-extend of the lowest 8 bits give
1887 the original number? */
1888 static Bool
fits8bits ( UInt w32
)
1891 return toBool(i32
== ((Int
)(w32
<< 24) >> 24));
1895 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1897 greg, 0(ereg) | ereg != ESP && ereg != EBP
1900 greg, d8(ereg) | ereg != ESP
1903 greg, d32(ereg) | ereg != ESP
1906 greg, d8(%esp) = 01 greg 100, 0x24, d8
1908 -----------------------------------------------
1910 greg, d8(base,index,scale)
1912 = 01 greg 100, scale index base, d8
1914 greg, d32(base,index,scale)
1916 = 10 greg 100, scale index base, d32
1918 static UChar
* doAMode_M__wrk ( UChar
* p
, UInt gregEnc
, X86AMode
* am
)
1920 if (am
->tag
== Xam_IR
) {
1921 if (am
->Xam
.IR
.imm
== 0
1922 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())
1923 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_EBP()) ) {
1924 *p
++ = mkModRegRM(0, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1927 if (fits8bits(am
->Xam
.IR
.imm
)
1928 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())) {
1929 *p
++ = mkModRegRM(1, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1930 *p
++ = toUChar(am
->Xam
.IR
.imm
& 0xFF);
1933 if (! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())) {
1934 *p
++ = mkModRegRM(2, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1935 p
= emit32(p
, am
->Xam
.IR
.imm
);
1938 if (sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())
1939 && fits8bits(am
->Xam
.IR
.imm
)) {
1940 *p
++ = mkModRegRM(1, gregEnc
, 4);
1942 *p
++ = toUChar(am
->Xam
.IR
.imm
& 0xFF);
1946 vpanic("doAMode_M: can't emit amode IR");
1949 if (am
->tag
== Xam_IRRS
) {
1950 if (fits8bits(am
->Xam
.IRRS
.imm
)
1951 && ! sameHReg(am
->Xam
.IRRS
.index
, hregX86_ESP())) {
1952 *p
++ = mkModRegRM(1, gregEnc
, 4);
1953 *p
++ = mkSIB(am
->Xam
.IRRS
.shift
, iregEnc(am
->Xam
.IRRS
.index
),
1954 iregEnc(am
->Xam
.IRRS
.base
));
1955 *p
++ = toUChar(am
->Xam
.IRRS
.imm
& 0xFF);
1958 if (! sameHReg(am
->Xam
.IRRS
.index
, hregX86_ESP())) {
1959 *p
++ = mkModRegRM(2, gregEnc
, 4);
1960 *p
++ = mkSIB(am
->Xam
.IRRS
.shift
, iregEnc(am
->Xam
.IRRS
.index
),
1961 iregEnc(am
->Xam
.IRRS
.base
));
1962 p
= emit32(p
, am
->Xam
.IRRS
.imm
);
1966 vpanic("doAMode_M: can't emit amode IRRS");
1969 vpanic("doAMode_M: unknown amode");
1973 static UChar
* doAMode_M ( UChar
* p
, HReg greg
, X86AMode
* am
)
1975 return doAMode_M__wrk(p
, iregEnc(greg
), am
);
1978 static UChar
* doAMode_M_enc ( UChar
* p
, UInt gregEnc
, X86AMode
* am
)
1980 vassert(gregEnc
< 8);
1981 return doAMode_M__wrk(p
, gregEnc
, am
);
1985 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
1986 inline static UChar
* doAMode_R__wrk ( UChar
* p
, UInt gregEnc
, UInt eregEnc
)
1988 *p
++ = mkModRegRM(3, gregEnc
, eregEnc
);
1992 static UChar
* doAMode_R ( UChar
* p
, HReg greg
, HReg ereg
)
1994 return doAMode_R__wrk(p
, iregEnc(greg
), iregEnc(ereg
));
1997 static UChar
* doAMode_R_enc_reg ( UChar
* p
, UInt gregEnc
, HReg ereg
)
1999 vassert(gregEnc
< 8);
2000 return doAMode_R__wrk(p
, gregEnc
, iregEnc(ereg
));
2003 static UChar
* doAMode_R_enc_enc ( UChar
* p
, UInt gregEnc
, UInt eregEnc
)
2005 vassert( (gregEnc
|eregEnc
) < 8);
2006 return doAMode_R__wrk(p
, gregEnc
, eregEnc
);
2010 /* Emit ffree %st(7) */
2011 static UChar
* do_ffree_st7 ( UChar
* p
)
2018 /* Emit fstp %st(i), 1 <= i <= 7 */
2019 static UChar
* do_fstp_st ( UChar
* p
, Int i
)
2021 vassert(1 <= i
&& i
<= 7);
2023 *p
++ = toUChar(0xD8+i
);
2027 /* Emit fld %st(i), 0 <= i <= 6 */
2028 static UChar
* do_fld_st ( UChar
* p
, Int i
)
2030 vassert(0 <= i
&& i
<= 6);
2032 *p
++ = toUChar(0xC0+i
);
2036 /* Emit f<op> %st(0) */
2037 static UChar
* do_fop1_st ( UChar
* p
, X86FpOp op
)
2040 case Xfp_NEG
: *p
++ = 0xD9; *p
++ = 0xE0; break;
2041 case Xfp_ABS
: *p
++ = 0xD9; *p
++ = 0xE1; break;
2042 case Xfp_SQRT
: *p
++ = 0xD9; *p
++ = 0xFA; break;
2043 case Xfp_ROUND
: *p
++ = 0xD9; *p
++ = 0xFC; break;
2044 case Xfp_SIN
: *p
++ = 0xD9; *p
++ = 0xFE; break;
2045 case Xfp_COS
: *p
++ = 0xD9; *p
++ = 0xFF; break;
2046 case Xfp_2XM1
: *p
++ = 0xD9; *p
++ = 0xF0; break;
2047 case Xfp_MOV
: break;
2049 /* fptan pushes 1.0 on the FP stack, except when the argument
2050 is out of range. Hence we have to do the instruction,
2051 then inspect C2 to see if there is an out of range
2052 condition. If there is, we skip the fincstp that is used
2053 by the in-range case to get rid of this extra 1.0
2055 p
= do_ffree_st7(p
); /* since fptan sometimes pushes 1.0 */
2056 *p
++ = 0xD9; *p
++ = 0xF2; // fptan
2057 *p
++ = 0x50; // pushl %eax
2058 *p
++ = 0xDF; *p
++ = 0xE0; // fnstsw %ax
2059 *p
++ = 0x66; *p
++ = 0xA9;
2060 *p
++ = 0x00; *p
++ = 0x04; // testw $0x400,%ax
2061 *p
++ = 0x75; *p
++ = 0x02; // jnz after_fincstp
2062 *p
++ = 0xD9; *p
++ = 0xF7; // fincstp
2063 *p
++ = 0x58; // after_fincstp: popl %eax
2066 vpanic("do_fop1_st: unknown op");
2071 /* Emit f<op> %st(i), 1 <= i <= 5 */
2072 static UChar
* do_fop2_st ( UChar
* p
, X86FpOp op
, Int i
)
2076 case Xfp_ADD
: subopc
= 0; break;
2077 case Xfp_SUB
: subopc
= 4; break;
2078 case Xfp_MUL
: subopc
= 1; break;
2079 case Xfp_DIV
: subopc
= 6; break;
2080 default: vpanic("do_fop2_st: unknown op");
2083 p
= doAMode_R_enc_enc(p
, subopc
, i
);
2087 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2088 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2090 static UChar
* push_word_from_tags ( UChar
* p
, UShort tags
)
2093 vassert(0 == (tags
& ~0xF));
2095 /* pushl $0x00000000 */
2100 /* pushl $0xFFFFFFFF */
2105 vassert(0); /* awaiting test case */
2107 if (tags
& 1) w
|= 0x000000FF;
2108 if (tags
& 2) w
|= 0x0000FF00;
2109 if (tags
& 4) w
|= 0x00FF0000;
2110 if (tags
& 8) w
|= 0xFF000000;
2117 /* Emit an instruction into buf and return the number of bytes used.
2118 Note that buf is not the insn's final place, and therefore it is
2119 imperative to emit position-independent code. If the emitted
2120 instruction was a profiler inc, set *is_profInc to True, else
2121 leave it unchanged. */
2123 Int
emit_X86Instr ( /*MB_MOD*/Bool
* is_profInc
,
2124 UChar
* buf
, Int nbuf
, const X86Instr
* i
,
2125 Bool mode64
, VexEndness endness_host
,
2126 const void* disp_cp_chain_me_to_slowEP
,
2127 const void* disp_cp_chain_me_to_fastEP
,
2128 const void* disp_cp_xindir
,
2129 const void* disp_cp_xassisted
)
2131 UInt irno
, opc
, opc_rr
, subopc_imm
, opc_imma
, opc_cl
, opc_imm
, subopc
;
2136 vassert(nbuf
>= 32);
2137 vassert(mode64
== False
);
2139 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2144 /* Deal specially with MOV */
2145 if (i
->Xin
.Alu32R
.op
== Xalu_MOV
) {
2146 switch (i
->Xin
.Alu32R
.src
->tag
) {
2148 *p
++ = toUChar(0xB8 + iregEnc(i
->Xin
.Alu32R
.dst
));
2149 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2153 p
= doAMode_R(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
,
2158 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2159 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2166 if (i
->Xin
.Alu32R
.op
== Xalu_MUL
) {
2167 switch (i
->Xin
.Alu32R
.src
->tag
) {
2171 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
,
2172 i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
);
2177 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2178 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2181 if (fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2183 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
, i
->Xin
.Alu32R
.dst
);
2184 *p
++ = toUChar(0xFF & i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2187 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
, i
->Xin
.Alu32R
.dst
);
2188 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2195 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2196 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2197 switch (i
->Xin
.Alu32R
.op
) {
2198 case Xalu_ADC
: opc
= 0x13; opc_rr
= 0x11;
2199 subopc_imm
= 2; opc_imma
= 0x15; break;
2200 case Xalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2201 subopc_imm
= 0; opc_imma
= 0x05; break;
2202 case Xalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2203 subopc_imm
= 5; opc_imma
= 0x2D; break;
2204 case Xalu_SBB
: opc
= 0x1B; opc_rr
= 0x19;
2205 subopc_imm
= 3; opc_imma
= 0x1D; break;
2206 case Xalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2207 subopc_imm
= 4; opc_imma
= 0x25; break;
2208 case Xalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2209 subopc_imm
= 6; opc_imma
= 0x35; break;
2210 case Xalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2211 subopc_imm
= 1; opc_imma
= 0x0D; break;
2212 case Xalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2213 subopc_imm
= 7; opc_imma
= 0x3D; break;
2216 switch (i
->Xin
.Alu32R
.src
->tag
) {
2218 if (sameHReg(i
->Xin
.Alu32R
.dst
, hregX86_EAX())
2219 && !fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2220 *p
++ = toUChar(opc_imma
);
2221 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2223 if (fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2225 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Xin
.Alu32R
.dst
);
2226 *p
++ = toUChar(0xFF & i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2229 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Xin
.Alu32R
.dst
);
2230 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2234 *p
++ = toUChar(opc_rr
);
2235 p
= doAMode_R(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
,
2239 *p
++ = toUChar(opc
);
2240 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2241 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2249 /* Deal specially with MOV */
2250 if (i
->Xin
.Alu32M
.op
== Xalu_MOV
) {
2251 switch (i
->Xin
.Alu32M
.src
->tag
) {
2254 p
= doAMode_M(p
, i
->Xin
.Alu32M
.src
->Xri
.Reg
.reg
,
2259 p
= doAMode_M_enc(p
, 0, i
->Xin
.Alu32M
.dst
);
2260 p
= emit32(p
, i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2266 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2268 opc
= subopc_imm
= opc_imma
= 0;
2269 switch (i
->Xin
.Alu32M
.op
) {
2270 case Xalu_ADD
: opc
= 0x01; subopc_imm
= 0; break;
2271 case Xalu_SUB
: opc
= 0x29; subopc_imm
= 5; break;
2272 case Xalu_CMP
: opc
= 0x39; subopc_imm
= 7; break;
2275 switch (i
->Xin
.Alu32M
.src
->tag
) {
2277 *p
++ = toUChar(opc
);
2278 p
= doAMode_M(p
, i
->Xin
.Alu32M
.src
->Xri
.Reg
.reg
,
2282 if (fits8bits(i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
)) {
2284 p
= doAMode_M_enc(p
, subopc_imm
, i
->Xin
.Alu32M
.dst
);
2285 *p
++ = toUChar(0xFF & i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2289 p
= doAMode_M_enc(p
, subopc_imm
, i
->Xin
.Alu32M
.dst
);
2290 p
= emit32(p
, i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2299 opc_cl
= opc_imm
= subopc
= 0;
2300 switch (i
->Xin
.Sh32
.op
) {
2301 case Xsh_SHR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 5; break;
2302 case Xsh_SAR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 7; break;
2303 case Xsh_SHL
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 4; break;
2306 if (i
->Xin
.Sh32
.src
== 0) {
2307 *p
++ = toUChar(opc_cl
);
2308 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Sh32
.dst
);
2310 *p
++ = toUChar(opc_imm
);
2311 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Sh32
.dst
);
2312 *p
++ = (UChar
)(i
->Xin
.Sh32
.src
);
2317 if (i
->Xin
.Test32
.dst
->tag
== Xrm_Reg
) {
2318 /* testl $imm32, %reg */
2320 p
= doAMode_R_enc_reg(p
, 0, i
->Xin
.Test32
.dst
->Xrm
.Reg
.reg
);
2321 p
= emit32(p
, i
->Xin
.Test32
.imm32
);
2324 /* testl $imm32, amode */
2326 p
= doAMode_M_enc(p
, 0, i
->Xin
.Test32
.dst
->Xrm
.Mem
.am
);
2327 p
= emit32(p
, i
->Xin
.Test32
.imm32
);
2332 if (i
->Xin
.Unary32
.op
== Xun_NOT
) {
2334 p
= doAMode_R_enc_reg(p
, 2, i
->Xin
.Unary32
.dst
);
2337 if (i
->Xin
.Unary32
.op
== Xun_NEG
) {
2339 p
= doAMode_R_enc_reg(p
, 3, i
->Xin
.Unary32
.dst
);
2346 p
= doAMode_M(p
, i
->Xin
.Lea32
.dst
, i
->Xin
.Lea32
.am
);
2350 subopc
= i
->Xin
.MulL
.syned
? 5 : 4;
2352 switch (i
->Xin
.MulL
.src
->tag
) {
2354 p
= doAMode_M_enc(p
, subopc
, i
->Xin
.MulL
.src
->Xrm
.Mem
.am
);
2357 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.MulL
.src
->Xrm
.Reg
.reg
);
2365 subopc
= i
->Xin
.Div
.syned
? 7 : 6;
2367 switch (i
->Xin
.Div
.src
->tag
) {
2369 p
= doAMode_M_enc(p
, subopc
, i
->Xin
.Div
.src
->Xrm
.Mem
.am
);
2372 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Div
.src
->Xrm
.Reg
.reg
);
2380 vassert(i
->Xin
.Sh3232
.op
== Xsh_SHL
|| i
->Xin
.Sh3232
.op
== Xsh_SHR
);
2381 if (i
->Xin
.Sh3232
.amt
== 0) {
2382 /* shldl/shrdl by %cl */
2384 if (i
->Xin
.Sh3232
.op
== Xsh_SHL
) {
2389 p
= doAMode_R(p
, i
->Xin
.Sh3232
.src
, i
->Xin
.Sh3232
.dst
);
2395 switch (i
->Xin
.Push
.src
->tag
) {
2398 p
= doAMode_M_enc(p
, 6, i
->Xin
.Push
.src
->Xrmi
.Mem
.am
);
2402 p
= emit32(p
, i
->Xin
.Push
.src
->Xrmi
.Imm
.imm32
);
2405 *p
++ = toUChar(0x50 + iregEnc(i
->Xin
.Push
.src
->Xrmi
.Reg
.reg
));
2412 if (i
->Xin
.Call
.cond
!= Xcc_ALWAYS
2413 && i
->Xin
.Call
.rloc
.pri
!= RLPri_None
) {
2414 /* The call might not happen (it isn't unconditional) and it
2415 returns a result. In this case we will need to generate a
2416 control flow diamond to put 0x555..555 in the return
2417 register(s) in the case where the call doesn't happen. If
2418 this ever becomes necessary, maybe copy code from the ARM
2419 equivalent. Until that day, just give up. */
2422 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2423 for explanation of this. */
2424 switch (i
->Xin
.Call
.regparms
) {
2425 case 0: irno
= iregEnc(hregX86_EAX()); break;
2426 case 1: irno
= iregEnc(hregX86_EDX()); break;
2427 case 2: irno
= iregEnc(hregX86_ECX()); break;
2428 case 3: irno
= iregEnc(hregX86_EDI()); break;
2429 default: vpanic(" emit_X86Instr:call:regparms");
2431 /* jump over the following two insns if the condition does not
2433 if (i
->Xin
.Call
.cond
!= Xcc_ALWAYS
) {
2434 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.Call
.cond
^ 1)));
2435 *p
++ = 0x07; /* 7 bytes in the next two insns */
2437 /* movl $target, %tmp */
2438 *p
++ = toUChar(0xB8 + irno
);
2439 p
= emit32(p
, i
->Xin
.Call
.target
);
2442 *p
++ = toUChar(0xD0 + irno
);
2446 /* NB: what goes on here has to be very closely coordinated with the
2447 chainXDirect_X86 and unchainXDirect_X86 below. */
2448 /* We're generating chain-me requests here, so we need to be
2449 sure this is actually allowed -- no-redir translations can't
2450 use chain-me's. Hence: */
2451 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
2452 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
2454 /* Use ptmp for backpatching conditional jumps. */
2457 /* First off, if this is conditional, create a conditional
2458 jump over the rest of it. */
2459 if (i
->Xin
.XDirect
.cond
!= Xcc_ALWAYS
) {
2460 /* jmp fwds if !condition */
2461 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XDirect
.cond
^ 1)));
2462 ptmp
= p
; /* fill in this bit later */
2463 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2466 /* Update the guest EIP. */
2467 /* movl $dstGA, amEIP */
2469 p
= doAMode_M_enc(p
, 0, i
->Xin
.XDirect
.amEIP
);
2470 p
= emit32(p
, i
->Xin
.XDirect
.dstGA
);
2472 /* --- FIRST PATCHABLE BYTE follows --- */
2473 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2474 to) backs up the return address, so as to find the address of
2475 the first patchable byte. So: don't change the length of the
2476 two instructions below. */
2477 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2479 const void* disp_cp_chain_me
2480 = i
->Xin
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
2481 : disp_cp_chain_me_to_slowEP
;
2482 p
= emit32(p
, (UInt
)(Addr
)disp_cp_chain_me
);
2486 /* --- END of PATCHABLE BYTES --- */
2488 /* Fix up the conditional jump, if there was one. */
2489 if (i
->Xin
.XDirect
.cond
!= Xcc_ALWAYS
) {
2490 Int delta
= p
- ptmp
;
2491 vassert(delta
> 0 && delta
< 40);
2492 *ptmp
= toUChar(delta
-1);
2498 /* We're generating transfers that could lead indirectly to a
2499 chain-me, so we need to be sure this is actually allowed --
2500 no-redir translations are not allowed to reach normal
2501 translations without going through the scheduler. That means
2502 no XDirects or XIndirs out from no-redir translations.
2504 vassert(disp_cp_xindir
!= NULL
);
2506 /* Use ptmp for backpatching conditional jumps. */
2509 /* First off, if this is conditional, create a conditional
2510 jump over the rest of it. */
2511 if (i
->Xin
.XIndir
.cond
!= Xcc_ALWAYS
) {
2512 /* jmp fwds if !condition */
2513 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XIndir
.cond
^ 1)));
2514 ptmp
= p
; /* fill in this bit later */
2515 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2518 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2520 p
= doAMode_M(p
, i
->Xin
.XIndir
.dstGA
, i
->Xin
.XIndir
.amEIP
);
2522 /* movl $disp_indir, %edx */
2524 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xindir
);
2529 /* Fix up the conditional jump, if there was one. */
2530 if (i
->Xin
.XIndir
.cond
!= Xcc_ALWAYS
) {
2531 Int delta
= p
- ptmp
;
2532 vassert(delta
> 0 && delta
< 40);
2533 *ptmp
= toUChar(delta
-1);
2538 case Xin_XAssisted
: {
2539 /* Use ptmp for backpatching conditional jumps. */
2542 /* First off, if this is conditional, create a conditional
2543 jump over the rest of it. */
2544 if (i
->Xin
.XAssisted
.cond
!= Xcc_ALWAYS
) {
2545 /* jmp fwds if !condition */
2546 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XAssisted
.cond
^ 1)));
2547 ptmp
= p
; /* fill in this bit later */
2548 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2551 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2553 p
= doAMode_M(p
, i
->Xin
.XIndir
.dstGA
, i
->Xin
.XIndir
.amEIP
);
2554 /* movl $magic_number, %ebp. */
2556 switch (i
->Xin
.XAssisted
.jk
) {
2557 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
2558 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
2559 case Ijk_Sys_int128
: trcval
= VEX_TRC_JMP_SYS_INT128
; break;
2560 case Ijk_Sys_int129
: trcval
= VEX_TRC_JMP_SYS_INT129
; break;
2561 case Ijk_Sys_int130
: trcval
= VEX_TRC_JMP_SYS_INT130
; break;
2562 case Ijk_Sys_int145
: trcval
= VEX_TRC_JMP_SYS_INT145
; break;
2563 case Ijk_Sys_int210
: trcval
= VEX_TRC_JMP_SYS_INT210
; break;
2564 case Ijk_Sys_sysenter
: trcval
= VEX_TRC_JMP_SYS_SYSENTER
; break;
2565 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
2566 case Ijk_EmWarn
: trcval
= VEX_TRC_JMP_EMWARN
; break;
2567 case Ijk_MapFail
: trcval
= VEX_TRC_JMP_MAPFAIL
; break;
2568 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
2569 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
2570 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
2571 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
2572 case Ijk_SigSEGV
: trcval
= VEX_TRC_JMP_SIGSEGV
; break;
2573 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
2574 /* We don't expect to see the following being assisted. */
2579 ppIRJumpKind(i
->Xin
.XAssisted
.jk
);
2580 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2582 vassert(trcval
!= 0);
2584 p
= emit32(p
, trcval
);
2586 /* movl $disp_indir, %edx */
2588 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xassisted
);
2593 /* Fix up the conditional jump, if there was one. */
2594 if (i
->Xin
.XAssisted
.cond
!= Xcc_ALWAYS
) {
2595 Int delta
= p
- ptmp
;
2596 vassert(delta
> 0 && delta
< 40);
2597 *ptmp
= toUChar(delta
-1);
2603 vassert(i
->Xin
.CMov32
.cond
!= Xcc_ALWAYS
);
2605 /* This generates cmov, which is illegal on P54/P55. */
2608 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2609 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2610 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2613 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2614 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2619 /* Alternative version which works on any x86 variant. */
2620 /* jmp fwds if !condition */
2621 *p
++ = toUChar(0x70 + (i
->Xin
.CMov32
.cond
^ 1));
2622 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
2625 switch (i
->Xin
.CMov32
.src
->tag
) {
2627 /* Big sigh. This is movl E -> G ... */
2629 p
= doAMode_R(p
, i
->Xin
.CMov32
.src
->Xrm
.Reg
.reg
,
2634 /* ... whereas this is movl G -> E. That's why the args
2635 to doAMode_R appear to be the wrong way round in the
2638 p
= doAMode_M(p
, i
->Xin
.CMov32
.dst
,
2639 i
->Xin
.CMov32
.src
->Xrm
.Mem
.am
);
2644 /* Fill in the jump offset. */
2645 *(ptmp
-1) = toUChar(p
- ptmp
);
2651 if (i
->Xin
.LoadEX
.szSmall
== 1 && !i
->Xin
.LoadEX
.syned
) {
2655 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2658 if (i
->Xin
.LoadEX
.szSmall
== 2 && !i
->Xin
.LoadEX
.syned
) {
2662 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2665 if (i
->Xin
.LoadEX
.szSmall
== 1 && i
->Xin
.LoadEX
.syned
) {
2669 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2675 /* Make the destination register be 1 or 0, depending on whether
2676 the relevant condition holds. We have to dodge and weave
2677 when the destination is %esi or %edi as we cannot directly
2678 emit the native 'setb %reg' for those. Further complication:
2679 the top 24 bits of the destination should be forced to zero,
2680 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2681 Sigh. So start off my moving $0 into the dest. */
2683 /* Do we need to swap in %eax? */
2684 if (iregEnc(i
->Xin
.Set32
.dst
) >= 4) {
2685 /* xchg %eax, %dst */
2686 *p
++ = toUChar(0x90 + iregEnc(i
->Xin
.Set32
.dst
));
2688 *p
++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
2690 /* setb lo8(%eax) */
2692 *p
++ = toUChar(0x90 + (0xF & i
->Xin
.Set32
.cond
));
2693 p
= doAMode_R_enc_reg(p
, 0, hregX86_EAX());
2694 /* xchg %eax, %dst */
2695 *p
++ = toUChar(0x90 + iregEnc(i
->Xin
.Set32
.dst
));
2698 *p
++ = toUChar(0xB8 + iregEnc(i
->Xin
.Set32
.dst
));
2700 /* setb lo8(%dst) */
2702 *p
++ = toUChar(0x90 + (0xF & i
->Xin
.Set32
.cond
));
2703 p
= doAMode_R_enc_reg(p
, 0, i
->Xin
.Set32
.dst
);
2709 if (i
->Xin
.Bsfr32
.isFwds
) {
2714 p
= doAMode_R(p
, i
->Xin
.Bsfr32
.dst
, i
->Xin
.Bsfr32
.src
);
2718 /* see comment in hdefs.h re this insn */
2719 if (0) vex_printf("EMIT FENCE\n");
2720 if (i
->Xin
.MFence
.hwcaps
& (VEX_HWCAPS_X86_SSE3
2721 |VEX_HWCAPS_X86_SSE2
)) {
2723 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF0;
2726 if (i
->Xin
.MFence
.hwcaps
& VEX_HWCAPS_X86_MMXEXT
) {
2728 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF8;
2729 /* lock addl $0,0(%esp) */
2730 *p
++ = 0xF0; *p
++ = 0x83; *p
++ = 0x44;
2731 *p
++ = 0x24; *p
++ = 0x00; *p
++ = 0x00;
2734 if (i
->Xin
.MFence
.hwcaps
== 0/*baseline, no SSE*/) {
2735 /* lock addl $0,0(%esp) */
2736 *p
++ = 0xF0; *p
++ = 0x83; *p
++ = 0x44;
2737 *p
++ = 0x24; *p
++ = 0x00; *p
++ = 0x00;
2740 vpanic("emit_X86Instr:mfence:hwcaps");
2747 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2748 in %ebx. The new-value register is hardwired to be %ebx
2749 since letting it be any integer register gives the problem
2750 that %sil and %dil are unaddressible on x86 and hence we
2751 would have to resort to the same kind of trickery as with
2752 byte-sized Xin.Store, just below. Given that this isn't
2753 performance critical, it is simpler just to force the
2754 register operand to %ebx (could equally be %ecx or %edx).
2755 (Although %ebx is more consistent with cmpxchg8b.) */
2756 if (i
->Xin
.ACAS
.sz
== 2) *p
++ = 0x66;
2758 if (i
->Xin
.ACAS
.sz
== 1) *p
++ = 0xB0; else *p
++ = 0xB1;
2759 p
= doAMode_M(p
, hregX86_EBX(), i
->Xin
.ACAS
.addr
);
2765 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2766 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2767 aren't encoded in the insn. */
2770 p
= doAMode_M_enc(p
, 1, i
->Xin
.DACAS
.addr
);
2774 if (i
->Xin
.Store
.sz
== 2) {
2775 /* This case, at least, is simple, given that we can
2776 reference the low 16 bits of any integer register. */
2779 p
= doAMode_M(p
, i
->Xin
.Store
.src
, i
->Xin
.Store
.dst
);
2783 if (i
->Xin
.Store
.sz
== 1) {
2784 /* We have to do complex dodging and weaving if src is not
2785 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2786 if (iregEnc(i
->Xin
.Store
.src
) < 4) {
2787 /* we're OK, can do it directly */
2789 p
= doAMode_M(p
, i
->Xin
.Store
.src
, i
->Xin
.Store
.dst
);
2792 /* Bleh. This means the source is %edi or %esi. Since
2793 the address mode can only mention three registers, at
2794 least one of %eax/%ebx/%ecx/%edx must be available to
2795 temporarily swap the source into, so the store can
2796 happen. So we have to look at the regs mentioned
2798 HReg swap
= INVALID_HREG
;
2799 HReg eax
= hregX86_EAX(), ebx
= hregX86_EBX(),
2800 ecx
= hregX86_ECX(), edx
= hregX86_EDX();
2803 addRegUsage_X86AMode(&u
, i
->Xin
.Store
.dst
);
2804 /**/ if (! HRegUsage__contains(&u
, eax
)) { swap
= eax
; }
2805 else if (! HRegUsage__contains(&u
, ebx
)) { swap
= ebx
; }
2806 else if (! HRegUsage__contains(&u
, ecx
)) { swap
= ecx
; }
2807 else if (! HRegUsage__contains(&u
, edx
)) { swap
= edx
; }
2808 vassert(! hregIsInvalid(swap
));
2809 /* xchgl %source, %swap. Could do better if swap is %eax. */
2811 p
= doAMode_R(p
, i
->Xin
.Store
.src
, swap
);
2812 /* movb lo8{%swap}, (dst) */
2814 p
= doAMode_M(p
, swap
, i
->Xin
.Store
.dst
);
2815 /* xchgl %source, %swap. Could do better if swap is %eax. */
2817 p
= doAMode_R(p
, i
->Xin
.Store
.src
, swap
);
2820 } /* if (i->Xin.Store.sz == 1) */
2825 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2827 p
= do_ffree_st7(p
);
2828 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpUnary
.src
));
2829 p
= do_fop1_st(p
, i
->Xin
.FpUnary
.op
);
2830 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpUnary
.dst
));
2834 if (i
->Xin
.FpBinary
.op
== Xfp_YL2X
2835 || i
->Xin
.FpBinary
.op
== Xfp_YL2XP1
) {
2836 /* Have to do this specially. */
2837 /* ffree %st7 ; fld %st(srcL) ;
2838 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2839 p
= do_ffree_st7(p
);
2840 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2841 p
= do_ffree_st7(p
);
2842 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2844 *p
++ = toUChar(i
->Xin
.FpBinary
.op
==Xfp_YL2X
? 0xF1 : 0xF9);
2845 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2848 if (i
->Xin
.FpBinary
.op
== Xfp_ATAN
) {
2849 /* Have to do this specially. */
2850 /* ffree %st7 ; fld %st(srcL) ;
2851 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2852 p
= do_ffree_st7(p
);
2853 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2854 p
= do_ffree_st7(p
);
2855 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2856 *p
++ = 0xD9; *p
++ = 0xF3;
2857 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2860 if (i
->Xin
.FpBinary
.op
== Xfp_PREM
2861 || i
->Xin
.FpBinary
.op
== Xfp_PREM1
2862 || i
->Xin
.FpBinary
.op
== Xfp_SCALE
) {
2863 /* Have to do this specially. */
2864 /* ffree %st7 ; fld %st(srcR) ;
2865 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2866 fincstp ; ffree %st7 */
2867 p
= do_ffree_st7(p
);
2868 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcR
));
2869 p
= do_ffree_st7(p
);
2870 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcL
));
2872 switch (i
->Xin
.FpBinary
.op
) {
2873 case Xfp_PREM
: *p
++ = 0xF8; break;
2874 case Xfp_PREM1
: *p
++ = 0xF5; break;
2875 case Xfp_SCALE
: *p
++ = 0xFD; break;
2876 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2878 p
= do_fstp_st(p
, 2+fregEnc(i
->Xin
.FpBinary
.dst
));
2879 *p
++ = 0xD9; *p
++ = 0xF7;
2880 p
= do_ffree_st7(p
);
2884 /* gop %srcL, %srcR, %dst
2885 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2887 p
= do_ffree_st7(p
);
2888 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2889 p
= do_fop2_st(p
, i
->Xin
.FpBinary
.op
,
2890 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2891 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2895 if (i
->Xin
.FpLdSt
.isLoad
) {
2896 /* Load from memory into %fakeN.
2897 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2899 p
= do_ffree_st7(p
);
2900 switch (i
->Xin
.FpLdSt
.sz
) {
2903 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2907 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2911 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2914 vpanic("emitX86Instr(FpLdSt,load)");
2916 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpLdSt
.reg
));
2919 /* Store from %fakeN into memory.
2920 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2922 p
= do_ffree_st7(p
);
2923 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpLdSt
.reg
));
2924 switch (i
->Xin
.FpLdSt
.sz
) {
2927 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2931 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2935 p
= doAMode_M_enc(p
, 7/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2938 vpanic("emitX86Instr(FpLdSt,store)");
2945 if (i
->Xin
.FpLdStI
.isLoad
) {
2946 /* Load from memory into %fakeN, converting from an int.
2947 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2949 switch (i
->Xin
.FpLdStI
.sz
) {
2950 case 8: opc
= 0xDF; subopc_imm
= 5; break;
2951 case 4: opc
= 0xDB; subopc_imm
= 0; break;
2952 case 2: vassert(0); opc
= 0xDF; subopc_imm
= 0; break;
2953 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2955 p
= do_ffree_st7(p
);
2956 *p
++ = toUChar(opc
);
2957 p
= doAMode_M_enc(p
, subopc_imm
/*subopcode*/, i
->Xin
.FpLdStI
.addr
);
2958 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpLdStI
.reg
));
2961 /* Store from %fakeN into memory, converting to an int.
2962 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2964 switch (i
->Xin
.FpLdStI
.sz
) {
2965 case 8: opc
= 0xDF; subopc_imm
= 7; break;
2966 case 4: opc
= 0xDB; subopc_imm
= 3; break;
2967 case 2: opc
= 0xDF; subopc_imm
= 3; break;
2968 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2970 p
= do_ffree_st7(p
);
2971 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpLdStI
.reg
));
2972 *p
++ = toUChar(opc
);
2973 p
= doAMode_M_enc(p
, subopc_imm
/*subopcode*/, i
->Xin
.FpLdStI
.addr
);
2979 /* ffree %st7 ; fld %st(src) */
2980 p
= do_ffree_st7(p
);
2981 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.Fp64to32
.src
));
2983 *p
++ = 0x83; *p
++ = 0xEC; *p
++ = 0x04;
2985 *p
++ = 0xD9; *p
++ = 0x1C; *p
++ = 0x24;
2987 *p
++ = 0xD9; *p
++ = 0x04; *p
++ = 0x24;
2989 *p
++ = 0x83; *p
++ = 0xC4; *p
++ = 0x04;
2990 /* fstp %st(1+dst) */
2991 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.Fp64to32
.dst
));
2995 /* jmp fwds if !condition */
2996 *p
++ = toUChar(0x70 + (i
->Xin
.FpCMov
.cond
^ 1));
2997 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3000 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3001 p
= do_ffree_st7(p
);
3002 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpCMov
.src
));
3003 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpCMov
.dst
));
3005 /* Fill in the jump offset. */
3006 *(ptmp
-1) = toUChar(p
- ptmp
);
3011 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Xin
.FpLdCW
.addr
);
3015 /* note, this emits fnstsw %ax, not fstsw %ax */
3021 /* gcmp %fL, %fR, %dst
3022 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3023 fnstsw %ax ; movl %eax, %dst
3026 p
= do_ffree_st7(p
);
3028 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpCmp
.srcL
));
3029 /* fucomp %(fR+1) */
3031 *p
++ = toUChar(0xE8 + (7 & (1+fregEnc(i
->Xin
.FpCmp
.srcR
))));
3035 /* movl %eax, %dst */
3037 p
= doAMode_R(p
, hregX86_EAX(), i
->Xin
.FpCmp
.dst
);
3040 case Xin_SseConst
: {
3041 UShort con
= i
->Xin
.SseConst
.con
;
3042 p
= push_word_from_tags(p
, toUShort((con
>> 12) & 0xF));
3043 p
= push_word_from_tags(p
, toUShort((con
>> 8) & 0xF));
3044 p
= push_word_from_tags(p
, toUShort((con
>> 4) & 0xF));
3045 p
= push_word_from_tags(p
, toUShort(con
& 0xF));
3046 /* movl (%esp), %xmm-dst */
3049 *p
++ = toUChar(0x04 + 8 * (7 & vregEnc(i
->Xin
.SseConst
.dst
)));
3051 /* addl $16, %esp */
3060 *p
++ = toUChar(i
->Xin
.SseLdSt
.isLoad
? 0x10 : 0x11);
3061 p
= doAMode_M_enc(p
, vregEnc(i
->Xin
.SseLdSt
.reg
), i
->Xin
.SseLdSt
.addr
);
3065 vassert(i
->Xin
.SseLdzLO
.sz
== 4 || i
->Xin
.SseLdzLO
.sz
== 8);
3066 /* movs[sd] amode, %xmm-dst */
3067 *p
++ = toUChar(i
->Xin
.SseLdzLO
.sz
==4 ? 0xF3 : 0xF2);
3070 p
= doAMode_M_enc(p
, vregEnc(i
->Xin
.SseLdzLO
.reg
), i
->Xin
.SseLdzLO
.addr
);
3076 switch (i
->Xin
.Sse32Fx4
.op
) {
3077 case Xsse_ADDF
: *p
++ = 0x58; break;
3078 case Xsse_DIVF
: *p
++ = 0x5E; break;
3079 case Xsse_MAXF
: *p
++ = 0x5F; break;
3080 case Xsse_MINF
: *p
++ = 0x5D; break;
3081 case Xsse_MULF
: *p
++ = 0x59; break;
3082 case Xsse_RCPF
: *p
++ = 0x53; break;
3083 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3084 case Xsse_SQRTF
: *p
++ = 0x51; break;
3085 case Xsse_SUBF
: *p
++ = 0x5C; break;
3086 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3087 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3088 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3089 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3092 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse32Fx4
.dst
),
3093 vregEnc(i
->Xin
.Sse32Fx4
.src
) );
3095 *p
++ = toUChar(xtra
& 0xFF);
3102 switch (i
->Xin
.Sse64Fx2
.op
) {
3103 case Xsse_ADDF
: *p
++ = 0x58; break;
3104 case Xsse_DIVF
: *p
++ = 0x5E; break;
3105 case Xsse_MAXF
: *p
++ = 0x5F; break;
3106 case Xsse_MINF
: *p
++ = 0x5D; break;
3107 case Xsse_MULF
: *p
++ = 0x59; break;
3108 case Xsse_RCPF
: *p
++ = 0x53; break;
3109 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3110 case Xsse_SQRTF
: *p
++ = 0x51; break;
3111 case Xsse_SUBF
: *p
++ = 0x5C; break;
3112 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3113 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3114 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3115 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3118 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse64Fx2
.dst
),
3119 vregEnc(i
->Xin
.Sse64Fx2
.src
) );
3121 *p
++ = toUChar(xtra
& 0xFF);
3128 switch (i
->Xin
.Sse32FLo
.op
) {
3129 case Xsse_ADDF
: *p
++ = 0x58; break;
3130 case Xsse_DIVF
: *p
++ = 0x5E; break;
3131 case Xsse_MAXF
: *p
++ = 0x5F; break;
3132 case Xsse_MINF
: *p
++ = 0x5D; break;
3133 case Xsse_MULF
: *p
++ = 0x59; break;
3134 case Xsse_RCPF
: *p
++ = 0x53; break;
3135 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3136 case Xsse_SQRTF
: *p
++ = 0x51; break;
3137 case Xsse_SUBF
: *p
++ = 0x5C; break;
3138 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3139 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3140 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3141 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3144 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse32FLo
.dst
),
3145 vregEnc(i
->Xin
.Sse32FLo
.src
) );
3147 *p
++ = toUChar(xtra
& 0xFF);
3154 switch (i
->Xin
.Sse64FLo
.op
) {
3155 case Xsse_ADDF
: *p
++ = 0x58; break;
3156 case Xsse_DIVF
: *p
++ = 0x5E; break;
3157 case Xsse_MAXF
: *p
++ = 0x5F; break;
3158 case Xsse_MINF
: *p
++ = 0x5D; break;
3159 case Xsse_MULF
: *p
++ = 0x59; break;
3160 case Xsse_RCPF
: *p
++ = 0x53; break;
3161 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3162 case Xsse_SQRTF
: *p
++ = 0x51; break;
3163 case Xsse_SUBF
: *p
++ = 0x5C; break;
3164 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3165 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3166 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3167 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3170 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse64FLo
.dst
),
3171 vregEnc(i
->Xin
.Sse64FLo
.src
) );
3173 *p
++ = toUChar(xtra
& 0xFF);
3177 # define XX(_n) *p++ = (_n)
3178 switch (i
->Xin
.SseReRg
.op
) {
3179 case Xsse_MOV
: /*movups*/ XX(0x0F); XX(0x10); break;
3180 case Xsse_OR
: XX(0x0F); XX(0x56); break;
3181 case Xsse_XOR
: XX(0x0F); XX(0x57); break;
3182 case Xsse_AND
: XX(0x0F); XX(0x54); break;
3183 case Xsse_PACKSSD
: XX(0x66); XX(0x0F); XX(0x6B); break;
3184 case Xsse_PACKSSW
: XX(0x66); XX(0x0F); XX(0x63); break;
3185 case Xsse_PACKUSW
: XX(0x66); XX(0x0F); XX(0x67); break;
3186 case Xsse_ADD8
: XX(0x66); XX(0x0F); XX(0xFC); break;
3187 case Xsse_ADD16
: XX(0x66); XX(0x0F); XX(0xFD); break;
3188 case Xsse_ADD32
: XX(0x66); XX(0x0F); XX(0xFE); break;
3189 case Xsse_ADD64
: XX(0x66); XX(0x0F); XX(0xD4); break;
3190 case Xsse_QADD8S
: XX(0x66); XX(0x0F); XX(0xEC); break;
3191 case Xsse_QADD16S
: XX(0x66); XX(0x0F); XX(0xED); break;
3192 case Xsse_QADD8U
: XX(0x66); XX(0x0F); XX(0xDC); break;
3193 case Xsse_QADD16U
: XX(0x66); XX(0x0F); XX(0xDD); break;
3194 case Xsse_AVG8U
: XX(0x66); XX(0x0F); XX(0xE0); break;
3195 case Xsse_AVG16U
: XX(0x66); XX(0x0F); XX(0xE3); break;
3196 case Xsse_CMPEQ8
: XX(0x66); XX(0x0F); XX(0x74); break;
3197 case Xsse_CMPEQ16
: XX(0x66); XX(0x0F); XX(0x75); break;
3198 case Xsse_CMPEQ32
: XX(0x66); XX(0x0F); XX(0x76); break;
3199 case Xsse_CMPGT8S
: XX(0x66); XX(0x0F); XX(0x64); break;
3200 case Xsse_CMPGT16S
: XX(0x66); XX(0x0F); XX(0x65); break;
3201 case Xsse_CMPGT32S
: XX(0x66); XX(0x0F); XX(0x66); break;
3202 case Xsse_MAX16S
: XX(0x66); XX(0x0F); XX(0xEE); break;
3203 case Xsse_MAX8U
: XX(0x66); XX(0x0F); XX(0xDE); break;
3204 case Xsse_MIN16S
: XX(0x66); XX(0x0F); XX(0xEA); break;
3205 case Xsse_MIN8U
: XX(0x66); XX(0x0F); XX(0xDA); break;
3206 case Xsse_MULHI16U
: XX(0x66); XX(0x0F); XX(0xE4); break;
3207 case Xsse_MULHI16S
: XX(0x66); XX(0x0F); XX(0xE5); break;
3208 case Xsse_MUL16
: XX(0x66); XX(0x0F); XX(0xD5); break;
3209 case Xsse_SHL16
: XX(0x66); XX(0x0F); XX(0xF1); break;
3210 case Xsse_SHL32
: XX(0x66); XX(0x0F); XX(0xF2); break;
3211 case Xsse_SHL64
: XX(0x66); XX(0x0F); XX(0xF3); break;
3212 case Xsse_SAR16
: XX(0x66); XX(0x0F); XX(0xE1); break;
3213 case Xsse_SAR32
: XX(0x66); XX(0x0F); XX(0xE2); break;
3214 case Xsse_SHR16
: XX(0x66); XX(0x0F); XX(0xD1); break;
3215 case Xsse_SHR32
: XX(0x66); XX(0x0F); XX(0xD2); break;
3216 case Xsse_SHR64
: XX(0x66); XX(0x0F); XX(0xD3); break;
3217 case Xsse_SUB8
: XX(0x66); XX(0x0F); XX(0xF8); break;
3218 case Xsse_SUB16
: XX(0x66); XX(0x0F); XX(0xF9); break;
3219 case Xsse_SUB32
: XX(0x66); XX(0x0F); XX(0xFA); break;
3220 case Xsse_SUB64
: XX(0x66); XX(0x0F); XX(0xFB); break;
3221 case Xsse_QSUB8S
: XX(0x66); XX(0x0F); XX(0xE8); break;
3222 case Xsse_QSUB16S
: XX(0x66); XX(0x0F); XX(0xE9); break;
3223 case Xsse_QSUB8U
: XX(0x66); XX(0x0F); XX(0xD8); break;
3224 case Xsse_QSUB16U
: XX(0x66); XX(0x0F); XX(0xD9); break;
3225 case Xsse_UNPCKHB
: XX(0x66); XX(0x0F); XX(0x68); break;
3226 case Xsse_UNPCKHW
: XX(0x66); XX(0x0F); XX(0x69); break;
3227 case Xsse_UNPCKHD
: XX(0x66); XX(0x0F); XX(0x6A); break;
3228 case Xsse_UNPCKHQ
: XX(0x66); XX(0x0F); XX(0x6D); break;
3229 case Xsse_UNPCKLB
: XX(0x66); XX(0x0F); XX(0x60); break;
3230 case Xsse_UNPCKLW
: XX(0x66); XX(0x0F); XX(0x61); break;
3231 case Xsse_UNPCKLD
: XX(0x66); XX(0x0F); XX(0x62); break;
3232 case Xsse_UNPCKLQ
: XX(0x66); XX(0x0F); XX(0x6C); break;
3235 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseReRg
.dst
),
3236 vregEnc(i
->Xin
.SseReRg
.src
) );
3241 /* jmp fwds if !condition */
3242 *p
++ = toUChar(0x70 + (i
->Xin
.SseCMov
.cond
^ 1));
3243 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3246 /* movaps %src, %dst */
3249 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseCMov
.dst
),
3250 vregEnc(i
->Xin
.SseCMov
.src
) );
3252 /* Fill in the jump offset. */
3253 *(ptmp
-1) = toUChar(p
- ptmp
);
3260 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseShuf
.dst
),
3261 vregEnc(i
->Xin
.SseShuf
.src
) );
3262 *p
++ = (UChar
)(i
->Xin
.SseShuf
.order
);
3267 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
3268 (2 bytes) jns nofail expected taken
3269 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
3272 /* This is heavily asserted re instruction lengths. It needs to
3273 be. If we get given unexpected forms of .amCounter or
3274 .amFailAddr -- basically, anything that's not of the form
3275 uimm7(%ebp) -- they are likely to fail. */
3276 /* Note also that after the decl we must be very careful not to
3277 read the carry flag, else we get a partial flags stall.
3278 js/jns avoids that, though. */
3280 /* --- decl 8(%ebp) --- */
3281 /* "1" because + there's no register in this encoding;
3282 instead the register + field is used as a sub opcode. The
3283 encoding for "decl r/m32" + is FF /1, hence the "1". */
3285 p
= doAMode_M_enc(p
, 1, i
->Xin
.EvCheck
.amCounter
);
3286 vassert(p
- p0
== 3);
3287 /* --- jns nofail --- */
3289 *p
++ = 0x03; /* need to check this 0x03 after the next insn */
3290 vassert(p
- p0
== 5);
3291 /* --- jmp* 0(%ebp) --- */
3292 /* The encoding is FF /4. */
3294 p
= doAMode_M_enc(p
, 4, i
->Xin
.EvCheck
.amFailAddr
);
3295 vassert(p
- p0
== 8); /* also ensures that 0x03 offset above is ok */
3296 /* And crosscheck .. */
3297 vassert(evCheckSzB_X86() == 8);
3302 /* We generate addl $1,NotKnownYet
3303 adcl $0,NotKnownYet+4
3304 in the expectation that a later call to LibVEX_patchProfCtr
3305 will be used to fill in the immediate fields once the right
3307 83 05 00 00 00 00 01
3308 83 15 00 00 00 00 00
3310 *p
++ = 0x83; *p
++ = 0x05;
3311 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
3313 *p
++ = 0x83; *p
++ = 0x15;
3314 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
3316 /* Tell the caller .. */
3317 vassert(!(*is_profInc
));
3327 ppX86Instr(i
, mode64
);
3328 vpanic("emit_X86Instr");
3332 vassert(p
- &buf
[0] <= 32);
3337 /* How big is an event check? See case for Xin_EvCheck in
3338 emit_X86Instr just above. That crosschecks what this returns, so
3339 we can tell if we're inconsistent. */
3340 Int
evCheckSzB_X86 (void)
3346 /* NB: what goes on here has to be very closely coordinated with the
3347 emitInstr case for XDirect, above. */
3348 VexInvalRange
chainXDirect_X86 ( VexEndness endness_host
,
3349 void* place_to_chain
,
3350 const void* disp_cp_chain_me_EXPECTED
,
3351 const void* place_to_jump_to
)
3353 vassert(endness_host
== VexEndnessLE
);
3355 /* What we're expecting to see is:
3356 movl $disp_cp_chain_me_EXPECTED, %edx
3359 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3362 UChar
* p
= (UChar
*)place_to_chain
;
3363 vassert(p
[0] == 0xBA);
3364 vassert(read_misaligned_UInt_LE(&p
[1])
3365 == (UInt
)(Addr
)disp_cp_chain_me_EXPECTED
);
3366 vassert(p
[5] == 0xFF);
3367 vassert(p
[6] == 0xD2);
3368 /* And what we want to change it to is:
3369 jmp disp32 where disp32 is relative to the next insn
3372 E9 <4 bytes == disp32>
3374 The replacement has the same length as the original.
3376 /* This is the delta we need to put into a JMP d32 insn. It's
3377 relative to the start of the next insn, hence the -5. */
3378 Long delta
= (Long
)((const UChar
*)place_to_jump_to
- p
) - 5;
3380 /* And make the modifications. */
3382 write_misaligned_UInt_LE(&p
[1], (UInt
)(ULong
)delta
);
3383 p
[5] = 0x0F; p
[6] = 0x0B;
3384 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3386 vassert(delta
== 0LL || delta
== -1LL);
3387 VexInvalRange vir
= { (HWord
)place_to_chain
, 7 };
3392 /* NB: what goes on here has to be very closely coordinated with the
3393 emitInstr case for XDirect, above. */
3394 VexInvalRange
unchainXDirect_X86 ( VexEndness endness_host
,
3395 void* place_to_unchain
,
3396 const void* place_to_jump_to_EXPECTED
,
3397 const void* disp_cp_chain_me
)
3399 vassert(endness_host
== VexEndnessLE
);
3401 /* What we're expecting to see is:
3405 E9 <4 bytes == disp32>
3408 UChar
* p
= (UChar
*)place_to_unchain
;
3411 && p
[5] == 0x0F && p
[6] == 0x0B) {
3412 /* Check the offset is right. */
3413 Int s32
= (Int
)read_misaligned_UInt_LE(&p
[1]);
3414 if ((UChar
*)p
+ 5 + s32
== place_to_jump_to_EXPECTED
) {
3417 vex_printf("QQQ unchainXDirect_X86: found valid\n");
3421 /* And what we want to change it to is:
3422 movl $disp_cp_chain_me, %edx
3425 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3427 So it's the same length (convenient, huh).
3430 write_misaligned_UInt_LE(&p
[1], (UInt
)(Addr
)disp_cp_chain_me
);
3433 VexInvalRange vir
= { (HWord
)place_to_unchain
, 7 };
3438 /* Patch the counter address into a profile inc point, as previously
3439 created by the Xin_ProfInc case for emit_X86Instr. */
3440 VexInvalRange
patchProfInc_X86 ( VexEndness endness_host
,
3441 void* place_to_patch
,
3442 const ULong
* location_of_counter
)
3444 vassert(endness_host
== VexEndnessLE
);
3445 vassert(sizeof(ULong
*) == 4);
3446 UChar
* p
= (UChar
*)place_to_patch
;
3447 vassert(p
[0] == 0x83);
3448 vassert(p
[1] == 0x05);
3449 vassert(p
[2] == 0x00);
3450 vassert(p
[3] == 0x00);
3451 vassert(p
[4] == 0x00);
3452 vassert(p
[5] == 0x00);
3453 vassert(p
[6] == 0x01);
3454 vassert(p
[7] == 0x83);
3455 vassert(p
[8] == 0x15);
3456 vassert(p
[9] == 0x00);
3457 vassert(p
[10] == 0x00);
3458 vassert(p
[11] == 0x00);
3459 vassert(p
[12] == 0x00);
3460 vassert(p
[13] == 0x00);
3461 UInt imm32
= (UInt
)(Addr
)location_of_counter
;
3462 p
[2] = imm32
& 0xFF; imm32
>>= 8;
3463 p
[3] = imm32
& 0xFF; imm32
>>= 8;
3464 p
[4] = imm32
& 0xFF; imm32
>>= 8;
3465 p
[5] = imm32
& 0xFF;
3466 imm32
= 4 + (UInt
)(Addr
)location_of_counter
;
3467 p
[9] = imm32
& 0xFF; imm32
>>= 8;
3468 p
[10] = imm32
& 0xFF; imm32
>>= 8;
3469 p
[11] = imm32
& 0xFF; imm32
>>= 8;
3470 p
[12] = imm32
& 0xFF;
3471 VexInvalRange vir
= { (HWord
)place_to_patch
, 14 };
3476 /*---------------------------------------------------------------*/
3477 /*--- end host_x86_defs.c ---*/
3478 /*---------------------------------------------------------------*/