2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_x86_defs.h"
45 /* --------- Registers. --------- */
47 const RRegUniverse
* getRRegUniverse_X86 ( void )
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_X86
;
52 static Bool rRegUniverse_X86_initted
= False
;
54 /* Handy shorthand, nothing more */
55 RRegUniverse
* ru
= &rRegUniverse_X86
;
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_X86_initted
))
61 RRegUniverse__init(ru
);
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru
->allocable_start
[HRcInt32
] = ru
->size
;
67 ru
->regs
[ru
->size
++] = hregX86_EBX();
68 ru
->regs
[ru
->size
++] = hregX86_ESI();
69 ru
->regs
[ru
->size
++] = hregX86_EDI();
70 ru
->regs
[ru
->size
++] = hregX86_EAX();
71 ru
->regs
[ru
->size
++] = hregX86_ECX();
72 ru
->regs
[ru
->size
++] = hregX86_EDX();
73 ru
->allocable_end
[HRcInt32
] = ru
->size
- 1;
75 ru
->allocable_start
[HRcFlt64
] = ru
->size
;
76 ru
->regs
[ru
->size
++] = hregX86_FAKE0();
77 ru
->regs
[ru
->size
++] = hregX86_FAKE1();
78 ru
->regs
[ru
->size
++] = hregX86_FAKE2();
79 ru
->regs
[ru
->size
++] = hregX86_FAKE3();
80 ru
->regs
[ru
->size
++] = hregX86_FAKE4();
81 ru
->regs
[ru
->size
++] = hregX86_FAKE5();
82 ru
->allocable_end
[HRcFlt64
] = ru
->size
- 1;
84 ru
->allocable_start
[HRcVec128
] = ru
->size
;
85 ru
->regs
[ru
->size
++] = hregX86_XMM0();
86 ru
->regs
[ru
->size
++] = hregX86_XMM1();
87 ru
->regs
[ru
->size
++] = hregX86_XMM2();
88 ru
->regs
[ru
->size
++] = hregX86_XMM3();
89 ru
->regs
[ru
->size
++] = hregX86_XMM4();
90 ru
->regs
[ru
->size
++] = hregX86_XMM5();
91 ru
->regs
[ru
->size
++] = hregX86_XMM6();
92 ru
->regs
[ru
->size
++] = hregX86_XMM7();
93 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
94 ru
->allocable
= ru
->size
;
96 /* And other regs, not available to the allocator. */
97 ru
->regs
[ru
->size
++] = hregX86_ESP();
98 ru
->regs
[ru
->size
++] = hregX86_EBP();
100 rRegUniverse_X86_initted
= True
;
102 RRegUniverse__check_is_sane(ru
);
107 UInt
ppHRegX86 ( HReg reg
)
110 static const HChar
* ireg32_names
[8]
111 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
112 /* Be generic for all virtual regs. */
113 if (hregIsVirtual(reg
)) {
116 /* But specific for real regs. */
117 switch (hregClass(reg
)) {
119 r
= hregEncoding(reg
);
120 vassert(r
>= 0 && r
< 8);
121 return vex_printf("%s", ireg32_names
[r
]);
123 r
= hregEncoding(reg
);
124 vassert(r
>= 0 && r
< 6);
125 return vex_printf("%%fake%d", r
);
127 r
= hregEncoding(reg
);
128 vassert(r
>= 0 && r
< 8);
129 return vex_printf("%%xmm%d", r
);
136 /* --------- Condition codes, Intel encoding. --------- */
138 const HChar
* showX86CondCode ( X86CondCode cond
)
141 case Xcc_O
: return "o";
142 case Xcc_NO
: return "no";
143 case Xcc_B
: return "b";
144 case Xcc_NB
: return "nb";
145 case Xcc_Z
: return "z";
146 case Xcc_NZ
: return "nz";
147 case Xcc_BE
: return "be";
148 case Xcc_NBE
: return "nbe";
149 case Xcc_S
: return "s";
150 case Xcc_NS
: return "ns";
151 case Xcc_P
: return "p";
152 case Xcc_NP
: return "np";
153 case Xcc_L
: return "l";
154 case Xcc_NL
: return "nl";
155 case Xcc_LE
: return "le";
156 case Xcc_NLE
: return "nle";
157 case Xcc_ALWAYS
: return "ALWAYS";
158 default: vpanic("ppX86CondCode");
163 /* --------- X86AMode: memory address expressions. --------- */
165 X86AMode
* X86AMode_IR ( UInt imm32
, HReg reg
) {
166 X86AMode
* am
= LibVEX_Alloc_inline(sizeof(X86AMode
));
168 am
->Xam
.IR
.imm
= imm32
;
169 am
->Xam
.IR
.reg
= reg
;
172 X86AMode
* X86AMode_IRRS ( UInt imm32
, HReg base
, HReg indEx
, Int shift
) {
173 X86AMode
* am
= LibVEX_Alloc_inline(sizeof(X86AMode
));
175 am
->Xam
.IRRS
.imm
= imm32
;
176 am
->Xam
.IRRS
.base
= base
;
177 am
->Xam
.IRRS
.index
= indEx
;
178 am
->Xam
.IRRS
.shift
= shift
;
179 vassert(shift
>= 0 && shift
<= 3);
183 X86AMode
* dopyX86AMode ( X86AMode
* am
) {
186 return X86AMode_IR( am
->Xam
.IR
.imm
, am
->Xam
.IR
.reg
);
188 return X86AMode_IRRS( am
->Xam
.IRRS
.imm
, am
->Xam
.IRRS
.base
,
189 am
->Xam
.IRRS
.index
, am
->Xam
.IRRS
.shift
);
191 vpanic("dopyX86AMode");
195 void ppX86AMode ( X86AMode
* am
) {
198 if (am
->Xam
.IR
.imm
== 0)
201 vex_printf("0x%x(", am
->Xam
.IR
.imm
);
202 ppHRegX86(am
->Xam
.IR
.reg
);
206 vex_printf("0x%x(", am
->Xam
.IRRS
.imm
);
207 ppHRegX86(am
->Xam
.IRRS
.base
);
209 ppHRegX86(am
->Xam
.IRRS
.index
);
210 vex_printf(",%d)", 1 << am
->Xam
.IRRS
.shift
);
213 vpanic("ppX86AMode");
217 static void addRegUsage_X86AMode ( HRegUsage
* u
, X86AMode
* am
) {
220 addHRegUse(u
, HRmRead
, am
->Xam
.IR
.reg
);
223 addHRegUse(u
, HRmRead
, am
->Xam
.IRRS
.base
);
224 addHRegUse(u
, HRmRead
, am
->Xam
.IRRS
.index
);
227 vpanic("addRegUsage_X86AMode");
231 static void mapRegs_X86AMode ( HRegRemap
* m
, X86AMode
* am
) {
234 am
->Xam
.IR
.reg
= lookupHRegRemap(m
, am
->Xam
.IR
.reg
);
237 am
->Xam
.IRRS
.base
= lookupHRegRemap(m
, am
->Xam
.IRRS
.base
);
238 am
->Xam
.IRRS
.index
= lookupHRegRemap(m
, am
->Xam
.IRRS
.index
);
241 vpanic("mapRegs_X86AMode");
245 /* --------- Operand, which can be reg, immediate or memory. --------- */
247 X86RMI
* X86RMI_Imm ( UInt imm32
) {
248 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
250 op
->Xrmi
.Imm
.imm32
= imm32
;
253 X86RMI
* X86RMI_Reg ( HReg reg
) {
254 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
256 op
->Xrmi
.Reg
.reg
= reg
;
259 X86RMI
* X86RMI_Mem ( X86AMode
* am
) {
260 X86RMI
* op
= LibVEX_Alloc_inline(sizeof(X86RMI
));
262 op
->Xrmi
.Mem
.am
= am
;
266 void ppX86RMI ( X86RMI
* op
) {
269 vex_printf("$0x%x", op
->Xrmi
.Imm
.imm32
);
272 ppHRegX86(op
->Xrmi
.Reg
.reg
);
275 ppX86AMode(op
->Xrmi
.Mem
.am
);
282 /* An X86RMI can only be used in a "read" context (what would it mean
283 to write or modify a literal?) and so we enumerate its registers
285 static void addRegUsage_X86RMI ( HRegUsage
* u
, X86RMI
* op
) {
290 addHRegUse(u
, HRmRead
, op
->Xrmi
.Reg
.reg
);
293 addRegUsage_X86AMode(u
, op
->Xrmi
.Mem
.am
);
296 vpanic("addRegUsage_X86RMI");
300 static void mapRegs_X86RMI ( HRegRemap
* m
, X86RMI
* op
) {
305 op
->Xrmi
.Reg
.reg
= lookupHRegRemap(m
, op
->Xrmi
.Reg
.reg
);
308 mapRegs_X86AMode(m
, op
->Xrmi
.Mem
.am
);
311 vpanic("mapRegs_X86RMI");
316 /* --------- Operand, which can be reg or immediate only. --------- */
318 X86RI
* X86RI_Imm ( UInt imm32
) {
319 X86RI
* op
= LibVEX_Alloc_inline(sizeof(X86RI
));
321 op
->Xri
.Imm
.imm32
= imm32
;
324 X86RI
* X86RI_Reg ( HReg reg
) {
325 X86RI
* op
= LibVEX_Alloc_inline(sizeof(X86RI
));
327 op
->Xri
.Reg
.reg
= reg
;
331 void ppX86RI ( X86RI
* op
) {
334 vex_printf("$0x%x", op
->Xri
.Imm
.imm32
);
337 ppHRegX86(op
->Xri
.Reg
.reg
);
344 /* An X86RI can only be used in a "read" context (what would it mean
345 to write or modify a literal?) and so we enumerate its registers
347 static void addRegUsage_X86RI ( HRegUsage
* u
, X86RI
* op
) {
352 addHRegUse(u
, HRmRead
, op
->Xri
.Reg
.reg
);
355 vpanic("addRegUsage_X86RI");
359 static void mapRegs_X86RI ( HRegRemap
* m
, X86RI
* op
) {
364 op
->Xri
.Reg
.reg
= lookupHRegRemap(m
, op
->Xri
.Reg
.reg
);
367 vpanic("mapRegs_X86RI");
372 /* --------- Operand, which can be reg or memory only. --------- */
374 X86RM
* X86RM_Reg ( HReg reg
) {
375 X86RM
* op
= LibVEX_Alloc_inline(sizeof(X86RM
));
377 op
->Xrm
.Reg
.reg
= reg
;
380 X86RM
* X86RM_Mem ( X86AMode
* am
) {
381 X86RM
* op
= LibVEX_Alloc_inline(sizeof(X86RM
));
387 void ppX86RM ( X86RM
* op
) {
390 ppX86AMode(op
->Xrm
.Mem
.am
);
393 ppHRegX86(op
->Xrm
.Reg
.reg
);
400 /* Because an X86RM can be both a source or destination operand, we
401 have to supply a mode -- pertaining to the operand as a whole --
402 indicating how it's being used. */
403 static void addRegUsage_X86RM ( HRegUsage
* u
, X86RM
* op
, HRegMode mode
) {
406 /* Memory is read, written or modified. So we just want to
407 know the regs read by the amode. */
408 addRegUsage_X86AMode(u
, op
->Xrm
.Mem
.am
);
411 /* reg is read, written or modified. Add it in the
413 addHRegUse(u
, mode
, op
->Xrm
.Reg
.reg
);
416 vpanic("addRegUsage_X86RM");
420 static void mapRegs_X86RM ( HRegRemap
* m
, X86RM
* op
)
424 mapRegs_X86AMode(m
, op
->Xrm
.Mem
.am
);
427 op
->Xrm
.Reg
.reg
= lookupHRegRemap(m
, op
->Xrm
.Reg
.reg
);
430 vpanic("mapRegs_X86RM");
435 /* --------- Instructions. --------- */
437 const HChar
* showX86UnaryOp ( X86UnaryOp op
) {
439 case Xun_NOT
: return "not";
440 case Xun_NEG
: return "neg";
441 default: vpanic("showX86UnaryOp");
445 const HChar
* showX86AluOp ( X86AluOp op
) {
447 case Xalu_MOV
: return "mov";
448 case Xalu_CMP
: return "cmp";
449 case Xalu_ADD
: return "add";
450 case Xalu_SUB
: return "sub";
451 case Xalu_ADC
: return "adc";
452 case Xalu_SBB
: return "sbb";
453 case Xalu_AND
: return "and";
454 case Xalu_OR
: return "or";
455 case Xalu_XOR
: return "xor";
456 case Xalu_MUL
: return "mul";
457 default: vpanic("showX86AluOp");
461 const HChar
* showX86ShiftOp ( X86ShiftOp op
) {
463 case Xsh_SHL
: return "shl";
464 case Xsh_SHR
: return "shr";
465 case Xsh_SAR
: return "sar";
466 default: vpanic("showX86ShiftOp");
470 const HChar
* showX86FpOp ( X86FpOp op
) {
472 case Xfp_ADD
: return "add";
473 case Xfp_SUB
: return "sub";
474 case Xfp_MUL
: return "mul";
475 case Xfp_DIV
: return "div";
476 case Xfp_SCALE
: return "scale";
477 case Xfp_ATAN
: return "atan";
478 case Xfp_YL2X
: return "yl2x";
479 case Xfp_YL2XP1
: return "yl2xp1";
480 case Xfp_PREM
: return "prem";
481 case Xfp_PREM1
: return "prem1";
482 case Xfp_SQRT
: return "sqrt";
483 case Xfp_ABS
: return "abs";
484 case Xfp_NEG
: return "chs";
485 case Xfp_MOV
: return "mov";
486 case Xfp_SIN
: return "sin";
487 case Xfp_COS
: return "cos";
488 case Xfp_TAN
: return "tan";
489 case Xfp_ROUND
: return "round";
490 case Xfp_2XM1
: return "2xm1";
491 default: vpanic("showX86FpOp");
495 const HChar
* showX86SseOp ( X86SseOp op
) {
497 case Xsse_MOV
: return "mov(?!)";
498 case Xsse_ADDF
: return "add";
499 case Xsse_SUBF
: return "sub";
500 case Xsse_MULF
: return "mul";
501 case Xsse_DIVF
: return "div";
502 case Xsse_MAXF
: return "max";
503 case Xsse_MINF
: return "min";
504 case Xsse_CMPEQF
: return "cmpFeq";
505 case Xsse_CMPLTF
: return "cmpFlt";
506 case Xsse_CMPLEF
: return "cmpFle";
507 case Xsse_CMPUNF
: return "cmpFun";
508 case Xsse_RCPF
: return "rcp";
509 case Xsse_RSQRTF
: return "rsqrt";
510 case Xsse_SQRTF
: return "sqrt";
511 case Xsse_AND
: return "and";
512 case Xsse_OR
: return "or";
513 case Xsse_XOR
: return "xor";
514 case Xsse_ANDN
: return "andn";
515 case Xsse_ADD8
: return "paddb";
516 case Xsse_ADD16
: return "paddw";
517 case Xsse_ADD32
: return "paddd";
518 case Xsse_ADD64
: return "paddq";
519 case Xsse_QADD8U
: return "paddusb";
520 case Xsse_QADD16U
: return "paddusw";
521 case Xsse_QADD8S
: return "paddsb";
522 case Xsse_QADD16S
: return "paddsw";
523 case Xsse_SUB8
: return "psubb";
524 case Xsse_SUB16
: return "psubw";
525 case Xsse_SUB32
: return "psubd";
526 case Xsse_SUB64
: return "psubq";
527 case Xsse_QSUB8U
: return "psubusb";
528 case Xsse_QSUB16U
: return "psubusw";
529 case Xsse_QSUB8S
: return "psubsb";
530 case Xsse_QSUB16S
: return "psubsw";
531 case Xsse_MUL16
: return "pmullw";
532 case Xsse_MULHI16U
: return "pmulhuw";
533 case Xsse_MULHI16S
: return "pmulhw";
534 case Xsse_AVG8U
: return "pavgb";
535 case Xsse_AVG16U
: return "pavgw";
536 case Xsse_MAX16S
: return "pmaxw";
537 case Xsse_MAX8U
: return "pmaxub";
538 case Xsse_MIN16S
: return "pminw";
539 case Xsse_MIN8U
: return "pminub";
540 case Xsse_CMPEQ8
: return "pcmpeqb";
541 case Xsse_CMPEQ16
: return "pcmpeqw";
542 case Xsse_CMPEQ32
: return "pcmpeqd";
543 case Xsse_CMPGT8S
: return "pcmpgtb";
544 case Xsse_CMPGT16S
: return "pcmpgtw";
545 case Xsse_CMPGT32S
: return "pcmpgtd";
546 case Xsse_SHL16
: return "psllw";
547 case Xsse_SHL32
: return "pslld";
548 case Xsse_SHL64
: return "psllq";
549 case Xsse_SHR16
: return "psrlw";
550 case Xsse_SHR32
: return "psrld";
551 case Xsse_SHR64
: return "psrlq";
552 case Xsse_SAR16
: return "psraw";
553 case Xsse_SAR32
: return "psrad";
554 case Xsse_PACKSSD
: return "packssdw";
555 case Xsse_PACKSSW
: return "packsswb";
556 case Xsse_PACKUSW
: return "packuswb";
557 case Xsse_UNPCKHB
: return "punpckhb";
558 case Xsse_UNPCKHW
: return "punpckhw";
559 case Xsse_UNPCKHD
: return "punpckhd";
560 case Xsse_UNPCKHQ
: return "punpckhq";
561 case Xsse_UNPCKLB
: return "punpcklb";
562 case Xsse_UNPCKLW
: return "punpcklw";
563 case Xsse_UNPCKLD
: return "punpckld";
564 case Xsse_UNPCKLQ
: return "punpcklq";
565 default: vpanic("showX86SseOp");
569 X86Instr
* X86Instr_Alu32R ( X86AluOp op
, X86RMI
* src
, HReg dst
) {
570 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
572 i
->Xin
.Alu32R
.op
= op
;
573 i
->Xin
.Alu32R
.src
= src
;
574 i
->Xin
.Alu32R
.dst
= dst
;
577 X86Instr
* X86Instr_Alu32M ( X86AluOp op
, X86RI
* src
, X86AMode
* dst
) {
578 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
580 i
->Xin
.Alu32M
.op
= op
;
581 i
->Xin
.Alu32M
.src
= src
;
582 i
->Xin
.Alu32M
.dst
= dst
;
583 vassert(op
!= Xalu_MUL
);
586 X86Instr
* X86Instr_Sh32 ( X86ShiftOp op
, UInt src
, HReg dst
) {
587 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
590 i
->Xin
.Sh32
.src
= src
;
591 i
->Xin
.Sh32
.dst
= dst
;
594 X86Instr
* X86Instr_Test32 ( UInt imm32
, X86RM
* dst
) {
595 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
597 i
->Xin
.Test32
.imm32
= imm32
;
598 i
->Xin
.Test32
.dst
= dst
;
601 X86Instr
* X86Instr_Unary32 ( X86UnaryOp op
, HReg dst
) {
602 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
603 i
->tag
= Xin_Unary32
;
604 i
->Xin
.Unary32
.op
= op
;
605 i
->Xin
.Unary32
.dst
= dst
;
608 X86Instr
* X86Instr_Lea32 ( X86AMode
* am
, HReg dst
) {
609 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
611 i
->Xin
.Lea32
.am
= am
;
612 i
->Xin
.Lea32
.dst
= dst
;
615 X86Instr
* X86Instr_MulL ( Bool syned
, X86RM
* src
) {
616 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
618 i
->Xin
.MulL
.syned
= syned
;
619 i
->Xin
.MulL
.src
= src
;
622 X86Instr
* X86Instr_Div ( Bool syned
, X86RM
* src
) {
623 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
625 i
->Xin
.Div
.syned
= syned
;
626 i
->Xin
.Div
.src
= src
;
629 X86Instr
* X86Instr_Sh3232 ( X86ShiftOp op
, UInt amt
, HReg src
, HReg dst
) {
630 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
632 i
->Xin
.Sh3232
.op
= op
;
633 i
->Xin
.Sh3232
.amt
= amt
;
634 i
->Xin
.Sh3232
.src
= src
;
635 i
->Xin
.Sh3232
.dst
= dst
;
636 vassert(op
== Xsh_SHL
|| op
== Xsh_SHR
);
639 X86Instr
* X86Instr_Push( X86RMI
* src
) {
640 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
642 i
->Xin
.Push
.src
= src
;
645 X86Instr
* X86Instr_Call ( X86CondCode cond
, Addr32 target
, Int regparms
,
647 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
649 i
->Xin
.Call
.cond
= cond
;
650 i
->Xin
.Call
.target
= target
;
651 i
->Xin
.Call
.regparms
= regparms
;
652 i
->Xin
.Call
.rloc
= rloc
;
653 vassert(regparms
>= 0 && regparms
<= 3);
654 vassert(is_sane_RetLoc(rloc
));
657 X86Instr
* X86Instr_XDirect ( Addr32 dstGA
, X86AMode
* amEIP
,
658 X86CondCode cond
, Bool toFastEP
) {
659 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
660 i
->tag
= Xin_XDirect
;
661 i
->Xin
.XDirect
.dstGA
= dstGA
;
662 i
->Xin
.XDirect
.amEIP
= amEIP
;
663 i
->Xin
.XDirect
.cond
= cond
;
664 i
->Xin
.XDirect
.toFastEP
= toFastEP
;
667 X86Instr
* X86Instr_XIndir ( HReg dstGA
, X86AMode
* amEIP
,
669 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
671 i
->Xin
.XIndir
.dstGA
= dstGA
;
672 i
->Xin
.XIndir
.amEIP
= amEIP
;
673 i
->Xin
.XIndir
.cond
= cond
;
676 X86Instr
* X86Instr_XAssisted ( HReg dstGA
, X86AMode
* amEIP
,
677 X86CondCode cond
, IRJumpKind jk
) {
678 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
679 i
->tag
= Xin_XAssisted
;
680 i
->Xin
.XAssisted
.dstGA
= dstGA
;
681 i
->Xin
.XAssisted
.amEIP
= amEIP
;
682 i
->Xin
.XAssisted
.cond
= cond
;
683 i
->Xin
.XAssisted
.jk
= jk
;
686 X86Instr
* X86Instr_CMov32 ( X86CondCode cond
, X86RM
* src
, HReg dst
) {
687 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
689 i
->Xin
.CMov32
.cond
= cond
;
690 i
->Xin
.CMov32
.src
= src
;
691 i
->Xin
.CMov32
.dst
= dst
;
692 vassert(cond
!= Xcc_ALWAYS
);
695 X86Instr
* X86Instr_LoadEX ( UChar szSmall
, Bool syned
,
696 X86AMode
* src
, HReg dst
) {
697 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
699 i
->Xin
.LoadEX
.szSmall
= szSmall
;
700 i
->Xin
.LoadEX
.syned
= syned
;
701 i
->Xin
.LoadEX
.src
= src
;
702 i
->Xin
.LoadEX
.dst
= dst
;
703 vassert(szSmall
== 1 || szSmall
== 2);
706 X86Instr
* X86Instr_Store ( UChar sz
, HReg src
, X86AMode
* dst
) {
707 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
709 i
->Xin
.Store
.sz
= sz
;
710 i
->Xin
.Store
.src
= src
;
711 i
->Xin
.Store
.dst
= dst
;
712 vassert(sz
== 1 || sz
== 2);
715 X86Instr
* X86Instr_Set32 ( X86CondCode cond
, HReg dst
) {
716 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
718 i
->Xin
.Set32
.cond
= cond
;
719 i
->Xin
.Set32
.dst
= dst
;
722 X86Instr
* X86Instr_Bsfr32 ( Bool isFwds
, HReg src
, HReg dst
) {
723 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
725 i
->Xin
.Bsfr32
.isFwds
= isFwds
;
726 i
->Xin
.Bsfr32
.src
= src
;
727 i
->Xin
.Bsfr32
.dst
= dst
;
730 X86Instr
* X86Instr_MFence ( UInt hwcaps
) {
731 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
733 i
->Xin
.MFence
.hwcaps
= hwcaps
;
734 vassert(0 == (hwcaps
& ~(VEX_HWCAPS_X86_MMXEXT
738 |VEX_HWCAPS_X86_LZCNT
)));
741 X86Instr
* X86Instr_ACAS ( X86AMode
* addr
, UChar sz
) {
742 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
744 i
->Xin
.ACAS
.addr
= addr
;
746 vassert(sz
== 4 || sz
== 2 || sz
== 1);
749 X86Instr
* X86Instr_DACAS ( X86AMode
* addr
) {
750 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
752 i
->Xin
.DACAS
.addr
= addr
;
756 X86Instr
* X86Instr_FpUnary ( X86FpOp op
, HReg src
, HReg dst
) {
757 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
758 i
->tag
= Xin_FpUnary
;
759 i
->Xin
.FpUnary
.op
= op
;
760 i
->Xin
.FpUnary
.src
= src
;
761 i
->Xin
.FpUnary
.dst
= dst
;
764 X86Instr
* X86Instr_FpBinary ( X86FpOp op
, HReg srcL
, HReg srcR
, HReg dst
) {
765 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
766 i
->tag
= Xin_FpBinary
;
767 i
->Xin
.FpBinary
.op
= op
;
768 i
->Xin
.FpBinary
.srcL
= srcL
;
769 i
->Xin
.FpBinary
.srcR
= srcR
;
770 i
->Xin
.FpBinary
.dst
= dst
;
773 X86Instr
* X86Instr_FpLdSt ( Bool isLoad
, UChar sz
, HReg reg
, X86AMode
* addr
) {
774 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
776 i
->Xin
.FpLdSt
.isLoad
= isLoad
;
777 i
->Xin
.FpLdSt
.sz
= sz
;
778 i
->Xin
.FpLdSt
.reg
= reg
;
779 i
->Xin
.FpLdSt
.addr
= addr
;
780 vassert(sz
== 4 || sz
== 8 || sz
== 10);
783 X86Instr
* X86Instr_FpLdStI ( Bool isLoad
, UChar sz
,
784 HReg reg
, X86AMode
* addr
) {
785 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
786 i
->tag
= Xin_FpLdStI
;
787 i
->Xin
.FpLdStI
.isLoad
= isLoad
;
788 i
->Xin
.FpLdStI
.sz
= sz
;
789 i
->Xin
.FpLdStI
.reg
= reg
;
790 i
->Xin
.FpLdStI
.addr
= addr
;
791 vassert(sz
== 2 || sz
== 4 || sz
== 8);
794 X86Instr
* X86Instr_Fp64to32 ( HReg src
, HReg dst
) {
795 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
796 i
->tag
= Xin_Fp64to32
;
797 i
->Xin
.Fp64to32
.src
= src
;
798 i
->Xin
.Fp64to32
.dst
= dst
;
801 X86Instr
* X86Instr_FpCMov ( X86CondCode cond
, HReg src
, HReg dst
) {
802 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
804 i
->Xin
.FpCMov
.cond
= cond
;
805 i
->Xin
.FpCMov
.src
= src
;
806 i
->Xin
.FpCMov
.dst
= dst
;
807 vassert(cond
!= Xcc_ALWAYS
);
810 X86Instr
* X86Instr_FpLdCW ( X86AMode
* addr
) {
811 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
813 i
->Xin
.FpLdCW
.addr
= addr
;
816 X86Instr
* X86Instr_FpStSW_AX ( void ) {
817 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
818 i
->tag
= Xin_FpStSW_AX
;
821 X86Instr
* X86Instr_FpCmp ( HReg srcL
, HReg srcR
, HReg dst
) {
822 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
824 i
->Xin
.FpCmp
.srcL
= srcL
;
825 i
->Xin
.FpCmp
.srcR
= srcR
;
826 i
->Xin
.FpCmp
.dst
= dst
;
829 X86Instr
* X86Instr_SseConst ( UShort con
, HReg dst
) {
830 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
831 i
->tag
= Xin_SseConst
;
832 i
->Xin
.SseConst
.con
= con
;
833 i
->Xin
.SseConst
.dst
= dst
;
834 vassert(hregClass(dst
) == HRcVec128
);
837 X86Instr
* X86Instr_SseLdSt ( Bool isLoad
, HReg reg
, X86AMode
* addr
) {
838 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
839 i
->tag
= Xin_SseLdSt
;
840 i
->Xin
.SseLdSt
.isLoad
= isLoad
;
841 i
->Xin
.SseLdSt
.reg
= reg
;
842 i
->Xin
.SseLdSt
.addr
= addr
;
845 X86Instr
* X86Instr_SseLdzLO ( Int sz
, HReg reg
, X86AMode
* addr
)
847 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
848 i
->tag
= Xin_SseLdzLO
;
849 i
->Xin
.SseLdzLO
.sz
= toUChar(sz
);
850 i
->Xin
.SseLdzLO
.reg
= reg
;
851 i
->Xin
.SseLdzLO
.addr
= addr
;
852 vassert(sz
== 4 || sz
== 8);
855 X86Instr
* X86Instr_Sse32Fx4 ( X86SseOp op
, HReg src
, HReg dst
) {
856 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
857 i
->tag
= Xin_Sse32Fx4
;
858 i
->Xin
.Sse32Fx4
.op
= op
;
859 i
->Xin
.Sse32Fx4
.src
= src
;
860 i
->Xin
.Sse32Fx4
.dst
= dst
;
861 vassert(op
!= Xsse_MOV
);
864 X86Instr
* X86Instr_Sse32FLo ( X86SseOp op
, HReg src
, HReg dst
) {
865 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
866 i
->tag
= Xin_Sse32FLo
;
867 i
->Xin
.Sse32FLo
.op
= op
;
868 i
->Xin
.Sse32FLo
.src
= src
;
869 i
->Xin
.Sse32FLo
.dst
= dst
;
870 vassert(op
!= Xsse_MOV
);
873 X86Instr
* X86Instr_Sse64Fx2 ( X86SseOp op
, HReg src
, HReg dst
) {
874 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
875 i
->tag
= Xin_Sse64Fx2
;
876 i
->Xin
.Sse64Fx2
.op
= op
;
877 i
->Xin
.Sse64Fx2
.src
= src
;
878 i
->Xin
.Sse64Fx2
.dst
= dst
;
879 vassert(op
!= Xsse_MOV
);
882 X86Instr
* X86Instr_Sse64FLo ( X86SseOp op
, HReg src
, HReg dst
) {
883 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
884 i
->tag
= Xin_Sse64FLo
;
885 i
->Xin
.Sse64FLo
.op
= op
;
886 i
->Xin
.Sse64FLo
.src
= src
;
887 i
->Xin
.Sse64FLo
.dst
= dst
;
888 vassert(op
!= Xsse_MOV
);
891 X86Instr
* X86Instr_SseReRg ( X86SseOp op
, HReg re
, HReg rg
) {
892 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
893 i
->tag
= Xin_SseReRg
;
894 i
->Xin
.SseReRg
.op
= op
;
895 i
->Xin
.SseReRg
.src
= re
;
896 i
->Xin
.SseReRg
.dst
= rg
;
899 X86Instr
* X86Instr_SseCMov ( X86CondCode cond
, HReg src
, HReg dst
) {
900 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
901 i
->tag
= Xin_SseCMov
;
902 i
->Xin
.SseCMov
.cond
= cond
;
903 i
->Xin
.SseCMov
.src
= src
;
904 i
->Xin
.SseCMov
.dst
= dst
;
905 vassert(cond
!= Xcc_ALWAYS
);
908 X86Instr
* X86Instr_SseShuf ( Int order
, HReg src
, HReg dst
) {
909 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
910 i
->tag
= Xin_SseShuf
;
911 i
->Xin
.SseShuf
.order
= order
;
912 i
->Xin
.SseShuf
.src
= src
;
913 i
->Xin
.SseShuf
.dst
= dst
;
914 vassert(order
>= 0 && order
<= 0xFF);
917 X86Instr
* X86Instr_EvCheck ( X86AMode
* amCounter
,
918 X86AMode
* amFailAddr
) {
919 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
920 i
->tag
= Xin_EvCheck
;
921 i
->Xin
.EvCheck
.amCounter
= amCounter
;
922 i
->Xin
.EvCheck
.amFailAddr
= amFailAddr
;
925 X86Instr
* X86Instr_ProfInc ( void ) {
926 X86Instr
* i
= LibVEX_Alloc_inline(sizeof(X86Instr
));
927 i
->tag
= Xin_ProfInc
;
931 void ppX86Instr ( const X86Instr
* i
, Bool mode64
) {
932 vassert(mode64
== False
);
935 vex_printf("%sl ", showX86AluOp(i
->Xin
.Alu32R
.op
));
936 ppX86RMI(i
->Xin
.Alu32R
.src
);
938 ppHRegX86(i
->Xin
.Alu32R
.dst
);
941 vex_printf("%sl ", showX86AluOp(i
->Xin
.Alu32M
.op
));
942 ppX86RI(i
->Xin
.Alu32M
.src
);
944 ppX86AMode(i
->Xin
.Alu32M
.dst
);
947 vex_printf("%sl ", showX86ShiftOp(i
->Xin
.Sh32
.op
));
948 if (i
->Xin
.Sh32
.src
== 0)
951 vex_printf("$%d,", (Int
)i
->Xin
.Sh32
.src
);
952 ppHRegX86(i
->Xin
.Sh32
.dst
);
955 vex_printf("testl $%d,", (Int
)i
->Xin
.Test32
.imm32
);
956 ppX86RM(i
->Xin
.Test32
.dst
);
959 vex_printf("%sl ", showX86UnaryOp(i
->Xin
.Unary32
.op
));
960 ppHRegX86(i
->Xin
.Unary32
.dst
);
964 ppX86AMode(i
->Xin
.Lea32
.am
);
966 ppHRegX86(i
->Xin
.Lea32
.dst
);
969 vex_printf("%cmull ", i
->Xin
.MulL
.syned
? 's' : 'u');
970 ppX86RM(i
->Xin
.MulL
.src
);
973 vex_printf("%cdivl ", i
->Xin
.Div
.syned
? 's' : 'u');
974 ppX86RM(i
->Xin
.Div
.src
);
977 vex_printf("%sdl ", showX86ShiftOp(i
->Xin
.Sh3232
.op
));
978 if (i
->Xin
.Sh3232
.amt
== 0)
979 vex_printf(" %%cl,");
981 vex_printf(" $%d,", (Int
)i
->Xin
.Sh3232
.amt
);
982 ppHRegX86(i
->Xin
.Sh3232
.src
);
984 ppHRegX86(i
->Xin
.Sh3232
.dst
);
987 vex_printf("pushl ");
988 ppX86RMI(i
->Xin
.Push
.src
);
991 vex_printf("call%s[%d,",
992 i
->Xin
.Call
.cond
==Xcc_ALWAYS
993 ? "" : showX86CondCode(i
->Xin
.Call
.cond
),
994 i
->Xin
.Call
.regparms
);
995 ppRetLoc(i
->Xin
.Call
.rloc
);
996 vex_printf("] 0x%x", i
->Xin
.Call
.target
);
999 vex_printf("(xDirect) ");
1000 vex_printf("if (%%eflags.%s) { ",
1001 showX86CondCode(i
->Xin
.XDirect
.cond
));
1002 vex_printf("movl $0x%x,", i
->Xin
.XDirect
.dstGA
);
1003 ppX86AMode(i
->Xin
.XDirect
.amEIP
);
1005 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1006 i
->Xin
.XDirect
.toFastEP
? "fast" : "slow");
1009 vex_printf("(xIndir) ");
1010 vex_printf("if (%%eflags.%s) { movl ",
1011 showX86CondCode(i
->Xin
.XIndir
.cond
));
1012 ppHRegX86(i
->Xin
.XIndir
.dstGA
);
1014 ppX86AMode(i
->Xin
.XIndir
.amEIP
);
1015 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1018 vex_printf("(xAssisted) ");
1019 vex_printf("if (%%eflags.%s) { ",
1020 showX86CondCode(i
->Xin
.XAssisted
.cond
));
1021 vex_printf("movl ");
1022 ppHRegX86(i
->Xin
.XAssisted
.dstGA
);
1024 ppX86AMode(i
->Xin
.XAssisted
.amEIP
);
1025 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1026 (Int
)i
->Xin
.XAssisted
.jk
);
1027 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1030 vex_printf("cmov%s ", showX86CondCode(i
->Xin
.CMov32
.cond
));
1031 ppX86RM(i
->Xin
.CMov32
.src
);
1033 ppHRegX86(i
->Xin
.CMov32
.dst
);
1036 vex_printf("mov%c%cl ",
1037 i
->Xin
.LoadEX
.syned
? 's' : 'z',
1038 i
->Xin
.LoadEX
.szSmall
==1 ? 'b' : 'w');
1039 ppX86AMode(i
->Xin
.LoadEX
.src
);
1041 ppHRegX86(i
->Xin
.LoadEX
.dst
);
1044 vex_printf("mov%c ", i
->Xin
.Store
.sz
==1 ? 'b' : 'w');
1045 ppHRegX86(i
->Xin
.Store
.src
);
1047 ppX86AMode(i
->Xin
.Store
.dst
);
1050 vex_printf("setl%s ", showX86CondCode(i
->Xin
.Set32
.cond
));
1051 ppHRegX86(i
->Xin
.Set32
.dst
);
1054 vex_printf("bs%cl ", i
->Xin
.Bsfr32
.isFwds
? 'f' : 'r');
1055 ppHRegX86(i
->Xin
.Bsfr32
.src
);
1057 ppHRegX86(i
->Xin
.Bsfr32
.dst
);
1060 vex_printf("mfence(%s)",
1061 LibVEX_ppVexHwCaps(VexArchX86
,i
->Xin
.MFence
.hwcaps
));
1064 vex_printf("lock cmpxchg%c ",
1065 i
->Xin
.ACAS
.sz
==1 ? 'b'
1066 : i
->Xin
.ACAS
.sz
==2 ? 'w' : 'l');
1067 vex_printf("{%%eax->%%ebx},");
1068 ppX86AMode(i
->Xin
.ACAS
.addr
);
1071 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1072 ppX86AMode(i
->Xin
.DACAS
.addr
);
1075 vex_printf("g%sD ", showX86FpOp(i
->Xin
.FpUnary
.op
));
1076 ppHRegX86(i
->Xin
.FpUnary
.src
);
1078 ppHRegX86(i
->Xin
.FpUnary
.dst
);
1081 vex_printf("g%sD ", showX86FpOp(i
->Xin
.FpBinary
.op
));
1082 ppHRegX86(i
->Xin
.FpBinary
.srcL
);
1084 ppHRegX86(i
->Xin
.FpBinary
.srcR
);
1086 ppHRegX86(i
->Xin
.FpBinary
.dst
);
1089 if (i
->Xin
.FpLdSt
.isLoad
) {
1090 vex_printf("gld%c " , i
->Xin
.FpLdSt
.sz
==10 ? 'T'
1091 : (i
->Xin
.FpLdSt
.sz
==8 ? 'D' : 'F'));
1092 ppX86AMode(i
->Xin
.FpLdSt
.addr
);
1094 ppHRegX86(i
->Xin
.FpLdSt
.reg
);
1096 vex_printf("gst%c " , i
->Xin
.FpLdSt
.sz
==10 ? 'T'
1097 : (i
->Xin
.FpLdSt
.sz
==8 ? 'D' : 'F'));
1098 ppHRegX86(i
->Xin
.FpLdSt
.reg
);
1100 ppX86AMode(i
->Xin
.FpLdSt
.addr
);
1104 if (i
->Xin
.FpLdStI
.isLoad
) {
1105 vex_printf("gild%s ", i
->Xin
.FpLdStI
.sz
==8 ? "ll" :
1106 i
->Xin
.FpLdStI
.sz
==4 ? "l" : "w");
1107 ppX86AMode(i
->Xin
.FpLdStI
.addr
);
1109 ppHRegX86(i
->Xin
.FpLdStI
.reg
);
1111 vex_printf("gist%s ", i
->Xin
.FpLdStI
.sz
==8 ? "ll" :
1112 i
->Xin
.FpLdStI
.sz
==4 ? "l" : "w");
1113 ppHRegX86(i
->Xin
.FpLdStI
.reg
);
1115 ppX86AMode(i
->Xin
.FpLdStI
.addr
);
1119 vex_printf("gdtof ");
1120 ppHRegX86(i
->Xin
.Fp64to32
.src
);
1122 ppHRegX86(i
->Xin
.Fp64to32
.dst
);
1125 vex_printf("gcmov%s ", showX86CondCode(i
->Xin
.FpCMov
.cond
));
1126 ppHRegX86(i
->Xin
.FpCMov
.src
);
1128 ppHRegX86(i
->Xin
.FpCMov
.dst
);
1131 vex_printf("fldcw ");
1132 ppX86AMode(i
->Xin
.FpLdCW
.addr
);
1135 vex_printf("fstsw %%ax");
1138 vex_printf("gcmp ");
1139 ppHRegX86(i
->Xin
.FpCmp
.srcL
);
1141 ppHRegX86(i
->Xin
.FpCmp
.srcR
);
1143 ppHRegX86(i
->Xin
.FpCmp
.dst
);
1146 vex_printf("const $0x%04x,", (Int
)i
->Xin
.SseConst
.con
);
1147 ppHRegX86(i
->Xin
.SseConst
.dst
);
1150 vex_printf("movups ");
1151 if (i
->Xin
.SseLdSt
.isLoad
) {
1152 ppX86AMode(i
->Xin
.SseLdSt
.addr
);
1154 ppHRegX86(i
->Xin
.SseLdSt
.reg
);
1156 ppHRegX86(i
->Xin
.SseLdSt
.reg
);
1158 ppX86AMode(i
->Xin
.SseLdSt
.addr
);
1162 vex_printf("movs%s ", i
->Xin
.SseLdzLO
.sz
==4 ? "s" : "d");
1163 ppX86AMode(i
->Xin
.SseLdzLO
.addr
);
1165 ppHRegX86(i
->Xin
.SseLdzLO
.reg
);
1168 vex_printf("%sps ", showX86SseOp(i
->Xin
.Sse32Fx4
.op
));
1169 ppHRegX86(i
->Xin
.Sse32Fx4
.src
);
1171 ppHRegX86(i
->Xin
.Sse32Fx4
.dst
);
1174 vex_printf("%sss ", showX86SseOp(i
->Xin
.Sse32FLo
.op
));
1175 ppHRegX86(i
->Xin
.Sse32FLo
.src
);
1177 ppHRegX86(i
->Xin
.Sse32FLo
.dst
);
1180 vex_printf("%spd ", showX86SseOp(i
->Xin
.Sse64Fx2
.op
));
1181 ppHRegX86(i
->Xin
.Sse64Fx2
.src
);
1183 ppHRegX86(i
->Xin
.Sse64Fx2
.dst
);
1186 vex_printf("%ssd ", showX86SseOp(i
->Xin
.Sse64FLo
.op
));
1187 ppHRegX86(i
->Xin
.Sse64FLo
.src
);
1189 ppHRegX86(i
->Xin
.Sse64FLo
.dst
);
1192 vex_printf("%s ", showX86SseOp(i
->Xin
.SseReRg
.op
));
1193 ppHRegX86(i
->Xin
.SseReRg
.src
);
1195 ppHRegX86(i
->Xin
.SseReRg
.dst
);
1198 vex_printf("cmov%s ", showX86CondCode(i
->Xin
.SseCMov
.cond
));
1199 ppHRegX86(i
->Xin
.SseCMov
.src
);
1201 ppHRegX86(i
->Xin
.SseCMov
.dst
);
1204 vex_printf("pshufd $0x%x,", (UInt
)i
->Xin
.SseShuf
.order
);
1205 ppHRegX86(i
->Xin
.SseShuf
.src
);
1207 ppHRegX86(i
->Xin
.SseShuf
.dst
);
1210 vex_printf("(evCheck) decl ");
1211 ppX86AMode(i
->Xin
.EvCheck
.amCounter
);
1212 vex_printf("; jns nofail; jmp *");
1213 ppX86AMode(i
->Xin
.EvCheck
.amFailAddr
);
1214 vex_printf("; nofail:");
1217 vex_printf("(profInc) addl $1,NotKnownYet; "
1218 "adcl $0,NotKnownYet+4");
1221 vpanic("ppX86Instr");
1225 /* --------- Helpers for register allocation. --------- */
1227 void getRegUsage_X86Instr (HRegUsage
* u
, const X86Instr
* i
, Bool mode64
)
1230 vassert(mode64
== False
);
1234 addRegUsage_X86RMI(u
, i
->Xin
.Alu32R
.src
);
1235 if (i
->Xin
.Alu32R
.op
== Xalu_MOV
) {
1236 addHRegUse(u
, HRmWrite
, i
->Xin
.Alu32R
.dst
);
1238 if (i
->Xin
.Alu32R
.src
->tag
== Xrmi_Reg
) {
1239 u
->isRegRegMove
= True
;
1240 u
->regMoveSrc
= i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
;
1241 u
->regMoveDst
= i
->Xin
.Alu32R
.dst
;
1245 if (i
->Xin
.Alu32R
.op
== Xalu_CMP
) {
1246 addHRegUse(u
, HRmRead
, i
->Xin
.Alu32R
.dst
);
1249 addHRegUse(u
, HRmModify
, i
->Xin
.Alu32R
.dst
);
1252 addRegUsage_X86RI(u
, i
->Xin
.Alu32M
.src
);
1253 addRegUsage_X86AMode(u
, i
->Xin
.Alu32M
.dst
);
1256 addHRegUse(u
, HRmModify
, i
->Xin
.Sh32
.dst
);
1257 if (i
->Xin
.Sh32
.src
== 0)
1258 addHRegUse(u
, HRmRead
, hregX86_ECX());
1261 addRegUsage_X86RM(u
, i
->Xin
.Test32
.dst
, HRmRead
);
1264 addHRegUse(u
, HRmModify
, i
->Xin
.Unary32
.dst
);
1267 addRegUsage_X86AMode(u
, i
->Xin
.Lea32
.am
);
1268 addHRegUse(u
, HRmWrite
, i
->Xin
.Lea32
.dst
);
1271 addRegUsage_X86RM(u
, i
->Xin
.MulL
.src
, HRmRead
);
1272 addHRegUse(u
, HRmModify
, hregX86_EAX());
1273 addHRegUse(u
, HRmWrite
, hregX86_EDX());
1276 addRegUsage_X86RM(u
, i
->Xin
.Div
.src
, HRmRead
);
1277 addHRegUse(u
, HRmModify
, hregX86_EAX());
1278 addHRegUse(u
, HRmModify
, hregX86_EDX());
1281 addHRegUse(u
, HRmRead
, i
->Xin
.Sh3232
.src
);
1282 addHRegUse(u
, HRmModify
, i
->Xin
.Sh3232
.dst
);
1283 if (i
->Xin
.Sh3232
.amt
== 0)
1284 addHRegUse(u
, HRmRead
, hregX86_ECX());
1287 addRegUsage_X86RMI(u
, i
->Xin
.Push
.src
);
1288 addHRegUse(u
, HRmModify
, hregX86_ESP());
1291 /* This is a bit subtle. */
1292 /* First off, claim it trashes all the caller-saved regs
1293 which fall within the register allocator's jurisdiction.
1294 These I believe to be %eax %ecx %edx and all the xmm
1296 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1297 addHRegUse(u
, HRmWrite
, hregX86_ECX());
1298 addHRegUse(u
, HRmWrite
, hregX86_EDX());
1299 addHRegUse(u
, HRmWrite
, hregX86_XMM0());
1300 addHRegUse(u
, HRmWrite
, hregX86_XMM1());
1301 addHRegUse(u
, HRmWrite
, hregX86_XMM2());
1302 addHRegUse(u
, HRmWrite
, hregX86_XMM3());
1303 addHRegUse(u
, HRmWrite
, hregX86_XMM4());
1304 addHRegUse(u
, HRmWrite
, hregX86_XMM5());
1305 addHRegUse(u
, HRmWrite
, hregX86_XMM6());
1306 addHRegUse(u
, HRmWrite
, hregX86_XMM7());
1307 /* Now we have to state any parameter-carrying registers
1308 which might be read. This depends on the regparmness. */
1309 switch (i
->Xin
.Call
.regparms
) {
1310 case 3: addHRegUse(u
, HRmRead
, hregX86_ECX()); /*fallthru*/
1311 case 2: addHRegUse(u
, HRmRead
, hregX86_EDX()); /*fallthru*/
1312 case 1: addHRegUse(u
, HRmRead
, hregX86_EAX()); break;
1314 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1316 /* Finally, there is the issue that the insn trashes a
1317 register because the literal target address has to be
1318 loaded into a register. Fortunately, for the 0/1/2
1319 regparm case, we can use EAX, EDX and ECX respectively, so
1320 this does not cause any further damage. For the 3-regparm
1321 case, we'll have to choose another register arbitrarily --
1322 since A, D and C are used for parameters -- and so we might
1323 as well choose EDI. */
1324 if (i
->Xin
.Call
.regparms
== 3)
1325 addHRegUse(u
, HRmWrite
, hregX86_EDI());
1326 /* Upshot of this is that the assembler really must observe
1327 the here-stated convention of which register to use as an
1328 address temporary, depending on the regparmness: 0==EAX,
1329 1==EDX, 2==ECX, 3==EDI. */
1331 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1332 conditionally exit the block. Hence we only need to list (1)
1333 the registers that they read, and (2) the registers that they
1334 write in the case where the block is not exited. (2) is
1335 empty, hence only (1) is relevant here. */
1337 addRegUsage_X86AMode(u
, i
->Xin
.XDirect
.amEIP
);
1340 addHRegUse(u
, HRmRead
, i
->Xin
.XIndir
.dstGA
);
1341 addRegUsage_X86AMode(u
, i
->Xin
.XIndir
.amEIP
);
1344 addHRegUse(u
, HRmRead
, i
->Xin
.XAssisted
.dstGA
);
1345 addRegUsage_X86AMode(u
, i
->Xin
.XAssisted
.amEIP
);
1348 addRegUsage_X86RM(u
, i
->Xin
.CMov32
.src
, HRmRead
);
1349 addHRegUse(u
, HRmModify
, i
->Xin
.CMov32
.dst
);
1352 addRegUsage_X86AMode(u
, i
->Xin
.LoadEX
.src
);
1353 addHRegUse(u
, HRmWrite
, i
->Xin
.LoadEX
.dst
);
1356 addHRegUse(u
, HRmRead
, i
->Xin
.Store
.src
);
1357 addRegUsage_X86AMode(u
, i
->Xin
.Store
.dst
);
1360 addHRegUse(u
, HRmWrite
, i
->Xin
.Set32
.dst
);
1363 addHRegUse(u
, HRmRead
, i
->Xin
.Bsfr32
.src
);
1364 addHRegUse(u
, HRmWrite
, i
->Xin
.Bsfr32
.dst
);
1369 addRegUsage_X86AMode(u
, i
->Xin
.ACAS
.addr
);
1370 addHRegUse(u
, HRmRead
, hregX86_EBX());
1371 addHRegUse(u
, HRmModify
, hregX86_EAX());
1374 addRegUsage_X86AMode(u
, i
->Xin
.DACAS
.addr
);
1375 addHRegUse(u
, HRmRead
, hregX86_ECX());
1376 addHRegUse(u
, HRmRead
, hregX86_EBX());
1377 addHRegUse(u
, HRmModify
, hregX86_EDX());
1378 addHRegUse(u
, HRmModify
, hregX86_EAX());
1381 addHRegUse(u
, HRmRead
, i
->Xin
.FpUnary
.src
);
1382 addHRegUse(u
, HRmWrite
, i
->Xin
.FpUnary
.dst
);
1384 if (i
->Xin
.FpUnary
.op
== Xfp_MOV
) {
1385 u
->isRegRegMove
= True
;
1386 u
->regMoveSrc
= i
->Xin
.FpUnary
.src
;
1387 u
->regMoveDst
= i
->Xin
.FpUnary
.dst
;
1391 addHRegUse(u
, HRmRead
, i
->Xin
.FpBinary
.srcL
);
1392 addHRegUse(u
, HRmRead
, i
->Xin
.FpBinary
.srcR
);
1393 addHRegUse(u
, HRmWrite
, i
->Xin
.FpBinary
.dst
);
1396 addRegUsage_X86AMode(u
, i
->Xin
.FpLdSt
.addr
);
1397 addHRegUse(u
, i
->Xin
.FpLdSt
.isLoad
? HRmWrite
: HRmRead
,
1401 addRegUsage_X86AMode(u
, i
->Xin
.FpLdStI
.addr
);
1402 addHRegUse(u
, i
->Xin
.FpLdStI
.isLoad
? HRmWrite
: HRmRead
,
1403 i
->Xin
.FpLdStI
.reg
);
1406 addHRegUse(u
, HRmRead
, i
->Xin
.Fp64to32
.src
);
1407 addHRegUse(u
, HRmWrite
, i
->Xin
.Fp64to32
.dst
);
1410 addHRegUse(u
, HRmRead
, i
->Xin
.FpCMov
.src
);
1411 addHRegUse(u
, HRmModify
, i
->Xin
.FpCMov
.dst
);
1414 addRegUsage_X86AMode(u
, i
->Xin
.FpLdCW
.addr
);
1417 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1420 addHRegUse(u
, HRmRead
, i
->Xin
.FpCmp
.srcL
);
1421 addHRegUse(u
, HRmRead
, i
->Xin
.FpCmp
.srcR
);
1422 addHRegUse(u
, HRmWrite
, i
->Xin
.FpCmp
.dst
);
1423 addHRegUse(u
, HRmWrite
, hregX86_EAX());
1426 addRegUsage_X86AMode(u
, i
->Xin
.SseLdSt
.addr
);
1427 addHRegUse(u
, i
->Xin
.SseLdSt
.isLoad
? HRmWrite
: HRmRead
,
1428 i
->Xin
.SseLdSt
.reg
);
1431 addRegUsage_X86AMode(u
, i
->Xin
.SseLdzLO
.addr
);
1432 addHRegUse(u
, HRmWrite
, i
->Xin
.SseLdzLO
.reg
);
1435 addHRegUse(u
, HRmWrite
, i
->Xin
.SseConst
.dst
);
1438 vassert(i
->Xin
.Sse32Fx4
.op
!= Xsse_MOV
);
1439 unary
= toBool( i
->Xin
.Sse32Fx4
.op
== Xsse_RCPF
1440 || i
->Xin
.Sse32Fx4
.op
== Xsse_RSQRTF
1441 || i
->Xin
.Sse32Fx4
.op
== Xsse_SQRTF
);
1442 addHRegUse(u
, HRmRead
, i
->Xin
.Sse32Fx4
.src
);
1443 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1444 i
->Xin
.Sse32Fx4
.dst
);
1447 vassert(i
->Xin
.Sse32FLo
.op
!= Xsse_MOV
);
1448 unary
= toBool( i
->Xin
.Sse32FLo
.op
== Xsse_RCPF
1449 || i
->Xin
.Sse32FLo
.op
== Xsse_RSQRTF
1450 || i
->Xin
.Sse32FLo
.op
== Xsse_SQRTF
);
1451 addHRegUse(u
, HRmRead
, i
->Xin
.Sse32FLo
.src
);
1452 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1453 i
->Xin
.Sse32FLo
.dst
);
1456 vassert(i
->Xin
.Sse64Fx2
.op
!= Xsse_MOV
);
1457 unary
= toBool( i
->Xin
.Sse64Fx2
.op
== Xsse_RCPF
1458 || i
->Xin
.Sse64Fx2
.op
== Xsse_RSQRTF
1459 || i
->Xin
.Sse64Fx2
.op
== Xsse_SQRTF
);
1460 addHRegUse(u
, HRmRead
, i
->Xin
.Sse64Fx2
.src
);
1461 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1462 i
->Xin
.Sse64Fx2
.dst
);
1465 vassert(i
->Xin
.Sse64FLo
.op
!= Xsse_MOV
);
1466 unary
= toBool( i
->Xin
.Sse64FLo
.op
== Xsse_RCPF
1467 || i
->Xin
.Sse64FLo
.op
== Xsse_RSQRTF
1468 || i
->Xin
.Sse64FLo
.op
== Xsse_SQRTF
);
1469 addHRegUse(u
, HRmRead
, i
->Xin
.Sse64FLo
.src
);
1470 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1471 i
->Xin
.Sse64FLo
.dst
);
1474 if (i
->Xin
.SseReRg
.op
== Xsse_XOR
1475 && sameHReg(i
->Xin
.SseReRg
.src
, i
->Xin
.SseReRg
.dst
)) {
1476 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1477 /* (as opposed to a rite of passage :-) */
1478 addHRegUse(u
, HRmWrite
, i
->Xin
.SseReRg
.dst
);
1480 addHRegUse(u
, HRmRead
, i
->Xin
.SseReRg
.src
);
1481 addHRegUse(u
, i
->Xin
.SseReRg
.op
== Xsse_MOV
1482 ? HRmWrite
: HRmModify
,
1483 i
->Xin
.SseReRg
.dst
);
1485 if (i
->Xin
.SseReRg
.op
== Xsse_MOV
) {
1486 u
->isRegRegMove
= True
;
1487 u
->regMoveSrc
= i
->Xin
.SseReRg
.src
;
1488 u
->regMoveDst
= i
->Xin
.SseReRg
.dst
;
1493 addHRegUse(u
, HRmRead
, i
->Xin
.SseCMov
.src
);
1494 addHRegUse(u
, HRmModify
, i
->Xin
.SseCMov
.dst
);
1497 addHRegUse(u
, HRmRead
, i
->Xin
.SseShuf
.src
);
1498 addHRegUse(u
, HRmWrite
, i
->Xin
.SseShuf
.dst
);
1501 /* We expect both amodes only to mention %ebp, so this is in
1502 fact pointless, since %ebp isn't allocatable, but anyway.. */
1503 addRegUsage_X86AMode(u
, i
->Xin
.EvCheck
.amCounter
);
1504 addRegUsage_X86AMode(u
, i
->Xin
.EvCheck
.amFailAddr
);
1507 /* does not use any registers. */
1510 ppX86Instr(i
, False
);
1511 vpanic("getRegUsage_X86Instr");
1516 static void mapReg( HRegRemap
* m
, HReg
* r
)
1518 *r
= lookupHRegRemap(m
, *r
);
1521 void mapRegs_X86Instr ( HRegRemap
* m
, X86Instr
* i
, Bool mode64
)
1523 vassert(mode64
== False
);
1526 mapRegs_X86RMI(m
, i
->Xin
.Alu32R
.src
);
1527 mapReg(m
, &i
->Xin
.Alu32R
.dst
);
1530 mapRegs_X86RI(m
, i
->Xin
.Alu32M
.src
);
1531 mapRegs_X86AMode(m
, i
->Xin
.Alu32M
.dst
);
1534 mapReg(m
, &i
->Xin
.Sh32
.dst
);
1537 mapRegs_X86RM(m
, i
->Xin
.Test32
.dst
);
1540 mapReg(m
, &i
->Xin
.Unary32
.dst
);
1543 mapRegs_X86AMode(m
, i
->Xin
.Lea32
.am
);
1544 mapReg(m
, &i
->Xin
.Lea32
.dst
);
1547 mapRegs_X86RM(m
, i
->Xin
.MulL
.src
);
1550 mapRegs_X86RM(m
, i
->Xin
.Div
.src
);
1553 mapReg(m
, &i
->Xin
.Sh3232
.src
);
1554 mapReg(m
, &i
->Xin
.Sh3232
.dst
);
1557 mapRegs_X86RMI(m
, i
->Xin
.Push
.src
);
1562 mapRegs_X86AMode(m
, i
->Xin
.XDirect
.amEIP
);
1565 mapReg(m
, &i
->Xin
.XIndir
.dstGA
);
1566 mapRegs_X86AMode(m
, i
->Xin
.XIndir
.amEIP
);
1569 mapReg(m
, &i
->Xin
.XAssisted
.dstGA
);
1570 mapRegs_X86AMode(m
, i
->Xin
.XAssisted
.amEIP
);
1573 mapRegs_X86RM(m
, i
->Xin
.CMov32
.src
);
1574 mapReg(m
, &i
->Xin
.CMov32
.dst
);
1577 mapRegs_X86AMode(m
, i
->Xin
.LoadEX
.src
);
1578 mapReg(m
, &i
->Xin
.LoadEX
.dst
);
1581 mapReg(m
, &i
->Xin
.Store
.src
);
1582 mapRegs_X86AMode(m
, i
->Xin
.Store
.dst
);
1585 mapReg(m
, &i
->Xin
.Set32
.dst
);
1588 mapReg(m
, &i
->Xin
.Bsfr32
.src
);
1589 mapReg(m
, &i
->Xin
.Bsfr32
.dst
);
1594 mapRegs_X86AMode(m
, i
->Xin
.ACAS
.addr
);
1597 mapRegs_X86AMode(m
, i
->Xin
.DACAS
.addr
);
1600 mapReg(m
, &i
->Xin
.FpUnary
.src
);
1601 mapReg(m
, &i
->Xin
.FpUnary
.dst
);
1604 mapReg(m
, &i
->Xin
.FpBinary
.srcL
);
1605 mapReg(m
, &i
->Xin
.FpBinary
.srcR
);
1606 mapReg(m
, &i
->Xin
.FpBinary
.dst
);
1609 mapRegs_X86AMode(m
, i
->Xin
.FpLdSt
.addr
);
1610 mapReg(m
, &i
->Xin
.FpLdSt
.reg
);
1613 mapRegs_X86AMode(m
, i
->Xin
.FpLdStI
.addr
);
1614 mapReg(m
, &i
->Xin
.FpLdStI
.reg
);
1617 mapReg(m
, &i
->Xin
.Fp64to32
.src
);
1618 mapReg(m
, &i
->Xin
.Fp64to32
.dst
);
1621 mapReg(m
, &i
->Xin
.FpCMov
.src
);
1622 mapReg(m
, &i
->Xin
.FpCMov
.dst
);
1625 mapRegs_X86AMode(m
, i
->Xin
.FpLdCW
.addr
);
1630 mapReg(m
, &i
->Xin
.FpCmp
.srcL
);
1631 mapReg(m
, &i
->Xin
.FpCmp
.srcR
);
1632 mapReg(m
, &i
->Xin
.FpCmp
.dst
);
1635 mapReg(m
, &i
->Xin
.SseConst
.dst
);
1638 mapReg(m
, &i
->Xin
.SseLdSt
.reg
);
1639 mapRegs_X86AMode(m
, i
->Xin
.SseLdSt
.addr
);
1642 mapReg(m
, &i
->Xin
.SseLdzLO
.reg
);
1643 mapRegs_X86AMode(m
, i
->Xin
.SseLdzLO
.addr
);
1646 mapReg(m
, &i
->Xin
.Sse32Fx4
.src
);
1647 mapReg(m
, &i
->Xin
.Sse32Fx4
.dst
);
1650 mapReg(m
, &i
->Xin
.Sse32FLo
.src
);
1651 mapReg(m
, &i
->Xin
.Sse32FLo
.dst
);
1654 mapReg(m
, &i
->Xin
.Sse64Fx2
.src
);
1655 mapReg(m
, &i
->Xin
.Sse64Fx2
.dst
);
1658 mapReg(m
, &i
->Xin
.Sse64FLo
.src
);
1659 mapReg(m
, &i
->Xin
.Sse64FLo
.dst
);
1662 mapReg(m
, &i
->Xin
.SseReRg
.src
);
1663 mapReg(m
, &i
->Xin
.SseReRg
.dst
);
1666 mapReg(m
, &i
->Xin
.SseCMov
.src
);
1667 mapReg(m
, &i
->Xin
.SseCMov
.dst
);
1670 mapReg(m
, &i
->Xin
.SseShuf
.src
);
1671 mapReg(m
, &i
->Xin
.SseShuf
.dst
);
1674 /* We expect both amodes only to mention %ebp, so this is in
1675 fact pointless, since %ebp isn't allocatable, but anyway.. */
1676 mapRegs_X86AMode(m
, i
->Xin
.EvCheck
.amCounter
);
1677 mapRegs_X86AMode(m
, i
->Xin
.EvCheck
.amFailAddr
);
1680 /* does not use any registers. */
1684 ppX86Instr(i
, mode64
);
1685 vpanic("mapRegs_X86Instr");
1689 /* Generate x86 spill/reload instructions under the direction of the
1690 register allocator. Note it's critical these don't write the
1693 void genSpill_X86 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
1694 HReg rreg
, Int offsetB
, Bool mode64
)
1697 vassert(offsetB
>= 0);
1698 vassert(!hregIsVirtual(rreg
));
1699 vassert(mode64
== False
);
1701 am
= X86AMode_IR(offsetB
, hregX86_EBP());
1702 switch (hregClass(rreg
)) {
1704 *i1
= X86Instr_Alu32M ( Xalu_MOV
, X86RI_Reg(rreg
), am
);
1707 *i1
= X86Instr_FpLdSt ( False
/*store*/, 10, rreg
, am
);
1710 *i1
= X86Instr_SseLdSt ( False
/*store*/, rreg
, am
);
1713 ppHRegClass(hregClass(rreg
));
1714 vpanic("genSpill_X86: unimplemented regclass");
1718 void genReload_X86 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
1719 HReg rreg
, Int offsetB
, Bool mode64
)
1722 vassert(offsetB
>= 0);
1723 vassert(!hregIsVirtual(rreg
));
1724 vassert(mode64
== False
);
1726 am
= X86AMode_IR(offsetB
, hregX86_EBP());
1727 switch (hregClass(rreg
)) {
1729 *i1
= X86Instr_Alu32R ( Xalu_MOV
, X86RMI_Mem(am
), rreg
);
1732 *i1
= X86Instr_FpLdSt ( True
/*load*/, 10, rreg
, am
);
1735 *i1
= X86Instr_SseLdSt ( True
/*load*/, rreg
, am
);
1738 ppHRegClass(hregClass(rreg
));
1739 vpanic("genReload_X86: unimplemented regclass");
1743 X86Instr
* genMove_X86(HReg from
, HReg to
, Bool mode64
)
1745 switch (hregClass(from
)) {
1747 return X86Instr_Alu32R(Xalu_MOV
, X86RMI_Reg(from
), to
);
1749 return X86Instr_SseReRg(Xsse_MOV
, from
, to
);
1751 ppHRegClass(hregClass(from
));
1752 vpanic("genMove_X86: unimplemented regclass");
1756 /* The given instruction reads the specified vreg exactly once, and
1757 that vreg is currently located at the given spill offset. If
1758 possible, return a variant of the instruction to one which instead
1759 references the spill slot directly. */
1761 X86Instr
* directReload_X86( X86Instr
* i
, HReg vreg
, Short spill_off
)
1763 vassert(spill_off
>= 0 && spill_off
< 10000); /* let's say */
1765 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1766 Convert to: src=RMI_Mem, dst=Reg
1768 if (i
->tag
== Xin_Alu32R
1769 && (i
->Xin
.Alu32R
.op
== Xalu_MOV
|| i
->Xin
.Alu32R
.op
== Xalu_OR
1770 || i
->Xin
.Alu32R
.op
== Xalu_XOR
)
1771 && i
->Xin
.Alu32R
.src
->tag
== Xrmi_Reg
1772 && sameHReg(i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
, vreg
)) {
1773 vassert(! sameHReg(i
->Xin
.Alu32R
.dst
, vreg
));
1774 return X86Instr_Alu32R(
1776 X86RMI_Mem( X86AMode_IR( spill_off
, hregX86_EBP())),
1781 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1782 Convert to: src=RI_Imm, dst=Mem
1784 if (i
->tag
== Xin_Alu32R
1785 && (i
->Xin
.Alu32R
.op
== Xalu_CMP
)
1786 && i
->Xin
.Alu32R
.src
->tag
== Xrmi_Imm
1787 && sameHReg(i
->Xin
.Alu32R
.dst
, vreg
)) {
1788 return X86Instr_Alu32M(
1790 X86RI_Imm( i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
),
1791 X86AMode_IR( spill_off
, hregX86_EBP())
1795 /* Deal with form: Push(RMI_Reg)
1796 Convert to: Push(RMI_Mem)
1798 if (i
->tag
== Xin_Push
1799 && i
->Xin
.Push
.src
->tag
== Xrmi_Reg
1800 && sameHReg(i
->Xin
.Push
.src
->Xrmi
.Reg
.reg
, vreg
)) {
1801 return X86Instr_Push(
1802 X86RMI_Mem( X86AMode_IR( spill_off
, hregX86_EBP()))
1806 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1807 Convert to CMov32(RM_Mem, dst) */
1808 if (i
->tag
== Xin_CMov32
1809 && i
->Xin
.CMov32
.src
->tag
== Xrm_Reg
1810 && sameHReg(i
->Xin
.CMov32
.src
->Xrm
.Reg
.reg
, vreg
)) {
1811 vassert(! sameHReg(i
->Xin
.CMov32
.dst
, vreg
));
1812 return X86Instr_CMov32(
1814 X86RM_Mem( X86AMode_IR( spill_off
, hregX86_EBP() )),
1819 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1820 if (i
->tag
== Xin_Test32
1821 && i
->Xin
.Test32
.dst
->tag
== Xrm_Reg
1822 && sameHReg(i
->Xin
.Test32
.dst
->Xrm
.Reg
.reg
, vreg
)) {
1823 return X86Instr_Test32(
1824 i
->Xin
.Test32
.imm32
,
1825 X86RM_Mem( X86AMode_IR( spill_off
, hregX86_EBP() ) )
1833 /* --------- The x86 assembler (bleh.) --------- */
1835 inline static UInt
iregEnc ( HReg r
)
1838 vassert(hregClass(r
) == HRcInt32
);
1839 vassert(!hregIsVirtual(r
));
1840 n
= hregEncoding(r
);
1845 inline static UInt
fregEnc ( HReg r
)
1848 vassert(hregClass(r
) == HRcFlt64
);
1849 vassert(!hregIsVirtual(r
));
1850 n
= hregEncoding(r
);
1855 inline static UInt
vregEnc ( HReg r
)
1858 vassert(hregClass(r
) == HRcVec128
);
1859 vassert(!hregIsVirtual(r
));
1860 n
= hregEncoding(r
);
1865 inline static UChar
mkModRegRM ( UInt mod
, UInt reg
, UInt regmem
)
1868 vassert((reg
|regmem
) < 8);
1869 return (UChar
)( ((mod
& 3) << 6) | ((reg
& 7) << 3) | (regmem
& 7) );
1872 inline static UChar
mkSIB ( UInt shift
, UInt regindex
, UInt regbase
)
1875 vassert((regindex
|regbase
) < 8);
1876 return (UChar
)( ((shift
& 3) << 6) | ((regindex
& 7) << 3) | (regbase
& 7) );
1879 static UChar
* emit32 ( UChar
* p
, UInt w32
)
1881 *p
++ = toUChar( w32
& 0x000000FF);
1882 *p
++ = toUChar((w32
>> 8) & 0x000000FF);
1883 *p
++ = toUChar((w32
>> 16) & 0x000000FF);
1884 *p
++ = toUChar((w32
>> 24) & 0x000000FF);
1888 /* Does a sign-extend of the lowest 8 bits give
1889 the original number? */
1890 static Bool
fits8bits ( UInt w32
)
1893 return toBool(i32
== ((Int
)(w32
<< 24) >> 24));
1897 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1899 greg, 0(ereg) | ereg != ESP && ereg != EBP
1902 greg, d8(ereg) | ereg != ESP
1905 greg, d32(ereg) | ereg != ESP
1908 greg, d8(%esp) = 01 greg 100, 0x24, d8
1910 -----------------------------------------------
1912 greg, d8(base,index,scale)
1914 = 01 greg 100, scale index base, d8
1916 greg, d32(base,index,scale)
1918 = 10 greg 100, scale index base, d32
1920 static UChar
* doAMode_M__wrk ( UChar
* p
, UInt gregEnc
, X86AMode
* am
)
1922 if (am
->tag
== Xam_IR
) {
1923 if (am
->Xam
.IR
.imm
== 0
1924 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())
1925 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_EBP()) ) {
1926 *p
++ = mkModRegRM(0, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1929 if (fits8bits(am
->Xam
.IR
.imm
)
1930 && ! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())) {
1931 *p
++ = mkModRegRM(1, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1932 *p
++ = toUChar(am
->Xam
.IR
.imm
& 0xFF);
1935 if (! sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())) {
1936 *p
++ = mkModRegRM(2, gregEnc
, iregEnc(am
->Xam
.IR
.reg
));
1937 p
= emit32(p
, am
->Xam
.IR
.imm
);
1940 if (sameHReg(am
->Xam
.IR
.reg
, hregX86_ESP())
1941 && fits8bits(am
->Xam
.IR
.imm
)) {
1942 *p
++ = mkModRegRM(1, gregEnc
, 4);
1944 *p
++ = toUChar(am
->Xam
.IR
.imm
& 0xFF);
1948 vpanic("doAMode_M: can't emit amode IR");
1951 if (am
->tag
== Xam_IRRS
) {
1952 if (fits8bits(am
->Xam
.IRRS
.imm
)
1953 && ! sameHReg(am
->Xam
.IRRS
.index
, hregX86_ESP())) {
1954 *p
++ = mkModRegRM(1, gregEnc
, 4);
1955 *p
++ = mkSIB(am
->Xam
.IRRS
.shift
, iregEnc(am
->Xam
.IRRS
.index
),
1956 iregEnc(am
->Xam
.IRRS
.base
));
1957 *p
++ = toUChar(am
->Xam
.IRRS
.imm
& 0xFF);
1960 if (! sameHReg(am
->Xam
.IRRS
.index
, hregX86_ESP())) {
1961 *p
++ = mkModRegRM(2, gregEnc
, 4);
1962 *p
++ = mkSIB(am
->Xam
.IRRS
.shift
, iregEnc(am
->Xam
.IRRS
.index
),
1963 iregEnc(am
->Xam
.IRRS
.base
));
1964 p
= emit32(p
, am
->Xam
.IRRS
.imm
);
1968 vpanic("doAMode_M: can't emit amode IRRS");
1971 vpanic("doAMode_M: unknown amode");
1975 static UChar
* doAMode_M ( UChar
* p
, HReg greg
, X86AMode
* am
)
1977 return doAMode_M__wrk(p
, iregEnc(greg
), am
);
1980 static UChar
* doAMode_M_enc ( UChar
* p
, UInt gregEnc
, X86AMode
* am
)
1982 vassert(gregEnc
< 8);
1983 return doAMode_M__wrk(p
, gregEnc
, am
);
1987 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
1988 inline static UChar
* doAMode_R__wrk ( UChar
* p
, UInt gregEnc
, UInt eregEnc
)
1990 *p
++ = mkModRegRM(3, gregEnc
, eregEnc
);
1994 static UChar
* doAMode_R ( UChar
* p
, HReg greg
, HReg ereg
)
1996 return doAMode_R__wrk(p
, iregEnc(greg
), iregEnc(ereg
));
1999 static UChar
* doAMode_R_enc_reg ( UChar
* p
, UInt gregEnc
, HReg ereg
)
2001 vassert(gregEnc
< 8);
2002 return doAMode_R__wrk(p
, gregEnc
, iregEnc(ereg
));
2005 static UChar
* doAMode_R_enc_enc ( UChar
* p
, UInt gregEnc
, UInt eregEnc
)
2007 vassert( (gregEnc
|eregEnc
) < 8);
2008 return doAMode_R__wrk(p
, gregEnc
, eregEnc
);
2012 /* Emit ffree %st(7) */
2013 static UChar
* do_ffree_st7 ( UChar
* p
)
2020 /* Emit fstp %st(i), 1 <= i <= 7 */
2021 static UChar
* do_fstp_st ( UChar
* p
, Int i
)
2023 vassert(1 <= i
&& i
<= 7);
2025 *p
++ = toUChar(0xD8+i
);
2029 /* Emit fld %st(i), 0 <= i <= 6 */
2030 static UChar
* do_fld_st ( UChar
* p
, Int i
)
2032 vassert(0 <= i
&& i
<= 6);
2034 *p
++ = toUChar(0xC0+i
);
2038 /* Emit f<op> %st(0) */
2039 static UChar
* do_fop1_st ( UChar
* p
, X86FpOp op
)
2042 case Xfp_NEG
: *p
++ = 0xD9; *p
++ = 0xE0; break;
2043 case Xfp_ABS
: *p
++ = 0xD9; *p
++ = 0xE1; break;
2044 case Xfp_SQRT
: *p
++ = 0xD9; *p
++ = 0xFA; break;
2045 case Xfp_ROUND
: *p
++ = 0xD9; *p
++ = 0xFC; break;
2046 case Xfp_SIN
: *p
++ = 0xD9; *p
++ = 0xFE; break;
2047 case Xfp_COS
: *p
++ = 0xD9; *p
++ = 0xFF; break;
2048 case Xfp_2XM1
: *p
++ = 0xD9; *p
++ = 0xF0; break;
2049 case Xfp_MOV
: break;
2051 /* fptan pushes 1.0 on the FP stack, except when the argument
2052 is out of range. Hence we have to do the instruction,
2053 then inspect C2 to see if there is an out of range
2054 condition. If there is, we skip the fincstp that is used
2055 by the in-range case to get rid of this extra 1.0
2057 p
= do_ffree_st7(p
); /* since fptan sometimes pushes 1.0 */
2058 *p
++ = 0xD9; *p
++ = 0xF2; // fptan
2059 *p
++ = 0x50; // pushl %eax
2060 *p
++ = 0xDF; *p
++ = 0xE0; // fnstsw %ax
2061 *p
++ = 0x66; *p
++ = 0xA9;
2062 *p
++ = 0x00; *p
++ = 0x04; // testw $0x400,%ax
2063 *p
++ = 0x75; *p
++ = 0x02; // jnz after_fincstp
2064 *p
++ = 0xD9; *p
++ = 0xF7; // fincstp
2065 *p
++ = 0x58; // after_fincstp: popl %eax
2068 vpanic("do_fop1_st: unknown op");
2073 /* Emit f<op> %st(i), 1 <= i <= 5 */
2074 static UChar
* do_fop2_st ( UChar
* p
, X86FpOp op
, Int i
)
2078 case Xfp_ADD
: subopc
= 0; break;
2079 case Xfp_SUB
: subopc
= 4; break;
2080 case Xfp_MUL
: subopc
= 1; break;
2081 case Xfp_DIV
: subopc
= 6; break;
2082 default: vpanic("do_fop2_st: unknown op");
2085 p
= doAMode_R_enc_enc(p
, subopc
, i
);
2089 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2090 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2092 static UChar
* push_word_from_tags ( UChar
* p
, UShort tags
)
2095 vassert(0 == (tags
& ~0xF));
2097 /* pushl $0x00000000 */
2102 /* pushl $0xFFFFFFFF */
2107 vassert(0); /* awaiting test case */
2109 if (tags
& 1) w
|= 0x000000FF;
2110 if (tags
& 2) w
|= 0x0000FF00;
2111 if (tags
& 4) w
|= 0x00FF0000;
2112 if (tags
& 8) w
|= 0xFF000000;
2119 /* Emit an instruction into buf and return the number of bytes used.
2120 Note that buf is not the insn's final place, and therefore it is
2121 imperative to emit position-independent code. If the emitted
2122 instruction was a profiler inc, set *is_profInc to True, else
2123 leave it unchanged. */
2125 Int
emit_X86Instr ( /*MB_MOD*/Bool
* is_profInc
,
2126 UChar
* buf
, Int nbuf
, const X86Instr
* i
,
2127 Bool mode64
, VexEndness endness_host
,
2128 const void* disp_cp_chain_me_to_slowEP
,
2129 const void* disp_cp_chain_me_to_fastEP
,
2130 const void* disp_cp_xindir
,
2131 const void* disp_cp_xassisted
)
2133 UInt irno
, opc
, opc_rr
, subopc_imm
, opc_imma
, opc_cl
, opc_imm
, subopc
;
2138 vassert(nbuf
>= 32);
2139 vassert(mode64
== False
);
2141 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2146 /* Deal specially with MOV */
2147 if (i
->Xin
.Alu32R
.op
== Xalu_MOV
) {
2148 switch (i
->Xin
.Alu32R
.src
->tag
) {
2150 *p
++ = toUChar(0xB8 + iregEnc(i
->Xin
.Alu32R
.dst
));
2151 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2155 p
= doAMode_R(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
,
2160 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2161 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2168 if (i
->Xin
.Alu32R
.op
== Xalu_MUL
) {
2169 switch (i
->Xin
.Alu32R
.src
->tag
) {
2173 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
,
2174 i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
);
2179 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2180 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2183 if (fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2185 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
, i
->Xin
.Alu32R
.dst
);
2186 *p
++ = toUChar(0xFF & i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2189 p
= doAMode_R(p
, i
->Xin
.Alu32R
.dst
, i
->Xin
.Alu32R
.dst
);
2190 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2197 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2198 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2199 switch (i
->Xin
.Alu32R
.op
) {
2200 case Xalu_ADC
: opc
= 0x13; opc_rr
= 0x11;
2201 subopc_imm
= 2; opc_imma
= 0x15; break;
2202 case Xalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2203 subopc_imm
= 0; opc_imma
= 0x05; break;
2204 case Xalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2205 subopc_imm
= 5; opc_imma
= 0x2D; break;
2206 case Xalu_SBB
: opc
= 0x1B; opc_rr
= 0x19;
2207 subopc_imm
= 3; opc_imma
= 0x1D; break;
2208 case Xalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2209 subopc_imm
= 4; opc_imma
= 0x25; break;
2210 case Xalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2211 subopc_imm
= 6; opc_imma
= 0x35; break;
2212 case Xalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2213 subopc_imm
= 1; opc_imma
= 0x0D; break;
2214 case Xalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2215 subopc_imm
= 7; opc_imma
= 0x3D; break;
2218 switch (i
->Xin
.Alu32R
.src
->tag
) {
2220 if (sameHReg(i
->Xin
.Alu32R
.dst
, hregX86_EAX())
2221 && !fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2222 *p
++ = toUChar(opc_imma
);
2223 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2225 if (fits8bits(i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
)) {
2227 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Xin
.Alu32R
.dst
);
2228 *p
++ = toUChar(0xFF & i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2231 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Xin
.Alu32R
.dst
);
2232 p
= emit32(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Imm
.imm32
);
2236 *p
++ = toUChar(opc_rr
);
2237 p
= doAMode_R(p
, i
->Xin
.Alu32R
.src
->Xrmi
.Reg
.reg
,
2241 *p
++ = toUChar(opc
);
2242 p
= doAMode_M(p
, i
->Xin
.Alu32R
.dst
,
2243 i
->Xin
.Alu32R
.src
->Xrmi
.Mem
.am
);
2251 /* Deal specially with MOV */
2252 if (i
->Xin
.Alu32M
.op
== Xalu_MOV
) {
2253 switch (i
->Xin
.Alu32M
.src
->tag
) {
2256 p
= doAMode_M(p
, i
->Xin
.Alu32M
.src
->Xri
.Reg
.reg
,
2261 p
= doAMode_M_enc(p
, 0, i
->Xin
.Alu32M
.dst
);
2262 p
= emit32(p
, i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2268 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2270 opc
= subopc_imm
= opc_imma
= 0;
2271 switch (i
->Xin
.Alu32M
.op
) {
2272 case Xalu_ADD
: opc
= 0x01; subopc_imm
= 0; break;
2273 case Xalu_SUB
: opc
= 0x29; subopc_imm
= 5; break;
2274 case Xalu_CMP
: opc
= 0x39; subopc_imm
= 7; break;
2277 switch (i
->Xin
.Alu32M
.src
->tag
) {
2279 *p
++ = toUChar(opc
);
2280 p
= doAMode_M(p
, i
->Xin
.Alu32M
.src
->Xri
.Reg
.reg
,
2284 if (fits8bits(i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
)) {
2286 p
= doAMode_M_enc(p
, subopc_imm
, i
->Xin
.Alu32M
.dst
);
2287 *p
++ = toUChar(0xFF & i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2291 p
= doAMode_M_enc(p
, subopc_imm
, i
->Xin
.Alu32M
.dst
);
2292 p
= emit32(p
, i
->Xin
.Alu32M
.src
->Xri
.Imm
.imm32
);
2301 opc_cl
= opc_imm
= subopc
= 0;
2302 switch (i
->Xin
.Sh32
.op
) {
2303 case Xsh_SHR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 5; break;
2304 case Xsh_SAR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 7; break;
2305 case Xsh_SHL
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 4; break;
2308 if (i
->Xin
.Sh32
.src
== 0) {
2309 *p
++ = toUChar(opc_cl
);
2310 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Sh32
.dst
);
2312 *p
++ = toUChar(opc_imm
);
2313 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Sh32
.dst
);
2314 *p
++ = (UChar
)(i
->Xin
.Sh32
.src
);
2319 if (i
->Xin
.Test32
.dst
->tag
== Xrm_Reg
) {
2320 /* testl $imm32, %reg */
2322 p
= doAMode_R_enc_reg(p
, 0, i
->Xin
.Test32
.dst
->Xrm
.Reg
.reg
);
2323 p
= emit32(p
, i
->Xin
.Test32
.imm32
);
2326 /* testl $imm32, amode */
2328 p
= doAMode_M_enc(p
, 0, i
->Xin
.Test32
.dst
->Xrm
.Mem
.am
);
2329 p
= emit32(p
, i
->Xin
.Test32
.imm32
);
2334 if (i
->Xin
.Unary32
.op
== Xun_NOT
) {
2336 p
= doAMode_R_enc_reg(p
, 2, i
->Xin
.Unary32
.dst
);
2339 if (i
->Xin
.Unary32
.op
== Xun_NEG
) {
2341 p
= doAMode_R_enc_reg(p
, 3, i
->Xin
.Unary32
.dst
);
2348 p
= doAMode_M(p
, i
->Xin
.Lea32
.dst
, i
->Xin
.Lea32
.am
);
2352 subopc
= i
->Xin
.MulL
.syned
? 5 : 4;
2354 switch (i
->Xin
.MulL
.src
->tag
) {
2356 p
= doAMode_M_enc(p
, subopc
, i
->Xin
.MulL
.src
->Xrm
.Mem
.am
);
2359 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.MulL
.src
->Xrm
.Reg
.reg
);
2367 subopc
= i
->Xin
.Div
.syned
? 7 : 6;
2369 switch (i
->Xin
.Div
.src
->tag
) {
2371 p
= doAMode_M_enc(p
, subopc
, i
->Xin
.Div
.src
->Xrm
.Mem
.am
);
2374 p
= doAMode_R_enc_reg(p
, subopc
, i
->Xin
.Div
.src
->Xrm
.Reg
.reg
);
2382 vassert(i
->Xin
.Sh3232
.op
== Xsh_SHL
|| i
->Xin
.Sh3232
.op
== Xsh_SHR
);
2383 if (i
->Xin
.Sh3232
.amt
== 0) {
2384 /* shldl/shrdl by %cl */
2386 if (i
->Xin
.Sh3232
.op
== Xsh_SHL
) {
2391 p
= doAMode_R(p
, i
->Xin
.Sh3232
.src
, i
->Xin
.Sh3232
.dst
);
2397 switch (i
->Xin
.Push
.src
->tag
) {
2400 p
= doAMode_M_enc(p
, 6, i
->Xin
.Push
.src
->Xrmi
.Mem
.am
);
2404 p
= emit32(p
, i
->Xin
.Push
.src
->Xrmi
.Imm
.imm32
);
2407 *p
++ = toUChar(0x50 + iregEnc(i
->Xin
.Push
.src
->Xrmi
.Reg
.reg
));
2414 if (i
->Xin
.Call
.cond
!= Xcc_ALWAYS
2415 && i
->Xin
.Call
.rloc
.pri
!= RLPri_None
) {
2416 /* The call might not happen (it isn't unconditional) and it
2417 returns a result. In this case we will need to generate a
2418 control flow diamond to put 0x555..555 in the return
2419 register(s) in the case where the call doesn't happen. If
2420 this ever becomes necessary, maybe copy code from the ARM
2421 equivalent. Until that day, just give up. */
2424 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2425 for explanation of this. */
2426 switch (i
->Xin
.Call
.regparms
) {
2427 case 0: irno
= iregEnc(hregX86_EAX()); break;
2428 case 1: irno
= iregEnc(hregX86_EDX()); break;
2429 case 2: irno
= iregEnc(hregX86_ECX()); break;
2430 case 3: irno
= iregEnc(hregX86_EDI()); break;
2431 default: vpanic(" emit_X86Instr:call:regparms");
2433 /* jump over the following two insns if the condition does not
2435 if (i
->Xin
.Call
.cond
!= Xcc_ALWAYS
) {
2436 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.Call
.cond
^ 1)));
2437 *p
++ = 0x07; /* 7 bytes in the next two insns */
2439 /* movl $target, %tmp */
2440 *p
++ = toUChar(0xB8 + irno
);
2441 p
= emit32(p
, i
->Xin
.Call
.target
);
2444 *p
++ = toUChar(0xD0 + irno
);
2448 /* NB: what goes on here has to be very closely coordinated with the
2449 chainXDirect_X86 and unchainXDirect_X86 below. */
2450 /* We're generating chain-me requests here, so we need to be
2451 sure this is actually allowed -- no-redir translations can't
2452 use chain-me's. Hence: */
2453 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
2454 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
2456 /* Use ptmp for backpatching conditional jumps. */
2459 /* First off, if this is conditional, create a conditional
2460 jump over the rest of it. */
2461 if (i
->Xin
.XDirect
.cond
!= Xcc_ALWAYS
) {
2462 /* jmp fwds if !condition */
2463 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XDirect
.cond
^ 1)));
2464 ptmp
= p
; /* fill in this bit later */
2465 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2468 /* Update the guest EIP. */
2469 /* movl $dstGA, amEIP */
2471 p
= doAMode_M_enc(p
, 0, i
->Xin
.XDirect
.amEIP
);
2472 p
= emit32(p
, i
->Xin
.XDirect
.dstGA
);
2474 /* --- FIRST PATCHABLE BYTE follows --- */
2475 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2476 to) backs up the return address, so as to find the address of
2477 the first patchable byte. So: don't change the length of the
2478 two instructions below. */
2479 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2481 const void* disp_cp_chain_me
2482 = i
->Xin
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
2483 : disp_cp_chain_me_to_slowEP
;
2484 p
= emit32(p
, (UInt
)(Addr
)disp_cp_chain_me
);
2488 /* --- END of PATCHABLE BYTES --- */
2490 /* Fix up the conditional jump, if there was one. */
2491 if (i
->Xin
.XDirect
.cond
!= Xcc_ALWAYS
) {
2492 Int delta
= p
- ptmp
;
2493 vassert(delta
> 0 && delta
< 40);
2494 *ptmp
= toUChar(delta
-1);
2500 /* We're generating transfers that could lead indirectly to a
2501 chain-me, so we need to be sure this is actually allowed --
2502 no-redir translations are not allowed to reach normal
2503 translations without going through the scheduler. That means
2504 no XDirects or XIndirs out from no-redir translations.
2506 vassert(disp_cp_xindir
!= NULL
);
2508 /* Use ptmp for backpatching conditional jumps. */
2511 /* First off, if this is conditional, create a conditional
2512 jump over the rest of it. */
2513 if (i
->Xin
.XIndir
.cond
!= Xcc_ALWAYS
) {
2514 /* jmp fwds if !condition */
2515 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XIndir
.cond
^ 1)));
2516 ptmp
= p
; /* fill in this bit later */
2517 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2520 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2522 p
= doAMode_M(p
, i
->Xin
.XIndir
.dstGA
, i
->Xin
.XIndir
.amEIP
);
2524 /* movl $disp_indir, %edx */
2526 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xindir
);
2531 /* Fix up the conditional jump, if there was one. */
2532 if (i
->Xin
.XIndir
.cond
!= Xcc_ALWAYS
) {
2533 Int delta
= p
- ptmp
;
2534 vassert(delta
> 0 && delta
< 40);
2535 *ptmp
= toUChar(delta
-1);
2540 case Xin_XAssisted
: {
2541 /* Use ptmp for backpatching conditional jumps. */
2544 /* First off, if this is conditional, create a conditional
2545 jump over the rest of it. */
2546 if (i
->Xin
.XAssisted
.cond
!= Xcc_ALWAYS
) {
2547 /* jmp fwds if !condition */
2548 *p
++ = toUChar(0x70 + (0xF & (i
->Xin
.XAssisted
.cond
^ 1)));
2549 ptmp
= p
; /* fill in this bit later */
2550 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2553 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2555 p
= doAMode_M(p
, i
->Xin
.XIndir
.dstGA
, i
->Xin
.XIndir
.amEIP
);
2556 /* movl $magic_number, %ebp. */
2558 switch (i
->Xin
.XAssisted
.jk
) {
2559 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
2560 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
2561 case Ijk_Sys_int128
: trcval
= VEX_TRC_JMP_SYS_INT128
; break;
2562 case Ijk_Sys_int129
: trcval
= VEX_TRC_JMP_SYS_INT129
; break;
2563 case Ijk_Sys_int130
: trcval
= VEX_TRC_JMP_SYS_INT130
; break;
2564 case Ijk_Sys_int145
: trcval
= VEX_TRC_JMP_SYS_INT145
; break;
2565 case Ijk_Sys_int210
: trcval
= VEX_TRC_JMP_SYS_INT210
; break;
2566 case Ijk_Sys_sysenter
: trcval
= VEX_TRC_JMP_SYS_SYSENTER
; break;
2567 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
2568 case Ijk_EmWarn
: trcval
= VEX_TRC_JMP_EMWARN
; break;
2569 case Ijk_MapFail
: trcval
= VEX_TRC_JMP_MAPFAIL
; break;
2570 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
2571 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
2572 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
2573 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
2574 case Ijk_SigSEGV
: trcval
= VEX_TRC_JMP_SIGSEGV
; break;
2575 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
2576 /* We don't expect to see the following being assisted. */
2581 ppIRJumpKind(i
->Xin
.XAssisted
.jk
);
2582 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2584 vassert(trcval
!= 0);
2586 p
= emit32(p
, trcval
);
2588 /* movl $disp_indir, %edx */
2590 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xassisted
);
2595 /* Fix up the conditional jump, if there was one. */
2596 if (i
->Xin
.XAssisted
.cond
!= Xcc_ALWAYS
) {
2597 Int delta
= p
- ptmp
;
2598 vassert(delta
> 0 && delta
< 40);
2599 *ptmp
= toUChar(delta
-1);
2605 vassert(i
->Xin
.CMov32
.cond
!= Xcc_ALWAYS
);
2607 /* This generates cmov, which is illegal on P54/P55. */
2610 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2611 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2612 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2615 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2616 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2621 /* Alternative version which works on any x86 variant. */
2622 /* jmp fwds if !condition */
2623 *p
++ = toUChar(0x70 + (i
->Xin
.CMov32
.cond
^ 1));
2624 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
2627 switch (i
->Xin
.CMov32
.src
->tag
) {
2629 /* Big sigh. This is movl E -> G ... */
2631 p
= doAMode_R(p
, i
->Xin
.CMov32
.src
->Xrm
.Reg
.reg
,
2636 /* ... whereas this is movl G -> E. That's why the args
2637 to doAMode_R appear to be the wrong way round in the
2640 p
= doAMode_M(p
, i
->Xin
.CMov32
.dst
,
2641 i
->Xin
.CMov32
.src
->Xrm
.Mem
.am
);
2646 /* Fill in the jump offset. */
2647 *(ptmp
-1) = toUChar(p
- ptmp
);
2653 if (i
->Xin
.LoadEX
.szSmall
== 1 && !i
->Xin
.LoadEX
.syned
) {
2657 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2660 if (i
->Xin
.LoadEX
.szSmall
== 2 && !i
->Xin
.LoadEX
.syned
) {
2664 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2667 if (i
->Xin
.LoadEX
.szSmall
== 1 && i
->Xin
.LoadEX
.syned
) {
2671 p
= doAMode_M(p
, i
->Xin
.LoadEX
.dst
, i
->Xin
.LoadEX
.src
);
2677 /* Make the destination register be 1 or 0, depending on whether
2678 the relevant condition holds. We have to dodge and weave
2679 when the destination is %esi or %edi as we cannot directly
2680 emit the native 'setb %reg' for those. Further complication:
2681 the top 24 bits of the destination should be forced to zero,
2682 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2683 Sigh. So start off my moving $0 into the dest. */
2685 /* Do we need to swap in %eax? */
2686 if (iregEnc(i
->Xin
.Set32
.dst
) >= 4) {
2687 /* xchg %eax, %dst */
2688 *p
++ = toUChar(0x90 + iregEnc(i
->Xin
.Set32
.dst
));
2690 *p
++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
2692 /* setb lo8(%eax) */
2694 *p
++ = toUChar(0x90 + (0xF & i
->Xin
.Set32
.cond
));
2695 p
= doAMode_R_enc_reg(p
, 0, hregX86_EAX());
2696 /* xchg %eax, %dst */
2697 *p
++ = toUChar(0x90 + iregEnc(i
->Xin
.Set32
.dst
));
2700 *p
++ = toUChar(0xB8 + iregEnc(i
->Xin
.Set32
.dst
));
2702 /* setb lo8(%dst) */
2704 *p
++ = toUChar(0x90 + (0xF & i
->Xin
.Set32
.cond
));
2705 p
= doAMode_R_enc_reg(p
, 0, i
->Xin
.Set32
.dst
);
2711 if (i
->Xin
.Bsfr32
.isFwds
) {
2716 p
= doAMode_R(p
, i
->Xin
.Bsfr32
.dst
, i
->Xin
.Bsfr32
.src
);
2720 /* see comment in hdefs.h re this insn */
2721 if (0) vex_printf("EMIT FENCE\n");
2722 if (i
->Xin
.MFence
.hwcaps
& (VEX_HWCAPS_X86_SSE3
2723 |VEX_HWCAPS_X86_SSE2
)) {
2725 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF0;
2728 if (i
->Xin
.MFence
.hwcaps
& VEX_HWCAPS_X86_MMXEXT
) {
2730 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF8;
2731 /* lock addl $0,0(%esp) */
2732 *p
++ = 0xF0; *p
++ = 0x83; *p
++ = 0x44;
2733 *p
++ = 0x24; *p
++ = 0x00; *p
++ = 0x00;
2736 if (i
->Xin
.MFence
.hwcaps
== 0/*baseline, no SSE*/) {
2737 /* lock addl $0,0(%esp) */
2738 *p
++ = 0xF0; *p
++ = 0x83; *p
++ = 0x44;
2739 *p
++ = 0x24; *p
++ = 0x00; *p
++ = 0x00;
2742 vpanic("emit_X86Instr:mfence:hwcaps");
2749 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2750 in %ebx. The new-value register is hardwired to be %ebx
2751 since letting it be any integer register gives the problem
2752 that %sil and %dil are unaddressible on x86 and hence we
2753 would have to resort to the same kind of trickery as with
2754 byte-sized Xin.Store, just below. Given that this isn't
2755 performance critical, it is simpler just to force the
2756 register operand to %ebx (could equally be %ecx or %edx).
2757 (Although %ebx is more consistent with cmpxchg8b.) */
2758 if (i
->Xin
.ACAS
.sz
== 2) *p
++ = 0x66;
2760 if (i
->Xin
.ACAS
.sz
== 1) *p
++ = 0xB0; else *p
++ = 0xB1;
2761 p
= doAMode_M(p
, hregX86_EBX(), i
->Xin
.ACAS
.addr
);
2767 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2768 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2769 aren't encoded in the insn. */
2772 p
= doAMode_M_enc(p
, 1, i
->Xin
.DACAS
.addr
);
2776 if (i
->Xin
.Store
.sz
== 2) {
2777 /* This case, at least, is simple, given that we can
2778 reference the low 16 bits of any integer register. */
2781 p
= doAMode_M(p
, i
->Xin
.Store
.src
, i
->Xin
.Store
.dst
);
2785 if (i
->Xin
.Store
.sz
== 1) {
2786 /* We have to do complex dodging and weaving if src is not
2787 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2788 if (iregEnc(i
->Xin
.Store
.src
) < 4) {
2789 /* we're OK, can do it directly */
2791 p
= doAMode_M(p
, i
->Xin
.Store
.src
, i
->Xin
.Store
.dst
);
2794 /* Bleh. This means the source is %edi or %esi. Since
2795 the address mode can only mention three registers, at
2796 least one of %eax/%ebx/%ecx/%edx must be available to
2797 temporarily swap the source into, so the store can
2798 happen. So we have to look at the regs mentioned
2800 HReg swap
= INVALID_HREG
;
2801 HReg eax
= hregX86_EAX(), ebx
= hregX86_EBX(),
2802 ecx
= hregX86_ECX(), edx
= hregX86_EDX();
2805 addRegUsage_X86AMode(&u
, i
->Xin
.Store
.dst
);
2806 /**/ if (! HRegUsage__contains(&u
, eax
)) { swap
= eax
; }
2807 else if (! HRegUsage__contains(&u
, ebx
)) { swap
= ebx
; }
2808 else if (! HRegUsage__contains(&u
, ecx
)) { swap
= ecx
; }
2809 else if (! HRegUsage__contains(&u
, edx
)) { swap
= edx
; }
2810 vassert(! hregIsInvalid(swap
));
2811 /* xchgl %source, %swap. Could do better if swap is %eax. */
2813 p
= doAMode_R(p
, i
->Xin
.Store
.src
, swap
);
2814 /* movb lo8{%swap}, (dst) */
2816 p
= doAMode_M(p
, swap
, i
->Xin
.Store
.dst
);
2817 /* xchgl %source, %swap. Could do better if swap is %eax. */
2819 p
= doAMode_R(p
, i
->Xin
.Store
.src
, swap
);
2822 } /* if (i->Xin.Store.sz == 1) */
2827 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2829 p
= do_ffree_st7(p
);
2830 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpUnary
.src
));
2831 p
= do_fop1_st(p
, i
->Xin
.FpUnary
.op
);
2832 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpUnary
.dst
));
2836 if (i
->Xin
.FpBinary
.op
== Xfp_YL2X
2837 || i
->Xin
.FpBinary
.op
== Xfp_YL2XP1
) {
2838 /* Have to do this specially. */
2839 /* ffree %st7 ; fld %st(srcL) ;
2840 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2841 p
= do_ffree_st7(p
);
2842 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2843 p
= do_ffree_st7(p
);
2844 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2846 *p
++ = toUChar(i
->Xin
.FpBinary
.op
==Xfp_YL2X
? 0xF1 : 0xF9);
2847 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2850 if (i
->Xin
.FpBinary
.op
== Xfp_ATAN
) {
2851 /* Have to do this specially. */
2852 /* ffree %st7 ; fld %st(srcL) ;
2853 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2854 p
= do_ffree_st7(p
);
2855 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2856 p
= do_ffree_st7(p
);
2857 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2858 *p
++ = 0xD9; *p
++ = 0xF3;
2859 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2862 if (i
->Xin
.FpBinary
.op
== Xfp_PREM
2863 || i
->Xin
.FpBinary
.op
== Xfp_PREM1
2864 || i
->Xin
.FpBinary
.op
== Xfp_SCALE
) {
2865 /* Have to do this specially. */
2866 /* ffree %st7 ; fld %st(srcR) ;
2867 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2868 fincstp ; ffree %st7 */
2869 p
= do_ffree_st7(p
);
2870 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcR
));
2871 p
= do_ffree_st7(p
);
2872 p
= do_fld_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.srcL
));
2874 switch (i
->Xin
.FpBinary
.op
) {
2875 case Xfp_PREM
: *p
++ = 0xF8; break;
2876 case Xfp_PREM1
: *p
++ = 0xF5; break;
2877 case Xfp_SCALE
: *p
++ = 0xFD; break;
2878 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2880 p
= do_fstp_st(p
, 2+fregEnc(i
->Xin
.FpBinary
.dst
));
2881 *p
++ = 0xD9; *p
++ = 0xF7;
2882 p
= do_ffree_st7(p
);
2886 /* gop %srcL, %srcR, %dst
2887 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2889 p
= do_ffree_st7(p
);
2890 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpBinary
.srcL
));
2891 p
= do_fop2_st(p
, i
->Xin
.FpBinary
.op
,
2892 1+fregEnc(i
->Xin
.FpBinary
.srcR
));
2893 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpBinary
.dst
));
2897 if (i
->Xin
.FpLdSt
.isLoad
) {
2898 /* Load from memory into %fakeN.
2899 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2901 p
= do_ffree_st7(p
);
2902 switch (i
->Xin
.FpLdSt
.sz
) {
2905 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2909 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2913 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2916 vpanic("emitX86Instr(FpLdSt,load)");
2918 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpLdSt
.reg
));
2921 /* Store from %fakeN into memory.
2922 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2924 p
= do_ffree_st7(p
);
2925 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpLdSt
.reg
));
2926 switch (i
->Xin
.FpLdSt
.sz
) {
2929 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2933 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2937 p
= doAMode_M_enc(p
, 7/*subopcode*/, i
->Xin
.FpLdSt
.addr
);
2940 vpanic("emitX86Instr(FpLdSt,store)");
2947 if (i
->Xin
.FpLdStI
.isLoad
) {
2948 /* Load from memory into %fakeN, converting from an int.
2949 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2951 switch (i
->Xin
.FpLdStI
.sz
) {
2952 case 8: opc
= 0xDF; subopc_imm
= 5; break;
2953 case 4: opc
= 0xDB; subopc_imm
= 0; break;
2954 case 2: vassert(0); opc
= 0xDF; subopc_imm
= 0; break;
2955 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2957 p
= do_ffree_st7(p
);
2958 *p
++ = toUChar(opc
);
2959 p
= doAMode_M_enc(p
, subopc_imm
/*subopcode*/, i
->Xin
.FpLdStI
.addr
);
2960 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpLdStI
.reg
));
2963 /* Store from %fakeN into memory, converting to an int.
2964 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2966 switch (i
->Xin
.FpLdStI
.sz
) {
2967 case 8: opc
= 0xDF; subopc_imm
= 7; break;
2968 case 4: opc
= 0xDB; subopc_imm
= 3; break;
2969 case 2: opc
= 0xDF; subopc_imm
= 3; break;
2970 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2972 p
= do_ffree_st7(p
);
2973 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpLdStI
.reg
));
2974 *p
++ = toUChar(opc
);
2975 p
= doAMode_M_enc(p
, subopc_imm
/*subopcode*/, i
->Xin
.FpLdStI
.addr
);
2981 /* ffree %st7 ; fld %st(src) */
2982 p
= do_ffree_st7(p
);
2983 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.Fp64to32
.src
));
2985 *p
++ = 0x83; *p
++ = 0xEC; *p
++ = 0x04;
2987 *p
++ = 0xD9; *p
++ = 0x1C; *p
++ = 0x24;
2989 *p
++ = 0xD9; *p
++ = 0x04; *p
++ = 0x24;
2991 *p
++ = 0x83; *p
++ = 0xC4; *p
++ = 0x04;
2992 /* fstp %st(1+dst) */
2993 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.Fp64to32
.dst
));
2997 /* jmp fwds if !condition */
2998 *p
++ = toUChar(0x70 + (i
->Xin
.FpCMov
.cond
^ 1));
2999 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3002 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3003 p
= do_ffree_st7(p
);
3004 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpCMov
.src
));
3005 p
= do_fstp_st(p
, 1+fregEnc(i
->Xin
.FpCMov
.dst
));
3007 /* Fill in the jump offset. */
3008 *(ptmp
-1) = toUChar(p
- ptmp
);
3013 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Xin
.FpLdCW
.addr
);
3017 /* note, this emits fnstsw %ax, not fstsw %ax */
3023 /* gcmp %fL, %fR, %dst
3024 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3025 fnstsw %ax ; movl %eax, %dst
3028 p
= do_ffree_st7(p
);
3030 p
= do_fld_st(p
, 0+fregEnc(i
->Xin
.FpCmp
.srcL
));
3031 /* fucomp %(fR+1) */
3033 *p
++ = toUChar(0xE8 + (7 & (1+fregEnc(i
->Xin
.FpCmp
.srcR
))));
3037 /* movl %eax, %dst */
3039 p
= doAMode_R(p
, hregX86_EAX(), i
->Xin
.FpCmp
.dst
);
3042 case Xin_SseConst
: {
3043 UShort con
= i
->Xin
.SseConst
.con
;
3044 p
= push_word_from_tags(p
, toUShort((con
>> 12) & 0xF));
3045 p
= push_word_from_tags(p
, toUShort((con
>> 8) & 0xF));
3046 p
= push_word_from_tags(p
, toUShort((con
>> 4) & 0xF));
3047 p
= push_word_from_tags(p
, toUShort(con
& 0xF));
3048 /* movl (%esp), %xmm-dst */
3051 *p
++ = toUChar(0x04 + 8 * (7 & vregEnc(i
->Xin
.SseConst
.dst
)));
3053 /* addl $16, %esp */
3062 *p
++ = toUChar(i
->Xin
.SseLdSt
.isLoad
? 0x10 : 0x11);
3063 p
= doAMode_M_enc(p
, vregEnc(i
->Xin
.SseLdSt
.reg
), i
->Xin
.SseLdSt
.addr
);
3067 vassert(i
->Xin
.SseLdzLO
.sz
== 4 || i
->Xin
.SseLdzLO
.sz
== 8);
3068 /* movs[sd] amode, %xmm-dst */
3069 *p
++ = toUChar(i
->Xin
.SseLdzLO
.sz
==4 ? 0xF3 : 0xF2);
3072 p
= doAMode_M_enc(p
, vregEnc(i
->Xin
.SseLdzLO
.reg
), i
->Xin
.SseLdzLO
.addr
);
3078 switch (i
->Xin
.Sse32Fx4
.op
) {
3079 case Xsse_ADDF
: *p
++ = 0x58; break;
3080 case Xsse_DIVF
: *p
++ = 0x5E; break;
3081 case Xsse_MAXF
: *p
++ = 0x5F; break;
3082 case Xsse_MINF
: *p
++ = 0x5D; break;
3083 case Xsse_MULF
: *p
++ = 0x59; break;
3084 case Xsse_RCPF
: *p
++ = 0x53; break;
3085 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3086 case Xsse_SQRTF
: *p
++ = 0x51; break;
3087 case Xsse_SUBF
: *p
++ = 0x5C; break;
3088 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3089 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3090 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3091 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3094 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse32Fx4
.dst
),
3095 vregEnc(i
->Xin
.Sse32Fx4
.src
) );
3097 *p
++ = toUChar(xtra
& 0xFF);
3104 switch (i
->Xin
.Sse64Fx2
.op
) {
3105 case Xsse_ADDF
: *p
++ = 0x58; break;
3106 case Xsse_DIVF
: *p
++ = 0x5E; break;
3107 case Xsse_MAXF
: *p
++ = 0x5F; break;
3108 case Xsse_MINF
: *p
++ = 0x5D; break;
3109 case Xsse_MULF
: *p
++ = 0x59; break;
3110 case Xsse_RCPF
: *p
++ = 0x53; break;
3111 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3112 case Xsse_SQRTF
: *p
++ = 0x51; break;
3113 case Xsse_SUBF
: *p
++ = 0x5C; break;
3114 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3115 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3116 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3117 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3120 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse64Fx2
.dst
),
3121 vregEnc(i
->Xin
.Sse64Fx2
.src
) );
3123 *p
++ = toUChar(xtra
& 0xFF);
3130 switch (i
->Xin
.Sse32FLo
.op
) {
3131 case Xsse_ADDF
: *p
++ = 0x58; break;
3132 case Xsse_DIVF
: *p
++ = 0x5E; break;
3133 case Xsse_MAXF
: *p
++ = 0x5F; break;
3134 case Xsse_MINF
: *p
++ = 0x5D; break;
3135 case Xsse_MULF
: *p
++ = 0x59; break;
3136 case Xsse_RCPF
: *p
++ = 0x53; break;
3137 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3138 case Xsse_SQRTF
: *p
++ = 0x51; break;
3139 case Xsse_SUBF
: *p
++ = 0x5C; break;
3140 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3141 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3142 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3143 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3146 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse32FLo
.dst
),
3147 vregEnc(i
->Xin
.Sse32FLo
.src
) );
3149 *p
++ = toUChar(xtra
& 0xFF);
3156 switch (i
->Xin
.Sse64FLo
.op
) {
3157 case Xsse_ADDF
: *p
++ = 0x58; break;
3158 case Xsse_DIVF
: *p
++ = 0x5E; break;
3159 case Xsse_MAXF
: *p
++ = 0x5F; break;
3160 case Xsse_MINF
: *p
++ = 0x5D; break;
3161 case Xsse_MULF
: *p
++ = 0x59; break;
3162 case Xsse_RCPF
: *p
++ = 0x53; break;
3163 case Xsse_RSQRTF
: *p
++ = 0x52; break;
3164 case Xsse_SQRTF
: *p
++ = 0x51; break;
3165 case Xsse_SUBF
: *p
++ = 0x5C; break;
3166 case Xsse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3167 case Xsse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3168 case Xsse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3169 case Xsse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3172 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.Sse64FLo
.dst
),
3173 vregEnc(i
->Xin
.Sse64FLo
.src
) );
3175 *p
++ = toUChar(xtra
& 0xFF);
3179 # define XX(_n) *p++ = (_n)
3180 switch (i
->Xin
.SseReRg
.op
) {
3181 case Xsse_MOV
: /*movups*/ XX(0x0F); XX(0x10); break;
3182 case Xsse_OR
: XX(0x0F); XX(0x56); break;
3183 case Xsse_XOR
: XX(0x0F); XX(0x57); break;
3184 case Xsse_AND
: XX(0x0F); XX(0x54); break;
3185 case Xsse_PACKSSD
: XX(0x66); XX(0x0F); XX(0x6B); break;
3186 case Xsse_PACKSSW
: XX(0x66); XX(0x0F); XX(0x63); break;
3187 case Xsse_PACKUSW
: XX(0x66); XX(0x0F); XX(0x67); break;
3188 case Xsse_ADD8
: XX(0x66); XX(0x0F); XX(0xFC); break;
3189 case Xsse_ADD16
: XX(0x66); XX(0x0F); XX(0xFD); break;
3190 case Xsse_ADD32
: XX(0x66); XX(0x0F); XX(0xFE); break;
3191 case Xsse_ADD64
: XX(0x66); XX(0x0F); XX(0xD4); break;
3192 case Xsse_QADD8S
: XX(0x66); XX(0x0F); XX(0xEC); break;
3193 case Xsse_QADD16S
: XX(0x66); XX(0x0F); XX(0xED); break;
3194 case Xsse_QADD8U
: XX(0x66); XX(0x0F); XX(0xDC); break;
3195 case Xsse_QADD16U
: XX(0x66); XX(0x0F); XX(0xDD); break;
3196 case Xsse_AVG8U
: XX(0x66); XX(0x0F); XX(0xE0); break;
3197 case Xsse_AVG16U
: XX(0x66); XX(0x0F); XX(0xE3); break;
3198 case Xsse_CMPEQ8
: XX(0x66); XX(0x0F); XX(0x74); break;
3199 case Xsse_CMPEQ16
: XX(0x66); XX(0x0F); XX(0x75); break;
3200 case Xsse_CMPEQ32
: XX(0x66); XX(0x0F); XX(0x76); break;
3201 case Xsse_CMPGT8S
: XX(0x66); XX(0x0F); XX(0x64); break;
3202 case Xsse_CMPGT16S
: XX(0x66); XX(0x0F); XX(0x65); break;
3203 case Xsse_CMPGT32S
: XX(0x66); XX(0x0F); XX(0x66); break;
3204 case Xsse_MAX16S
: XX(0x66); XX(0x0F); XX(0xEE); break;
3205 case Xsse_MAX8U
: XX(0x66); XX(0x0F); XX(0xDE); break;
3206 case Xsse_MIN16S
: XX(0x66); XX(0x0F); XX(0xEA); break;
3207 case Xsse_MIN8U
: XX(0x66); XX(0x0F); XX(0xDA); break;
3208 case Xsse_MULHI16U
: XX(0x66); XX(0x0F); XX(0xE4); break;
3209 case Xsse_MULHI16S
: XX(0x66); XX(0x0F); XX(0xE5); break;
3210 case Xsse_MUL16
: XX(0x66); XX(0x0F); XX(0xD5); break;
3211 case Xsse_SHL16
: XX(0x66); XX(0x0F); XX(0xF1); break;
3212 case Xsse_SHL32
: XX(0x66); XX(0x0F); XX(0xF2); break;
3213 case Xsse_SHL64
: XX(0x66); XX(0x0F); XX(0xF3); break;
3214 case Xsse_SAR16
: XX(0x66); XX(0x0F); XX(0xE1); break;
3215 case Xsse_SAR32
: XX(0x66); XX(0x0F); XX(0xE2); break;
3216 case Xsse_SHR16
: XX(0x66); XX(0x0F); XX(0xD1); break;
3217 case Xsse_SHR32
: XX(0x66); XX(0x0F); XX(0xD2); break;
3218 case Xsse_SHR64
: XX(0x66); XX(0x0F); XX(0xD3); break;
3219 case Xsse_SUB8
: XX(0x66); XX(0x0F); XX(0xF8); break;
3220 case Xsse_SUB16
: XX(0x66); XX(0x0F); XX(0xF9); break;
3221 case Xsse_SUB32
: XX(0x66); XX(0x0F); XX(0xFA); break;
3222 case Xsse_SUB64
: XX(0x66); XX(0x0F); XX(0xFB); break;
3223 case Xsse_QSUB8S
: XX(0x66); XX(0x0F); XX(0xE8); break;
3224 case Xsse_QSUB16S
: XX(0x66); XX(0x0F); XX(0xE9); break;
3225 case Xsse_QSUB8U
: XX(0x66); XX(0x0F); XX(0xD8); break;
3226 case Xsse_QSUB16U
: XX(0x66); XX(0x0F); XX(0xD9); break;
3227 case Xsse_UNPCKHB
: XX(0x66); XX(0x0F); XX(0x68); break;
3228 case Xsse_UNPCKHW
: XX(0x66); XX(0x0F); XX(0x69); break;
3229 case Xsse_UNPCKHD
: XX(0x66); XX(0x0F); XX(0x6A); break;
3230 case Xsse_UNPCKHQ
: XX(0x66); XX(0x0F); XX(0x6D); break;
3231 case Xsse_UNPCKLB
: XX(0x66); XX(0x0F); XX(0x60); break;
3232 case Xsse_UNPCKLW
: XX(0x66); XX(0x0F); XX(0x61); break;
3233 case Xsse_UNPCKLD
: XX(0x66); XX(0x0F); XX(0x62); break;
3234 case Xsse_UNPCKLQ
: XX(0x66); XX(0x0F); XX(0x6C); break;
3237 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseReRg
.dst
),
3238 vregEnc(i
->Xin
.SseReRg
.src
) );
3243 /* jmp fwds if !condition */
3244 *p
++ = toUChar(0x70 + (i
->Xin
.SseCMov
.cond
^ 1));
3245 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3248 /* movaps %src, %dst */
3251 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseCMov
.dst
),
3252 vregEnc(i
->Xin
.SseCMov
.src
) );
3254 /* Fill in the jump offset. */
3255 *(ptmp
-1) = toUChar(p
- ptmp
);
3262 p
= doAMode_R_enc_enc(p
, vregEnc(i
->Xin
.SseShuf
.dst
),
3263 vregEnc(i
->Xin
.SseShuf
.src
) );
3264 *p
++ = (UChar
)(i
->Xin
.SseShuf
.order
);
3269 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
3270 (2 bytes) jns nofail expected taken
3271 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
3274 /* This is heavily asserted re instruction lengths. It needs to
3275 be. If we get given unexpected forms of .amCounter or
3276 .amFailAddr -- basically, anything that's not of the form
3277 uimm7(%ebp) -- they are likely to fail. */
3278 /* Note also that after the decl we must be very careful not to
3279 read the carry flag, else we get a partial flags stall.
3280 js/jns avoids that, though. */
3282 /* --- decl 8(%ebp) --- */
3283 /* "1" because + there's no register in this encoding;
3284 instead the register + field is used as a sub opcode. The
3285 encoding for "decl r/m32" + is FF /1, hence the "1". */
3287 p
= doAMode_M_enc(p
, 1, i
->Xin
.EvCheck
.amCounter
);
3288 vassert(p
- p0
== 3);
3289 /* --- jns nofail --- */
3291 *p
++ = 0x03; /* need to check this 0x03 after the next insn */
3292 vassert(p
- p0
== 5);
3293 /* --- jmp* 0(%ebp) --- */
3294 /* The encoding is FF /4. */
3296 p
= doAMode_M_enc(p
, 4, i
->Xin
.EvCheck
.amFailAddr
);
3297 vassert(p
- p0
== 8); /* also ensures that 0x03 offset above is ok */
3298 /* And crosscheck .. */
3299 vassert(evCheckSzB_X86() == 8);
3304 /* We generate addl $1,NotKnownYet
3305 adcl $0,NotKnownYet+4
3306 in the expectation that a later call to LibVEX_patchProfCtr
3307 will be used to fill in the immediate fields once the right
3309 83 05 00 00 00 00 01
3310 83 15 00 00 00 00 00
3312 *p
++ = 0x83; *p
++ = 0x05;
3313 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
3315 *p
++ = 0x83; *p
++ = 0x15;
3316 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
3318 /* Tell the caller .. */
3319 vassert(!(*is_profInc
));
3329 ppX86Instr(i
, mode64
);
3330 vpanic("emit_X86Instr");
3334 vassert(p
- &buf
[0] <= 32);
3339 /* How big is an event check? See case for Xin_EvCheck in
3340 emit_X86Instr just above. That crosschecks what this returns, so
3341 we can tell if we're inconsistent. */
3342 Int
evCheckSzB_X86 (void)
3348 /* NB: what goes on here has to be very closely coordinated with the
3349 emitInstr case for XDirect, above. */
3350 VexInvalRange
chainXDirect_X86 ( VexEndness endness_host
,
3351 void* place_to_chain
,
3352 const void* disp_cp_chain_me_EXPECTED
,
3353 const void* place_to_jump_to
)
3355 vassert(endness_host
== VexEndnessLE
);
3357 /* What we're expecting to see is:
3358 movl $disp_cp_chain_me_EXPECTED, %edx
3361 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3364 UChar
* p
= (UChar
*)place_to_chain
;
3365 vassert(p
[0] == 0xBA);
3366 vassert(read_misaligned_UInt_LE(&p
[1])
3367 == (UInt
)(Addr
)disp_cp_chain_me_EXPECTED
);
3368 vassert(p
[5] == 0xFF);
3369 vassert(p
[6] == 0xD2);
3370 /* And what we want to change it to is:
3371 jmp disp32 where disp32 is relative to the next insn
3374 E9 <4 bytes == disp32>
3376 The replacement has the same length as the original.
3378 /* This is the delta we need to put into a JMP d32 insn. It's
3379 relative to the start of the next insn, hence the -5. */
3380 Long delta
= (Long
)((const UChar
*)place_to_jump_to
- p
) - 5;
3382 /* And make the modifications. */
3384 write_misaligned_UInt_LE(&p
[1], (UInt
)(ULong
)delta
);
3385 p
[5] = 0x0F; p
[6] = 0x0B;
3386 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3388 vassert(delta
== 0LL || delta
== -1LL);
3389 VexInvalRange vir
= { (HWord
)place_to_chain
, 7 };
3394 /* NB: what goes on here has to be very closely coordinated with the
3395 emitInstr case for XDirect, above. */
3396 VexInvalRange
unchainXDirect_X86 ( VexEndness endness_host
,
3397 void* place_to_unchain
,
3398 const void* place_to_jump_to_EXPECTED
,
3399 const void* disp_cp_chain_me
)
3401 vassert(endness_host
== VexEndnessLE
);
3403 /* What we're expecting to see is:
3407 E9 <4 bytes == disp32>
3410 UChar
* p
= (UChar
*)place_to_unchain
;
3413 && p
[5] == 0x0F && p
[6] == 0x0B) {
3414 /* Check the offset is right. */
3415 Int s32
= (Int
)read_misaligned_UInt_LE(&p
[1]);
3416 if ((UChar
*)p
+ 5 + s32
== place_to_jump_to_EXPECTED
) {
3419 vex_printf("QQQ unchainXDirect_X86: found valid\n");
3423 /* And what we want to change it to is:
3424 movl $disp_cp_chain_me, %edx
3427 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3429 So it's the same length (convenient, huh).
3432 write_misaligned_UInt_LE(&p
[1], (UInt
)(Addr
)disp_cp_chain_me
);
3435 VexInvalRange vir
= { (HWord
)place_to_unchain
, 7 };
3440 /* Patch the counter address into a profile inc point, as previously
3441 created by the Xin_ProfInc case for emit_X86Instr. */
3442 VexInvalRange
patchProfInc_X86 ( VexEndness endness_host
,
3443 void* place_to_patch
,
3444 const ULong
* location_of_counter
)
3446 vassert(endness_host
== VexEndnessLE
);
3447 vassert(sizeof(ULong
*) == 4);
3448 UChar
* p
= (UChar
*)place_to_patch
;
3449 vassert(p
[0] == 0x83);
3450 vassert(p
[1] == 0x05);
3451 vassert(p
[2] == 0x00);
3452 vassert(p
[3] == 0x00);
3453 vassert(p
[4] == 0x00);
3454 vassert(p
[5] == 0x00);
3455 vassert(p
[6] == 0x01);
3456 vassert(p
[7] == 0x83);
3457 vassert(p
[8] == 0x15);
3458 vassert(p
[9] == 0x00);
3459 vassert(p
[10] == 0x00);
3460 vassert(p
[11] == 0x00);
3461 vassert(p
[12] == 0x00);
3462 vassert(p
[13] == 0x00);
3463 UInt imm32
= (UInt
)(Addr
)location_of_counter
;
3464 p
[2] = imm32
& 0xFF; imm32
>>= 8;
3465 p
[3] = imm32
& 0xFF; imm32
>>= 8;
3466 p
[4] = imm32
& 0xFF; imm32
>>= 8;
3467 p
[5] = imm32
& 0xFF;
3468 imm32
= 4 + (UInt
)(Addr
)location_of_counter
;
3469 p
[9] = imm32
& 0xFF; imm32
>>= 8;
3470 p
[10] = imm32
& 0xFF; imm32
>>= 8;
3471 p
[11] = imm32
& 0xFF; imm32
>>= 8;
3472 p
[12] = imm32
& 0xFF;
3473 VexInvalRange vir
= { (HWord
)place_to_patch
, 14 };
3478 /*---------------------------------------------------------------*/
3479 /*--- end host_x86_defs.c ---*/
3480 /*---------------------------------------------------------------*/