Add a test program for the membarrier() system call
[valgrind.git] / VEX / priv / host_x86_defs.c
blobeb8e020e3de21f9926d59632a53c4702c56b4f66
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
28 The GNU General Public License is contained in the file COPYING.
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_x86_defs.h"
45 /* --------- Registers. --------- */
47 const RRegUniverse* getRRegUniverse_X86 ( void )
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_X86;
52 static Bool rRegUniverse_X86_initted = False;
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_X86;
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_X86_initted))
59 return ru;
61 RRegUniverse__init(ru);
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
66 ru->allocable_start[HRcInt32] = ru->size;
67 ru->regs[ru->size++] = hregX86_EBX();
68 ru->regs[ru->size++] = hregX86_ESI();
69 ru->regs[ru->size++] = hregX86_EDI();
70 ru->regs[ru->size++] = hregX86_EAX();
71 ru->regs[ru->size++] = hregX86_ECX();
72 ru->regs[ru->size++] = hregX86_EDX();
73 ru->allocable_end[HRcInt32] = ru->size - 1;
75 ru->allocable_start[HRcFlt64] = ru->size;
76 ru->regs[ru->size++] = hregX86_FAKE0();
77 ru->regs[ru->size++] = hregX86_FAKE1();
78 ru->regs[ru->size++] = hregX86_FAKE2();
79 ru->regs[ru->size++] = hregX86_FAKE3();
80 ru->regs[ru->size++] = hregX86_FAKE4();
81 ru->regs[ru->size++] = hregX86_FAKE5();
82 ru->allocable_end[HRcFlt64] = ru->size - 1;
84 ru->allocable_start[HRcVec128] = ru->size;
85 ru->regs[ru->size++] = hregX86_XMM0();
86 ru->regs[ru->size++] = hregX86_XMM1();
87 ru->regs[ru->size++] = hregX86_XMM2();
88 ru->regs[ru->size++] = hregX86_XMM3();
89 ru->regs[ru->size++] = hregX86_XMM4();
90 ru->regs[ru->size++] = hregX86_XMM5();
91 ru->regs[ru->size++] = hregX86_XMM6();
92 ru->regs[ru->size++] = hregX86_XMM7();
93 ru->allocable_end[HRcVec128] = ru->size - 1;
94 ru->allocable = ru->size;
96 /* And other regs, not available to the allocator. */
97 ru->regs[ru->size++] = hregX86_ESP();
98 ru->regs[ru->size++] = hregX86_EBP();
100 rRegUniverse_X86_initted = True;
102 RRegUniverse__check_is_sane(ru);
103 return ru;
107 UInt ppHRegX86 ( HReg reg )
109 Int r;
110 static const HChar* ireg32_names[8]
111 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
112 /* Be generic for all virtual regs. */
113 if (hregIsVirtual(reg)) {
114 return ppHReg(reg);
116 /* But specific for real regs. */
117 switch (hregClass(reg)) {
118 case HRcInt32:
119 r = hregEncoding(reg);
120 vassert(r >= 0 && r < 8);
121 return vex_printf("%s", ireg32_names[r]);
122 case HRcFlt64:
123 r = hregEncoding(reg);
124 vassert(r >= 0 && r < 6);
125 return vex_printf("%%fake%d", r);
126 case HRcVec128:
127 r = hregEncoding(reg);
128 vassert(r >= 0 && r < 8);
129 return vex_printf("%%xmm%d", r);
130 default:
131 vpanic("ppHRegX86");
136 /* --------- Condition codes, Intel encoding. --------- */
138 const HChar* showX86CondCode ( X86CondCode cond )
140 switch (cond) {
141 case Xcc_O: return "o";
142 case Xcc_NO: return "no";
143 case Xcc_B: return "b";
144 case Xcc_NB: return "nb";
145 case Xcc_Z: return "z";
146 case Xcc_NZ: return "nz";
147 case Xcc_BE: return "be";
148 case Xcc_NBE: return "nbe";
149 case Xcc_S: return "s";
150 case Xcc_NS: return "ns";
151 case Xcc_P: return "p";
152 case Xcc_NP: return "np";
153 case Xcc_L: return "l";
154 case Xcc_NL: return "nl";
155 case Xcc_LE: return "le";
156 case Xcc_NLE: return "nle";
157 case Xcc_ALWAYS: return "ALWAYS";
158 default: vpanic("ppX86CondCode");
163 /* --------- X86AMode: memory address expressions. --------- */
165 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
166 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
167 am->tag = Xam_IR;
168 am->Xam.IR.imm = imm32;
169 am->Xam.IR.reg = reg;
170 return am;
172 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
173 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
174 am->tag = Xam_IRRS;
175 am->Xam.IRRS.imm = imm32;
176 am->Xam.IRRS.base = base;
177 am->Xam.IRRS.index = indEx;
178 am->Xam.IRRS.shift = shift;
179 vassert(shift >= 0 && shift <= 3);
180 return am;
183 X86AMode* dopyX86AMode ( X86AMode* am ) {
184 switch (am->tag) {
185 case Xam_IR:
186 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
187 case Xam_IRRS:
188 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
189 am->Xam.IRRS.index, am->Xam.IRRS.shift );
190 default:
191 vpanic("dopyX86AMode");
195 void ppX86AMode ( X86AMode* am ) {
196 switch (am->tag) {
197 case Xam_IR:
198 if (am->Xam.IR.imm == 0)
199 vex_printf("(");
200 else
201 vex_printf("0x%x(", am->Xam.IR.imm);
202 ppHRegX86(am->Xam.IR.reg);
203 vex_printf(")");
204 return;
205 case Xam_IRRS:
206 vex_printf("0x%x(", am->Xam.IRRS.imm);
207 ppHRegX86(am->Xam.IRRS.base);
208 vex_printf(",");
209 ppHRegX86(am->Xam.IRRS.index);
210 vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
211 return;
212 default:
213 vpanic("ppX86AMode");
217 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
218 switch (am->tag) {
219 case Xam_IR:
220 addHRegUse(u, HRmRead, am->Xam.IR.reg);
221 return;
222 case Xam_IRRS:
223 addHRegUse(u, HRmRead, am->Xam.IRRS.base);
224 addHRegUse(u, HRmRead, am->Xam.IRRS.index);
225 return;
226 default:
227 vpanic("addRegUsage_X86AMode");
231 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
232 switch (am->tag) {
233 case Xam_IR:
234 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
235 return;
236 case Xam_IRRS:
237 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
238 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
239 return;
240 default:
241 vpanic("mapRegs_X86AMode");
245 /* --------- Operand, which can be reg, immediate or memory. --------- */
247 X86RMI* X86RMI_Imm ( UInt imm32 ) {
248 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
249 op->tag = Xrmi_Imm;
250 op->Xrmi.Imm.imm32 = imm32;
251 return op;
253 X86RMI* X86RMI_Reg ( HReg reg ) {
254 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
255 op->tag = Xrmi_Reg;
256 op->Xrmi.Reg.reg = reg;
257 return op;
259 X86RMI* X86RMI_Mem ( X86AMode* am ) {
260 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
261 op->tag = Xrmi_Mem;
262 op->Xrmi.Mem.am = am;
263 return op;
266 void ppX86RMI ( X86RMI* op ) {
267 switch (op->tag) {
268 case Xrmi_Imm:
269 vex_printf("$0x%x", op->Xrmi.Imm.imm32);
270 return;
271 case Xrmi_Reg:
272 ppHRegX86(op->Xrmi.Reg.reg);
273 return;
274 case Xrmi_Mem:
275 ppX86AMode(op->Xrmi.Mem.am);
276 return;
277 default:
278 vpanic("ppX86RMI");
282 /* An X86RMI can only be used in a "read" context (what would it mean
283 to write or modify a literal?) and so we enumerate its registers
284 accordingly. */
285 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
286 switch (op->tag) {
287 case Xrmi_Imm:
288 return;
289 case Xrmi_Reg:
290 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
291 return;
292 case Xrmi_Mem:
293 addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
294 return;
295 default:
296 vpanic("addRegUsage_X86RMI");
300 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
301 switch (op->tag) {
302 case Xrmi_Imm:
303 return;
304 case Xrmi_Reg:
305 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
306 return;
307 case Xrmi_Mem:
308 mapRegs_X86AMode(m, op->Xrmi.Mem.am);
309 return;
310 default:
311 vpanic("mapRegs_X86RMI");
316 /* --------- Operand, which can be reg or immediate only. --------- */
318 X86RI* X86RI_Imm ( UInt imm32 ) {
319 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI));
320 op->tag = Xri_Imm;
321 op->Xri.Imm.imm32 = imm32;
322 return op;
324 X86RI* X86RI_Reg ( HReg reg ) {
325 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI));
326 op->tag = Xri_Reg;
327 op->Xri.Reg.reg = reg;
328 return op;
331 void ppX86RI ( X86RI* op ) {
332 switch (op->tag) {
333 case Xri_Imm:
334 vex_printf("$0x%x", op->Xri.Imm.imm32);
335 return;
336 case Xri_Reg:
337 ppHRegX86(op->Xri.Reg.reg);
338 return;
339 default:
340 vpanic("ppX86RI");
344 /* An X86RI can only be used in a "read" context (what would it mean
345 to write or modify a literal?) and so we enumerate its registers
346 accordingly. */
347 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
348 switch (op->tag) {
349 case Xri_Imm:
350 return;
351 case Xri_Reg:
352 addHRegUse(u, HRmRead, op->Xri.Reg.reg);
353 return;
354 default:
355 vpanic("addRegUsage_X86RI");
359 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
360 switch (op->tag) {
361 case Xri_Imm:
362 return;
363 case Xri_Reg:
364 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
365 return;
366 default:
367 vpanic("mapRegs_X86RI");
372 /* --------- Operand, which can be reg or memory only. --------- */
374 X86RM* X86RM_Reg ( HReg reg ) {
375 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM));
376 op->tag = Xrm_Reg;
377 op->Xrm.Reg.reg = reg;
378 return op;
380 X86RM* X86RM_Mem ( X86AMode* am ) {
381 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM));
382 op->tag = Xrm_Mem;
383 op->Xrm.Mem.am = am;
384 return op;
387 void ppX86RM ( X86RM* op ) {
388 switch (op->tag) {
389 case Xrm_Mem:
390 ppX86AMode(op->Xrm.Mem.am);
391 return;
392 case Xrm_Reg:
393 ppHRegX86(op->Xrm.Reg.reg);
394 return;
395 default:
396 vpanic("ppX86RM");
400 /* Because an X86RM can be both a source or destination operand, we
401 have to supply a mode -- pertaining to the operand as a whole --
402 indicating how it's being used. */
403 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
404 switch (op->tag) {
405 case Xrm_Mem:
406 /* Memory is read, written or modified. So we just want to
407 know the regs read by the amode. */
408 addRegUsage_X86AMode(u, op->Xrm.Mem.am);
409 return;
410 case Xrm_Reg:
411 /* reg is read, written or modified. Add it in the
412 appropriate way. */
413 addHRegUse(u, mode, op->Xrm.Reg.reg);
414 return;
415 default:
416 vpanic("addRegUsage_X86RM");
420 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
422 switch (op->tag) {
423 case Xrm_Mem:
424 mapRegs_X86AMode(m, op->Xrm.Mem.am);
425 return;
426 case Xrm_Reg:
427 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
428 return;
429 default:
430 vpanic("mapRegs_X86RM");
435 /* --------- Instructions. --------- */
437 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
438 switch (op) {
439 case Xun_NOT: return "not";
440 case Xun_NEG: return "neg";
441 default: vpanic("showX86UnaryOp");
445 const HChar* showX86AluOp ( X86AluOp op ) {
446 switch (op) {
447 case Xalu_MOV: return "mov";
448 case Xalu_CMP: return "cmp";
449 case Xalu_ADD: return "add";
450 case Xalu_SUB: return "sub";
451 case Xalu_ADC: return "adc";
452 case Xalu_SBB: return "sbb";
453 case Xalu_AND: return "and";
454 case Xalu_OR: return "or";
455 case Xalu_XOR: return "xor";
456 case Xalu_MUL: return "mul";
457 default: vpanic("showX86AluOp");
461 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
462 switch (op) {
463 case Xsh_SHL: return "shl";
464 case Xsh_SHR: return "shr";
465 case Xsh_SAR: return "sar";
466 default: vpanic("showX86ShiftOp");
470 const HChar* showX86FpOp ( X86FpOp op ) {
471 switch (op) {
472 case Xfp_ADD: return "add";
473 case Xfp_SUB: return "sub";
474 case Xfp_MUL: return "mul";
475 case Xfp_DIV: return "div";
476 case Xfp_SCALE: return "scale";
477 case Xfp_ATAN: return "atan";
478 case Xfp_YL2X: return "yl2x";
479 case Xfp_YL2XP1: return "yl2xp1";
480 case Xfp_PREM: return "prem";
481 case Xfp_PREM1: return "prem1";
482 case Xfp_SQRT: return "sqrt";
483 case Xfp_ABS: return "abs";
484 case Xfp_NEG: return "chs";
485 case Xfp_MOV: return "mov";
486 case Xfp_SIN: return "sin";
487 case Xfp_COS: return "cos";
488 case Xfp_TAN: return "tan";
489 case Xfp_ROUND: return "round";
490 case Xfp_2XM1: return "2xm1";
491 default: vpanic("showX86FpOp");
495 const HChar* showX86SseOp ( X86SseOp op ) {
496 switch (op) {
497 case Xsse_MOV: return "mov(?!)";
498 case Xsse_ADDF: return "add";
499 case Xsse_SUBF: return "sub";
500 case Xsse_MULF: return "mul";
501 case Xsse_DIVF: return "div";
502 case Xsse_MAXF: return "max";
503 case Xsse_MINF: return "min";
504 case Xsse_CMPEQF: return "cmpFeq";
505 case Xsse_CMPLTF: return "cmpFlt";
506 case Xsse_CMPLEF: return "cmpFle";
507 case Xsse_CMPUNF: return "cmpFun";
508 case Xsse_RCPF: return "rcp";
509 case Xsse_RSQRTF: return "rsqrt";
510 case Xsse_SQRTF: return "sqrt";
511 case Xsse_AND: return "and";
512 case Xsse_OR: return "or";
513 case Xsse_XOR: return "xor";
514 case Xsse_ANDN: return "andn";
515 case Xsse_ADD8: return "paddb";
516 case Xsse_ADD16: return "paddw";
517 case Xsse_ADD32: return "paddd";
518 case Xsse_ADD64: return "paddq";
519 case Xsse_QADD8U: return "paddusb";
520 case Xsse_QADD16U: return "paddusw";
521 case Xsse_QADD8S: return "paddsb";
522 case Xsse_QADD16S: return "paddsw";
523 case Xsse_SUB8: return "psubb";
524 case Xsse_SUB16: return "psubw";
525 case Xsse_SUB32: return "psubd";
526 case Xsse_SUB64: return "psubq";
527 case Xsse_QSUB8U: return "psubusb";
528 case Xsse_QSUB16U: return "psubusw";
529 case Xsse_QSUB8S: return "psubsb";
530 case Xsse_QSUB16S: return "psubsw";
531 case Xsse_MUL16: return "pmullw";
532 case Xsse_MULHI16U: return "pmulhuw";
533 case Xsse_MULHI16S: return "pmulhw";
534 case Xsse_AVG8U: return "pavgb";
535 case Xsse_AVG16U: return "pavgw";
536 case Xsse_MAX16S: return "pmaxw";
537 case Xsse_MAX8U: return "pmaxub";
538 case Xsse_MIN16S: return "pminw";
539 case Xsse_MIN8U: return "pminub";
540 case Xsse_CMPEQ8: return "pcmpeqb";
541 case Xsse_CMPEQ16: return "pcmpeqw";
542 case Xsse_CMPEQ32: return "pcmpeqd";
543 case Xsse_CMPGT8S: return "pcmpgtb";
544 case Xsse_CMPGT16S: return "pcmpgtw";
545 case Xsse_CMPGT32S: return "pcmpgtd";
546 case Xsse_SHL16: return "psllw";
547 case Xsse_SHL32: return "pslld";
548 case Xsse_SHL64: return "psllq";
549 case Xsse_SHR16: return "psrlw";
550 case Xsse_SHR32: return "psrld";
551 case Xsse_SHR64: return "psrlq";
552 case Xsse_SAR16: return "psraw";
553 case Xsse_SAR32: return "psrad";
554 case Xsse_PACKSSD: return "packssdw";
555 case Xsse_PACKSSW: return "packsswb";
556 case Xsse_PACKUSW: return "packuswb";
557 case Xsse_UNPCKHB: return "punpckhb";
558 case Xsse_UNPCKHW: return "punpckhw";
559 case Xsse_UNPCKHD: return "punpckhd";
560 case Xsse_UNPCKHQ: return "punpckhq";
561 case Xsse_UNPCKLB: return "punpcklb";
562 case Xsse_UNPCKLW: return "punpcklw";
563 case Xsse_UNPCKLD: return "punpckld";
564 case Xsse_UNPCKLQ: return "punpcklq";
565 default: vpanic("showX86SseOp");
569 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
570 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
571 i->tag = Xin_Alu32R;
572 i->Xin.Alu32R.op = op;
573 i->Xin.Alu32R.src = src;
574 i->Xin.Alu32R.dst = dst;
575 return i;
577 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
578 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
579 i->tag = Xin_Alu32M;
580 i->Xin.Alu32M.op = op;
581 i->Xin.Alu32M.src = src;
582 i->Xin.Alu32M.dst = dst;
583 vassert(op != Xalu_MUL);
584 return i;
586 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
587 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
588 i->tag = Xin_Sh32;
589 i->Xin.Sh32.op = op;
590 i->Xin.Sh32.src = src;
591 i->Xin.Sh32.dst = dst;
592 return i;
594 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
595 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
596 i->tag = Xin_Test32;
597 i->Xin.Test32.imm32 = imm32;
598 i->Xin.Test32.dst = dst;
599 return i;
601 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
602 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
603 i->tag = Xin_Unary32;
604 i->Xin.Unary32.op = op;
605 i->Xin.Unary32.dst = dst;
606 return i;
608 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
609 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
610 i->tag = Xin_Lea32;
611 i->Xin.Lea32.am = am;
612 i->Xin.Lea32.dst = dst;
613 return i;
615 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
616 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
617 i->tag = Xin_MulL;
618 i->Xin.MulL.syned = syned;
619 i->Xin.MulL.src = src;
620 return i;
622 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
623 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
624 i->tag = Xin_Div;
625 i->Xin.Div.syned = syned;
626 i->Xin.Div.src = src;
627 return i;
629 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
630 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
631 i->tag = Xin_Sh3232;
632 i->Xin.Sh3232.op = op;
633 i->Xin.Sh3232.amt = amt;
634 i->Xin.Sh3232.src = src;
635 i->Xin.Sh3232.dst = dst;
636 vassert(op == Xsh_SHL || op == Xsh_SHR);
637 return i;
639 X86Instr* X86Instr_Push( X86RMI* src ) {
640 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
641 i->tag = Xin_Push;
642 i->Xin.Push.src = src;
643 return i;
645 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
646 RetLoc rloc ) {
647 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
648 i->tag = Xin_Call;
649 i->Xin.Call.cond = cond;
650 i->Xin.Call.target = target;
651 i->Xin.Call.regparms = regparms;
652 i->Xin.Call.rloc = rloc;
653 vassert(regparms >= 0 && regparms <= 3);
654 vassert(is_sane_RetLoc(rloc));
655 return i;
657 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
658 X86CondCode cond, Bool toFastEP ) {
659 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
660 i->tag = Xin_XDirect;
661 i->Xin.XDirect.dstGA = dstGA;
662 i->Xin.XDirect.amEIP = amEIP;
663 i->Xin.XDirect.cond = cond;
664 i->Xin.XDirect.toFastEP = toFastEP;
665 return i;
667 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
668 X86CondCode cond ) {
669 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
670 i->tag = Xin_XIndir;
671 i->Xin.XIndir.dstGA = dstGA;
672 i->Xin.XIndir.amEIP = amEIP;
673 i->Xin.XIndir.cond = cond;
674 return i;
676 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
677 X86CondCode cond, IRJumpKind jk ) {
678 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
679 i->tag = Xin_XAssisted;
680 i->Xin.XAssisted.dstGA = dstGA;
681 i->Xin.XAssisted.amEIP = amEIP;
682 i->Xin.XAssisted.cond = cond;
683 i->Xin.XAssisted.jk = jk;
684 return i;
686 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
687 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
688 i->tag = Xin_CMov32;
689 i->Xin.CMov32.cond = cond;
690 i->Xin.CMov32.src = src;
691 i->Xin.CMov32.dst = dst;
692 vassert(cond != Xcc_ALWAYS);
693 return i;
695 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
696 X86AMode* src, HReg dst ) {
697 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
698 i->tag = Xin_LoadEX;
699 i->Xin.LoadEX.szSmall = szSmall;
700 i->Xin.LoadEX.syned = syned;
701 i->Xin.LoadEX.src = src;
702 i->Xin.LoadEX.dst = dst;
703 vassert(szSmall == 1 || szSmall == 2);
704 return i;
706 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
707 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
708 i->tag = Xin_Store;
709 i->Xin.Store.sz = sz;
710 i->Xin.Store.src = src;
711 i->Xin.Store.dst = dst;
712 vassert(sz == 1 || sz == 2);
713 return i;
715 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
716 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
717 i->tag = Xin_Set32;
718 i->Xin.Set32.cond = cond;
719 i->Xin.Set32.dst = dst;
720 return i;
722 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
723 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
724 i->tag = Xin_Bsfr32;
725 i->Xin.Bsfr32.isFwds = isFwds;
726 i->Xin.Bsfr32.src = src;
727 i->Xin.Bsfr32.dst = dst;
728 return i;
730 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
731 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
732 i->tag = Xin_MFence;
733 i->Xin.MFence.hwcaps = hwcaps;
734 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
735 |VEX_HWCAPS_X86_SSE1
736 |VEX_HWCAPS_X86_SSE2
737 |VEX_HWCAPS_X86_SSE3
738 |VEX_HWCAPS_X86_LZCNT)));
739 return i;
741 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
742 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
743 i->tag = Xin_ACAS;
744 i->Xin.ACAS.addr = addr;
745 i->Xin.ACAS.sz = sz;
746 vassert(sz == 4 || sz == 2 || sz == 1);
747 return i;
749 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
750 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
751 i->tag = Xin_DACAS;
752 i->Xin.DACAS.addr = addr;
753 return i;
756 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
757 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
758 i->tag = Xin_FpUnary;
759 i->Xin.FpUnary.op = op;
760 i->Xin.FpUnary.src = src;
761 i->Xin.FpUnary.dst = dst;
762 return i;
764 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
765 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
766 i->tag = Xin_FpBinary;
767 i->Xin.FpBinary.op = op;
768 i->Xin.FpBinary.srcL = srcL;
769 i->Xin.FpBinary.srcR = srcR;
770 i->Xin.FpBinary.dst = dst;
771 return i;
773 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
774 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
775 i->tag = Xin_FpLdSt;
776 i->Xin.FpLdSt.isLoad = isLoad;
777 i->Xin.FpLdSt.sz = sz;
778 i->Xin.FpLdSt.reg = reg;
779 i->Xin.FpLdSt.addr = addr;
780 vassert(sz == 4 || sz == 8 || sz == 10);
781 return i;
783 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
784 HReg reg, X86AMode* addr ) {
785 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
786 i->tag = Xin_FpLdStI;
787 i->Xin.FpLdStI.isLoad = isLoad;
788 i->Xin.FpLdStI.sz = sz;
789 i->Xin.FpLdStI.reg = reg;
790 i->Xin.FpLdStI.addr = addr;
791 vassert(sz == 2 || sz == 4 || sz == 8);
792 return i;
794 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
795 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
796 i->tag = Xin_Fp64to32;
797 i->Xin.Fp64to32.src = src;
798 i->Xin.Fp64to32.dst = dst;
799 return i;
801 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
802 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
803 i->tag = Xin_FpCMov;
804 i->Xin.FpCMov.cond = cond;
805 i->Xin.FpCMov.src = src;
806 i->Xin.FpCMov.dst = dst;
807 vassert(cond != Xcc_ALWAYS);
808 return i;
810 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
811 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
812 i->tag = Xin_FpLdCW;
813 i->Xin.FpLdCW.addr = addr;
814 return i;
816 X86Instr* X86Instr_FpStSW_AX ( void ) {
817 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
818 i->tag = Xin_FpStSW_AX;
819 return i;
821 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
822 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
823 i->tag = Xin_FpCmp;
824 i->Xin.FpCmp.srcL = srcL;
825 i->Xin.FpCmp.srcR = srcR;
826 i->Xin.FpCmp.dst = dst;
827 return i;
829 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
830 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
831 i->tag = Xin_SseConst;
832 i->Xin.SseConst.con = con;
833 i->Xin.SseConst.dst = dst;
834 vassert(hregClass(dst) == HRcVec128);
835 return i;
837 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
838 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
839 i->tag = Xin_SseLdSt;
840 i->Xin.SseLdSt.isLoad = isLoad;
841 i->Xin.SseLdSt.reg = reg;
842 i->Xin.SseLdSt.addr = addr;
843 return i;
845 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr )
847 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
848 i->tag = Xin_SseLdzLO;
849 i->Xin.SseLdzLO.sz = toUChar(sz);
850 i->Xin.SseLdzLO.reg = reg;
851 i->Xin.SseLdzLO.addr = addr;
852 vassert(sz == 4 || sz == 8);
853 return i;
855 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
856 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
857 i->tag = Xin_Sse32Fx4;
858 i->Xin.Sse32Fx4.op = op;
859 i->Xin.Sse32Fx4.src = src;
860 i->Xin.Sse32Fx4.dst = dst;
861 vassert(op != Xsse_MOV);
862 return i;
864 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
865 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
866 i->tag = Xin_Sse32FLo;
867 i->Xin.Sse32FLo.op = op;
868 i->Xin.Sse32FLo.src = src;
869 i->Xin.Sse32FLo.dst = dst;
870 vassert(op != Xsse_MOV);
871 return i;
873 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
874 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
875 i->tag = Xin_Sse64Fx2;
876 i->Xin.Sse64Fx2.op = op;
877 i->Xin.Sse64Fx2.src = src;
878 i->Xin.Sse64Fx2.dst = dst;
879 vassert(op != Xsse_MOV);
880 return i;
882 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
883 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
884 i->tag = Xin_Sse64FLo;
885 i->Xin.Sse64FLo.op = op;
886 i->Xin.Sse64FLo.src = src;
887 i->Xin.Sse64FLo.dst = dst;
888 vassert(op != Xsse_MOV);
889 return i;
891 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
892 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
893 i->tag = Xin_SseReRg;
894 i->Xin.SseReRg.op = op;
895 i->Xin.SseReRg.src = re;
896 i->Xin.SseReRg.dst = rg;
897 return i;
899 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
900 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
901 i->tag = Xin_SseCMov;
902 i->Xin.SseCMov.cond = cond;
903 i->Xin.SseCMov.src = src;
904 i->Xin.SseCMov.dst = dst;
905 vassert(cond != Xcc_ALWAYS);
906 return i;
908 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
909 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
910 i->tag = Xin_SseShuf;
911 i->Xin.SseShuf.order = order;
912 i->Xin.SseShuf.src = src;
913 i->Xin.SseShuf.dst = dst;
914 vassert(order >= 0 && order <= 0xFF);
915 return i;
917 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
918 X86AMode* amFailAddr ) {
919 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
920 i->tag = Xin_EvCheck;
921 i->Xin.EvCheck.amCounter = amCounter;
922 i->Xin.EvCheck.amFailAddr = amFailAddr;
923 return i;
925 X86Instr* X86Instr_ProfInc ( void ) {
926 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
927 i->tag = Xin_ProfInc;
928 return i;
931 void ppX86Instr ( const X86Instr* i, Bool mode64 ) {
932 vassert(mode64 == False);
933 switch (i->tag) {
934 case Xin_Alu32R:
935 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
936 ppX86RMI(i->Xin.Alu32R.src);
937 vex_printf(",");
938 ppHRegX86(i->Xin.Alu32R.dst);
939 return;
940 case Xin_Alu32M:
941 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
942 ppX86RI(i->Xin.Alu32M.src);
943 vex_printf(",");
944 ppX86AMode(i->Xin.Alu32M.dst);
945 return;
946 case Xin_Sh32:
947 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
948 if (i->Xin.Sh32.src == 0)
949 vex_printf("%%cl,");
950 else
951 vex_printf("$%d,", (Int)i->Xin.Sh32.src);
952 ppHRegX86(i->Xin.Sh32.dst);
953 return;
954 case Xin_Test32:
955 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
956 ppX86RM(i->Xin.Test32.dst);
957 return;
958 case Xin_Unary32:
959 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
960 ppHRegX86(i->Xin.Unary32.dst);
961 return;
962 case Xin_Lea32:
963 vex_printf("leal ");
964 ppX86AMode(i->Xin.Lea32.am);
965 vex_printf(",");
966 ppHRegX86(i->Xin.Lea32.dst);
967 return;
968 case Xin_MulL:
969 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
970 ppX86RM(i->Xin.MulL.src);
971 return;
972 case Xin_Div:
973 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
974 ppX86RM(i->Xin.Div.src);
975 return;
976 case Xin_Sh3232:
977 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
978 if (i->Xin.Sh3232.amt == 0)
979 vex_printf(" %%cl,");
980 else
981 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
982 ppHRegX86(i->Xin.Sh3232.src);
983 vex_printf(",");
984 ppHRegX86(i->Xin.Sh3232.dst);
985 return;
986 case Xin_Push:
987 vex_printf("pushl ");
988 ppX86RMI(i->Xin.Push.src);
989 return;
990 case Xin_Call:
991 vex_printf("call%s[%d,",
992 i->Xin.Call.cond==Xcc_ALWAYS
993 ? "" : showX86CondCode(i->Xin.Call.cond),
994 i->Xin.Call.regparms);
995 ppRetLoc(i->Xin.Call.rloc);
996 vex_printf("] 0x%x", i->Xin.Call.target);
997 break;
998 case Xin_XDirect:
999 vex_printf("(xDirect) ");
1000 vex_printf("if (%%eflags.%s) { ",
1001 showX86CondCode(i->Xin.XDirect.cond));
1002 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
1003 ppX86AMode(i->Xin.XDirect.amEIP);
1004 vex_printf("; ");
1005 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1006 i->Xin.XDirect.toFastEP ? "fast" : "slow");
1007 return;
1008 case Xin_XIndir:
1009 vex_printf("(xIndir) ");
1010 vex_printf("if (%%eflags.%s) { movl ",
1011 showX86CondCode(i->Xin.XIndir.cond));
1012 ppHRegX86(i->Xin.XIndir.dstGA);
1013 vex_printf(",");
1014 ppX86AMode(i->Xin.XIndir.amEIP);
1015 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1016 return;
1017 case Xin_XAssisted:
1018 vex_printf("(xAssisted) ");
1019 vex_printf("if (%%eflags.%s) { ",
1020 showX86CondCode(i->Xin.XAssisted.cond));
1021 vex_printf("movl ");
1022 ppHRegX86(i->Xin.XAssisted.dstGA);
1023 vex_printf(",");
1024 ppX86AMode(i->Xin.XAssisted.amEIP);
1025 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1026 (Int)i->Xin.XAssisted.jk);
1027 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1028 return;
1029 case Xin_CMov32:
1030 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
1031 ppX86RM(i->Xin.CMov32.src);
1032 vex_printf(",");
1033 ppHRegX86(i->Xin.CMov32.dst);
1034 return;
1035 case Xin_LoadEX:
1036 vex_printf("mov%c%cl ",
1037 i->Xin.LoadEX.syned ? 's' : 'z',
1038 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
1039 ppX86AMode(i->Xin.LoadEX.src);
1040 vex_printf(",");
1041 ppHRegX86(i->Xin.LoadEX.dst);
1042 return;
1043 case Xin_Store:
1044 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
1045 ppHRegX86(i->Xin.Store.src);
1046 vex_printf(",");
1047 ppX86AMode(i->Xin.Store.dst);
1048 return;
1049 case Xin_Set32:
1050 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
1051 ppHRegX86(i->Xin.Set32.dst);
1052 return;
1053 case Xin_Bsfr32:
1054 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
1055 ppHRegX86(i->Xin.Bsfr32.src);
1056 vex_printf(",");
1057 ppHRegX86(i->Xin.Bsfr32.dst);
1058 return;
1059 case Xin_MFence:
1060 vex_printf("mfence(%s)",
1061 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1062 return;
1063 case Xin_ACAS:
1064 vex_printf("lock cmpxchg%c ",
1065 i->Xin.ACAS.sz==1 ? 'b'
1066 : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1067 vex_printf("{%%eax->%%ebx},");
1068 ppX86AMode(i->Xin.ACAS.addr);
1069 return;
1070 case Xin_DACAS:
1071 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1072 ppX86AMode(i->Xin.DACAS.addr);
1073 return;
1074 case Xin_FpUnary:
1075 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1076 ppHRegX86(i->Xin.FpUnary.src);
1077 vex_printf(",");
1078 ppHRegX86(i->Xin.FpUnary.dst);
1079 break;
1080 case Xin_FpBinary:
1081 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1082 ppHRegX86(i->Xin.FpBinary.srcL);
1083 vex_printf(",");
1084 ppHRegX86(i->Xin.FpBinary.srcR);
1085 vex_printf(",");
1086 ppHRegX86(i->Xin.FpBinary.dst);
1087 break;
1088 case Xin_FpLdSt:
1089 if (i->Xin.FpLdSt.isLoad) {
1090 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1091 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1092 ppX86AMode(i->Xin.FpLdSt.addr);
1093 vex_printf(", ");
1094 ppHRegX86(i->Xin.FpLdSt.reg);
1095 } else {
1096 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1097 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1098 ppHRegX86(i->Xin.FpLdSt.reg);
1099 vex_printf(", ");
1100 ppX86AMode(i->Xin.FpLdSt.addr);
1102 return;
1103 case Xin_FpLdStI:
1104 if (i->Xin.FpLdStI.isLoad) {
1105 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1106 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1107 ppX86AMode(i->Xin.FpLdStI.addr);
1108 vex_printf(", ");
1109 ppHRegX86(i->Xin.FpLdStI.reg);
1110 } else {
1111 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1112 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1113 ppHRegX86(i->Xin.FpLdStI.reg);
1114 vex_printf(", ");
1115 ppX86AMode(i->Xin.FpLdStI.addr);
1117 return;
1118 case Xin_Fp64to32:
1119 vex_printf("gdtof ");
1120 ppHRegX86(i->Xin.Fp64to32.src);
1121 vex_printf(",");
1122 ppHRegX86(i->Xin.Fp64to32.dst);
1123 return;
1124 case Xin_FpCMov:
1125 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1126 ppHRegX86(i->Xin.FpCMov.src);
1127 vex_printf(",");
1128 ppHRegX86(i->Xin.FpCMov.dst);
1129 return;
1130 case Xin_FpLdCW:
1131 vex_printf("fldcw ");
1132 ppX86AMode(i->Xin.FpLdCW.addr);
1133 return;
1134 case Xin_FpStSW_AX:
1135 vex_printf("fstsw %%ax");
1136 return;
1137 case Xin_FpCmp:
1138 vex_printf("gcmp ");
1139 ppHRegX86(i->Xin.FpCmp.srcL);
1140 vex_printf(",");
1141 ppHRegX86(i->Xin.FpCmp.srcR);
1142 vex_printf(",");
1143 ppHRegX86(i->Xin.FpCmp.dst);
1144 break;
1145 case Xin_SseConst:
1146 vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
1147 ppHRegX86(i->Xin.SseConst.dst);
1148 break;
1149 case Xin_SseLdSt:
1150 vex_printf("movups ");
1151 if (i->Xin.SseLdSt.isLoad) {
1152 ppX86AMode(i->Xin.SseLdSt.addr);
1153 vex_printf(",");
1154 ppHRegX86(i->Xin.SseLdSt.reg);
1155 } else {
1156 ppHRegX86(i->Xin.SseLdSt.reg);
1157 vex_printf(",");
1158 ppX86AMode(i->Xin.SseLdSt.addr);
1160 return;
1161 case Xin_SseLdzLO:
1162 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1163 ppX86AMode(i->Xin.SseLdzLO.addr);
1164 vex_printf(",");
1165 ppHRegX86(i->Xin.SseLdzLO.reg);
1166 return;
1167 case Xin_Sse32Fx4:
1168 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1169 ppHRegX86(i->Xin.Sse32Fx4.src);
1170 vex_printf(",");
1171 ppHRegX86(i->Xin.Sse32Fx4.dst);
1172 return;
1173 case Xin_Sse32FLo:
1174 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1175 ppHRegX86(i->Xin.Sse32FLo.src);
1176 vex_printf(",");
1177 ppHRegX86(i->Xin.Sse32FLo.dst);
1178 return;
1179 case Xin_Sse64Fx2:
1180 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1181 ppHRegX86(i->Xin.Sse64Fx2.src);
1182 vex_printf(",");
1183 ppHRegX86(i->Xin.Sse64Fx2.dst);
1184 return;
1185 case Xin_Sse64FLo:
1186 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1187 ppHRegX86(i->Xin.Sse64FLo.src);
1188 vex_printf(",");
1189 ppHRegX86(i->Xin.Sse64FLo.dst);
1190 return;
1191 case Xin_SseReRg:
1192 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1193 ppHRegX86(i->Xin.SseReRg.src);
1194 vex_printf(",");
1195 ppHRegX86(i->Xin.SseReRg.dst);
1196 return;
1197 case Xin_SseCMov:
1198 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1199 ppHRegX86(i->Xin.SseCMov.src);
1200 vex_printf(",");
1201 ppHRegX86(i->Xin.SseCMov.dst);
1202 return;
1203 case Xin_SseShuf:
1204 vex_printf("pshufd $0x%x,", (UInt)i->Xin.SseShuf.order);
1205 ppHRegX86(i->Xin.SseShuf.src);
1206 vex_printf(",");
1207 ppHRegX86(i->Xin.SseShuf.dst);
1208 return;
1209 case Xin_EvCheck:
1210 vex_printf("(evCheck) decl ");
1211 ppX86AMode(i->Xin.EvCheck.amCounter);
1212 vex_printf("; jns nofail; jmp *");
1213 ppX86AMode(i->Xin.EvCheck.amFailAddr);
1214 vex_printf("; nofail:");
1215 return;
1216 case Xin_ProfInc:
1217 vex_printf("(profInc) addl $1,NotKnownYet; "
1218 "adcl $0,NotKnownYet+4");
1219 return;
1220 default:
1221 vpanic("ppX86Instr");
1225 /* --------- Helpers for register allocation. --------- */
1227 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1229 Bool unary;
1230 vassert(mode64 == False);
1231 initHRegUsage(u);
1232 switch (i->tag) {
1233 case Xin_Alu32R:
1234 addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1235 if (i->Xin.Alu32R.op == Xalu_MOV) {
1236 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1238 if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
1239 u->isRegRegMove = True;
1240 u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1241 u->regMoveDst = i->Xin.Alu32R.dst;
1243 return;
1245 if (i->Xin.Alu32R.op == Xalu_CMP) {
1246 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1247 return;
1249 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1250 return;
1251 case Xin_Alu32M:
1252 addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1253 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1254 return;
1255 case Xin_Sh32:
1256 addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1257 if (i->Xin.Sh32.src == 0)
1258 addHRegUse(u, HRmRead, hregX86_ECX());
1259 return;
1260 case Xin_Test32:
1261 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1262 return;
1263 case Xin_Unary32:
1264 addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1265 return;
1266 case Xin_Lea32:
1267 addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1268 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1269 return;
1270 case Xin_MulL:
1271 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1272 addHRegUse(u, HRmModify, hregX86_EAX());
1273 addHRegUse(u, HRmWrite, hregX86_EDX());
1274 return;
1275 case Xin_Div:
1276 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1277 addHRegUse(u, HRmModify, hregX86_EAX());
1278 addHRegUse(u, HRmModify, hregX86_EDX());
1279 return;
1280 case Xin_Sh3232:
1281 addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1282 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1283 if (i->Xin.Sh3232.amt == 0)
1284 addHRegUse(u, HRmRead, hregX86_ECX());
1285 return;
1286 case Xin_Push:
1287 addRegUsage_X86RMI(u, i->Xin.Push.src);
1288 addHRegUse(u, HRmModify, hregX86_ESP());
1289 return;
1290 case Xin_Call:
1291 /* This is a bit subtle. */
1292 /* First off, claim it trashes all the caller-saved regs
1293 which fall within the register allocator's jurisdiction.
1294 These I believe to be %eax %ecx %edx and all the xmm
1295 registers. */
1296 addHRegUse(u, HRmWrite, hregX86_EAX());
1297 addHRegUse(u, HRmWrite, hregX86_ECX());
1298 addHRegUse(u, HRmWrite, hregX86_EDX());
1299 addHRegUse(u, HRmWrite, hregX86_XMM0());
1300 addHRegUse(u, HRmWrite, hregX86_XMM1());
1301 addHRegUse(u, HRmWrite, hregX86_XMM2());
1302 addHRegUse(u, HRmWrite, hregX86_XMM3());
1303 addHRegUse(u, HRmWrite, hregX86_XMM4());
1304 addHRegUse(u, HRmWrite, hregX86_XMM5());
1305 addHRegUse(u, HRmWrite, hregX86_XMM6());
1306 addHRegUse(u, HRmWrite, hregX86_XMM7());
1307 /* Now we have to state any parameter-carrying registers
1308 which might be read. This depends on the regparmness. */
1309 switch (i->Xin.Call.regparms) {
1310 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1311 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1312 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1313 case 0: break;
1314 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1316 /* Finally, there is the issue that the insn trashes a
1317 register because the literal target address has to be
1318 loaded into a register. Fortunately, for the 0/1/2
1319 regparm case, we can use EAX, EDX and ECX respectively, so
1320 this does not cause any further damage. For the 3-regparm
1321 case, we'll have to choose another register arbitrarily --
1322 since A, D and C are used for parameters -- and so we might
1323 as well choose EDI. */
1324 if (i->Xin.Call.regparms == 3)
1325 addHRegUse(u, HRmWrite, hregX86_EDI());
1326 /* Upshot of this is that the assembler really must observe
1327 the here-stated convention of which register to use as an
1328 address temporary, depending on the regparmness: 0==EAX,
1329 1==EDX, 2==ECX, 3==EDI. */
1330 return;
1331 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1332 conditionally exit the block. Hence we only need to list (1)
1333 the registers that they read, and (2) the registers that they
1334 write in the case where the block is not exited. (2) is
1335 empty, hence only (1) is relevant here. */
1336 case Xin_XDirect:
1337 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
1338 return;
1339 case Xin_XIndir:
1340 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
1341 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
1342 return;
1343 case Xin_XAssisted:
1344 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
1345 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
1346 return;
1347 case Xin_CMov32:
1348 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1349 addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1350 return;
1351 case Xin_LoadEX:
1352 addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1353 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1354 return;
1355 case Xin_Store:
1356 addHRegUse(u, HRmRead, i->Xin.Store.src);
1357 addRegUsage_X86AMode(u, i->Xin.Store.dst);
1358 return;
1359 case Xin_Set32:
1360 addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1361 return;
1362 case Xin_Bsfr32:
1363 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1364 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1365 return;
1366 case Xin_MFence:
1367 return;
1368 case Xin_ACAS:
1369 addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1370 addHRegUse(u, HRmRead, hregX86_EBX());
1371 addHRegUse(u, HRmModify, hregX86_EAX());
1372 return;
1373 case Xin_DACAS:
1374 addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1375 addHRegUse(u, HRmRead, hregX86_ECX());
1376 addHRegUse(u, HRmRead, hregX86_EBX());
1377 addHRegUse(u, HRmModify, hregX86_EDX());
1378 addHRegUse(u, HRmModify, hregX86_EAX());
1379 return;
1380 case Xin_FpUnary:
1381 addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1382 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1384 if (i->Xin.FpUnary.op == Xfp_MOV) {
1385 u->isRegRegMove = True;
1386 u->regMoveSrc = i->Xin.FpUnary.src;
1387 u->regMoveDst = i->Xin.FpUnary.dst;
1389 return;
1390 case Xin_FpBinary:
1391 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1392 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1393 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1394 return;
1395 case Xin_FpLdSt:
1396 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1397 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1398 i->Xin.FpLdSt.reg);
1399 return;
1400 case Xin_FpLdStI:
1401 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1402 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1403 i->Xin.FpLdStI.reg);
1404 return;
1405 case Xin_Fp64to32:
1406 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1407 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1408 return;
1409 case Xin_FpCMov:
1410 addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1411 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1412 return;
1413 case Xin_FpLdCW:
1414 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1415 return;
1416 case Xin_FpStSW_AX:
1417 addHRegUse(u, HRmWrite, hregX86_EAX());
1418 return;
1419 case Xin_FpCmp:
1420 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1421 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1422 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1423 addHRegUse(u, HRmWrite, hregX86_EAX());
1424 return;
1425 case Xin_SseLdSt:
1426 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1427 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1428 i->Xin.SseLdSt.reg);
1429 return;
1430 case Xin_SseLdzLO:
1431 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1432 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1433 return;
1434 case Xin_SseConst:
1435 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1436 return;
1437 case Xin_Sse32Fx4:
1438 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1439 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1440 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1441 || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1442 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1443 addHRegUse(u, unary ? HRmWrite : HRmModify,
1444 i->Xin.Sse32Fx4.dst);
1445 return;
1446 case Xin_Sse32FLo:
1447 vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1448 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1449 || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1450 || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1451 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1452 addHRegUse(u, unary ? HRmWrite : HRmModify,
1453 i->Xin.Sse32FLo.dst);
1454 return;
1455 case Xin_Sse64Fx2:
1456 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1457 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1458 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1459 || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1460 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1461 addHRegUse(u, unary ? HRmWrite : HRmModify,
1462 i->Xin.Sse64Fx2.dst);
1463 return;
1464 case Xin_Sse64FLo:
1465 vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1466 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1467 || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1468 || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1469 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1470 addHRegUse(u, unary ? HRmWrite : HRmModify,
1471 i->Xin.Sse64FLo.dst);
1472 return;
1473 case Xin_SseReRg:
1474 if (i->Xin.SseReRg.op == Xsse_XOR
1475 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
1476 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1477 /* (as opposed to a rite of passage :-) */
1478 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1479 } else {
1480 addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1481 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1482 ? HRmWrite : HRmModify,
1483 i->Xin.SseReRg.dst);
1485 if (i->Xin.SseReRg.op == Xsse_MOV) {
1486 u->isRegRegMove = True;
1487 u->regMoveSrc = i->Xin.SseReRg.src;
1488 u->regMoveDst = i->Xin.SseReRg.dst;
1491 return;
1492 case Xin_SseCMov:
1493 addHRegUse(u, HRmRead, i->Xin.SseCMov.src);
1494 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1495 return;
1496 case Xin_SseShuf:
1497 addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
1498 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1499 return;
1500 case Xin_EvCheck:
1501 /* We expect both amodes only to mention %ebp, so this is in
1502 fact pointless, since %ebp isn't allocatable, but anyway.. */
1503 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
1504 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
1505 return;
1506 case Xin_ProfInc:
1507 /* does not use any registers. */
1508 return;
1509 default:
1510 ppX86Instr(i, False);
1511 vpanic("getRegUsage_X86Instr");
1515 /* local helper */
1516 static void mapReg( HRegRemap* m, HReg* r )
1518 *r = lookupHRegRemap(m, *r);
1521 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1523 vassert(mode64 == False);
1524 switch (i->tag) {
1525 case Xin_Alu32R:
1526 mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1527 mapReg(m, &i->Xin.Alu32R.dst);
1528 return;
1529 case Xin_Alu32M:
1530 mapRegs_X86RI(m, i->Xin.Alu32M.src);
1531 mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1532 return;
1533 case Xin_Sh32:
1534 mapReg(m, &i->Xin.Sh32.dst);
1535 return;
1536 case Xin_Test32:
1537 mapRegs_X86RM(m, i->Xin.Test32.dst);
1538 return;
1539 case Xin_Unary32:
1540 mapReg(m, &i->Xin.Unary32.dst);
1541 return;
1542 case Xin_Lea32:
1543 mapRegs_X86AMode(m, i->Xin.Lea32.am);
1544 mapReg(m, &i->Xin.Lea32.dst);
1545 return;
1546 case Xin_MulL:
1547 mapRegs_X86RM(m, i->Xin.MulL.src);
1548 return;
1549 case Xin_Div:
1550 mapRegs_X86RM(m, i->Xin.Div.src);
1551 return;
1552 case Xin_Sh3232:
1553 mapReg(m, &i->Xin.Sh3232.src);
1554 mapReg(m, &i->Xin.Sh3232.dst);
1555 return;
1556 case Xin_Push:
1557 mapRegs_X86RMI(m, i->Xin.Push.src);
1558 return;
1559 case Xin_Call:
1560 return;
1561 case Xin_XDirect:
1562 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
1563 return;
1564 case Xin_XIndir:
1565 mapReg(m, &i->Xin.XIndir.dstGA);
1566 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
1567 return;
1568 case Xin_XAssisted:
1569 mapReg(m, &i->Xin.XAssisted.dstGA);
1570 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
1571 return;
1572 case Xin_CMov32:
1573 mapRegs_X86RM(m, i->Xin.CMov32.src);
1574 mapReg(m, &i->Xin.CMov32.dst);
1575 return;
1576 case Xin_LoadEX:
1577 mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1578 mapReg(m, &i->Xin.LoadEX.dst);
1579 return;
1580 case Xin_Store:
1581 mapReg(m, &i->Xin.Store.src);
1582 mapRegs_X86AMode(m, i->Xin.Store.dst);
1583 return;
1584 case Xin_Set32:
1585 mapReg(m, &i->Xin.Set32.dst);
1586 return;
1587 case Xin_Bsfr32:
1588 mapReg(m, &i->Xin.Bsfr32.src);
1589 mapReg(m, &i->Xin.Bsfr32.dst);
1590 return;
1591 case Xin_MFence:
1592 return;
1593 case Xin_ACAS:
1594 mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1595 return;
1596 case Xin_DACAS:
1597 mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1598 return;
1599 case Xin_FpUnary:
1600 mapReg(m, &i->Xin.FpUnary.src);
1601 mapReg(m, &i->Xin.FpUnary.dst);
1602 return;
1603 case Xin_FpBinary:
1604 mapReg(m, &i->Xin.FpBinary.srcL);
1605 mapReg(m, &i->Xin.FpBinary.srcR);
1606 mapReg(m, &i->Xin.FpBinary.dst);
1607 return;
1608 case Xin_FpLdSt:
1609 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1610 mapReg(m, &i->Xin.FpLdSt.reg);
1611 return;
1612 case Xin_FpLdStI:
1613 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1614 mapReg(m, &i->Xin.FpLdStI.reg);
1615 return;
1616 case Xin_Fp64to32:
1617 mapReg(m, &i->Xin.Fp64to32.src);
1618 mapReg(m, &i->Xin.Fp64to32.dst);
1619 return;
1620 case Xin_FpCMov:
1621 mapReg(m, &i->Xin.FpCMov.src);
1622 mapReg(m, &i->Xin.FpCMov.dst);
1623 return;
1624 case Xin_FpLdCW:
1625 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1626 return;
1627 case Xin_FpStSW_AX:
1628 return;
1629 case Xin_FpCmp:
1630 mapReg(m, &i->Xin.FpCmp.srcL);
1631 mapReg(m, &i->Xin.FpCmp.srcR);
1632 mapReg(m, &i->Xin.FpCmp.dst);
1633 return;
1634 case Xin_SseConst:
1635 mapReg(m, &i->Xin.SseConst.dst);
1636 return;
1637 case Xin_SseLdSt:
1638 mapReg(m, &i->Xin.SseLdSt.reg);
1639 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1640 break;
1641 case Xin_SseLdzLO:
1642 mapReg(m, &i->Xin.SseLdzLO.reg);
1643 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1644 break;
1645 case Xin_Sse32Fx4:
1646 mapReg(m, &i->Xin.Sse32Fx4.src);
1647 mapReg(m, &i->Xin.Sse32Fx4.dst);
1648 return;
1649 case Xin_Sse32FLo:
1650 mapReg(m, &i->Xin.Sse32FLo.src);
1651 mapReg(m, &i->Xin.Sse32FLo.dst);
1652 return;
1653 case Xin_Sse64Fx2:
1654 mapReg(m, &i->Xin.Sse64Fx2.src);
1655 mapReg(m, &i->Xin.Sse64Fx2.dst);
1656 return;
1657 case Xin_Sse64FLo:
1658 mapReg(m, &i->Xin.Sse64FLo.src);
1659 mapReg(m, &i->Xin.Sse64FLo.dst);
1660 return;
1661 case Xin_SseReRg:
1662 mapReg(m, &i->Xin.SseReRg.src);
1663 mapReg(m, &i->Xin.SseReRg.dst);
1664 return;
1665 case Xin_SseCMov:
1666 mapReg(m, &i->Xin.SseCMov.src);
1667 mapReg(m, &i->Xin.SseCMov.dst);
1668 return;
1669 case Xin_SseShuf:
1670 mapReg(m, &i->Xin.SseShuf.src);
1671 mapReg(m, &i->Xin.SseShuf.dst);
1672 return;
1673 case Xin_EvCheck:
1674 /* We expect both amodes only to mention %ebp, so this is in
1675 fact pointless, since %ebp isn't allocatable, but anyway.. */
1676 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
1677 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
1678 return;
1679 case Xin_ProfInc:
1680 /* does not use any registers. */
1681 return;
1683 default:
1684 ppX86Instr(i, mode64);
1685 vpanic("mapRegs_X86Instr");
1689 /* Generate x86 spill/reload instructions under the direction of the
1690 register allocator. Note it's critical these don't write the
1691 condition codes. */
1693 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1694 HReg rreg, Int offsetB, Bool mode64 )
1696 X86AMode* am;
1697 vassert(offsetB >= 0);
1698 vassert(!hregIsVirtual(rreg));
1699 vassert(mode64 == False);
1700 *i1 = *i2 = NULL;
1701 am = X86AMode_IR(offsetB, hregX86_EBP());
1702 switch (hregClass(rreg)) {
1703 case HRcInt32:
1704 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1705 return;
1706 case HRcFlt64:
1707 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1708 return;
1709 case HRcVec128:
1710 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1711 return;
1712 default:
1713 ppHRegClass(hregClass(rreg));
1714 vpanic("genSpill_X86: unimplemented regclass");
1718 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1719 HReg rreg, Int offsetB, Bool mode64 )
1721 X86AMode* am;
1722 vassert(offsetB >= 0);
1723 vassert(!hregIsVirtual(rreg));
1724 vassert(mode64 == False);
1725 *i1 = *i2 = NULL;
1726 am = X86AMode_IR(offsetB, hregX86_EBP());
1727 switch (hregClass(rreg)) {
1728 case HRcInt32:
1729 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1730 return;
1731 case HRcFlt64:
1732 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1733 return;
1734 case HRcVec128:
1735 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1736 return;
1737 default:
1738 ppHRegClass(hregClass(rreg));
1739 vpanic("genReload_X86: unimplemented regclass");
1743 X86Instr* genMove_X86(HReg from, HReg to, Bool mode64)
1745 switch (hregClass(from)) {
1746 case HRcInt32:
1747 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to);
1748 case HRcVec128:
1749 return X86Instr_SseReRg(Xsse_MOV, from, to);
1750 default:
1751 ppHRegClass(hregClass(from));
1752 vpanic("genMove_X86: unimplemented regclass");
1756 /* The given instruction reads the specified vreg exactly once, and
1757 that vreg is currently located at the given spill offset. If
1758 possible, return a variant of the instruction to one which instead
1759 references the spill slot directly. */
1761 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1763 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1765 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1766 Convert to: src=RMI_Mem, dst=Reg
1768 if (i->tag == Xin_Alu32R
1769 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1770 || i->Xin.Alu32R.op == Xalu_XOR)
1771 && i->Xin.Alu32R.src->tag == Xrmi_Reg
1772 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
1773 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
1774 return X86Instr_Alu32R(
1775 i->Xin.Alu32R.op,
1776 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1777 i->Xin.Alu32R.dst
1781 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1782 Convert to: src=RI_Imm, dst=Mem
1784 if (i->tag == Xin_Alu32R
1785 && (i->Xin.Alu32R.op == Xalu_CMP)
1786 && i->Xin.Alu32R.src->tag == Xrmi_Imm
1787 && sameHReg(i->Xin.Alu32R.dst, vreg)) {
1788 return X86Instr_Alu32M(
1789 i->Xin.Alu32R.op,
1790 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1791 X86AMode_IR( spill_off, hregX86_EBP())
1795 /* Deal with form: Push(RMI_Reg)
1796 Convert to: Push(RMI_Mem)
1798 if (i->tag == Xin_Push
1799 && i->Xin.Push.src->tag == Xrmi_Reg
1800 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
1801 return X86Instr_Push(
1802 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1806 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1807 Convert to CMov32(RM_Mem, dst) */
1808 if (i->tag == Xin_CMov32
1809 && i->Xin.CMov32.src->tag == Xrm_Reg
1810 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
1811 vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
1812 return X86Instr_CMov32(
1813 i->Xin.CMov32.cond,
1814 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1815 i->Xin.CMov32.dst
1819 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1820 if (i->tag == Xin_Test32
1821 && i->Xin.Test32.dst->tag == Xrm_Reg
1822 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
1823 return X86Instr_Test32(
1824 i->Xin.Test32.imm32,
1825 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1829 return NULL;
1833 /* --------- The x86 assembler (bleh.) --------- */
1835 inline static UInt iregEnc ( HReg r )
1837 UInt n;
1838 vassert(hregClass(r) == HRcInt32);
1839 vassert(!hregIsVirtual(r));
1840 n = hregEncoding(r);
1841 vassert(n <= 7);
1842 return n;
1845 inline static UInt fregEnc ( HReg r )
1847 UInt n;
1848 vassert(hregClass(r) == HRcFlt64);
1849 vassert(!hregIsVirtual(r));
1850 n = hregEncoding(r);
1851 vassert(n <= 5);
1852 return n;
1855 inline static UInt vregEnc ( HReg r )
1857 UInt n;
1858 vassert(hregClass(r) == HRcVec128);
1859 vassert(!hregIsVirtual(r));
1860 n = hregEncoding(r);
1861 vassert(n <= 7);
1862 return n;
1865 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1867 vassert(mod < 4);
1868 vassert((reg|regmem) < 8);
1869 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
1872 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1874 vassert(shift < 4);
1875 vassert((regindex|regbase) < 8);
1876 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
1879 static UChar* emit32 ( UChar* p, UInt w32 )
1881 *p++ = toUChar( w32 & 0x000000FF);
1882 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1883 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1884 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1885 return p;
1888 /* Does a sign-extend of the lowest 8 bits give
1889 the original number? */
1890 static Bool fits8bits ( UInt w32 )
1892 Int i32 = (Int)w32;
1893 return toBool(i32 == ((Int)(w32 << 24) >> 24));
1897 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1899 greg, 0(ereg) | ereg != ESP && ereg != EBP
1900 = 00 greg ereg
1902 greg, d8(ereg) | ereg != ESP
1903 = 01 greg ereg, d8
1905 greg, d32(ereg) | ereg != ESP
1906 = 10 greg ereg, d32
1908 greg, d8(%esp) = 01 greg 100, 0x24, d8
1910 -----------------------------------------------
1912 greg, d8(base,index,scale)
1913 | index != ESP
1914 = 01 greg 100, scale index base, d8
1916 greg, d32(base,index,scale)
1917 | index != ESP
1918 = 10 greg 100, scale index base, d32
1920 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am )
1922 if (am->tag == Xam_IR) {
1923 if (am->Xam.IR.imm == 0
1924 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
1925 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
1926 *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg));
1927 return p;
1929 if (fits8bits(am->Xam.IR.imm)
1930 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1931 *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg));
1932 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1933 return p;
1935 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1936 *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg));
1937 p = emit32(p, am->Xam.IR.imm);
1938 return p;
1940 if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
1941 && fits8bits(am->Xam.IR.imm)) {
1942 *p++ = mkModRegRM(1, gregEnc, 4);
1943 *p++ = 0x24;
1944 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1945 return p;
1947 ppX86AMode(am);
1948 vpanic("doAMode_M: can't emit amode IR");
1949 /*NOTREACHED*/
1951 if (am->tag == Xam_IRRS) {
1952 if (fits8bits(am->Xam.IRRS.imm)
1953 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1954 *p++ = mkModRegRM(1, gregEnc, 4);
1955 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1956 iregEnc(am->Xam.IRRS.base));
1957 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1958 return p;
1960 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1961 *p++ = mkModRegRM(2, gregEnc, 4);
1962 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1963 iregEnc(am->Xam.IRRS.base));
1964 p = emit32(p, am->Xam.IRRS.imm);
1965 return p;
1967 ppX86AMode(am);
1968 vpanic("doAMode_M: can't emit amode IRRS");
1969 /*NOTREACHED*/
1971 vpanic("doAMode_M: unknown amode");
1972 /*NOTREACHED*/
1975 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1977 return doAMode_M__wrk(p, iregEnc(greg), am);
1980 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am )
1982 vassert(gregEnc < 8);
1983 return doAMode_M__wrk(p, gregEnc, am);
1987 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
1988 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc )
1990 *p++ = mkModRegRM(3, gregEnc, eregEnc);
1991 return p;
1994 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1996 return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg));
1999 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg )
2001 vassert(gregEnc < 8);
2002 return doAMode_R__wrk(p, gregEnc, iregEnc(ereg));
2005 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc )
2007 vassert( (gregEnc|eregEnc) < 8);
2008 return doAMode_R__wrk(p, gregEnc, eregEnc);
2012 /* Emit ffree %st(7) */
2013 static UChar* do_ffree_st7 ( UChar* p )
2015 *p++ = 0xDD;
2016 *p++ = 0xC7;
2017 return p;
2020 /* Emit fstp %st(i), 1 <= i <= 7 */
2021 static UChar* do_fstp_st ( UChar* p, Int i )
2023 vassert(1 <= i && i <= 7);
2024 *p++ = 0xDD;
2025 *p++ = toUChar(0xD8+i);
2026 return p;
2029 /* Emit fld %st(i), 0 <= i <= 6 */
2030 static UChar* do_fld_st ( UChar* p, Int i )
2032 vassert(0 <= i && i <= 6);
2033 *p++ = 0xD9;
2034 *p++ = toUChar(0xC0+i);
2035 return p;
2038 /* Emit f<op> %st(0) */
2039 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
2041 switch (op) {
2042 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
2043 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
2044 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2045 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2046 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2047 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2048 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2049 case Xfp_MOV: break;
2050 case Xfp_TAN:
2051 /* fptan pushes 1.0 on the FP stack, except when the argument
2052 is out of range. Hence we have to do the instruction,
2053 then inspect C2 to see if there is an out of range
2054 condition. If there is, we skip the fincstp that is used
2055 by the in-range case to get rid of this extra 1.0
2056 value. */
2057 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
2058 *p++ = 0xD9; *p++ = 0xF2; // fptan
2059 *p++ = 0x50; // pushl %eax
2060 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
2061 *p++ = 0x66; *p++ = 0xA9;
2062 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
2063 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
2064 *p++ = 0xD9; *p++ = 0xF7; // fincstp
2065 *p++ = 0x58; // after_fincstp: popl %eax
2066 break;
2067 default:
2068 vpanic("do_fop1_st: unknown op");
2070 return p;
2073 /* Emit f<op> %st(i), 1 <= i <= 5 */
2074 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
2076 Int subopc;
2077 switch (op) {
2078 case Xfp_ADD: subopc = 0; break;
2079 case Xfp_SUB: subopc = 4; break;
2080 case Xfp_MUL: subopc = 1; break;
2081 case Xfp_DIV: subopc = 6; break;
2082 default: vpanic("do_fop2_st: unknown op");
2084 *p++ = 0xD8;
2085 p = doAMode_R_enc_enc(p, subopc, i);
2086 return p;
2089 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2090 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2092 static UChar* push_word_from_tags ( UChar* p, UShort tags )
2094 UInt w;
2095 vassert(0 == (tags & ~0xF));
2096 if (tags == 0) {
2097 /* pushl $0x00000000 */
2098 *p++ = 0x6A;
2099 *p++ = 0x00;
2101 else
2102 /* pushl $0xFFFFFFFF */
2103 if (tags == 0xF) {
2104 *p++ = 0x6A;
2105 *p++ = 0xFF;
2106 } else {
2107 vassert(0); /* awaiting test case */
2108 w = 0;
2109 if (tags & 1) w |= 0x000000FF;
2110 if (tags & 2) w |= 0x0000FF00;
2111 if (tags & 4) w |= 0x00FF0000;
2112 if (tags & 8) w |= 0xFF000000;
2113 *p++ = 0x68;
2114 p = emit32(p, w);
2116 return p;
2119 /* Emit an instruction into buf and return the number of bytes used.
2120 Note that buf is not the insn's final place, and therefore it is
2121 imperative to emit position-independent code. If the emitted
2122 instruction was a profiler inc, set *is_profInc to True, else
2123 leave it unchanged. */
2125 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
2126 UChar* buf, Int nbuf, const X86Instr* i,
2127 Bool mode64, VexEndness endness_host,
2128 const void* disp_cp_chain_me_to_slowEP,
2129 const void* disp_cp_chain_me_to_fastEP,
2130 const void* disp_cp_xindir,
2131 const void* disp_cp_xassisted )
2133 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2135 UInt xtra;
2136 UChar* p = &buf[0];
2137 UChar* ptmp;
2138 vassert(nbuf >= 32);
2139 vassert(mode64 == False);
2141 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2143 switch (i->tag) {
2145 case Xin_Alu32R:
2146 /* Deal specially with MOV */
2147 if (i->Xin.Alu32R.op == Xalu_MOV) {
2148 switch (i->Xin.Alu32R.src->tag) {
2149 case Xrmi_Imm:
2150 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst));
2151 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2152 goto done;
2153 case Xrmi_Reg:
2154 *p++ = 0x89;
2155 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2156 i->Xin.Alu32R.dst);
2157 goto done;
2158 case Xrmi_Mem:
2159 *p++ = 0x8B;
2160 p = doAMode_M(p, i->Xin.Alu32R.dst,
2161 i->Xin.Alu32R.src->Xrmi.Mem.am);
2162 goto done;
2163 default:
2164 goto bad;
2167 /* MUL */
2168 if (i->Xin.Alu32R.op == Xalu_MUL) {
2169 switch (i->Xin.Alu32R.src->tag) {
2170 case Xrmi_Reg:
2171 *p++ = 0x0F;
2172 *p++ = 0xAF;
2173 p = doAMode_R(p, i->Xin.Alu32R.dst,
2174 i->Xin.Alu32R.src->Xrmi.Reg.reg);
2175 goto done;
2176 case Xrmi_Mem:
2177 *p++ = 0x0F;
2178 *p++ = 0xAF;
2179 p = doAMode_M(p, i->Xin.Alu32R.dst,
2180 i->Xin.Alu32R.src->Xrmi.Mem.am);
2181 goto done;
2182 case Xrmi_Imm:
2183 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2184 *p++ = 0x6B;
2185 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2186 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2187 } else {
2188 *p++ = 0x69;
2189 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2190 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2192 goto done;
2193 default:
2194 goto bad;
2197 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2198 opc = opc_rr = subopc_imm = opc_imma = 0;
2199 switch (i->Xin.Alu32R.op) {
2200 case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2201 subopc_imm = 2; opc_imma = 0x15; break;
2202 case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2203 subopc_imm = 0; opc_imma = 0x05; break;
2204 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2205 subopc_imm = 5; opc_imma = 0x2D; break;
2206 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2207 subopc_imm = 3; opc_imma = 0x1D; break;
2208 case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2209 subopc_imm = 4; opc_imma = 0x25; break;
2210 case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2211 subopc_imm = 6; opc_imma = 0x35; break;
2212 case Xalu_OR: opc = 0x0B; opc_rr = 0x09;
2213 subopc_imm = 1; opc_imma = 0x0D; break;
2214 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2215 subopc_imm = 7; opc_imma = 0x3D; break;
2216 default: goto bad;
2218 switch (i->Xin.Alu32R.src->tag) {
2219 case Xrmi_Imm:
2220 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
2221 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2222 *p++ = toUChar(opc_imma);
2223 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2224 } else
2225 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2226 *p++ = 0x83;
2227 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2228 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2229 } else {
2230 *p++ = 0x81;
2231 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2232 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2234 goto done;
2235 case Xrmi_Reg:
2236 *p++ = toUChar(opc_rr);
2237 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2238 i->Xin.Alu32R.dst);
2239 goto done;
2240 case Xrmi_Mem:
2241 *p++ = toUChar(opc);
2242 p = doAMode_M(p, i->Xin.Alu32R.dst,
2243 i->Xin.Alu32R.src->Xrmi.Mem.am);
2244 goto done;
2245 default:
2246 goto bad;
2248 break;
2250 case Xin_Alu32M:
2251 /* Deal specially with MOV */
2252 if (i->Xin.Alu32M.op == Xalu_MOV) {
2253 switch (i->Xin.Alu32M.src->tag) {
2254 case Xri_Reg:
2255 *p++ = 0x89;
2256 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2257 i->Xin.Alu32M.dst);
2258 goto done;
2259 case Xri_Imm:
2260 *p++ = 0xC7;
2261 p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst);
2262 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2263 goto done;
2264 default:
2265 goto bad;
2268 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2269 allowed here. */
2270 opc = subopc_imm = opc_imma = 0;
2271 switch (i->Xin.Alu32M.op) {
2272 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2273 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2274 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2275 default: goto bad;
2277 switch (i->Xin.Alu32M.src->tag) {
2278 case Xri_Reg:
2279 *p++ = toUChar(opc);
2280 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2281 i->Xin.Alu32M.dst);
2282 goto done;
2283 case Xri_Imm:
2284 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2285 *p++ = 0x83;
2286 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2287 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2288 goto done;
2289 } else {
2290 *p++ = 0x81;
2291 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2292 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2293 goto done;
2295 default:
2296 goto bad;
2298 break;
2300 case Xin_Sh32:
2301 opc_cl = opc_imm = subopc = 0;
2302 switch (i->Xin.Sh32.op) {
2303 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2304 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2305 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2306 default: goto bad;
2308 if (i->Xin.Sh32.src == 0) {
2309 *p++ = toUChar(opc_cl);
2310 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2311 } else {
2312 *p++ = toUChar(opc_imm);
2313 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2314 *p++ = (UChar)(i->Xin.Sh32.src);
2316 goto done;
2318 case Xin_Test32:
2319 if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2320 /* testl $imm32, %reg */
2321 *p++ = 0xF7;
2322 p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg);
2323 p = emit32(p, i->Xin.Test32.imm32);
2324 goto done;
2325 } else {
2326 /* testl $imm32, amode */
2327 *p++ = 0xF7;
2328 p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am);
2329 p = emit32(p, i->Xin.Test32.imm32);
2330 goto done;
2333 case Xin_Unary32:
2334 if (i->Xin.Unary32.op == Xun_NOT) {
2335 *p++ = 0xF7;
2336 p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst);
2337 goto done;
2339 if (i->Xin.Unary32.op == Xun_NEG) {
2340 *p++ = 0xF7;
2341 p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst);
2342 goto done;
2344 break;
2346 case Xin_Lea32:
2347 *p++ = 0x8D;
2348 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2349 goto done;
2351 case Xin_MulL:
2352 subopc = i->Xin.MulL.syned ? 5 : 4;
2353 *p++ = 0xF7;
2354 switch (i->Xin.MulL.src->tag) {
2355 case Xrm_Mem:
2356 p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am);
2357 goto done;
2358 case Xrm_Reg:
2359 p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg);
2360 goto done;
2361 default:
2362 goto bad;
2364 break;
2366 case Xin_Div:
2367 subopc = i->Xin.Div.syned ? 7 : 6;
2368 *p++ = 0xF7;
2369 switch (i->Xin.Div.src->tag) {
2370 case Xrm_Mem:
2371 p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am);
2372 goto done;
2373 case Xrm_Reg:
2374 p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg);
2375 goto done;
2376 default:
2377 goto bad;
2379 break;
2381 case Xin_Sh3232:
2382 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2383 if (i->Xin.Sh3232.amt == 0) {
2384 /* shldl/shrdl by %cl */
2385 *p++ = 0x0F;
2386 if (i->Xin.Sh3232.op == Xsh_SHL) {
2387 *p++ = 0xA5;
2388 } else {
2389 *p++ = 0xAD;
2391 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2392 goto done;
2394 break;
2396 case Xin_Push:
2397 switch (i->Xin.Push.src->tag) {
2398 case Xrmi_Mem:
2399 *p++ = 0xFF;
2400 p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am);
2401 goto done;
2402 case Xrmi_Imm:
2403 *p++ = 0x68;
2404 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2405 goto done;
2406 case Xrmi_Reg:
2407 *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg));
2408 goto done;
2409 default:
2410 goto bad;
2413 case Xin_Call:
2414 if (i->Xin.Call.cond != Xcc_ALWAYS
2415 && i->Xin.Call.rloc.pri != RLPri_None) {
2416 /* The call might not happen (it isn't unconditional) and it
2417 returns a result. In this case we will need to generate a
2418 control flow diamond to put 0x555..555 in the return
2419 register(s) in the case where the call doesn't happen. If
2420 this ever becomes necessary, maybe copy code from the ARM
2421 equivalent. Until that day, just give up. */
2422 goto bad;
2424 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2425 for explanation of this. */
2426 switch (i->Xin.Call.regparms) {
2427 case 0: irno = iregEnc(hregX86_EAX()); break;
2428 case 1: irno = iregEnc(hregX86_EDX()); break;
2429 case 2: irno = iregEnc(hregX86_ECX()); break;
2430 case 3: irno = iregEnc(hregX86_EDI()); break;
2431 default: vpanic(" emit_X86Instr:call:regparms");
2433 /* jump over the following two insns if the condition does not
2434 hold */
2435 if (i->Xin.Call.cond != Xcc_ALWAYS) {
2436 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2437 *p++ = 0x07; /* 7 bytes in the next two insns */
2439 /* movl $target, %tmp */
2440 *p++ = toUChar(0xB8 + irno);
2441 p = emit32(p, i->Xin.Call.target);
2442 /* call *%tmp */
2443 *p++ = 0xFF;
2444 *p++ = toUChar(0xD0 + irno);
2445 goto done;
2447 case Xin_XDirect: {
2448 /* NB: what goes on here has to be very closely coordinated with the
2449 chainXDirect_X86 and unchainXDirect_X86 below. */
2450 /* We're generating chain-me requests here, so we need to be
2451 sure this is actually allowed -- no-redir translations can't
2452 use chain-me's. Hence: */
2453 vassert(disp_cp_chain_me_to_slowEP != NULL);
2454 vassert(disp_cp_chain_me_to_fastEP != NULL);
2456 /* Use ptmp for backpatching conditional jumps. */
2457 ptmp = NULL;
2459 /* First off, if this is conditional, create a conditional
2460 jump over the rest of it. */
2461 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2462 /* jmp fwds if !condition */
2463 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
2464 ptmp = p; /* fill in this bit later */
2465 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2468 /* Update the guest EIP. */
2469 /* movl $dstGA, amEIP */
2470 *p++ = 0xC7;
2471 p = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP);
2472 p = emit32(p, i->Xin.XDirect.dstGA);
2474 /* --- FIRST PATCHABLE BYTE follows --- */
2475 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2476 to) backs up the return address, so as to find the address of
2477 the first patchable byte. So: don't change the length of the
2478 two instructions below. */
2479 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2480 *p++ = 0xBA;
2481 const void* disp_cp_chain_me
2482 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2483 : disp_cp_chain_me_to_slowEP;
2484 p = emit32(p, (UInt)(Addr)disp_cp_chain_me);
2485 /* call *%edx */
2486 *p++ = 0xFF;
2487 *p++ = 0xD2;
2488 /* --- END of PATCHABLE BYTES --- */
2490 /* Fix up the conditional jump, if there was one. */
2491 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2492 Int delta = p - ptmp;
2493 vassert(delta > 0 && delta < 40);
2494 *ptmp = toUChar(delta-1);
2496 goto done;
2499 case Xin_XIndir: {
2500 /* We're generating transfers that could lead indirectly to a
2501 chain-me, so we need to be sure this is actually allowed --
2502 no-redir translations are not allowed to reach normal
2503 translations without going through the scheduler. That means
2504 no XDirects or XIndirs out from no-redir translations.
2505 Hence: */
2506 vassert(disp_cp_xindir != NULL);
2508 /* Use ptmp for backpatching conditional jumps. */
2509 ptmp = NULL;
2511 /* First off, if this is conditional, create a conditional
2512 jump over the rest of it. */
2513 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2514 /* jmp fwds if !condition */
2515 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
2516 ptmp = p; /* fill in this bit later */
2517 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2520 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2521 *p++ = 0x89;
2522 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2524 /* movl $disp_indir, %edx */
2525 *p++ = 0xBA;
2526 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
2527 /* jmp *%edx */
2528 *p++ = 0xFF;
2529 *p++ = 0xE2;
2531 /* Fix up the conditional jump, if there was one. */
2532 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2533 Int delta = p - ptmp;
2534 vassert(delta > 0 && delta < 40);
2535 *ptmp = toUChar(delta-1);
2537 goto done;
2540 case Xin_XAssisted: {
2541 /* Use ptmp for backpatching conditional jumps. */
2542 ptmp = NULL;
2544 /* First off, if this is conditional, create a conditional
2545 jump over the rest of it. */
2546 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2547 /* jmp fwds if !condition */
2548 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
2549 ptmp = p; /* fill in this bit later */
2550 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2553 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2554 *p++ = 0x89;
2555 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2556 /* movl $magic_number, %ebp. */
2557 UInt trcval = 0;
2558 switch (i->Xin.XAssisted.jk) {
2559 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2560 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2561 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
2562 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break;
2563 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break;
2564 case Ijk_Sys_int145: trcval = VEX_TRC_JMP_SYS_INT145; break;
2565 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
2566 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
2567 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2568 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2569 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2570 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
2571 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
2572 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2573 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2574 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2575 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2576 /* We don't expect to see the following being assisted. */
2577 case Ijk_Ret:
2578 case Ijk_Call:
2579 /* fallthrough */
2580 default:
2581 ppIRJumpKind(i->Xin.XAssisted.jk);
2582 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2584 vassert(trcval != 0);
2585 *p++ = 0xBD;
2586 p = emit32(p, trcval);
2588 /* movl $disp_indir, %edx */
2589 *p++ = 0xBA;
2590 p = emit32(p, (UInt)(Addr)disp_cp_xassisted);
2591 /* jmp *%edx */
2592 *p++ = 0xFF;
2593 *p++ = 0xE2;
2595 /* Fix up the conditional jump, if there was one. */
2596 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2597 Int delta = p - ptmp;
2598 vassert(delta > 0 && delta < 40);
2599 *ptmp = toUChar(delta-1);
2601 goto done;
2604 case Xin_CMov32:
2605 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2607 /* This generates cmov, which is illegal on P54/P55. */
2609 *p++ = 0x0F;
2610 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2611 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2612 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2613 goto done;
2615 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2616 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2617 goto done;
2621 /* Alternative version which works on any x86 variant. */
2622 /* jmp fwds if !condition */
2623 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2624 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2625 ptmp = p;
2627 switch (i->Xin.CMov32.src->tag) {
2628 case Xrm_Reg:
2629 /* Big sigh. This is movl E -> G ... */
2630 *p++ = 0x89;
2631 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2632 i->Xin.CMov32.dst);
2634 break;
2635 case Xrm_Mem:
2636 /* ... whereas this is movl G -> E. That's why the args
2637 to doAMode_R appear to be the wrong way round in the
2638 Xrm_Reg case. */
2639 *p++ = 0x8B;
2640 p = doAMode_M(p, i->Xin.CMov32.dst,
2641 i->Xin.CMov32.src->Xrm.Mem.am);
2642 break;
2643 default:
2644 goto bad;
2646 /* Fill in the jump offset. */
2647 *(ptmp-1) = toUChar(p - ptmp);
2648 goto done;
2650 break;
2652 case Xin_LoadEX:
2653 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2654 /* movzbl */
2655 *p++ = 0x0F;
2656 *p++ = 0xB6;
2657 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2658 goto done;
2660 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2661 /* movzwl */
2662 *p++ = 0x0F;
2663 *p++ = 0xB7;
2664 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2665 goto done;
2667 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2668 /* movsbl */
2669 *p++ = 0x0F;
2670 *p++ = 0xBE;
2671 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2672 goto done;
2674 break;
2676 case Xin_Set32:
2677 /* Make the destination register be 1 or 0, depending on whether
2678 the relevant condition holds. We have to dodge and weave
2679 when the destination is %esi or %edi as we cannot directly
2680 emit the native 'setb %reg' for those. Further complication:
2681 the top 24 bits of the destination should be forced to zero,
2682 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2683 Sigh. So start off my moving $0 into the dest. */
2685 /* Do we need to swap in %eax? */
2686 if (iregEnc(i->Xin.Set32.dst) >= 4) {
2687 /* xchg %eax, %dst */
2688 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2689 /* movl $0, %eax */
2690 *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
2691 p = emit32(p, 0);
2692 /* setb lo8(%eax) */
2693 *p++ = 0x0F;
2694 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2695 p = doAMode_R_enc_reg(p, 0, hregX86_EAX());
2696 /* xchg %eax, %dst */
2697 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2698 } else {
2699 /* movl $0, %dst */
2700 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst));
2701 p = emit32(p, 0);
2702 /* setb lo8(%dst) */
2703 *p++ = 0x0F;
2704 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2705 p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst);
2707 goto done;
2709 case Xin_Bsfr32:
2710 *p++ = 0x0F;
2711 if (i->Xin.Bsfr32.isFwds) {
2712 *p++ = 0xBC;
2713 } else {
2714 *p++ = 0xBD;
2716 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2717 goto done;
2719 case Xin_MFence:
2720 /* see comment in hdefs.h re this insn */
2721 if (0) vex_printf("EMIT FENCE\n");
2722 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2723 |VEX_HWCAPS_X86_SSE2)) {
2724 /* mfence */
2725 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2726 goto done;
2728 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
2729 /* sfence */
2730 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2731 /* lock addl $0,0(%esp) */
2732 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2733 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2734 goto done;
2736 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2737 /* lock addl $0,0(%esp) */
2738 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2739 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2740 goto done;
2742 vpanic("emit_X86Instr:mfence:hwcaps");
2743 /*NOTREACHED*/
2744 break;
2746 case Xin_ACAS:
2747 /* lock */
2748 *p++ = 0xF0;
2749 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2750 in %ebx. The new-value register is hardwired to be %ebx
2751 since letting it be any integer register gives the problem
2752 that %sil and %dil are unaddressible on x86 and hence we
2753 would have to resort to the same kind of trickery as with
2754 byte-sized Xin.Store, just below. Given that this isn't
2755 performance critical, it is simpler just to force the
2756 register operand to %ebx (could equally be %ecx or %edx).
2757 (Although %ebx is more consistent with cmpxchg8b.) */
2758 if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2759 *p++ = 0x0F;
2760 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2761 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2762 goto done;
2764 case Xin_DACAS:
2765 /* lock */
2766 *p++ = 0xF0;
2767 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2768 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2769 aren't encoded in the insn. */
2770 *p++ = 0x0F;
2771 *p++ = 0xC7;
2772 p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr);
2773 goto done;
2775 case Xin_Store:
2776 if (i->Xin.Store.sz == 2) {
2777 /* This case, at least, is simple, given that we can
2778 reference the low 16 bits of any integer register. */
2779 *p++ = 0x66;
2780 *p++ = 0x89;
2781 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2782 goto done;
2785 if (i->Xin.Store.sz == 1) {
2786 /* We have to do complex dodging and weaving if src is not
2787 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2788 if (iregEnc(i->Xin.Store.src) < 4) {
2789 /* we're OK, can do it directly */
2790 *p++ = 0x88;
2791 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2792 goto done;
2793 } else {
2794 /* Bleh. This means the source is %edi or %esi. Since
2795 the address mode can only mention three registers, at
2796 least one of %eax/%ebx/%ecx/%edx must be available to
2797 temporarily swap the source into, so the store can
2798 happen. So we have to look at the regs mentioned
2799 in the amode. */
2800 HReg swap = INVALID_HREG;
2801 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(),
2802 ecx = hregX86_ECX(), edx = hregX86_EDX();
2803 HRegUsage u;
2804 initHRegUsage(&u);
2805 addRegUsage_X86AMode(&u, i->Xin.Store.dst);
2806 /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; }
2807 else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; }
2808 else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; }
2809 else if (! HRegUsage__contains(&u, edx)) { swap = edx; }
2810 vassert(! hregIsInvalid(swap));
2811 /* xchgl %source, %swap. Could do better if swap is %eax. */
2812 *p++ = 0x87;
2813 p = doAMode_R(p, i->Xin.Store.src, swap);
2814 /* movb lo8{%swap}, (dst) */
2815 *p++ = 0x88;
2816 p = doAMode_M(p, swap, i->Xin.Store.dst);
2817 /* xchgl %source, %swap. Could do better if swap is %eax. */
2818 *p++ = 0x87;
2819 p = doAMode_R(p, i->Xin.Store.src, swap);
2820 goto done;
2822 } /* if (i->Xin.Store.sz == 1) */
2823 break;
2825 case Xin_FpUnary:
2826 /* gop %src, %dst
2827 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2829 p = do_ffree_st7(p);
2830 p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src));
2831 p = do_fop1_st(p, i->Xin.FpUnary.op);
2832 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst));
2833 goto done;
2835 case Xin_FpBinary:
2836 if (i->Xin.FpBinary.op == Xfp_YL2X
2837 || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2838 /* Have to do this specially. */
2839 /* ffree %st7 ; fld %st(srcL) ;
2840 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2841 p = do_ffree_st7(p);
2842 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2843 p = do_ffree_st7(p);
2844 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2845 *p++ = 0xD9;
2846 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2847 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2848 goto done;
2850 if (i->Xin.FpBinary.op == Xfp_ATAN) {
2851 /* Have to do this specially. */
2852 /* ffree %st7 ; fld %st(srcL) ;
2853 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2854 p = do_ffree_st7(p);
2855 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2856 p = do_ffree_st7(p);
2857 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2858 *p++ = 0xD9; *p++ = 0xF3;
2859 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2860 goto done;
2862 if (i->Xin.FpBinary.op == Xfp_PREM
2863 || i->Xin.FpBinary.op == Xfp_PREM1
2864 || i->Xin.FpBinary.op == Xfp_SCALE) {
2865 /* Have to do this specially. */
2866 /* ffree %st7 ; fld %st(srcR) ;
2867 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2868 fincstp ; ffree %st7 */
2869 p = do_ffree_st7(p);
2870 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR));
2871 p = do_ffree_st7(p);
2872 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL));
2873 *p++ = 0xD9;
2874 switch (i->Xin.FpBinary.op) {
2875 case Xfp_PREM: *p++ = 0xF8; break;
2876 case Xfp_PREM1: *p++ = 0xF5; break;
2877 case Xfp_SCALE: *p++ = 0xFD; break;
2878 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2880 p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst));
2881 *p++ = 0xD9; *p++ = 0xF7;
2882 p = do_ffree_st7(p);
2883 goto done;
2885 /* General case */
2886 /* gop %srcL, %srcR, %dst
2887 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2889 p = do_ffree_st7(p);
2890 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2891 p = do_fop2_st(p, i->Xin.FpBinary.op,
2892 1+fregEnc(i->Xin.FpBinary.srcR));
2893 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2894 goto done;
2896 case Xin_FpLdSt:
2897 if (i->Xin.FpLdSt.isLoad) {
2898 /* Load from memory into %fakeN.
2899 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2901 p = do_ffree_st7(p);
2902 switch (i->Xin.FpLdSt.sz) {
2903 case 4:
2904 *p++ = 0xD9;
2905 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2906 break;
2907 case 8:
2908 *p++ = 0xDD;
2909 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2910 break;
2911 case 10:
2912 *p++ = 0xDB;
2913 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr);
2914 break;
2915 default:
2916 vpanic("emitX86Instr(FpLdSt,load)");
2918 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg));
2919 goto done;
2920 } else {
2921 /* Store from %fakeN into memory.
2922 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2924 p = do_ffree_st7(p);
2925 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg));
2926 switch (i->Xin.FpLdSt.sz) {
2927 case 4:
2928 *p++ = 0xD9;
2929 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2930 break;
2931 case 8:
2932 *p++ = 0xDD;
2933 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2934 break;
2935 case 10:
2936 *p++ = 0xDB;
2937 p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr);
2938 break;
2939 default:
2940 vpanic("emitX86Instr(FpLdSt,store)");
2942 goto done;
2944 break;
2946 case Xin_FpLdStI:
2947 if (i->Xin.FpLdStI.isLoad) {
2948 /* Load from memory into %fakeN, converting from an int.
2949 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2951 switch (i->Xin.FpLdStI.sz) {
2952 case 8: opc = 0xDF; subopc_imm = 5; break;
2953 case 4: opc = 0xDB; subopc_imm = 0; break;
2954 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
2955 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2957 p = do_ffree_st7(p);
2958 *p++ = toUChar(opc);
2959 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2960 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg));
2961 goto done;
2962 } else {
2963 /* Store from %fakeN into memory, converting to an int.
2964 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2966 switch (i->Xin.FpLdStI.sz) {
2967 case 8: opc = 0xDF; subopc_imm = 7; break;
2968 case 4: opc = 0xDB; subopc_imm = 3; break;
2969 case 2: opc = 0xDF; subopc_imm = 3; break;
2970 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2972 p = do_ffree_st7(p);
2973 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg));
2974 *p++ = toUChar(opc);
2975 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2976 goto done;
2978 break;
2980 case Xin_Fp64to32:
2981 /* ffree %st7 ; fld %st(src) */
2982 p = do_ffree_st7(p);
2983 p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src));
2984 /* subl $4, %esp */
2985 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2986 /* fstps (%esp) */
2987 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2988 /* flds (%esp) */
2989 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2990 /* addl $4, %esp */
2991 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2992 /* fstp %st(1+dst) */
2993 p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst));
2994 goto done;
2996 case Xin_FpCMov:
2997 /* jmp fwds if !condition */
2998 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2999 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3000 ptmp = p;
3002 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3003 p = do_ffree_st7(p);
3004 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src));
3005 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst));
3007 /* Fill in the jump offset. */
3008 *(ptmp-1) = toUChar(p - ptmp);
3009 goto done;
3011 case Xin_FpLdCW:
3012 *p++ = 0xD9;
3013 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr);
3014 goto done;
3016 case Xin_FpStSW_AX:
3017 /* note, this emits fnstsw %ax, not fstsw %ax */
3018 *p++ = 0xDF;
3019 *p++ = 0xE0;
3020 goto done;
3022 case Xin_FpCmp:
3023 /* gcmp %fL, %fR, %dst
3024 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3025 fnstsw %ax ; movl %eax, %dst
3027 /* ffree %st7 */
3028 p = do_ffree_st7(p);
3029 /* fpush %fL */
3030 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL));
3031 /* fucomp %(fR+1) */
3032 *p++ = 0xDD;
3033 *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR))));
3034 /* fnstsw %ax */
3035 *p++ = 0xDF;
3036 *p++ = 0xE0;
3037 /* movl %eax, %dst */
3038 *p++ = 0x89;
3039 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
3040 goto done;
3042 case Xin_SseConst: {
3043 UShort con = i->Xin.SseConst.con;
3044 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
3045 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
3046 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
3047 p = push_word_from_tags(p, toUShort(con & 0xF));
3048 /* movl (%esp), %xmm-dst */
3049 *p++ = 0x0F;
3050 *p++ = 0x10;
3051 *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst)));
3052 *p++ = 0x24;
3053 /* addl $16, %esp */
3054 *p++ = 0x83;
3055 *p++ = 0xC4;
3056 *p++ = 0x10;
3057 goto done;
3060 case Xin_SseLdSt:
3061 *p++ = 0x0F;
3062 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
3063 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr);
3064 goto done;
3066 case Xin_SseLdzLO:
3067 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
3068 /* movs[sd] amode, %xmm-dst */
3069 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3070 *p++ = 0x0F;
3071 *p++ = 0x10;
3072 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr);
3073 goto done;
3075 case Xin_Sse32Fx4:
3076 xtra = 0;
3077 *p++ = 0x0F;
3078 switch (i->Xin.Sse32Fx4.op) {
3079 case Xsse_ADDF: *p++ = 0x58; break;
3080 case Xsse_DIVF: *p++ = 0x5E; break;
3081 case Xsse_MAXF: *p++ = 0x5F; break;
3082 case Xsse_MINF: *p++ = 0x5D; break;
3083 case Xsse_MULF: *p++ = 0x59; break;
3084 case Xsse_RCPF: *p++ = 0x53; break;
3085 case Xsse_RSQRTF: *p++ = 0x52; break;
3086 case Xsse_SQRTF: *p++ = 0x51; break;
3087 case Xsse_SUBF: *p++ = 0x5C; break;
3088 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3089 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3090 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3091 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3092 default: goto bad;
3094 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst),
3095 vregEnc(i->Xin.Sse32Fx4.src) );
3096 if (xtra & 0x100)
3097 *p++ = toUChar(xtra & 0xFF);
3098 goto done;
3100 case Xin_Sse64Fx2:
3101 xtra = 0;
3102 *p++ = 0x66;
3103 *p++ = 0x0F;
3104 switch (i->Xin.Sse64Fx2.op) {
3105 case Xsse_ADDF: *p++ = 0x58; break;
3106 case Xsse_DIVF: *p++ = 0x5E; break;
3107 case Xsse_MAXF: *p++ = 0x5F; break;
3108 case Xsse_MINF: *p++ = 0x5D; break;
3109 case Xsse_MULF: *p++ = 0x59; break;
3110 case Xsse_RCPF: *p++ = 0x53; break;
3111 case Xsse_RSQRTF: *p++ = 0x52; break;
3112 case Xsse_SQRTF: *p++ = 0x51; break;
3113 case Xsse_SUBF: *p++ = 0x5C; break;
3114 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3115 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3116 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3117 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3118 default: goto bad;
3120 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst),
3121 vregEnc(i->Xin.Sse64Fx2.src) );
3122 if (xtra & 0x100)
3123 *p++ = toUChar(xtra & 0xFF);
3124 goto done;
3126 case Xin_Sse32FLo:
3127 xtra = 0;
3128 *p++ = 0xF3;
3129 *p++ = 0x0F;
3130 switch (i->Xin.Sse32FLo.op) {
3131 case Xsse_ADDF: *p++ = 0x58; break;
3132 case Xsse_DIVF: *p++ = 0x5E; break;
3133 case Xsse_MAXF: *p++ = 0x5F; break;
3134 case Xsse_MINF: *p++ = 0x5D; break;
3135 case Xsse_MULF: *p++ = 0x59; break;
3136 case Xsse_RCPF: *p++ = 0x53; break;
3137 case Xsse_RSQRTF: *p++ = 0x52; break;
3138 case Xsse_SQRTF: *p++ = 0x51; break;
3139 case Xsse_SUBF: *p++ = 0x5C; break;
3140 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3141 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3142 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3143 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3144 default: goto bad;
3146 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst),
3147 vregEnc(i->Xin.Sse32FLo.src) );
3148 if (xtra & 0x100)
3149 *p++ = toUChar(xtra & 0xFF);
3150 goto done;
3152 case Xin_Sse64FLo:
3153 xtra = 0;
3154 *p++ = 0xF2;
3155 *p++ = 0x0F;
3156 switch (i->Xin.Sse64FLo.op) {
3157 case Xsse_ADDF: *p++ = 0x58; break;
3158 case Xsse_DIVF: *p++ = 0x5E; break;
3159 case Xsse_MAXF: *p++ = 0x5F; break;
3160 case Xsse_MINF: *p++ = 0x5D; break;
3161 case Xsse_MULF: *p++ = 0x59; break;
3162 case Xsse_RCPF: *p++ = 0x53; break;
3163 case Xsse_RSQRTF: *p++ = 0x52; break;
3164 case Xsse_SQRTF: *p++ = 0x51; break;
3165 case Xsse_SUBF: *p++ = 0x5C; break;
3166 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3167 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3168 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3169 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3170 default: goto bad;
3172 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst),
3173 vregEnc(i->Xin.Sse64FLo.src) );
3174 if (xtra & 0x100)
3175 *p++ = toUChar(xtra & 0xFF);
3176 goto done;
3178 case Xin_SseReRg:
3179 # define XX(_n) *p++ = (_n)
3180 switch (i->Xin.SseReRg.op) {
3181 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break;
3182 case Xsse_OR: XX(0x0F); XX(0x56); break;
3183 case Xsse_XOR: XX(0x0F); XX(0x57); break;
3184 case Xsse_AND: XX(0x0F); XX(0x54); break;
3185 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break;
3186 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break;
3187 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break;
3188 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break;
3189 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break;
3190 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break;
3191 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break;
3192 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break;
3193 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break;
3194 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break;
3195 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break;
3196 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break;
3197 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break;
3198 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break;
3199 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break;
3200 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break;
3201 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break;
3202 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3203 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3204 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break;
3205 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break;
3206 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break;
3207 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break;
3208 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3209 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3210 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break;
3211 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break;
3212 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break;
3213 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break;
3214 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break;
3215 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break;
3216 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break;
3217 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break;
3218 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break;
3219 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break;
3220 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break;
3221 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break;
3222 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break;
3223 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break;
3224 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break;
3225 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break;
3226 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break;
3227 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break;
3228 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break;
3229 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break;
3230 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break;
3231 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break;
3232 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break;
3233 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break;
3234 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break;
3235 default: goto bad;
3237 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst),
3238 vregEnc(i->Xin.SseReRg.src) );
3239 # undef XX
3240 goto done;
3242 case Xin_SseCMov:
3243 /* jmp fwds if !condition */
3244 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3245 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3246 ptmp = p;
3248 /* movaps %src, %dst */
3249 *p++ = 0x0F;
3250 *p++ = 0x28;
3251 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst),
3252 vregEnc(i->Xin.SseCMov.src) );
3254 /* Fill in the jump offset. */
3255 *(ptmp-1) = toUChar(p - ptmp);
3256 goto done;
3258 case Xin_SseShuf:
3259 *p++ = 0x66;
3260 *p++ = 0x0F;
3261 *p++ = 0x70;
3262 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst),
3263 vregEnc(i->Xin.SseShuf.src) );
3264 *p++ = (UChar)(i->Xin.SseShuf.order);
3265 goto done;
3267 case Xin_EvCheck: {
3268 /* We generate:
3269 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
3270 (2 bytes) jns nofail expected taken
3271 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
3272 nofail:
3274 /* This is heavily asserted re instruction lengths. It needs to
3275 be. If we get given unexpected forms of .amCounter or
3276 .amFailAddr -- basically, anything that's not of the form
3277 uimm7(%ebp) -- they are likely to fail. */
3278 /* Note also that after the decl we must be very careful not to
3279 read the carry flag, else we get a partial flags stall.
3280 js/jns avoids that, though. */
3281 UChar* p0 = p;
3282 /* --- decl 8(%ebp) --- */
3283 /* "1" because + there's no register in this encoding;
3284 instead the register + field is used as a sub opcode. The
3285 encoding for "decl r/m32" + is FF /1, hence the "1". */
3286 *p++ = 0xFF;
3287 p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter);
3288 vassert(p - p0 == 3);
3289 /* --- jns nofail --- */
3290 *p++ = 0x79;
3291 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3292 vassert(p - p0 == 5);
3293 /* --- jmp* 0(%ebp) --- */
3294 /* The encoding is FF /4. */
3295 *p++ = 0xFF;
3296 p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr);
3297 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3298 /* And crosscheck .. */
3299 vassert(evCheckSzB_X86() == 8);
3300 goto done;
3303 case Xin_ProfInc: {
3304 /* We generate addl $1,NotKnownYet
3305 adcl $0,NotKnownYet+4
3306 in the expectation that a later call to LibVEX_patchProfCtr
3307 will be used to fill in the immediate fields once the right
3308 value is known.
3309 83 05 00 00 00 00 01
3310 83 15 00 00 00 00 00
3312 *p++ = 0x83; *p++ = 0x05;
3313 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3314 *p++ = 0x01;
3315 *p++ = 0x83; *p++ = 0x15;
3316 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3317 *p++ = 0x00;
3318 /* Tell the caller .. */
3319 vassert(!(*is_profInc));
3320 *is_profInc = True;
3321 goto done;
3324 default:
3325 goto bad;
3328 bad:
3329 ppX86Instr(i, mode64);
3330 vpanic("emit_X86Instr");
3331 /*NOTREACHED*/
3333 done:
3334 vassert(p - &buf[0] <= 32);
3335 return p - &buf[0];
3339 /* How big is an event check? See case for Xin_EvCheck in
3340 emit_X86Instr just above. That crosschecks what this returns, so
3341 we can tell if we're inconsistent. */
3342 Int evCheckSzB_X86 (void)
3344 return 8;
3348 /* NB: what goes on here has to be very closely coordinated with the
3349 emitInstr case for XDirect, above. */
3350 VexInvalRange chainXDirect_X86 ( VexEndness endness_host,
3351 void* place_to_chain,
3352 const void* disp_cp_chain_me_EXPECTED,
3353 const void* place_to_jump_to )
3355 vassert(endness_host == VexEndnessLE);
3357 /* What we're expecting to see is:
3358 movl $disp_cp_chain_me_EXPECTED, %edx
3359 call *%edx
3361 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3362 FF D2
3364 UChar* p = (UChar*)place_to_chain;
3365 vassert(p[0] == 0xBA);
3366 vassert(read_misaligned_UInt_LE(&p[1])
3367 == (UInt)(Addr)disp_cp_chain_me_EXPECTED);
3368 vassert(p[5] == 0xFF);
3369 vassert(p[6] == 0xD2);
3370 /* And what we want to change it to is:
3371 jmp disp32 where disp32 is relative to the next insn
3372 ud2;
3374 E9 <4 bytes == disp32>
3375 0F 0B
3376 The replacement has the same length as the original.
3378 /* This is the delta we need to put into a JMP d32 insn. It's
3379 relative to the start of the next insn, hence the -5. */
3380 Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5;
3382 /* And make the modifications. */
3383 p[0] = 0xE9;
3384 write_misaligned_UInt_LE(&p[1], (UInt)(ULong)delta);
3385 p[5] = 0x0F; p[6] = 0x0B;
3386 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3387 delta >>= 32;
3388 vassert(delta == 0LL || delta == -1LL);
3389 VexInvalRange vir = { (HWord)place_to_chain, 7 };
3390 return vir;
3394 /* NB: what goes on here has to be very closely coordinated with the
3395 emitInstr case for XDirect, above. */
3396 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host,
3397 void* place_to_unchain,
3398 const void* place_to_jump_to_EXPECTED,
3399 const void* disp_cp_chain_me )
3401 vassert(endness_host == VexEndnessLE);
3403 /* What we're expecting to see is:
3404 jmp d32
3405 ud2;
3407 E9 <4 bytes == disp32>
3408 0F 0B
3410 UChar* p = (UChar*)place_to_unchain;
3411 Bool valid = False;
3412 if (p[0] == 0xE9
3413 && p[5] == 0x0F && p[6] == 0x0B) {
3414 /* Check the offset is right. */
3415 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
3416 if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) {
3417 valid = True;
3418 if (0)
3419 vex_printf("QQQ unchainXDirect_X86: found valid\n");
3422 vassert(valid);
3423 /* And what we want to change it to is:
3424 movl $disp_cp_chain_me, %edx
3425 call *%edx
3427 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3428 FF D2
3429 So it's the same length (convenient, huh).
3431 p[0] = 0xBA;
3432 write_misaligned_UInt_LE(&p[1], (UInt)(Addr)disp_cp_chain_me);
3433 p[5] = 0xFF;
3434 p[6] = 0xD2;
3435 VexInvalRange vir = { (HWord)place_to_unchain, 7 };
3436 return vir;
3440 /* Patch the counter address into a profile inc point, as previously
3441 created by the Xin_ProfInc case for emit_X86Instr. */
3442 VexInvalRange patchProfInc_X86 ( VexEndness endness_host,
3443 void* place_to_patch,
3444 const ULong* location_of_counter )
3446 vassert(endness_host == VexEndnessLE);
3447 vassert(sizeof(ULong*) == 4);
3448 UChar* p = (UChar*)place_to_patch;
3449 vassert(p[0] == 0x83);
3450 vassert(p[1] == 0x05);
3451 vassert(p[2] == 0x00);
3452 vassert(p[3] == 0x00);
3453 vassert(p[4] == 0x00);
3454 vassert(p[5] == 0x00);
3455 vassert(p[6] == 0x01);
3456 vassert(p[7] == 0x83);
3457 vassert(p[8] == 0x15);
3458 vassert(p[9] == 0x00);
3459 vassert(p[10] == 0x00);
3460 vassert(p[11] == 0x00);
3461 vassert(p[12] == 0x00);
3462 vassert(p[13] == 0x00);
3463 UInt imm32 = (UInt)(Addr)location_of_counter;
3464 p[2] = imm32 & 0xFF; imm32 >>= 8;
3465 p[3] = imm32 & 0xFF; imm32 >>= 8;
3466 p[4] = imm32 & 0xFF; imm32 >>= 8;
3467 p[5] = imm32 & 0xFF;
3468 imm32 = 4 + (UInt)(Addr)location_of_counter;
3469 p[9] = imm32 & 0xFF; imm32 >>= 8;
3470 p[10] = imm32 & 0xFF; imm32 >>= 8;
3471 p[11] = imm32 & 0xFF; imm32 >>= 8;
3472 p[12] = imm32 & 0xFF;
3473 VexInvalRange vir = { (HWord)place_to_patch, 14 };
3474 return vir;
3478 /*---------------------------------------------------------------*/
3479 /*--- end host_x86_defs.c ---*/
3480 /*---------------------------------------------------------------*/