Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_x86_defs.c
blob5497efdf7a7fdf0821eabb09b90459db79ebc521
2 /*---------------------------------------------------------------*/
3 /*--- begin host_x86_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_x86_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse* getRRegUniverse_X86 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_X86;
50 static Bool rRegUniverse_X86_initted = False;
52 /* Handy shorthand, nothing more */
53 RRegUniverse* ru = &rRegUniverse_X86;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_X86_initted))
57 return ru;
59 RRegUniverse__init(ru);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru->allocable_start[HRcInt32] = ru->size;
65 ru->regs[ru->size++] = hregX86_EBX();
66 ru->regs[ru->size++] = hregX86_ESI();
67 ru->regs[ru->size++] = hregX86_EDI();
68 ru->regs[ru->size++] = hregX86_EAX();
69 ru->regs[ru->size++] = hregX86_ECX();
70 ru->regs[ru->size++] = hregX86_EDX();
71 ru->allocable_end[HRcInt32] = ru->size - 1;
73 ru->allocable_start[HRcFlt64] = ru->size;
74 ru->regs[ru->size++] = hregX86_FAKE0();
75 ru->regs[ru->size++] = hregX86_FAKE1();
76 ru->regs[ru->size++] = hregX86_FAKE2();
77 ru->regs[ru->size++] = hregX86_FAKE3();
78 ru->regs[ru->size++] = hregX86_FAKE4();
79 ru->regs[ru->size++] = hregX86_FAKE5();
80 ru->allocable_end[HRcFlt64] = ru->size - 1;
82 ru->allocable_start[HRcVec128] = ru->size;
83 ru->regs[ru->size++] = hregX86_XMM0();
84 ru->regs[ru->size++] = hregX86_XMM1();
85 ru->regs[ru->size++] = hregX86_XMM2();
86 ru->regs[ru->size++] = hregX86_XMM3();
87 ru->regs[ru->size++] = hregX86_XMM4();
88 ru->regs[ru->size++] = hregX86_XMM5();
89 ru->regs[ru->size++] = hregX86_XMM6();
90 ru->regs[ru->size++] = hregX86_XMM7();
91 ru->allocable_end[HRcVec128] = ru->size - 1;
92 ru->allocable = ru->size;
94 /* And other regs, not available to the allocator. */
95 ru->regs[ru->size++] = hregX86_ESP();
96 ru->regs[ru->size++] = hregX86_EBP();
98 rRegUniverse_X86_initted = True;
100 RRegUniverse__check_is_sane(ru);
101 return ru;
105 UInt ppHRegX86 ( HReg reg )
107 Int r;
108 static const HChar* ireg32_names[8]
109 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
110 /* Be generic for all virtual regs. */
111 if (hregIsVirtual(reg)) {
112 return ppHReg(reg);
114 /* But specific for real regs. */
115 switch (hregClass(reg)) {
116 case HRcInt32:
117 r = hregEncoding(reg);
118 vassert(r >= 0 && r < 8);
119 return vex_printf("%s", ireg32_names[r]);
120 case HRcFlt64:
121 r = hregEncoding(reg);
122 vassert(r >= 0 && r < 6);
123 return vex_printf("%%fake%d", r);
124 case HRcVec128:
125 r = hregEncoding(reg);
126 vassert(r >= 0 && r < 8);
127 return vex_printf("%%xmm%d", r);
128 default:
129 vpanic("ppHRegX86");
134 /* --------- Condition codes, Intel encoding. --------- */
136 const HChar* showX86CondCode ( X86CondCode cond )
138 switch (cond) {
139 case Xcc_O: return "o";
140 case Xcc_NO: return "no";
141 case Xcc_B: return "b";
142 case Xcc_NB: return "nb";
143 case Xcc_Z: return "z";
144 case Xcc_NZ: return "nz";
145 case Xcc_BE: return "be";
146 case Xcc_NBE: return "nbe";
147 case Xcc_S: return "s";
148 case Xcc_NS: return "ns";
149 case Xcc_P: return "p";
150 case Xcc_NP: return "np";
151 case Xcc_L: return "l";
152 case Xcc_NL: return "nl";
153 case Xcc_LE: return "le";
154 case Xcc_NLE: return "nle";
155 case Xcc_ALWAYS: return "ALWAYS";
156 default: vpanic("ppX86CondCode");
161 /* --------- X86AMode: memory address expressions. --------- */
163 X86AMode* X86AMode_IR ( UInt imm32, HReg reg ) {
164 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
165 am->tag = Xam_IR;
166 am->Xam.IR.imm = imm32;
167 am->Xam.IR.reg = reg;
168 return am;
170 X86AMode* X86AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
171 X86AMode* am = LibVEX_Alloc_inline(sizeof(X86AMode));
172 am->tag = Xam_IRRS;
173 am->Xam.IRRS.imm = imm32;
174 am->Xam.IRRS.base = base;
175 am->Xam.IRRS.index = indEx;
176 am->Xam.IRRS.shift = shift;
177 vassert(shift >= 0 && shift <= 3);
178 return am;
181 X86AMode* dopyX86AMode ( X86AMode* am ) {
182 switch (am->tag) {
183 case Xam_IR:
184 return X86AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
185 case Xam_IRRS:
186 return X86AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
187 am->Xam.IRRS.index, am->Xam.IRRS.shift );
188 default:
189 vpanic("dopyX86AMode");
193 void ppX86AMode ( X86AMode* am ) {
194 switch (am->tag) {
195 case Xam_IR:
196 if (am->Xam.IR.imm == 0)
197 vex_printf("(");
198 else
199 vex_printf("0x%x(", am->Xam.IR.imm);
200 ppHRegX86(am->Xam.IR.reg);
201 vex_printf(")");
202 return;
203 case Xam_IRRS:
204 vex_printf("0x%x(", am->Xam.IRRS.imm);
205 ppHRegX86(am->Xam.IRRS.base);
206 vex_printf(",");
207 ppHRegX86(am->Xam.IRRS.index);
208 vex_printf(",%d)", 1 << am->Xam.IRRS.shift);
209 return;
210 default:
211 vpanic("ppX86AMode");
215 static void addRegUsage_X86AMode ( HRegUsage* u, X86AMode* am ) {
216 switch (am->tag) {
217 case Xam_IR:
218 addHRegUse(u, HRmRead, am->Xam.IR.reg);
219 return;
220 case Xam_IRRS:
221 addHRegUse(u, HRmRead, am->Xam.IRRS.base);
222 addHRegUse(u, HRmRead, am->Xam.IRRS.index);
223 return;
224 default:
225 vpanic("addRegUsage_X86AMode");
229 static void mapRegs_X86AMode ( HRegRemap* m, X86AMode* am ) {
230 switch (am->tag) {
231 case Xam_IR:
232 am->Xam.IR.reg = lookupHRegRemap(m, am->Xam.IR.reg);
233 return;
234 case Xam_IRRS:
235 am->Xam.IRRS.base = lookupHRegRemap(m, am->Xam.IRRS.base);
236 am->Xam.IRRS.index = lookupHRegRemap(m, am->Xam.IRRS.index);
237 return;
238 default:
239 vpanic("mapRegs_X86AMode");
243 /* --------- Operand, which can be reg, immediate or memory. --------- */
245 X86RMI* X86RMI_Imm ( UInt imm32 ) {
246 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
247 op->tag = Xrmi_Imm;
248 op->Xrmi.Imm.imm32 = imm32;
249 return op;
251 X86RMI* X86RMI_Reg ( HReg reg ) {
252 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
253 op->tag = Xrmi_Reg;
254 op->Xrmi.Reg.reg = reg;
255 return op;
257 X86RMI* X86RMI_Mem ( X86AMode* am ) {
258 X86RMI* op = LibVEX_Alloc_inline(sizeof(X86RMI));
259 op->tag = Xrmi_Mem;
260 op->Xrmi.Mem.am = am;
261 return op;
264 void ppX86RMI ( X86RMI* op ) {
265 switch (op->tag) {
266 case Xrmi_Imm:
267 vex_printf("$0x%x", op->Xrmi.Imm.imm32);
268 return;
269 case Xrmi_Reg:
270 ppHRegX86(op->Xrmi.Reg.reg);
271 return;
272 case Xrmi_Mem:
273 ppX86AMode(op->Xrmi.Mem.am);
274 return;
275 default:
276 vpanic("ppX86RMI");
280 /* An X86RMI can only be used in a "read" context (what would it mean
281 to write or modify a literal?) and so we enumerate its registers
282 accordingly. */
283 static void addRegUsage_X86RMI ( HRegUsage* u, X86RMI* op ) {
284 switch (op->tag) {
285 case Xrmi_Imm:
286 return;
287 case Xrmi_Reg:
288 addHRegUse(u, HRmRead, op->Xrmi.Reg.reg);
289 return;
290 case Xrmi_Mem:
291 addRegUsage_X86AMode(u, op->Xrmi.Mem.am);
292 return;
293 default:
294 vpanic("addRegUsage_X86RMI");
298 static void mapRegs_X86RMI ( HRegRemap* m, X86RMI* op ) {
299 switch (op->tag) {
300 case Xrmi_Imm:
301 return;
302 case Xrmi_Reg:
303 op->Xrmi.Reg.reg = lookupHRegRemap(m, op->Xrmi.Reg.reg);
304 return;
305 case Xrmi_Mem:
306 mapRegs_X86AMode(m, op->Xrmi.Mem.am);
307 return;
308 default:
309 vpanic("mapRegs_X86RMI");
314 /* --------- Operand, which can be reg or immediate only. --------- */
316 X86RI* X86RI_Imm ( UInt imm32 ) {
317 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI));
318 op->tag = Xri_Imm;
319 op->Xri.Imm.imm32 = imm32;
320 return op;
322 X86RI* X86RI_Reg ( HReg reg ) {
323 X86RI* op = LibVEX_Alloc_inline(sizeof(X86RI));
324 op->tag = Xri_Reg;
325 op->Xri.Reg.reg = reg;
326 return op;
329 void ppX86RI ( X86RI* op ) {
330 switch (op->tag) {
331 case Xri_Imm:
332 vex_printf("$0x%x", op->Xri.Imm.imm32);
333 return;
334 case Xri_Reg:
335 ppHRegX86(op->Xri.Reg.reg);
336 return;
337 default:
338 vpanic("ppX86RI");
342 /* An X86RI can only be used in a "read" context (what would it mean
343 to write or modify a literal?) and so we enumerate its registers
344 accordingly. */
345 static void addRegUsage_X86RI ( HRegUsage* u, X86RI* op ) {
346 switch (op->tag) {
347 case Xri_Imm:
348 return;
349 case Xri_Reg:
350 addHRegUse(u, HRmRead, op->Xri.Reg.reg);
351 return;
352 default:
353 vpanic("addRegUsage_X86RI");
357 static void mapRegs_X86RI ( HRegRemap* m, X86RI* op ) {
358 switch (op->tag) {
359 case Xri_Imm:
360 return;
361 case Xri_Reg:
362 op->Xri.Reg.reg = lookupHRegRemap(m, op->Xri.Reg.reg);
363 return;
364 default:
365 vpanic("mapRegs_X86RI");
370 /* --------- Operand, which can be reg or memory only. --------- */
372 X86RM* X86RM_Reg ( HReg reg ) {
373 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM));
374 op->tag = Xrm_Reg;
375 op->Xrm.Reg.reg = reg;
376 return op;
378 X86RM* X86RM_Mem ( X86AMode* am ) {
379 X86RM* op = LibVEX_Alloc_inline(sizeof(X86RM));
380 op->tag = Xrm_Mem;
381 op->Xrm.Mem.am = am;
382 return op;
385 void ppX86RM ( X86RM* op ) {
386 switch (op->tag) {
387 case Xrm_Mem:
388 ppX86AMode(op->Xrm.Mem.am);
389 return;
390 case Xrm_Reg:
391 ppHRegX86(op->Xrm.Reg.reg);
392 return;
393 default:
394 vpanic("ppX86RM");
398 /* Because an X86RM can be both a source or destination operand, we
399 have to supply a mode -- pertaining to the operand as a whole --
400 indicating how it's being used. */
401 static void addRegUsage_X86RM ( HRegUsage* u, X86RM* op, HRegMode mode ) {
402 switch (op->tag) {
403 case Xrm_Mem:
404 /* Memory is read, written or modified. So we just want to
405 know the regs read by the amode. */
406 addRegUsage_X86AMode(u, op->Xrm.Mem.am);
407 return;
408 case Xrm_Reg:
409 /* reg is read, written or modified. Add it in the
410 appropriate way. */
411 addHRegUse(u, mode, op->Xrm.Reg.reg);
412 return;
413 default:
414 vpanic("addRegUsage_X86RM");
418 static void mapRegs_X86RM ( HRegRemap* m, X86RM* op )
420 switch (op->tag) {
421 case Xrm_Mem:
422 mapRegs_X86AMode(m, op->Xrm.Mem.am);
423 return;
424 case Xrm_Reg:
425 op->Xrm.Reg.reg = lookupHRegRemap(m, op->Xrm.Reg.reg);
426 return;
427 default:
428 vpanic("mapRegs_X86RM");
433 /* --------- Instructions. --------- */
435 const HChar* showX86UnaryOp ( X86UnaryOp op ) {
436 switch (op) {
437 case Xun_NOT: return "not";
438 case Xun_NEG: return "neg";
439 default: vpanic("showX86UnaryOp");
443 const HChar* showX86AluOp ( X86AluOp op ) {
444 switch (op) {
445 case Xalu_MOV: return "mov";
446 case Xalu_CMP: return "cmp";
447 case Xalu_ADD: return "add";
448 case Xalu_SUB: return "sub";
449 case Xalu_ADC: return "adc";
450 case Xalu_SBB: return "sbb";
451 case Xalu_AND: return "and";
452 case Xalu_OR: return "or";
453 case Xalu_XOR: return "xor";
454 case Xalu_MUL: return "mul";
455 default: vpanic("showX86AluOp");
459 const HChar* showX86ShiftOp ( X86ShiftOp op ) {
460 switch (op) {
461 case Xsh_SHL: return "shl";
462 case Xsh_SHR: return "shr";
463 case Xsh_SAR: return "sar";
464 default: vpanic("showX86ShiftOp");
468 const HChar* showX86FpOp ( X86FpOp op ) {
469 switch (op) {
470 case Xfp_ADD: return "add";
471 case Xfp_SUB: return "sub";
472 case Xfp_MUL: return "mul";
473 case Xfp_DIV: return "div";
474 case Xfp_SCALE: return "scale";
475 case Xfp_ATAN: return "atan";
476 case Xfp_YL2X: return "yl2x";
477 case Xfp_YL2XP1: return "yl2xp1";
478 case Xfp_PREM: return "prem";
479 case Xfp_PREM1: return "prem1";
480 case Xfp_SQRT: return "sqrt";
481 case Xfp_ABS: return "abs";
482 case Xfp_NEG: return "chs";
483 case Xfp_MOV: return "mov";
484 case Xfp_SIN: return "sin";
485 case Xfp_COS: return "cos";
486 case Xfp_TAN: return "tan";
487 case Xfp_ROUND: return "round";
488 case Xfp_2XM1: return "2xm1";
489 default: vpanic("showX86FpOp");
493 const HChar* showX86SseOp ( X86SseOp op ) {
494 switch (op) {
495 case Xsse_MOV: return "mov(?!)";
496 case Xsse_ADDF: return "add";
497 case Xsse_SUBF: return "sub";
498 case Xsse_MULF: return "mul";
499 case Xsse_DIVF: return "div";
500 case Xsse_MAXF: return "max";
501 case Xsse_MINF: return "min";
502 case Xsse_CMPEQF: return "cmpFeq";
503 case Xsse_CMPLTF: return "cmpFlt";
504 case Xsse_CMPLEF: return "cmpFle";
505 case Xsse_CMPUNF: return "cmpFun";
506 case Xsse_RCPF: return "rcp";
507 case Xsse_RSQRTF: return "rsqrt";
508 case Xsse_SQRTF: return "sqrt";
509 case Xsse_AND: return "and";
510 case Xsse_OR: return "or";
511 case Xsse_XOR: return "xor";
512 case Xsse_ANDN: return "andn";
513 case Xsse_ADD8: return "paddb";
514 case Xsse_ADD16: return "paddw";
515 case Xsse_ADD32: return "paddd";
516 case Xsse_ADD64: return "paddq";
517 case Xsse_QADD8U: return "paddusb";
518 case Xsse_QADD16U: return "paddusw";
519 case Xsse_QADD8S: return "paddsb";
520 case Xsse_QADD16S: return "paddsw";
521 case Xsse_SUB8: return "psubb";
522 case Xsse_SUB16: return "psubw";
523 case Xsse_SUB32: return "psubd";
524 case Xsse_SUB64: return "psubq";
525 case Xsse_QSUB8U: return "psubusb";
526 case Xsse_QSUB16U: return "psubusw";
527 case Xsse_QSUB8S: return "psubsb";
528 case Xsse_QSUB16S: return "psubsw";
529 case Xsse_MUL16: return "pmullw";
530 case Xsse_MULHI16U: return "pmulhuw";
531 case Xsse_MULHI16S: return "pmulhw";
532 case Xsse_AVG8U: return "pavgb";
533 case Xsse_AVG16U: return "pavgw";
534 case Xsse_MAX16S: return "pmaxw";
535 case Xsse_MAX8U: return "pmaxub";
536 case Xsse_MIN16S: return "pminw";
537 case Xsse_MIN8U: return "pminub";
538 case Xsse_CMPEQ8: return "pcmpeqb";
539 case Xsse_CMPEQ16: return "pcmpeqw";
540 case Xsse_CMPEQ32: return "pcmpeqd";
541 case Xsse_CMPGT8S: return "pcmpgtb";
542 case Xsse_CMPGT16S: return "pcmpgtw";
543 case Xsse_CMPGT32S: return "pcmpgtd";
544 case Xsse_SHL16: return "psllw";
545 case Xsse_SHL32: return "pslld";
546 case Xsse_SHL64: return "psllq";
547 case Xsse_SHR16: return "psrlw";
548 case Xsse_SHR32: return "psrld";
549 case Xsse_SHR64: return "psrlq";
550 case Xsse_SAR16: return "psraw";
551 case Xsse_SAR32: return "psrad";
552 case Xsse_PACKSSD: return "packssdw";
553 case Xsse_PACKSSW: return "packsswb";
554 case Xsse_PACKUSW: return "packuswb";
555 case Xsse_UNPCKHB: return "punpckhb";
556 case Xsse_UNPCKHW: return "punpckhw";
557 case Xsse_UNPCKHD: return "punpckhd";
558 case Xsse_UNPCKHQ: return "punpckhq";
559 case Xsse_UNPCKLB: return "punpcklb";
560 case Xsse_UNPCKLW: return "punpcklw";
561 case Xsse_UNPCKLD: return "punpckld";
562 case Xsse_UNPCKLQ: return "punpcklq";
563 default: vpanic("showX86SseOp");
567 X86Instr* X86Instr_Alu32R ( X86AluOp op, X86RMI* src, HReg dst ) {
568 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
569 i->tag = Xin_Alu32R;
570 i->Xin.Alu32R.op = op;
571 i->Xin.Alu32R.src = src;
572 i->Xin.Alu32R.dst = dst;
573 return i;
575 X86Instr* X86Instr_Alu32M ( X86AluOp op, X86RI* src, X86AMode* dst ) {
576 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
577 i->tag = Xin_Alu32M;
578 i->Xin.Alu32M.op = op;
579 i->Xin.Alu32M.src = src;
580 i->Xin.Alu32M.dst = dst;
581 vassert(op != Xalu_MUL);
582 return i;
584 X86Instr* X86Instr_Sh32 ( X86ShiftOp op, UInt src, HReg dst ) {
585 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
586 i->tag = Xin_Sh32;
587 i->Xin.Sh32.op = op;
588 i->Xin.Sh32.src = src;
589 i->Xin.Sh32.dst = dst;
590 return i;
592 X86Instr* X86Instr_Test32 ( UInt imm32, X86RM* dst ) {
593 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
594 i->tag = Xin_Test32;
595 i->Xin.Test32.imm32 = imm32;
596 i->Xin.Test32.dst = dst;
597 return i;
599 X86Instr* X86Instr_Unary32 ( X86UnaryOp op, HReg dst ) {
600 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
601 i->tag = Xin_Unary32;
602 i->Xin.Unary32.op = op;
603 i->Xin.Unary32.dst = dst;
604 return i;
606 X86Instr* X86Instr_Lea32 ( X86AMode* am, HReg dst ) {
607 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
608 i->tag = Xin_Lea32;
609 i->Xin.Lea32.am = am;
610 i->Xin.Lea32.dst = dst;
611 return i;
613 X86Instr* X86Instr_MulL ( Bool syned, X86RM* src ) {
614 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
615 i->tag = Xin_MulL;
616 i->Xin.MulL.syned = syned;
617 i->Xin.MulL.src = src;
618 return i;
620 X86Instr* X86Instr_Div ( Bool syned, X86RM* src ) {
621 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
622 i->tag = Xin_Div;
623 i->Xin.Div.syned = syned;
624 i->Xin.Div.src = src;
625 return i;
627 X86Instr* X86Instr_Sh3232 ( X86ShiftOp op, UInt amt, HReg src, HReg dst ) {
628 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
629 i->tag = Xin_Sh3232;
630 i->Xin.Sh3232.op = op;
631 i->Xin.Sh3232.amt = amt;
632 i->Xin.Sh3232.src = src;
633 i->Xin.Sh3232.dst = dst;
634 vassert(op == Xsh_SHL || op == Xsh_SHR);
635 return i;
637 X86Instr* X86Instr_Push( X86RMI* src ) {
638 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
639 i->tag = Xin_Push;
640 i->Xin.Push.src = src;
641 return i;
643 X86Instr* X86Instr_Call ( X86CondCode cond, Addr32 target, Int regparms,
644 RetLoc rloc ) {
645 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
646 i->tag = Xin_Call;
647 i->Xin.Call.cond = cond;
648 i->Xin.Call.target = target;
649 i->Xin.Call.regparms = regparms;
650 i->Xin.Call.rloc = rloc;
651 vassert(regparms >= 0 && regparms <= 3);
652 vassert(is_sane_RetLoc(rloc));
653 return i;
655 X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
656 X86CondCode cond, Bool toFastEP ) {
657 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
658 i->tag = Xin_XDirect;
659 i->Xin.XDirect.dstGA = dstGA;
660 i->Xin.XDirect.amEIP = amEIP;
661 i->Xin.XDirect.cond = cond;
662 i->Xin.XDirect.toFastEP = toFastEP;
663 return i;
665 X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
666 X86CondCode cond ) {
667 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
668 i->tag = Xin_XIndir;
669 i->Xin.XIndir.dstGA = dstGA;
670 i->Xin.XIndir.amEIP = amEIP;
671 i->Xin.XIndir.cond = cond;
672 return i;
674 X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
675 X86CondCode cond, IRJumpKind jk ) {
676 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
677 i->tag = Xin_XAssisted;
678 i->Xin.XAssisted.dstGA = dstGA;
679 i->Xin.XAssisted.amEIP = amEIP;
680 i->Xin.XAssisted.cond = cond;
681 i->Xin.XAssisted.jk = jk;
682 return i;
684 X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
685 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
686 i->tag = Xin_CMov32;
687 i->Xin.CMov32.cond = cond;
688 i->Xin.CMov32.src = src;
689 i->Xin.CMov32.dst = dst;
690 vassert(cond != Xcc_ALWAYS);
691 return i;
693 X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
694 X86AMode* src, HReg dst ) {
695 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
696 i->tag = Xin_LoadEX;
697 i->Xin.LoadEX.szSmall = szSmall;
698 i->Xin.LoadEX.syned = syned;
699 i->Xin.LoadEX.src = src;
700 i->Xin.LoadEX.dst = dst;
701 vassert(szSmall == 1 || szSmall == 2);
702 return i;
704 X86Instr* X86Instr_Store ( UChar sz, HReg src, X86AMode* dst ) {
705 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
706 i->tag = Xin_Store;
707 i->Xin.Store.sz = sz;
708 i->Xin.Store.src = src;
709 i->Xin.Store.dst = dst;
710 vassert(sz == 1 || sz == 2);
711 return i;
713 X86Instr* X86Instr_Set32 ( X86CondCode cond, HReg dst ) {
714 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
715 i->tag = Xin_Set32;
716 i->Xin.Set32.cond = cond;
717 i->Xin.Set32.dst = dst;
718 return i;
720 X86Instr* X86Instr_Bsfr32 ( Bool isFwds, HReg src, HReg dst ) {
721 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
722 i->tag = Xin_Bsfr32;
723 i->Xin.Bsfr32.isFwds = isFwds;
724 i->Xin.Bsfr32.src = src;
725 i->Xin.Bsfr32.dst = dst;
726 return i;
728 X86Instr* X86Instr_MFence ( UInt hwcaps ) {
729 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
730 i->tag = Xin_MFence;
731 i->Xin.MFence.hwcaps = hwcaps;
732 vassert(0 == (hwcaps & ~(VEX_HWCAPS_X86_MMXEXT
733 |VEX_HWCAPS_X86_SSE1
734 |VEX_HWCAPS_X86_SSE2
735 |VEX_HWCAPS_X86_SSE3
736 |VEX_HWCAPS_X86_LZCNT)));
737 return i;
739 X86Instr* X86Instr_ACAS ( X86AMode* addr, UChar sz ) {
740 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
741 i->tag = Xin_ACAS;
742 i->Xin.ACAS.addr = addr;
743 i->Xin.ACAS.sz = sz;
744 vassert(sz == 4 || sz == 2 || sz == 1);
745 return i;
747 X86Instr* X86Instr_DACAS ( X86AMode* addr ) {
748 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
749 i->tag = Xin_DACAS;
750 i->Xin.DACAS.addr = addr;
751 return i;
754 X86Instr* X86Instr_FpUnary ( X86FpOp op, HReg src, HReg dst ) {
755 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
756 i->tag = Xin_FpUnary;
757 i->Xin.FpUnary.op = op;
758 i->Xin.FpUnary.src = src;
759 i->Xin.FpUnary.dst = dst;
760 return i;
762 X86Instr* X86Instr_FpBinary ( X86FpOp op, HReg srcL, HReg srcR, HReg dst ) {
763 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
764 i->tag = Xin_FpBinary;
765 i->Xin.FpBinary.op = op;
766 i->Xin.FpBinary.srcL = srcL;
767 i->Xin.FpBinary.srcR = srcR;
768 i->Xin.FpBinary.dst = dst;
769 return i;
771 X86Instr* X86Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, X86AMode* addr ) {
772 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
773 i->tag = Xin_FpLdSt;
774 i->Xin.FpLdSt.isLoad = isLoad;
775 i->Xin.FpLdSt.sz = sz;
776 i->Xin.FpLdSt.reg = reg;
777 i->Xin.FpLdSt.addr = addr;
778 vassert(sz == 4 || sz == 8 || sz == 10);
779 return i;
781 X86Instr* X86Instr_FpLdStI ( Bool isLoad, UChar sz,
782 HReg reg, X86AMode* addr ) {
783 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
784 i->tag = Xin_FpLdStI;
785 i->Xin.FpLdStI.isLoad = isLoad;
786 i->Xin.FpLdStI.sz = sz;
787 i->Xin.FpLdStI.reg = reg;
788 i->Xin.FpLdStI.addr = addr;
789 vassert(sz == 2 || sz == 4 || sz == 8);
790 return i;
792 X86Instr* X86Instr_Fp64to32 ( HReg src, HReg dst ) {
793 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
794 i->tag = Xin_Fp64to32;
795 i->Xin.Fp64to32.src = src;
796 i->Xin.Fp64to32.dst = dst;
797 return i;
799 X86Instr* X86Instr_FpCMov ( X86CondCode cond, HReg src, HReg dst ) {
800 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
801 i->tag = Xin_FpCMov;
802 i->Xin.FpCMov.cond = cond;
803 i->Xin.FpCMov.src = src;
804 i->Xin.FpCMov.dst = dst;
805 vassert(cond != Xcc_ALWAYS);
806 return i;
808 X86Instr* X86Instr_FpLdCW ( X86AMode* addr ) {
809 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
810 i->tag = Xin_FpLdCW;
811 i->Xin.FpLdCW.addr = addr;
812 return i;
814 X86Instr* X86Instr_FpStSW_AX ( void ) {
815 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
816 i->tag = Xin_FpStSW_AX;
817 return i;
819 X86Instr* X86Instr_FpCmp ( HReg srcL, HReg srcR, HReg dst ) {
820 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
821 i->tag = Xin_FpCmp;
822 i->Xin.FpCmp.srcL = srcL;
823 i->Xin.FpCmp.srcR = srcR;
824 i->Xin.FpCmp.dst = dst;
825 return i;
827 X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
828 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
829 i->tag = Xin_SseConst;
830 i->Xin.SseConst.con = con;
831 i->Xin.SseConst.dst = dst;
832 vassert(hregClass(dst) == HRcVec128);
833 return i;
835 X86Instr* X86Instr_SseLdSt ( Bool isLoad, HReg reg, X86AMode* addr ) {
836 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
837 i->tag = Xin_SseLdSt;
838 i->Xin.SseLdSt.isLoad = isLoad;
839 i->Xin.SseLdSt.reg = reg;
840 i->Xin.SseLdSt.addr = addr;
841 return i;
843 X86Instr* X86Instr_SseLdzLO ( Int sz, HReg reg, X86AMode* addr )
845 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
846 i->tag = Xin_SseLdzLO;
847 i->Xin.SseLdzLO.sz = toUChar(sz);
848 i->Xin.SseLdzLO.reg = reg;
849 i->Xin.SseLdzLO.addr = addr;
850 vassert(sz == 4 || sz == 8);
851 return i;
853 X86Instr* X86Instr_Sse32Fx4 ( X86SseOp op, HReg src, HReg dst ) {
854 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
855 i->tag = Xin_Sse32Fx4;
856 i->Xin.Sse32Fx4.op = op;
857 i->Xin.Sse32Fx4.src = src;
858 i->Xin.Sse32Fx4.dst = dst;
859 vassert(op != Xsse_MOV);
860 return i;
862 X86Instr* X86Instr_Sse32FLo ( X86SseOp op, HReg src, HReg dst ) {
863 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
864 i->tag = Xin_Sse32FLo;
865 i->Xin.Sse32FLo.op = op;
866 i->Xin.Sse32FLo.src = src;
867 i->Xin.Sse32FLo.dst = dst;
868 vassert(op != Xsse_MOV);
869 return i;
871 X86Instr* X86Instr_Sse64Fx2 ( X86SseOp op, HReg src, HReg dst ) {
872 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
873 i->tag = Xin_Sse64Fx2;
874 i->Xin.Sse64Fx2.op = op;
875 i->Xin.Sse64Fx2.src = src;
876 i->Xin.Sse64Fx2.dst = dst;
877 vassert(op != Xsse_MOV);
878 return i;
880 X86Instr* X86Instr_Sse64FLo ( X86SseOp op, HReg src, HReg dst ) {
881 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
882 i->tag = Xin_Sse64FLo;
883 i->Xin.Sse64FLo.op = op;
884 i->Xin.Sse64FLo.src = src;
885 i->Xin.Sse64FLo.dst = dst;
886 vassert(op != Xsse_MOV);
887 return i;
889 X86Instr* X86Instr_SseReRg ( X86SseOp op, HReg re, HReg rg ) {
890 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
891 i->tag = Xin_SseReRg;
892 i->Xin.SseReRg.op = op;
893 i->Xin.SseReRg.src = re;
894 i->Xin.SseReRg.dst = rg;
895 return i;
897 X86Instr* X86Instr_SseCMov ( X86CondCode cond, HReg src, HReg dst ) {
898 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
899 i->tag = Xin_SseCMov;
900 i->Xin.SseCMov.cond = cond;
901 i->Xin.SseCMov.src = src;
902 i->Xin.SseCMov.dst = dst;
903 vassert(cond != Xcc_ALWAYS);
904 return i;
906 X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst ) {
907 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
908 i->tag = Xin_SseShuf;
909 i->Xin.SseShuf.order = order;
910 i->Xin.SseShuf.src = src;
911 i->Xin.SseShuf.dst = dst;
912 vassert(order >= 0 && order <= 0xFF);
913 return i;
915 X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
916 X86AMode* amFailAddr ) {
917 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
918 i->tag = Xin_EvCheck;
919 i->Xin.EvCheck.amCounter = amCounter;
920 i->Xin.EvCheck.amFailAddr = amFailAddr;
921 return i;
923 X86Instr* X86Instr_ProfInc ( void ) {
924 X86Instr* i = LibVEX_Alloc_inline(sizeof(X86Instr));
925 i->tag = Xin_ProfInc;
926 return i;
929 void ppX86Instr ( const X86Instr* i, Bool mode64 ) {
930 vassert(mode64 == False);
931 switch (i->tag) {
932 case Xin_Alu32R:
933 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32R.op));
934 ppX86RMI(i->Xin.Alu32R.src);
935 vex_printf(",");
936 ppHRegX86(i->Xin.Alu32R.dst);
937 return;
938 case Xin_Alu32M:
939 vex_printf("%sl ", showX86AluOp(i->Xin.Alu32M.op));
940 ppX86RI(i->Xin.Alu32M.src);
941 vex_printf(",");
942 ppX86AMode(i->Xin.Alu32M.dst);
943 return;
944 case Xin_Sh32:
945 vex_printf("%sl ", showX86ShiftOp(i->Xin.Sh32.op));
946 if (i->Xin.Sh32.src == 0)
947 vex_printf("%%cl,");
948 else
949 vex_printf("$%d,", (Int)i->Xin.Sh32.src);
950 ppHRegX86(i->Xin.Sh32.dst);
951 return;
952 case Xin_Test32:
953 vex_printf("testl $%d,", (Int)i->Xin.Test32.imm32);
954 ppX86RM(i->Xin.Test32.dst);
955 return;
956 case Xin_Unary32:
957 vex_printf("%sl ", showX86UnaryOp(i->Xin.Unary32.op));
958 ppHRegX86(i->Xin.Unary32.dst);
959 return;
960 case Xin_Lea32:
961 vex_printf("leal ");
962 ppX86AMode(i->Xin.Lea32.am);
963 vex_printf(",");
964 ppHRegX86(i->Xin.Lea32.dst);
965 return;
966 case Xin_MulL:
967 vex_printf("%cmull ", i->Xin.MulL.syned ? 's' : 'u');
968 ppX86RM(i->Xin.MulL.src);
969 return;
970 case Xin_Div:
971 vex_printf("%cdivl ", i->Xin.Div.syned ? 's' : 'u');
972 ppX86RM(i->Xin.Div.src);
973 return;
974 case Xin_Sh3232:
975 vex_printf("%sdl ", showX86ShiftOp(i->Xin.Sh3232.op));
976 if (i->Xin.Sh3232.amt == 0)
977 vex_printf(" %%cl,");
978 else
979 vex_printf(" $%d,", (Int)i->Xin.Sh3232.amt);
980 ppHRegX86(i->Xin.Sh3232.src);
981 vex_printf(",");
982 ppHRegX86(i->Xin.Sh3232.dst);
983 return;
984 case Xin_Push:
985 vex_printf("pushl ");
986 ppX86RMI(i->Xin.Push.src);
987 return;
988 case Xin_Call:
989 vex_printf("call%s[%d,",
990 i->Xin.Call.cond==Xcc_ALWAYS
991 ? "" : showX86CondCode(i->Xin.Call.cond),
992 i->Xin.Call.regparms);
993 ppRetLoc(i->Xin.Call.rloc);
994 vex_printf("] 0x%x", i->Xin.Call.target);
995 break;
996 case Xin_XDirect:
997 vex_printf("(xDirect) ");
998 vex_printf("if (%%eflags.%s) { ",
999 showX86CondCode(i->Xin.XDirect.cond));
1000 vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
1001 ppX86AMode(i->Xin.XDirect.amEIP);
1002 vex_printf("; ");
1003 vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
1004 i->Xin.XDirect.toFastEP ? "fast" : "slow");
1005 return;
1006 case Xin_XIndir:
1007 vex_printf("(xIndir) ");
1008 vex_printf("if (%%eflags.%s) { movl ",
1009 showX86CondCode(i->Xin.XIndir.cond));
1010 ppHRegX86(i->Xin.XIndir.dstGA);
1011 vex_printf(",");
1012 ppX86AMode(i->Xin.XIndir.amEIP);
1013 vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
1014 return;
1015 case Xin_XAssisted:
1016 vex_printf("(xAssisted) ");
1017 vex_printf("if (%%eflags.%s) { ",
1018 showX86CondCode(i->Xin.XAssisted.cond));
1019 vex_printf("movl ");
1020 ppHRegX86(i->Xin.XAssisted.dstGA);
1021 vex_printf(",");
1022 ppX86AMode(i->Xin.XAssisted.amEIP);
1023 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
1024 (Int)i->Xin.XAssisted.jk);
1025 vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
1026 return;
1027 case Xin_CMov32:
1028 vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
1029 ppX86RM(i->Xin.CMov32.src);
1030 vex_printf(",");
1031 ppHRegX86(i->Xin.CMov32.dst);
1032 return;
1033 case Xin_LoadEX:
1034 vex_printf("mov%c%cl ",
1035 i->Xin.LoadEX.syned ? 's' : 'z',
1036 i->Xin.LoadEX.szSmall==1 ? 'b' : 'w');
1037 ppX86AMode(i->Xin.LoadEX.src);
1038 vex_printf(",");
1039 ppHRegX86(i->Xin.LoadEX.dst);
1040 return;
1041 case Xin_Store:
1042 vex_printf("mov%c ", i->Xin.Store.sz==1 ? 'b' : 'w');
1043 ppHRegX86(i->Xin.Store.src);
1044 vex_printf(",");
1045 ppX86AMode(i->Xin.Store.dst);
1046 return;
1047 case Xin_Set32:
1048 vex_printf("setl%s ", showX86CondCode(i->Xin.Set32.cond));
1049 ppHRegX86(i->Xin.Set32.dst);
1050 return;
1051 case Xin_Bsfr32:
1052 vex_printf("bs%cl ", i->Xin.Bsfr32.isFwds ? 'f' : 'r');
1053 ppHRegX86(i->Xin.Bsfr32.src);
1054 vex_printf(",");
1055 ppHRegX86(i->Xin.Bsfr32.dst);
1056 return;
1057 case Xin_MFence:
1058 vex_printf("mfence(%s)",
1059 LibVEX_ppVexHwCaps(VexArchX86,i->Xin.MFence.hwcaps));
1060 return;
1061 case Xin_ACAS:
1062 vex_printf("lock cmpxchg%c ",
1063 i->Xin.ACAS.sz==1 ? 'b'
1064 : i->Xin.ACAS.sz==2 ? 'w' : 'l');
1065 vex_printf("{%%eax->%%ebx},");
1066 ppX86AMode(i->Xin.ACAS.addr);
1067 return;
1068 case Xin_DACAS:
1069 vex_printf("lock cmpxchg8b {%%edx:%%eax->%%ecx:%%ebx},");
1070 ppX86AMode(i->Xin.DACAS.addr);
1071 return;
1072 case Xin_FpUnary:
1073 vex_printf("g%sD ", showX86FpOp(i->Xin.FpUnary.op));
1074 ppHRegX86(i->Xin.FpUnary.src);
1075 vex_printf(",");
1076 ppHRegX86(i->Xin.FpUnary.dst);
1077 break;
1078 case Xin_FpBinary:
1079 vex_printf("g%sD ", showX86FpOp(i->Xin.FpBinary.op));
1080 ppHRegX86(i->Xin.FpBinary.srcL);
1081 vex_printf(",");
1082 ppHRegX86(i->Xin.FpBinary.srcR);
1083 vex_printf(",");
1084 ppHRegX86(i->Xin.FpBinary.dst);
1085 break;
1086 case Xin_FpLdSt:
1087 if (i->Xin.FpLdSt.isLoad) {
1088 vex_printf("gld%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1089 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1090 ppX86AMode(i->Xin.FpLdSt.addr);
1091 vex_printf(", ");
1092 ppHRegX86(i->Xin.FpLdSt.reg);
1093 } else {
1094 vex_printf("gst%c " , i->Xin.FpLdSt.sz==10 ? 'T'
1095 : (i->Xin.FpLdSt.sz==8 ? 'D' : 'F'));
1096 ppHRegX86(i->Xin.FpLdSt.reg);
1097 vex_printf(", ");
1098 ppX86AMode(i->Xin.FpLdSt.addr);
1100 return;
1101 case Xin_FpLdStI:
1102 if (i->Xin.FpLdStI.isLoad) {
1103 vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1104 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1105 ppX86AMode(i->Xin.FpLdStI.addr);
1106 vex_printf(", ");
1107 ppHRegX86(i->Xin.FpLdStI.reg);
1108 } else {
1109 vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
1110 i->Xin.FpLdStI.sz==4 ? "l" : "w");
1111 ppHRegX86(i->Xin.FpLdStI.reg);
1112 vex_printf(", ");
1113 ppX86AMode(i->Xin.FpLdStI.addr);
1115 return;
1116 case Xin_Fp64to32:
1117 vex_printf("gdtof ");
1118 ppHRegX86(i->Xin.Fp64to32.src);
1119 vex_printf(",");
1120 ppHRegX86(i->Xin.Fp64to32.dst);
1121 return;
1122 case Xin_FpCMov:
1123 vex_printf("gcmov%s ", showX86CondCode(i->Xin.FpCMov.cond));
1124 ppHRegX86(i->Xin.FpCMov.src);
1125 vex_printf(",");
1126 ppHRegX86(i->Xin.FpCMov.dst);
1127 return;
1128 case Xin_FpLdCW:
1129 vex_printf("fldcw ");
1130 ppX86AMode(i->Xin.FpLdCW.addr);
1131 return;
1132 case Xin_FpStSW_AX:
1133 vex_printf("fstsw %%ax");
1134 return;
1135 case Xin_FpCmp:
1136 vex_printf("gcmp ");
1137 ppHRegX86(i->Xin.FpCmp.srcL);
1138 vex_printf(",");
1139 ppHRegX86(i->Xin.FpCmp.srcR);
1140 vex_printf(",");
1141 ppHRegX86(i->Xin.FpCmp.dst);
1142 break;
1143 case Xin_SseConst:
1144 vex_printf("const $0x%04x,", (UInt)i->Xin.SseConst.con);
1145 ppHRegX86(i->Xin.SseConst.dst);
1146 break;
1147 case Xin_SseLdSt:
1148 vex_printf("movups ");
1149 if (i->Xin.SseLdSt.isLoad) {
1150 ppX86AMode(i->Xin.SseLdSt.addr);
1151 vex_printf(",");
1152 ppHRegX86(i->Xin.SseLdSt.reg);
1153 } else {
1154 ppHRegX86(i->Xin.SseLdSt.reg);
1155 vex_printf(",");
1156 ppX86AMode(i->Xin.SseLdSt.addr);
1158 return;
1159 case Xin_SseLdzLO:
1160 vex_printf("movs%s ", i->Xin.SseLdzLO.sz==4 ? "s" : "d");
1161 ppX86AMode(i->Xin.SseLdzLO.addr);
1162 vex_printf(",");
1163 ppHRegX86(i->Xin.SseLdzLO.reg);
1164 return;
1165 case Xin_Sse32Fx4:
1166 vex_printf("%sps ", showX86SseOp(i->Xin.Sse32Fx4.op));
1167 ppHRegX86(i->Xin.Sse32Fx4.src);
1168 vex_printf(",");
1169 ppHRegX86(i->Xin.Sse32Fx4.dst);
1170 return;
1171 case Xin_Sse32FLo:
1172 vex_printf("%sss ", showX86SseOp(i->Xin.Sse32FLo.op));
1173 ppHRegX86(i->Xin.Sse32FLo.src);
1174 vex_printf(",");
1175 ppHRegX86(i->Xin.Sse32FLo.dst);
1176 return;
1177 case Xin_Sse64Fx2:
1178 vex_printf("%spd ", showX86SseOp(i->Xin.Sse64Fx2.op));
1179 ppHRegX86(i->Xin.Sse64Fx2.src);
1180 vex_printf(",");
1181 ppHRegX86(i->Xin.Sse64Fx2.dst);
1182 return;
1183 case Xin_Sse64FLo:
1184 vex_printf("%ssd ", showX86SseOp(i->Xin.Sse64FLo.op));
1185 ppHRegX86(i->Xin.Sse64FLo.src);
1186 vex_printf(",");
1187 ppHRegX86(i->Xin.Sse64FLo.dst);
1188 return;
1189 case Xin_SseReRg:
1190 vex_printf("%s ", showX86SseOp(i->Xin.SseReRg.op));
1191 ppHRegX86(i->Xin.SseReRg.src);
1192 vex_printf(",");
1193 ppHRegX86(i->Xin.SseReRg.dst);
1194 return;
1195 case Xin_SseCMov:
1196 vex_printf("cmov%s ", showX86CondCode(i->Xin.SseCMov.cond));
1197 ppHRegX86(i->Xin.SseCMov.src);
1198 vex_printf(",");
1199 ppHRegX86(i->Xin.SseCMov.dst);
1200 return;
1201 case Xin_SseShuf:
1202 vex_printf("pshufd $0x%x,", (UInt)i->Xin.SseShuf.order);
1203 ppHRegX86(i->Xin.SseShuf.src);
1204 vex_printf(",");
1205 ppHRegX86(i->Xin.SseShuf.dst);
1206 return;
1207 case Xin_EvCheck:
1208 vex_printf("(evCheck) decl ");
1209 ppX86AMode(i->Xin.EvCheck.amCounter);
1210 vex_printf("; jns nofail; jmp *");
1211 ppX86AMode(i->Xin.EvCheck.amFailAddr);
1212 vex_printf("; nofail:");
1213 return;
1214 case Xin_ProfInc:
1215 vex_printf("(profInc) addl $1,NotKnownYet; "
1216 "adcl $0,NotKnownYet+4");
1217 return;
1218 default:
1219 vpanic("ppX86Instr");
1223 /* --------- Helpers for register allocation. --------- */
1225 void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
1227 Bool unary;
1228 vassert(mode64 == False);
1229 initHRegUsage(u);
1230 switch (i->tag) {
1231 case Xin_Alu32R:
1232 addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
1233 if (i->Xin.Alu32R.op == Xalu_MOV) {
1234 addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
1236 if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
1237 u->isRegRegMove = True;
1238 u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg;
1239 u->regMoveDst = i->Xin.Alu32R.dst;
1241 return;
1243 if (i->Xin.Alu32R.op == Xalu_CMP) {
1244 addHRegUse(u, HRmRead, i->Xin.Alu32R.dst);
1245 return;
1247 addHRegUse(u, HRmModify, i->Xin.Alu32R.dst);
1248 return;
1249 case Xin_Alu32M:
1250 addRegUsage_X86RI(u, i->Xin.Alu32M.src);
1251 addRegUsage_X86AMode(u, i->Xin.Alu32M.dst);
1252 return;
1253 case Xin_Sh32:
1254 addHRegUse(u, HRmModify, i->Xin.Sh32.dst);
1255 if (i->Xin.Sh32.src == 0)
1256 addHRegUse(u, HRmRead, hregX86_ECX());
1257 return;
1258 case Xin_Test32:
1259 addRegUsage_X86RM(u, i->Xin.Test32.dst, HRmRead);
1260 return;
1261 case Xin_Unary32:
1262 addHRegUse(u, HRmModify, i->Xin.Unary32.dst);
1263 return;
1264 case Xin_Lea32:
1265 addRegUsage_X86AMode(u, i->Xin.Lea32.am);
1266 addHRegUse(u, HRmWrite, i->Xin.Lea32.dst);
1267 return;
1268 case Xin_MulL:
1269 addRegUsage_X86RM(u, i->Xin.MulL.src, HRmRead);
1270 addHRegUse(u, HRmModify, hregX86_EAX());
1271 addHRegUse(u, HRmWrite, hregX86_EDX());
1272 return;
1273 case Xin_Div:
1274 addRegUsage_X86RM(u, i->Xin.Div.src, HRmRead);
1275 addHRegUse(u, HRmModify, hregX86_EAX());
1276 addHRegUse(u, HRmModify, hregX86_EDX());
1277 return;
1278 case Xin_Sh3232:
1279 addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
1280 addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
1281 if (i->Xin.Sh3232.amt == 0)
1282 addHRegUse(u, HRmRead, hregX86_ECX());
1283 return;
1284 case Xin_Push:
1285 addRegUsage_X86RMI(u, i->Xin.Push.src);
1286 addHRegUse(u, HRmModify, hregX86_ESP());
1287 return;
1288 case Xin_Call:
1289 /* This is a bit subtle. */
1290 /* First off, claim it trashes all the caller-saved regs
1291 which fall within the register allocator's jurisdiction.
1292 These I believe to be %eax %ecx %edx and all the xmm
1293 registers. */
1294 addHRegUse(u, HRmWrite, hregX86_EAX());
1295 addHRegUse(u, HRmWrite, hregX86_ECX());
1296 addHRegUse(u, HRmWrite, hregX86_EDX());
1297 addHRegUse(u, HRmWrite, hregX86_XMM0());
1298 addHRegUse(u, HRmWrite, hregX86_XMM1());
1299 addHRegUse(u, HRmWrite, hregX86_XMM2());
1300 addHRegUse(u, HRmWrite, hregX86_XMM3());
1301 addHRegUse(u, HRmWrite, hregX86_XMM4());
1302 addHRegUse(u, HRmWrite, hregX86_XMM5());
1303 addHRegUse(u, HRmWrite, hregX86_XMM6());
1304 addHRegUse(u, HRmWrite, hregX86_XMM7());
1305 /* Now we have to state any parameter-carrying registers
1306 which might be read. This depends on the regparmness. */
1307 switch (i->Xin.Call.regparms) {
1308 case 3: addHRegUse(u, HRmRead, hregX86_ECX()); /*fallthru*/
1309 case 2: addHRegUse(u, HRmRead, hregX86_EDX()); /*fallthru*/
1310 case 1: addHRegUse(u, HRmRead, hregX86_EAX()); break;
1311 case 0: break;
1312 default: vpanic("getRegUsage_X86Instr:Call:regparms");
1314 /* Finally, there is the issue that the insn trashes a
1315 register because the literal target address has to be
1316 loaded into a register. Fortunately, for the 0/1/2
1317 regparm case, we can use EAX, EDX and ECX respectively, so
1318 this does not cause any further damage. For the 3-regparm
1319 case, we'll have to choose another register arbitrarily --
1320 since A, D and C are used for parameters -- and so we might
1321 as well choose EDI. */
1322 if (i->Xin.Call.regparms == 3)
1323 addHRegUse(u, HRmWrite, hregX86_EDI());
1324 /* Upshot of this is that the assembler really must observe
1325 the here-stated convention of which register to use as an
1326 address temporary, depending on the regparmness: 0==EAX,
1327 1==EDX, 2==ECX, 3==EDI. */
1328 return;
1329 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1330 conditionally exit the block. Hence we only need to list (1)
1331 the registers that they read, and (2) the registers that they
1332 write in the case where the block is not exited. (2) is
1333 empty, hence only (1) is relevant here. */
1334 case Xin_XDirect:
1335 addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
1336 return;
1337 case Xin_XIndir:
1338 addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
1339 addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
1340 return;
1341 case Xin_XAssisted:
1342 addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
1343 addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
1344 return;
1345 case Xin_CMov32:
1346 addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
1347 addHRegUse(u, HRmModify, i->Xin.CMov32.dst);
1348 return;
1349 case Xin_LoadEX:
1350 addRegUsage_X86AMode(u, i->Xin.LoadEX.src);
1351 addHRegUse(u, HRmWrite, i->Xin.LoadEX.dst);
1352 return;
1353 case Xin_Store:
1354 addHRegUse(u, HRmRead, i->Xin.Store.src);
1355 addRegUsage_X86AMode(u, i->Xin.Store.dst);
1356 return;
1357 case Xin_Set32:
1358 addHRegUse(u, HRmWrite, i->Xin.Set32.dst);
1359 return;
1360 case Xin_Bsfr32:
1361 addHRegUse(u, HRmRead, i->Xin.Bsfr32.src);
1362 addHRegUse(u, HRmWrite, i->Xin.Bsfr32.dst);
1363 return;
1364 case Xin_MFence:
1365 return;
1366 case Xin_ACAS:
1367 addRegUsage_X86AMode(u, i->Xin.ACAS.addr);
1368 addHRegUse(u, HRmRead, hregX86_EBX());
1369 addHRegUse(u, HRmModify, hregX86_EAX());
1370 return;
1371 case Xin_DACAS:
1372 addRegUsage_X86AMode(u, i->Xin.DACAS.addr);
1373 addHRegUse(u, HRmRead, hregX86_ECX());
1374 addHRegUse(u, HRmRead, hregX86_EBX());
1375 addHRegUse(u, HRmModify, hregX86_EDX());
1376 addHRegUse(u, HRmModify, hregX86_EAX());
1377 return;
1378 case Xin_FpUnary:
1379 addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
1380 addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
1382 if (i->Xin.FpUnary.op == Xfp_MOV) {
1383 u->isRegRegMove = True;
1384 u->regMoveSrc = i->Xin.FpUnary.src;
1385 u->regMoveDst = i->Xin.FpUnary.dst;
1387 return;
1388 case Xin_FpBinary:
1389 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
1390 addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
1391 addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
1392 return;
1393 case Xin_FpLdSt:
1394 addRegUsage_X86AMode(u, i->Xin.FpLdSt.addr);
1395 addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
1396 i->Xin.FpLdSt.reg);
1397 return;
1398 case Xin_FpLdStI:
1399 addRegUsage_X86AMode(u, i->Xin.FpLdStI.addr);
1400 addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
1401 i->Xin.FpLdStI.reg);
1402 return;
1403 case Xin_Fp64to32:
1404 addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
1405 addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
1406 return;
1407 case Xin_FpCMov:
1408 addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
1409 addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
1410 return;
1411 case Xin_FpLdCW:
1412 addRegUsage_X86AMode(u, i->Xin.FpLdCW.addr);
1413 return;
1414 case Xin_FpStSW_AX:
1415 addHRegUse(u, HRmWrite, hregX86_EAX());
1416 return;
1417 case Xin_FpCmp:
1418 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcL);
1419 addHRegUse(u, HRmRead, i->Xin.FpCmp.srcR);
1420 addHRegUse(u, HRmWrite, i->Xin.FpCmp.dst);
1421 addHRegUse(u, HRmWrite, hregX86_EAX());
1422 return;
1423 case Xin_SseLdSt:
1424 addRegUsage_X86AMode(u, i->Xin.SseLdSt.addr);
1425 addHRegUse(u, i->Xin.SseLdSt.isLoad ? HRmWrite : HRmRead,
1426 i->Xin.SseLdSt.reg);
1427 return;
1428 case Xin_SseLdzLO:
1429 addRegUsage_X86AMode(u, i->Xin.SseLdzLO.addr);
1430 addHRegUse(u, HRmWrite, i->Xin.SseLdzLO.reg);
1431 return;
1432 case Xin_SseConst:
1433 addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
1434 return;
1435 case Xin_Sse32Fx4:
1436 vassert(i->Xin.Sse32Fx4.op != Xsse_MOV);
1437 unary = toBool( i->Xin.Sse32Fx4.op == Xsse_RCPF
1438 || i->Xin.Sse32Fx4.op == Xsse_RSQRTF
1439 || i->Xin.Sse32Fx4.op == Xsse_SQRTF );
1440 addHRegUse(u, HRmRead, i->Xin.Sse32Fx4.src);
1441 addHRegUse(u, unary ? HRmWrite : HRmModify,
1442 i->Xin.Sse32Fx4.dst);
1443 return;
1444 case Xin_Sse32FLo:
1445 vassert(i->Xin.Sse32FLo.op != Xsse_MOV);
1446 unary = toBool( i->Xin.Sse32FLo.op == Xsse_RCPF
1447 || i->Xin.Sse32FLo.op == Xsse_RSQRTF
1448 || i->Xin.Sse32FLo.op == Xsse_SQRTF );
1449 addHRegUse(u, HRmRead, i->Xin.Sse32FLo.src);
1450 addHRegUse(u, unary ? HRmWrite : HRmModify,
1451 i->Xin.Sse32FLo.dst);
1452 return;
1453 case Xin_Sse64Fx2:
1454 vassert(i->Xin.Sse64Fx2.op != Xsse_MOV);
1455 unary = toBool( i->Xin.Sse64Fx2.op == Xsse_RCPF
1456 || i->Xin.Sse64Fx2.op == Xsse_RSQRTF
1457 || i->Xin.Sse64Fx2.op == Xsse_SQRTF );
1458 addHRegUse(u, HRmRead, i->Xin.Sse64Fx2.src);
1459 addHRegUse(u, unary ? HRmWrite : HRmModify,
1460 i->Xin.Sse64Fx2.dst);
1461 return;
1462 case Xin_Sse64FLo:
1463 vassert(i->Xin.Sse64FLo.op != Xsse_MOV);
1464 unary = toBool( i->Xin.Sse64FLo.op == Xsse_RCPF
1465 || i->Xin.Sse64FLo.op == Xsse_RSQRTF
1466 || i->Xin.Sse64FLo.op == Xsse_SQRTF );
1467 addHRegUse(u, HRmRead, i->Xin.Sse64FLo.src);
1468 addHRegUse(u, unary ? HRmWrite : HRmModify,
1469 i->Xin.Sse64FLo.dst);
1470 return;
1471 case Xin_SseReRg:
1472 if (i->Xin.SseReRg.op == Xsse_XOR
1473 && sameHReg(i->Xin.SseReRg.src, i->Xin.SseReRg.dst)) {
1474 /* reg-alloc needs to understand 'xor r,r' as a write of r */
1475 /* (as opposed to a rite of passage :-) */
1476 addHRegUse(u, HRmWrite, i->Xin.SseReRg.dst);
1477 } else {
1478 addHRegUse(u, HRmRead, i->Xin.SseReRg.src);
1479 addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
1480 ? HRmWrite : HRmModify,
1481 i->Xin.SseReRg.dst);
1483 if (i->Xin.SseReRg.op == Xsse_MOV) {
1484 u->isRegRegMove = True;
1485 u->regMoveSrc = i->Xin.SseReRg.src;
1486 u->regMoveDst = i->Xin.SseReRg.dst;
1489 return;
1490 case Xin_SseCMov:
1491 addHRegUse(u, HRmRead, i->Xin.SseCMov.src);
1492 addHRegUse(u, HRmModify, i->Xin.SseCMov.dst);
1493 return;
1494 case Xin_SseShuf:
1495 addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
1496 addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
1497 return;
1498 case Xin_EvCheck:
1499 /* We expect both amodes only to mention %ebp, so this is in
1500 fact pointless, since %ebp isn't allocatable, but anyway.. */
1501 addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
1502 addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
1503 return;
1504 case Xin_ProfInc:
1505 /* does not use any registers. */
1506 return;
1507 default:
1508 ppX86Instr(i, False);
1509 vpanic("getRegUsage_X86Instr");
1513 /* local helper */
1514 static void mapReg( HRegRemap* m, HReg* r )
1516 *r = lookupHRegRemap(m, *r);
1519 void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
1521 vassert(mode64 == False);
1522 switch (i->tag) {
1523 case Xin_Alu32R:
1524 mapRegs_X86RMI(m, i->Xin.Alu32R.src);
1525 mapReg(m, &i->Xin.Alu32R.dst);
1526 return;
1527 case Xin_Alu32M:
1528 mapRegs_X86RI(m, i->Xin.Alu32M.src);
1529 mapRegs_X86AMode(m, i->Xin.Alu32M.dst);
1530 return;
1531 case Xin_Sh32:
1532 mapReg(m, &i->Xin.Sh32.dst);
1533 return;
1534 case Xin_Test32:
1535 mapRegs_X86RM(m, i->Xin.Test32.dst);
1536 return;
1537 case Xin_Unary32:
1538 mapReg(m, &i->Xin.Unary32.dst);
1539 return;
1540 case Xin_Lea32:
1541 mapRegs_X86AMode(m, i->Xin.Lea32.am);
1542 mapReg(m, &i->Xin.Lea32.dst);
1543 return;
1544 case Xin_MulL:
1545 mapRegs_X86RM(m, i->Xin.MulL.src);
1546 return;
1547 case Xin_Div:
1548 mapRegs_X86RM(m, i->Xin.Div.src);
1549 return;
1550 case Xin_Sh3232:
1551 mapReg(m, &i->Xin.Sh3232.src);
1552 mapReg(m, &i->Xin.Sh3232.dst);
1553 return;
1554 case Xin_Push:
1555 mapRegs_X86RMI(m, i->Xin.Push.src);
1556 return;
1557 case Xin_Call:
1558 return;
1559 case Xin_XDirect:
1560 mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
1561 return;
1562 case Xin_XIndir:
1563 mapReg(m, &i->Xin.XIndir.dstGA);
1564 mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
1565 return;
1566 case Xin_XAssisted:
1567 mapReg(m, &i->Xin.XAssisted.dstGA);
1568 mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
1569 return;
1570 case Xin_CMov32:
1571 mapRegs_X86RM(m, i->Xin.CMov32.src);
1572 mapReg(m, &i->Xin.CMov32.dst);
1573 return;
1574 case Xin_LoadEX:
1575 mapRegs_X86AMode(m, i->Xin.LoadEX.src);
1576 mapReg(m, &i->Xin.LoadEX.dst);
1577 return;
1578 case Xin_Store:
1579 mapReg(m, &i->Xin.Store.src);
1580 mapRegs_X86AMode(m, i->Xin.Store.dst);
1581 return;
1582 case Xin_Set32:
1583 mapReg(m, &i->Xin.Set32.dst);
1584 return;
1585 case Xin_Bsfr32:
1586 mapReg(m, &i->Xin.Bsfr32.src);
1587 mapReg(m, &i->Xin.Bsfr32.dst);
1588 return;
1589 case Xin_MFence:
1590 return;
1591 case Xin_ACAS:
1592 mapRegs_X86AMode(m, i->Xin.ACAS.addr);
1593 return;
1594 case Xin_DACAS:
1595 mapRegs_X86AMode(m, i->Xin.DACAS.addr);
1596 return;
1597 case Xin_FpUnary:
1598 mapReg(m, &i->Xin.FpUnary.src);
1599 mapReg(m, &i->Xin.FpUnary.dst);
1600 return;
1601 case Xin_FpBinary:
1602 mapReg(m, &i->Xin.FpBinary.srcL);
1603 mapReg(m, &i->Xin.FpBinary.srcR);
1604 mapReg(m, &i->Xin.FpBinary.dst);
1605 return;
1606 case Xin_FpLdSt:
1607 mapRegs_X86AMode(m, i->Xin.FpLdSt.addr);
1608 mapReg(m, &i->Xin.FpLdSt.reg);
1609 return;
1610 case Xin_FpLdStI:
1611 mapRegs_X86AMode(m, i->Xin.FpLdStI.addr);
1612 mapReg(m, &i->Xin.FpLdStI.reg);
1613 return;
1614 case Xin_Fp64to32:
1615 mapReg(m, &i->Xin.Fp64to32.src);
1616 mapReg(m, &i->Xin.Fp64to32.dst);
1617 return;
1618 case Xin_FpCMov:
1619 mapReg(m, &i->Xin.FpCMov.src);
1620 mapReg(m, &i->Xin.FpCMov.dst);
1621 return;
1622 case Xin_FpLdCW:
1623 mapRegs_X86AMode(m, i->Xin.FpLdCW.addr);
1624 return;
1625 case Xin_FpStSW_AX:
1626 return;
1627 case Xin_FpCmp:
1628 mapReg(m, &i->Xin.FpCmp.srcL);
1629 mapReg(m, &i->Xin.FpCmp.srcR);
1630 mapReg(m, &i->Xin.FpCmp.dst);
1631 return;
1632 case Xin_SseConst:
1633 mapReg(m, &i->Xin.SseConst.dst);
1634 return;
1635 case Xin_SseLdSt:
1636 mapReg(m, &i->Xin.SseLdSt.reg);
1637 mapRegs_X86AMode(m, i->Xin.SseLdSt.addr);
1638 break;
1639 case Xin_SseLdzLO:
1640 mapReg(m, &i->Xin.SseLdzLO.reg);
1641 mapRegs_X86AMode(m, i->Xin.SseLdzLO.addr);
1642 break;
1643 case Xin_Sse32Fx4:
1644 mapReg(m, &i->Xin.Sse32Fx4.src);
1645 mapReg(m, &i->Xin.Sse32Fx4.dst);
1646 return;
1647 case Xin_Sse32FLo:
1648 mapReg(m, &i->Xin.Sse32FLo.src);
1649 mapReg(m, &i->Xin.Sse32FLo.dst);
1650 return;
1651 case Xin_Sse64Fx2:
1652 mapReg(m, &i->Xin.Sse64Fx2.src);
1653 mapReg(m, &i->Xin.Sse64Fx2.dst);
1654 return;
1655 case Xin_Sse64FLo:
1656 mapReg(m, &i->Xin.Sse64FLo.src);
1657 mapReg(m, &i->Xin.Sse64FLo.dst);
1658 return;
1659 case Xin_SseReRg:
1660 mapReg(m, &i->Xin.SseReRg.src);
1661 mapReg(m, &i->Xin.SseReRg.dst);
1662 return;
1663 case Xin_SseCMov:
1664 mapReg(m, &i->Xin.SseCMov.src);
1665 mapReg(m, &i->Xin.SseCMov.dst);
1666 return;
1667 case Xin_SseShuf:
1668 mapReg(m, &i->Xin.SseShuf.src);
1669 mapReg(m, &i->Xin.SseShuf.dst);
1670 return;
1671 case Xin_EvCheck:
1672 /* We expect both amodes only to mention %ebp, so this is in
1673 fact pointless, since %ebp isn't allocatable, but anyway.. */
1674 mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
1675 mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
1676 return;
1677 case Xin_ProfInc:
1678 /* does not use any registers. */
1679 return;
1681 default:
1682 ppX86Instr(i, mode64);
1683 vpanic("mapRegs_X86Instr");
1687 /* Generate x86 spill/reload instructions under the direction of the
1688 register allocator. Note it's critical these don't write the
1689 condition codes. */
1691 void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1692 HReg rreg, Int offsetB, Bool mode64 )
1694 X86AMode* am;
1695 vassert(offsetB >= 0);
1696 vassert(!hregIsVirtual(rreg));
1697 vassert(mode64 == False);
1698 *i1 = *i2 = NULL;
1699 am = X86AMode_IR(offsetB, hregX86_EBP());
1700 switch (hregClass(rreg)) {
1701 case HRcInt32:
1702 *i1 = X86Instr_Alu32M ( Xalu_MOV, X86RI_Reg(rreg), am );
1703 return;
1704 case HRcFlt64:
1705 *i1 = X86Instr_FpLdSt ( False/*store*/, 10, rreg, am );
1706 return;
1707 case HRcVec128:
1708 *i1 = X86Instr_SseLdSt ( False/*store*/, rreg, am );
1709 return;
1710 default:
1711 ppHRegClass(hregClass(rreg));
1712 vpanic("genSpill_X86: unimplemented regclass");
1716 void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1717 HReg rreg, Int offsetB, Bool mode64 )
1719 X86AMode* am;
1720 vassert(offsetB >= 0);
1721 vassert(!hregIsVirtual(rreg));
1722 vassert(mode64 == False);
1723 *i1 = *i2 = NULL;
1724 am = X86AMode_IR(offsetB, hregX86_EBP());
1725 switch (hregClass(rreg)) {
1726 case HRcInt32:
1727 *i1 = X86Instr_Alu32R ( Xalu_MOV, X86RMI_Mem(am), rreg );
1728 return;
1729 case HRcFlt64:
1730 *i1 = X86Instr_FpLdSt ( True/*load*/, 10, rreg, am );
1731 return;
1732 case HRcVec128:
1733 *i1 = X86Instr_SseLdSt ( True/*load*/, rreg, am );
1734 return;
1735 default:
1736 ppHRegClass(hregClass(rreg));
1737 vpanic("genReload_X86: unimplemented regclass");
1741 X86Instr* genMove_X86(HReg from, HReg to, Bool mode64)
1743 switch (hregClass(from)) {
1744 case HRcInt32:
1745 return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to);
1746 case HRcVec128:
1747 return X86Instr_SseReRg(Xsse_MOV, from, to);
1748 default:
1749 ppHRegClass(hregClass(from));
1750 vpanic("genMove_X86: unimplemented regclass");
1754 /* The given instruction reads the specified vreg exactly once, and
1755 that vreg is currently located at the given spill offset. If
1756 possible, return a variant of the instruction to one which instead
1757 references the spill slot directly. */
1759 X86Instr* directReload_X86( X86Instr* i, HReg vreg, Short spill_off )
1761 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
1763 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
1764 Convert to: src=RMI_Mem, dst=Reg
1766 if (i->tag == Xin_Alu32R
1767 && (i->Xin.Alu32R.op == Xalu_MOV || i->Xin.Alu32R.op == Xalu_OR
1768 || i->Xin.Alu32R.op == Xalu_XOR)
1769 && i->Xin.Alu32R.src->tag == Xrmi_Reg
1770 && sameHReg(i->Xin.Alu32R.src->Xrmi.Reg.reg, vreg)) {
1771 vassert(! sameHReg(i->Xin.Alu32R.dst, vreg));
1772 return X86Instr_Alu32R(
1773 i->Xin.Alu32R.op,
1774 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP())),
1775 i->Xin.Alu32R.dst
1779 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
1780 Convert to: src=RI_Imm, dst=Mem
1782 if (i->tag == Xin_Alu32R
1783 && (i->Xin.Alu32R.op == Xalu_CMP)
1784 && i->Xin.Alu32R.src->tag == Xrmi_Imm
1785 && sameHReg(i->Xin.Alu32R.dst, vreg)) {
1786 return X86Instr_Alu32M(
1787 i->Xin.Alu32R.op,
1788 X86RI_Imm( i->Xin.Alu32R.src->Xrmi.Imm.imm32 ),
1789 X86AMode_IR( spill_off, hregX86_EBP())
1793 /* Deal with form: Push(RMI_Reg)
1794 Convert to: Push(RMI_Mem)
1796 if (i->tag == Xin_Push
1797 && i->Xin.Push.src->tag == Xrmi_Reg
1798 && sameHReg(i->Xin.Push.src->Xrmi.Reg.reg, vreg)) {
1799 return X86Instr_Push(
1800 X86RMI_Mem( X86AMode_IR( spill_off, hregX86_EBP()))
1804 /* Deal with form: CMov32(src=RM_Reg, dst) where vreg == src
1805 Convert to CMov32(RM_Mem, dst) */
1806 if (i->tag == Xin_CMov32
1807 && i->Xin.CMov32.src->tag == Xrm_Reg
1808 && sameHReg(i->Xin.CMov32.src->Xrm.Reg.reg, vreg)) {
1809 vassert(! sameHReg(i->Xin.CMov32.dst, vreg));
1810 return X86Instr_CMov32(
1811 i->Xin.CMov32.cond,
1812 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() )),
1813 i->Xin.CMov32.dst
1817 /* Deal with form: Test32(imm,RM_Reg vreg) -> Test32(imm,amode) */
1818 if (i->tag == Xin_Test32
1819 && i->Xin.Test32.dst->tag == Xrm_Reg
1820 && sameHReg(i->Xin.Test32.dst->Xrm.Reg.reg, vreg)) {
1821 return X86Instr_Test32(
1822 i->Xin.Test32.imm32,
1823 X86RM_Mem( X86AMode_IR( spill_off, hregX86_EBP() ) )
1827 return NULL;
1831 /* --------- The x86 assembler (bleh.) --------- */
1833 inline static UInt iregEnc ( HReg r )
1835 UInt n;
1836 vassert(hregClass(r) == HRcInt32);
1837 vassert(!hregIsVirtual(r));
1838 n = hregEncoding(r);
1839 vassert(n <= 7);
1840 return n;
1843 inline static UInt fregEnc ( HReg r )
1845 UInt n;
1846 vassert(hregClass(r) == HRcFlt64);
1847 vassert(!hregIsVirtual(r));
1848 n = hregEncoding(r);
1849 vassert(n <= 5);
1850 return n;
1853 inline static UInt vregEnc ( HReg r )
1855 UInt n;
1856 vassert(hregClass(r) == HRcVec128);
1857 vassert(!hregIsVirtual(r));
1858 n = hregEncoding(r);
1859 vassert(n <= 7);
1860 return n;
1863 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
1865 vassert(mod < 4);
1866 vassert((reg|regmem) < 8);
1867 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
1870 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
1872 vassert(shift < 4);
1873 vassert((regindex|regbase) < 8);
1874 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
1877 static UChar* emit32 ( UChar* p, UInt w32 )
1879 *p++ = toUChar( w32 & 0x000000FF);
1880 *p++ = toUChar((w32 >> 8) & 0x000000FF);
1881 *p++ = toUChar((w32 >> 16) & 0x000000FF);
1882 *p++ = toUChar((w32 >> 24) & 0x000000FF);
1883 return p;
1886 /* Does a sign-extend of the lowest 8 bits give
1887 the original number? */
1888 static Bool fits8bits ( UInt w32 )
1890 Int i32 = (Int)w32;
1891 return toBool(i32 == ((Int)(w32 << 24) >> 24));
1895 /* Forming mod-reg-rm bytes and scale-index-base bytes.
1897 greg, 0(ereg) | ereg != ESP && ereg != EBP
1898 = 00 greg ereg
1900 greg, d8(ereg) | ereg != ESP
1901 = 01 greg ereg, d8
1903 greg, d32(ereg) | ereg != ESP
1904 = 10 greg ereg, d32
1906 greg, d8(%esp) = 01 greg 100, 0x24, d8
1908 -----------------------------------------------
1910 greg, d8(base,index,scale)
1911 | index != ESP
1912 = 01 greg 100, scale index base, d8
1914 greg, d32(base,index,scale)
1915 | index != ESP
1916 = 10 greg 100, scale index base, d32
1918 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc, X86AMode* am )
1920 if (am->tag == Xam_IR) {
1921 if (am->Xam.IR.imm == 0
1922 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())
1923 && ! sameHReg(am->Xam.IR.reg, hregX86_EBP()) ) {
1924 *p++ = mkModRegRM(0, gregEnc, iregEnc(am->Xam.IR.reg));
1925 return p;
1927 if (fits8bits(am->Xam.IR.imm)
1928 && ! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1929 *p++ = mkModRegRM(1, gregEnc, iregEnc(am->Xam.IR.reg));
1930 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1931 return p;
1933 if (! sameHReg(am->Xam.IR.reg, hregX86_ESP())) {
1934 *p++ = mkModRegRM(2, gregEnc, iregEnc(am->Xam.IR.reg));
1935 p = emit32(p, am->Xam.IR.imm);
1936 return p;
1938 if (sameHReg(am->Xam.IR.reg, hregX86_ESP())
1939 && fits8bits(am->Xam.IR.imm)) {
1940 *p++ = mkModRegRM(1, gregEnc, 4);
1941 *p++ = 0x24;
1942 *p++ = toUChar(am->Xam.IR.imm & 0xFF);
1943 return p;
1945 ppX86AMode(am);
1946 vpanic("doAMode_M: can't emit amode IR");
1947 /*NOTREACHED*/
1949 if (am->tag == Xam_IRRS) {
1950 if (fits8bits(am->Xam.IRRS.imm)
1951 && ! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1952 *p++ = mkModRegRM(1, gregEnc, 4);
1953 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1954 iregEnc(am->Xam.IRRS.base));
1955 *p++ = toUChar(am->Xam.IRRS.imm & 0xFF);
1956 return p;
1958 if (! sameHReg(am->Xam.IRRS.index, hregX86_ESP())) {
1959 *p++ = mkModRegRM(2, gregEnc, 4);
1960 *p++ = mkSIB(am->Xam.IRRS.shift, iregEnc(am->Xam.IRRS.index),
1961 iregEnc(am->Xam.IRRS.base));
1962 p = emit32(p, am->Xam.IRRS.imm);
1963 return p;
1965 ppX86AMode(am);
1966 vpanic("doAMode_M: can't emit amode IRRS");
1967 /*NOTREACHED*/
1969 vpanic("doAMode_M: unknown amode");
1970 /*NOTREACHED*/
1973 static UChar* doAMode_M ( UChar* p, HReg greg, X86AMode* am )
1975 return doAMode_M__wrk(p, iregEnc(greg), am);
1978 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc, X86AMode* am )
1980 vassert(gregEnc < 8);
1981 return doAMode_M__wrk(p, gregEnc, am);
1985 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
1986 inline static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc, UInt eregEnc )
1988 *p++ = mkModRegRM(3, gregEnc, eregEnc);
1989 return p;
1992 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
1994 return doAMode_R__wrk(p, iregEnc(greg), iregEnc(ereg));
1997 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc, HReg ereg )
1999 vassert(gregEnc < 8);
2000 return doAMode_R__wrk(p, gregEnc, iregEnc(ereg));
2003 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc, UInt eregEnc )
2005 vassert( (gregEnc|eregEnc) < 8);
2006 return doAMode_R__wrk(p, gregEnc, eregEnc);
2010 /* Emit ffree %st(7) */
2011 static UChar* do_ffree_st7 ( UChar* p )
2013 *p++ = 0xDD;
2014 *p++ = 0xC7;
2015 return p;
2018 /* Emit fstp %st(i), 1 <= i <= 7 */
2019 static UChar* do_fstp_st ( UChar* p, Int i )
2021 vassert(1 <= i && i <= 7);
2022 *p++ = 0xDD;
2023 *p++ = toUChar(0xD8+i);
2024 return p;
2027 /* Emit fld %st(i), 0 <= i <= 6 */
2028 static UChar* do_fld_st ( UChar* p, Int i )
2030 vassert(0 <= i && i <= 6);
2031 *p++ = 0xD9;
2032 *p++ = toUChar(0xC0+i);
2033 return p;
2036 /* Emit f<op> %st(0) */
2037 static UChar* do_fop1_st ( UChar* p, X86FpOp op )
2039 switch (op) {
2040 case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
2041 case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
2042 case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
2043 case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
2044 case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
2045 case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
2046 case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
2047 case Xfp_MOV: break;
2048 case Xfp_TAN:
2049 /* fptan pushes 1.0 on the FP stack, except when the argument
2050 is out of range. Hence we have to do the instruction,
2051 then inspect C2 to see if there is an out of range
2052 condition. If there is, we skip the fincstp that is used
2053 by the in-range case to get rid of this extra 1.0
2054 value. */
2055 p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */
2056 *p++ = 0xD9; *p++ = 0xF2; // fptan
2057 *p++ = 0x50; // pushl %eax
2058 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
2059 *p++ = 0x66; *p++ = 0xA9;
2060 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
2061 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
2062 *p++ = 0xD9; *p++ = 0xF7; // fincstp
2063 *p++ = 0x58; // after_fincstp: popl %eax
2064 break;
2065 default:
2066 vpanic("do_fop1_st: unknown op");
2068 return p;
2071 /* Emit f<op> %st(i), 1 <= i <= 5 */
2072 static UChar* do_fop2_st ( UChar* p, X86FpOp op, Int i )
2074 Int subopc;
2075 switch (op) {
2076 case Xfp_ADD: subopc = 0; break;
2077 case Xfp_SUB: subopc = 4; break;
2078 case Xfp_MUL: subopc = 1; break;
2079 case Xfp_DIV: subopc = 6; break;
2080 default: vpanic("do_fop2_st: unknown op");
2082 *p++ = 0xD8;
2083 p = doAMode_R_enc_enc(p, subopc, i);
2084 return p;
2087 /* Push a 32-bit word on the stack. The word depends on tags[3:0];
2088 each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
2090 static UChar* push_word_from_tags ( UChar* p, UShort tags )
2092 UInt w;
2093 vassert(0 == (tags & ~0xF));
2094 if (tags == 0) {
2095 /* pushl $0x00000000 */
2096 *p++ = 0x6A;
2097 *p++ = 0x00;
2099 else
2100 /* pushl $0xFFFFFFFF */
2101 if (tags == 0xF) {
2102 *p++ = 0x6A;
2103 *p++ = 0xFF;
2104 } else {
2105 vassert(0); /* awaiting test case */
2106 w = 0;
2107 if (tags & 1) w |= 0x000000FF;
2108 if (tags & 2) w |= 0x0000FF00;
2109 if (tags & 4) w |= 0x00FF0000;
2110 if (tags & 8) w |= 0xFF000000;
2111 *p++ = 0x68;
2112 p = emit32(p, w);
2114 return p;
2117 /* Emit an instruction into buf and return the number of bytes used.
2118 Note that buf is not the insn's final place, and therefore it is
2119 imperative to emit position-independent code. If the emitted
2120 instruction was a profiler inc, set *is_profInc to True, else
2121 leave it unchanged. */
2123 Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
2124 UChar* buf, Int nbuf, const X86Instr* i,
2125 Bool mode64, VexEndness endness_host,
2126 const void* disp_cp_chain_me_to_slowEP,
2127 const void* disp_cp_chain_me_to_fastEP,
2128 const void* disp_cp_xindir,
2129 const void* disp_cp_xassisted )
2131 UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2133 UInt xtra;
2134 UChar* p = &buf[0];
2135 UChar* ptmp;
2136 vassert(nbuf >= 32);
2137 vassert(mode64 == False);
2139 /* vex_printf("asm ");ppX86Instr(i, mode64); vex_printf("\n"); */
2141 switch (i->tag) {
2143 case Xin_Alu32R:
2144 /* Deal specially with MOV */
2145 if (i->Xin.Alu32R.op == Xalu_MOV) {
2146 switch (i->Xin.Alu32R.src->tag) {
2147 case Xrmi_Imm:
2148 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Alu32R.dst));
2149 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2150 goto done;
2151 case Xrmi_Reg:
2152 *p++ = 0x89;
2153 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2154 i->Xin.Alu32R.dst);
2155 goto done;
2156 case Xrmi_Mem:
2157 *p++ = 0x8B;
2158 p = doAMode_M(p, i->Xin.Alu32R.dst,
2159 i->Xin.Alu32R.src->Xrmi.Mem.am);
2160 goto done;
2161 default:
2162 goto bad;
2165 /* MUL */
2166 if (i->Xin.Alu32R.op == Xalu_MUL) {
2167 switch (i->Xin.Alu32R.src->tag) {
2168 case Xrmi_Reg:
2169 *p++ = 0x0F;
2170 *p++ = 0xAF;
2171 p = doAMode_R(p, i->Xin.Alu32R.dst,
2172 i->Xin.Alu32R.src->Xrmi.Reg.reg);
2173 goto done;
2174 case Xrmi_Mem:
2175 *p++ = 0x0F;
2176 *p++ = 0xAF;
2177 p = doAMode_M(p, i->Xin.Alu32R.dst,
2178 i->Xin.Alu32R.src->Xrmi.Mem.am);
2179 goto done;
2180 case Xrmi_Imm:
2181 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2182 *p++ = 0x6B;
2183 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2184 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2185 } else {
2186 *p++ = 0x69;
2187 p = doAMode_R(p, i->Xin.Alu32R.dst, i->Xin.Alu32R.dst);
2188 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2190 goto done;
2191 default:
2192 goto bad;
2195 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2196 opc = opc_rr = subopc_imm = opc_imma = 0;
2197 switch (i->Xin.Alu32R.op) {
2198 case Xalu_ADC: opc = 0x13; opc_rr = 0x11;
2199 subopc_imm = 2; opc_imma = 0x15; break;
2200 case Xalu_ADD: opc = 0x03; opc_rr = 0x01;
2201 subopc_imm = 0; opc_imma = 0x05; break;
2202 case Xalu_SUB: opc = 0x2B; opc_rr = 0x29;
2203 subopc_imm = 5; opc_imma = 0x2D; break;
2204 case Xalu_SBB: opc = 0x1B; opc_rr = 0x19;
2205 subopc_imm = 3; opc_imma = 0x1D; break;
2206 case Xalu_AND: opc = 0x23; opc_rr = 0x21;
2207 subopc_imm = 4; opc_imma = 0x25; break;
2208 case Xalu_XOR: opc = 0x33; opc_rr = 0x31;
2209 subopc_imm = 6; opc_imma = 0x35; break;
2210 case Xalu_OR: opc = 0x0B; opc_rr = 0x09;
2211 subopc_imm = 1; opc_imma = 0x0D; break;
2212 case Xalu_CMP: opc = 0x3B; opc_rr = 0x39;
2213 subopc_imm = 7; opc_imma = 0x3D; break;
2214 default: goto bad;
2216 switch (i->Xin.Alu32R.src->tag) {
2217 case Xrmi_Imm:
2218 if (sameHReg(i->Xin.Alu32R.dst, hregX86_EAX())
2219 && !fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2220 *p++ = toUChar(opc_imma);
2221 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2222 } else
2223 if (fits8bits(i->Xin.Alu32R.src->Xrmi.Imm.imm32)) {
2224 *p++ = 0x83;
2225 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2226 *p++ = toUChar(0xFF & i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2227 } else {
2228 *p++ = 0x81;
2229 p = doAMode_R_enc_reg(p, subopc_imm, i->Xin.Alu32R.dst);
2230 p = emit32(p, i->Xin.Alu32R.src->Xrmi.Imm.imm32);
2232 goto done;
2233 case Xrmi_Reg:
2234 *p++ = toUChar(opc_rr);
2235 p = doAMode_R(p, i->Xin.Alu32R.src->Xrmi.Reg.reg,
2236 i->Xin.Alu32R.dst);
2237 goto done;
2238 case Xrmi_Mem:
2239 *p++ = toUChar(opc);
2240 p = doAMode_M(p, i->Xin.Alu32R.dst,
2241 i->Xin.Alu32R.src->Xrmi.Mem.am);
2242 goto done;
2243 default:
2244 goto bad;
2246 break;
2248 case Xin_Alu32M:
2249 /* Deal specially with MOV */
2250 if (i->Xin.Alu32M.op == Xalu_MOV) {
2251 switch (i->Xin.Alu32M.src->tag) {
2252 case Xri_Reg:
2253 *p++ = 0x89;
2254 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2255 i->Xin.Alu32M.dst);
2256 goto done;
2257 case Xri_Imm:
2258 *p++ = 0xC7;
2259 p = doAMode_M_enc(p, 0, i->Xin.Alu32M.dst);
2260 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2261 goto done;
2262 default:
2263 goto bad;
2266 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2267 allowed here. */
2268 opc = subopc_imm = opc_imma = 0;
2269 switch (i->Xin.Alu32M.op) {
2270 case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
2271 case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
2272 case Xalu_CMP: opc = 0x39; subopc_imm = 7; break;
2273 default: goto bad;
2275 switch (i->Xin.Alu32M.src->tag) {
2276 case Xri_Reg:
2277 *p++ = toUChar(opc);
2278 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2279 i->Xin.Alu32M.dst);
2280 goto done;
2281 case Xri_Imm:
2282 if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
2283 *p++ = 0x83;
2284 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2285 *p++ = toUChar(0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32);
2286 goto done;
2287 } else {
2288 *p++ = 0x81;
2289 p = doAMode_M_enc(p, subopc_imm, i->Xin.Alu32M.dst);
2290 p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
2291 goto done;
2293 default:
2294 goto bad;
2296 break;
2298 case Xin_Sh32:
2299 opc_cl = opc_imm = subopc = 0;
2300 switch (i->Xin.Sh32.op) {
2301 case Xsh_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2302 case Xsh_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2303 case Xsh_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2304 default: goto bad;
2306 if (i->Xin.Sh32.src == 0) {
2307 *p++ = toUChar(opc_cl);
2308 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2309 } else {
2310 *p++ = toUChar(opc_imm);
2311 p = doAMode_R_enc_reg(p, subopc, i->Xin.Sh32.dst);
2312 *p++ = (UChar)(i->Xin.Sh32.src);
2314 goto done;
2316 case Xin_Test32:
2317 if (i->Xin.Test32.dst->tag == Xrm_Reg) {
2318 /* testl $imm32, %reg */
2319 *p++ = 0xF7;
2320 p = doAMode_R_enc_reg(p, 0, i->Xin.Test32.dst->Xrm.Reg.reg);
2321 p = emit32(p, i->Xin.Test32.imm32);
2322 goto done;
2323 } else {
2324 /* testl $imm32, amode */
2325 *p++ = 0xF7;
2326 p = doAMode_M_enc(p, 0, i->Xin.Test32.dst->Xrm.Mem.am);
2327 p = emit32(p, i->Xin.Test32.imm32);
2328 goto done;
2331 case Xin_Unary32:
2332 if (i->Xin.Unary32.op == Xun_NOT) {
2333 *p++ = 0xF7;
2334 p = doAMode_R_enc_reg(p, 2, i->Xin.Unary32.dst);
2335 goto done;
2337 if (i->Xin.Unary32.op == Xun_NEG) {
2338 *p++ = 0xF7;
2339 p = doAMode_R_enc_reg(p, 3, i->Xin.Unary32.dst);
2340 goto done;
2342 break;
2344 case Xin_Lea32:
2345 *p++ = 0x8D;
2346 p = doAMode_M(p, i->Xin.Lea32.dst, i->Xin.Lea32.am);
2347 goto done;
2349 case Xin_MulL:
2350 subopc = i->Xin.MulL.syned ? 5 : 4;
2351 *p++ = 0xF7;
2352 switch (i->Xin.MulL.src->tag) {
2353 case Xrm_Mem:
2354 p = doAMode_M_enc(p, subopc, i->Xin.MulL.src->Xrm.Mem.am);
2355 goto done;
2356 case Xrm_Reg:
2357 p = doAMode_R_enc_reg(p, subopc, i->Xin.MulL.src->Xrm.Reg.reg);
2358 goto done;
2359 default:
2360 goto bad;
2362 break;
2364 case Xin_Div:
2365 subopc = i->Xin.Div.syned ? 7 : 6;
2366 *p++ = 0xF7;
2367 switch (i->Xin.Div.src->tag) {
2368 case Xrm_Mem:
2369 p = doAMode_M_enc(p, subopc, i->Xin.Div.src->Xrm.Mem.am);
2370 goto done;
2371 case Xrm_Reg:
2372 p = doAMode_R_enc_reg(p, subopc, i->Xin.Div.src->Xrm.Reg.reg);
2373 goto done;
2374 default:
2375 goto bad;
2377 break;
2379 case Xin_Sh3232:
2380 vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
2381 if (i->Xin.Sh3232.amt == 0) {
2382 /* shldl/shrdl by %cl */
2383 *p++ = 0x0F;
2384 if (i->Xin.Sh3232.op == Xsh_SHL) {
2385 *p++ = 0xA5;
2386 } else {
2387 *p++ = 0xAD;
2389 p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
2390 goto done;
2392 break;
2394 case Xin_Push:
2395 switch (i->Xin.Push.src->tag) {
2396 case Xrmi_Mem:
2397 *p++ = 0xFF;
2398 p = doAMode_M_enc(p, 6, i->Xin.Push.src->Xrmi.Mem.am);
2399 goto done;
2400 case Xrmi_Imm:
2401 *p++ = 0x68;
2402 p = emit32(p, i->Xin.Push.src->Xrmi.Imm.imm32);
2403 goto done;
2404 case Xrmi_Reg:
2405 *p++ = toUChar(0x50 + iregEnc(i->Xin.Push.src->Xrmi.Reg.reg));
2406 goto done;
2407 default:
2408 goto bad;
2411 case Xin_Call:
2412 if (i->Xin.Call.cond != Xcc_ALWAYS
2413 && i->Xin.Call.rloc.pri != RLPri_None) {
2414 /* The call might not happen (it isn't unconditional) and it
2415 returns a result. In this case we will need to generate a
2416 control flow diamond to put 0x555..555 in the return
2417 register(s) in the case where the call doesn't happen. If
2418 this ever becomes necessary, maybe copy code from the ARM
2419 equivalent. Until that day, just give up. */
2420 goto bad;
2422 /* See detailed comment for Xin_Call in getRegUsage_X86Instr above
2423 for explanation of this. */
2424 switch (i->Xin.Call.regparms) {
2425 case 0: irno = iregEnc(hregX86_EAX()); break;
2426 case 1: irno = iregEnc(hregX86_EDX()); break;
2427 case 2: irno = iregEnc(hregX86_ECX()); break;
2428 case 3: irno = iregEnc(hregX86_EDI()); break;
2429 default: vpanic(" emit_X86Instr:call:regparms");
2431 /* jump over the following two insns if the condition does not
2432 hold */
2433 if (i->Xin.Call.cond != Xcc_ALWAYS) {
2434 *p++ = toUChar(0x70 + (0xF & (i->Xin.Call.cond ^ 1)));
2435 *p++ = 0x07; /* 7 bytes in the next two insns */
2437 /* movl $target, %tmp */
2438 *p++ = toUChar(0xB8 + irno);
2439 p = emit32(p, i->Xin.Call.target);
2440 /* call *%tmp */
2441 *p++ = 0xFF;
2442 *p++ = toUChar(0xD0 + irno);
2443 goto done;
2445 case Xin_XDirect: {
2446 /* NB: what goes on here has to be very closely coordinated with the
2447 chainXDirect_X86 and unchainXDirect_X86 below. */
2448 /* We're generating chain-me requests here, so we need to be
2449 sure this is actually allowed -- no-redir translations can't
2450 use chain-me's. Hence: */
2451 vassert(disp_cp_chain_me_to_slowEP != NULL);
2452 vassert(disp_cp_chain_me_to_fastEP != NULL);
2454 /* Use ptmp for backpatching conditional jumps. */
2455 ptmp = NULL;
2457 /* First off, if this is conditional, create a conditional
2458 jump over the rest of it. */
2459 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2460 /* jmp fwds if !condition */
2461 *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
2462 ptmp = p; /* fill in this bit later */
2463 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2466 /* Update the guest EIP. */
2467 /* movl $dstGA, amEIP */
2468 *p++ = 0xC7;
2469 p = doAMode_M_enc(p, 0, i->Xin.XDirect.amEIP);
2470 p = emit32(p, i->Xin.XDirect.dstGA);
2472 /* --- FIRST PATCHABLE BYTE follows --- */
2473 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
2474 to) backs up the return address, so as to find the address of
2475 the first patchable byte. So: don't change the length of the
2476 two instructions below. */
2477 /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
2478 *p++ = 0xBA;
2479 const void* disp_cp_chain_me
2480 = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
2481 : disp_cp_chain_me_to_slowEP;
2482 p = emit32(p, (UInt)(Addr)disp_cp_chain_me);
2483 /* call *%edx */
2484 *p++ = 0xFF;
2485 *p++ = 0xD2;
2486 /* --- END of PATCHABLE BYTES --- */
2488 /* Fix up the conditional jump, if there was one. */
2489 if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
2490 Int delta = p - ptmp;
2491 vassert(delta > 0 && delta < 40);
2492 *ptmp = toUChar(delta-1);
2494 goto done;
2497 case Xin_XIndir: {
2498 /* We're generating transfers that could lead indirectly to a
2499 chain-me, so we need to be sure this is actually allowed --
2500 no-redir translations are not allowed to reach normal
2501 translations without going through the scheduler. That means
2502 no XDirects or XIndirs out from no-redir translations.
2503 Hence: */
2504 vassert(disp_cp_xindir != NULL);
2506 /* Use ptmp for backpatching conditional jumps. */
2507 ptmp = NULL;
2509 /* First off, if this is conditional, create a conditional
2510 jump over the rest of it. */
2511 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2512 /* jmp fwds if !condition */
2513 *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
2514 ptmp = p; /* fill in this bit later */
2515 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2518 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2519 *p++ = 0x89;
2520 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2522 /* movl $disp_indir, %edx */
2523 *p++ = 0xBA;
2524 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
2525 /* jmp *%edx */
2526 *p++ = 0xFF;
2527 *p++ = 0xE2;
2529 /* Fix up the conditional jump, if there was one. */
2530 if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
2531 Int delta = p - ptmp;
2532 vassert(delta > 0 && delta < 40);
2533 *ptmp = toUChar(delta-1);
2535 goto done;
2538 case Xin_XAssisted: {
2539 /* Use ptmp for backpatching conditional jumps. */
2540 ptmp = NULL;
2542 /* First off, if this is conditional, create a conditional
2543 jump over the rest of it. */
2544 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2545 /* jmp fwds if !condition */
2546 *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
2547 ptmp = p; /* fill in this bit later */
2548 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2551 /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
2552 *p++ = 0x89;
2553 p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
2554 /* movl $magic_number, %ebp. */
2555 UInt trcval = 0;
2556 switch (i->Xin.XAssisted.jk) {
2557 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
2558 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
2559 case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
2560 case Ijk_Sys_int129: trcval = VEX_TRC_JMP_SYS_INT129; break;
2561 case Ijk_Sys_int130: trcval = VEX_TRC_JMP_SYS_INT130; break;
2562 case Ijk_Sys_int145: trcval = VEX_TRC_JMP_SYS_INT145; break;
2563 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
2564 case Ijk_Sys_sysenter: trcval = VEX_TRC_JMP_SYS_SYSENTER; break;
2565 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
2566 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
2567 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
2568 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
2569 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
2570 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
2571 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
2572 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
2573 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
2574 /* We don't expect to see the following being assisted. */
2575 case Ijk_Ret:
2576 case Ijk_Call:
2577 /* fallthrough */
2578 default:
2579 ppIRJumpKind(i->Xin.XAssisted.jk);
2580 vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
2582 vassert(trcval != 0);
2583 *p++ = 0xBD;
2584 p = emit32(p, trcval);
2586 /* movl $disp_indir, %edx */
2587 *p++ = 0xBA;
2588 p = emit32(p, (UInt)(Addr)disp_cp_xassisted);
2589 /* jmp *%edx */
2590 *p++ = 0xFF;
2591 *p++ = 0xE2;
2593 /* Fix up the conditional jump, if there was one. */
2594 if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
2595 Int delta = p - ptmp;
2596 vassert(delta > 0 && delta < 40);
2597 *ptmp = toUChar(delta-1);
2599 goto done;
2602 case Xin_CMov32:
2603 vassert(i->Xin.CMov32.cond != Xcc_ALWAYS);
2605 /* This generates cmov, which is illegal on P54/P55. */
2607 *p++ = 0x0F;
2608 *p++ = toUChar(0x40 + (0xF & i->Xin.CMov32.cond));
2609 if (i->Xin.CMov32.src->tag == Xrm_Reg) {
2610 p = doAMode_R(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Reg.reg);
2611 goto done;
2613 if (i->Xin.CMov32.src->tag == Xrm_Mem) {
2614 p = doAMode_M(p, i->Xin.CMov32.dst, i->Xin.CMov32.src->Xrm.Mem.am);
2615 goto done;
2619 /* Alternative version which works on any x86 variant. */
2620 /* jmp fwds if !condition */
2621 *p++ = toUChar(0x70 + (i->Xin.CMov32.cond ^ 1));
2622 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2623 ptmp = p;
2625 switch (i->Xin.CMov32.src->tag) {
2626 case Xrm_Reg:
2627 /* Big sigh. This is movl E -> G ... */
2628 *p++ = 0x89;
2629 p = doAMode_R(p, i->Xin.CMov32.src->Xrm.Reg.reg,
2630 i->Xin.CMov32.dst);
2632 break;
2633 case Xrm_Mem:
2634 /* ... whereas this is movl G -> E. That's why the args
2635 to doAMode_R appear to be the wrong way round in the
2636 Xrm_Reg case. */
2637 *p++ = 0x8B;
2638 p = doAMode_M(p, i->Xin.CMov32.dst,
2639 i->Xin.CMov32.src->Xrm.Mem.am);
2640 break;
2641 default:
2642 goto bad;
2644 /* Fill in the jump offset. */
2645 *(ptmp-1) = toUChar(p - ptmp);
2646 goto done;
2648 break;
2650 case Xin_LoadEX:
2651 if (i->Xin.LoadEX.szSmall == 1 && !i->Xin.LoadEX.syned) {
2652 /* movzbl */
2653 *p++ = 0x0F;
2654 *p++ = 0xB6;
2655 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2656 goto done;
2658 if (i->Xin.LoadEX.szSmall == 2 && !i->Xin.LoadEX.syned) {
2659 /* movzwl */
2660 *p++ = 0x0F;
2661 *p++ = 0xB7;
2662 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2663 goto done;
2665 if (i->Xin.LoadEX.szSmall == 1 && i->Xin.LoadEX.syned) {
2666 /* movsbl */
2667 *p++ = 0x0F;
2668 *p++ = 0xBE;
2669 p = doAMode_M(p, i->Xin.LoadEX.dst, i->Xin.LoadEX.src);
2670 goto done;
2672 break;
2674 case Xin_Set32:
2675 /* Make the destination register be 1 or 0, depending on whether
2676 the relevant condition holds. We have to dodge and weave
2677 when the destination is %esi or %edi as we cannot directly
2678 emit the native 'setb %reg' for those. Further complication:
2679 the top 24 bits of the destination should be forced to zero,
2680 but doing 'xor %r,%r' kills the flag(s) we are about to read.
2681 Sigh. So start off my moving $0 into the dest. */
2683 /* Do we need to swap in %eax? */
2684 if (iregEnc(i->Xin.Set32.dst) >= 4) {
2685 /* xchg %eax, %dst */
2686 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2687 /* movl $0, %eax */
2688 *p++ =toUChar(0xB8 + iregEnc(hregX86_EAX()));
2689 p = emit32(p, 0);
2690 /* setb lo8(%eax) */
2691 *p++ = 0x0F;
2692 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2693 p = doAMode_R_enc_reg(p, 0, hregX86_EAX());
2694 /* xchg %eax, %dst */
2695 *p++ = toUChar(0x90 + iregEnc(i->Xin.Set32.dst));
2696 } else {
2697 /* movl $0, %dst */
2698 *p++ = toUChar(0xB8 + iregEnc(i->Xin.Set32.dst));
2699 p = emit32(p, 0);
2700 /* setb lo8(%dst) */
2701 *p++ = 0x0F;
2702 *p++ = toUChar(0x90 + (0xF & i->Xin.Set32.cond));
2703 p = doAMode_R_enc_reg(p, 0, i->Xin.Set32.dst);
2705 goto done;
2707 case Xin_Bsfr32:
2708 *p++ = 0x0F;
2709 if (i->Xin.Bsfr32.isFwds) {
2710 *p++ = 0xBC;
2711 } else {
2712 *p++ = 0xBD;
2714 p = doAMode_R(p, i->Xin.Bsfr32.dst, i->Xin.Bsfr32.src);
2715 goto done;
2717 case Xin_MFence:
2718 /* see comment in hdefs.h re this insn */
2719 if (0) vex_printf("EMIT FENCE\n");
2720 if (i->Xin.MFence.hwcaps & (VEX_HWCAPS_X86_SSE3
2721 |VEX_HWCAPS_X86_SSE2)) {
2722 /* mfence */
2723 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
2724 goto done;
2726 if (i->Xin.MFence.hwcaps & VEX_HWCAPS_X86_MMXEXT) {
2727 /* sfence */
2728 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF8;
2729 /* lock addl $0,0(%esp) */
2730 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2731 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2732 goto done;
2734 if (i->Xin.MFence.hwcaps == 0/*baseline, no SSE*/) {
2735 /* lock addl $0,0(%esp) */
2736 *p++ = 0xF0; *p++ = 0x83; *p++ = 0x44;
2737 *p++ = 0x24; *p++ = 0x00; *p++ = 0x00;
2738 goto done;
2740 vpanic("emit_X86Instr:mfence:hwcaps");
2741 /*NOTREACHED*/
2742 break;
2744 case Xin_ACAS:
2745 /* lock */
2746 *p++ = 0xF0;
2747 /* cmpxchg{b,w,l} %ebx,mem. Expected-value in %eax, new value
2748 in %ebx. The new-value register is hardwired to be %ebx
2749 since letting it be any integer register gives the problem
2750 that %sil and %dil are unaddressible on x86 and hence we
2751 would have to resort to the same kind of trickery as with
2752 byte-sized Xin.Store, just below. Given that this isn't
2753 performance critical, it is simpler just to force the
2754 register operand to %ebx (could equally be %ecx or %edx).
2755 (Although %ebx is more consistent with cmpxchg8b.) */
2756 if (i->Xin.ACAS.sz == 2) *p++ = 0x66;
2757 *p++ = 0x0F;
2758 if (i->Xin.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
2759 p = doAMode_M(p, hregX86_EBX(), i->Xin.ACAS.addr);
2760 goto done;
2762 case Xin_DACAS:
2763 /* lock */
2764 *p++ = 0xF0;
2765 /* cmpxchg8b m64. Expected-value in %edx:%eax, new value
2766 in %ecx:%ebx. All 4 regs are hardwired in the ISA, so
2767 aren't encoded in the insn. */
2768 *p++ = 0x0F;
2769 *p++ = 0xC7;
2770 p = doAMode_M_enc(p, 1, i->Xin.DACAS.addr);
2771 goto done;
2773 case Xin_Store:
2774 if (i->Xin.Store.sz == 2) {
2775 /* This case, at least, is simple, given that we can
2776 reference the low 16 bits of any integer register. */
2777 *p++ = 0x66;
2778 *p++ = 0x89;
2779 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2780 goto done;
2783 if (i->Xin.Store.sz == 1) {
2784 /* We have to do complex dodging and weaving if src is not
2785 the low 8 bits of %eax/%ebx/%ecx/%edx. */
2786 if (iregEnc(i->Xin.Store.src) < 4) {
2787 /* we're OK, can do it directly */
2788 *p++ = 0x88;
2789 p = doAMode_M(p, i->Xin.Store.src, i->Xin.Store.dst);
2790 goto done;
2791 } else {
2792 /* Bleh. This means the source is %edi or %esi. Since
2793 the address mode can only mention three registers, at
2794 least one of %eax/%ebx/%ecx/%edx must be available to
2795 temporarily swap the source into, so the store can
2796 happen. So we have to look at the regs mentioned
2797 in the amode. */
2798 HReg swap = INVALID_HREG;
2799 HReg eax = hregX86_EAX(), ebx = hregX86_EBX(),
2800 ecx = hregX86_ECX(), edx = hregX86_EDX();
2801 HRegUsage u;
2802 initHRegUsage(&u);
2803 addRegUsage_X86AMode(&u, i->Xin.Store.dst);
2804 /**/ if (! HRegUsage__contains(&u, eax)) { swap = eax; }
2805 else if (! HRegUsage__contains(&u, ebx)) { swap = ebx; }
2806 else if (! HRegUsage__contains(&u, ecx)) { swap = ecx; }
2807 else if (! HRegUsage__contains(&u, edx)) { swap = edx; }
2808 vassert(! hregIsInvalid(swap));
2809 /* xchgl %source, %swap. Could do better if swap is %eax. */
2810 *p++ = 0x87;
2811 p = doAMode_R(p, i->Xin.Store.src, swap);
2812 /* movb lo8{%swap}, (dst) */
2813 *p++ = 0x88;
2814 p = doAMode_M(p, swap, i->Xin.Store.dst);
2815 /* xchgl %source, %swap. Could do better if swap is %eax. */
2816 *p++ = 0x87;
2817 p = doAMode_R(p, i->Xin.Store.src, swap);
2818 goto done;
2820 } /* if (i->Xin.Store.sz == 1) */
2821 break;
2823 case Xin_FpUnary:
2824 /* gop %src, %dst
2825 --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
2827 p = do_ffree_st7(p);
2828 p = do_fld_st(p, 0+fregEnc(i->Xin.FpUnary.src));
2829 p = do_fop1_st(p, i->Xin.FpUnary.op);
2830 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpUnary.dst));
2831 goto done;
2833 case Xin_FpBinary:
2834 if (i->Xin.FpBinary.op == Xfp_YL2X
2835 || i->Xin.FpBinary.op == Xfp_YL2XP1) {
2836 /* Have to do this specially. */
2837 /* ffree %st7 ; fld %st(srcL) ;
2838 ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
2839 p = do_ffree_st7(p);
2840 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2841 p = do_ffree_st7(p);
2842 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2843 *p++ = 0xD9;
2844 *p++ = toUChar(i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9);
2845 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2846 goto done;
2848 if (i->Xin.FpBinary.op == Xfp_ATAN) {
2849 /* Have to do this specially. */
2850 /* ffree %st7 ; fld %st(srcL) ;
2851 ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
2852 p = do_ffree_st7(p);
2853 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2854 p = do_ffree_st7(p);
2855 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcR));
2856 *p++ = 0xD9; *p++ = 0xF3;
2857 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2858 goto done;
2860 if (i->Xin.FpBinary.op == Xfp_PREM
2861 || i->Xin.FpBinary.op == Xfp_PREM1
2862 || i->Xin.FpBinary.op == Xfp_SCALE) {
2863 /* Have to do this specially. */
2864 /* ffree %st7 ; fld %st(srcR) ;
2865 ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
2866 fincstp ; ffree %st7 */
2867 p = do_ffree_st7(p);
2868 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcR));
2869 p = do_ffree_st7(p);
2870 p = do_fld_st(p, 1+fregEnc(i->Xin.FpBinary.srcL));
2871 *p++ = 0xD9;
2872 switch (i->Xin.FpBinary.op) {
2873 case Xfp_PREM: *p++ = 0xF8; break;
2874 case Xfp_PREM1: *p++ = 0xF5; break;
2875 case Xfp_SCALE: *p++ = 0xFD; break;
2876 default: vpanic("emitX86Instr(FpBinary,PREM/PREM1/SCALE)");
2878 p = do_fstp_st(p, 2+fregEnc(i->Xin.FpBinary.dst));
2879 *p++ = 0xD9; *p++ = 0xF7;
2880 p = do_ffree_st7(p);
2881 goto done;
2883 /* General case */
2884 /* gop %srcL, %srcR, %dst
2885 --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
2887 p = do_ffree_st7(p);
2888 p = do_fld_st(p, 0+fregEnc(i->Xin.FpBinary.srcL));
2889 p = do_fop2_st(p, i->Xin.FpBinary.op,
2890 1+fregEnc(i->Xin.FpBinary.srcR));
2891 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpBinary.dst));
2892 goto done;
2894 case Xin_FpLdSt:
2895 if (i->Xin.FpLdSt.isLoad) {
2896 /* Load from memory into %fakeN.
2897 --> ffree %st(7) ; fld{s/l/t} amode ; fstp st(N+1)
2899 p = do_ffree_st7(p);
2900 switch (i->Xin.FpLdSt.sz) {
2901 case 4:
2902 *p++ = 0xD9;
2903 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2904 break;
2905 case 8:
2906 *p++ = 0xDD;
2907 p = doAMode_M_enc(p, 0/*subopcode*/, i->Xin.FpLdSt.addr);
2908 break;
2909 case 10:
2910 *p++ = 0xDB;
2911 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdSt.addr);
2912 break;
2913 default:
2914 vpanic("emitX86Instr(FpLdSt,load)");
2916 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdSt.reg));
2917 goto done;
2918 } else {
2919 /* Store from %fakeN into memory.
2920 --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
2922 p = do_ffree_st7(p);
2923 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdSt.reg));
2924 switch (i->Xin.FpLdSt.sz) {
2925 case 4:
2926 *p++ = 0xD9;
2927 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2928 break;
2929 case 8:
2930 *p++ = 0xDD;
2931 p = doAMode_M_enc(p, 3/*subopcode*/, i->Xin.FpLdSt.addr);
2932 break;
2933 case 10:
2934 *p++ = 0xDB;
2935 p = doAMode_M_enc(p, 7/*subopcode*/, i->Xin.FpLdSt.addr);
2936 break;
2937 default:
2938 vpanic("emitX86Instr(FpLdSt,store)");
2940 goto done;
2942 break;
2944 case Xin_FpLdStI:
2945 if (i->Xin.FpLdStI.isLoad) {
2946 /* Load from memory into %fakeN, converting from an int.
2947 --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
2949 switch (i->Xin.FpLdStI.sz) {
2950 case 8: opc = 0xDF; subopc_imm = 5; break;
2951 case 4: opc = 0xDB; subopc_imm = 0; break;
2952 case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
2953 default: vpanic("emitX86Instr(Xin_FpLdStI-load)");
2955 p = do_ffree_st7(p);
2956 *p++ = toUChar(opc);
2957 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2958 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpLdStI.reg));
2959 goto done;
2960 } else {
2961 /* Store from %fakeN into memory, converting to an int.
2962 --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
2964 switch (i->Xin.FpLdStI.sz) {
2965 case 8: opc = 0xDF; subopc_imm = 7; break;
2966 case 4: opc = 0xDB; subopc_imm = 3; break;
2967 case 2: opc = 0xDF; subopc_imm = 3; break;
2968 default: vpanic("emitX86Instr(Xin_FpLdStI-store)");
2970 p = do_ffree_st7(p);
2971 p = do_fld_st(p, 0+fregEnc(i->Xin.FpLdStI.reg));
2972 *p++ = toUChar(opc);
2973 p = doAMode_M_enc(p, subopc_imm/*subopcode*/, i->Xin.FpLdStI.addr);
2974 goto done;
2976 break;
2978 case Xin_Fp64to32:
2979 /* ffree %st7 ; fld %st(src) */
2980 p = do_ffree_st7(p);
2981 p = do_fld_st(p, 0+fregEnc(i->Xin.Fp64to32.src));
2982 /* subl $4, %esp */
2983 *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
2984 /* fstps (%esp) */
2985 *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
2986 /* flds (%esp) */
2987 *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
2988 /* addl $4, %esp */
2989 *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
2990 /* fstp %st(1+dst) */
2991 p = do_fstp_st(p, 1+fregEnc(i->Xin.Fp64to32.dst));
2992 goto done;
2994 case Xin_FpCMov:
2995 /* jmp fwds if !condition */
2996 *p++ = toUChar(0x70 + (i->Xin.FpCMov.cond ^ 1));
2997 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
2998 ptmp = p;
3000 /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
3001 p = do_ffree_st7(p);
3002 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCMov.src));
3003 p = do_fstp_st(p, 1+fregEnc(i->Xin.FpCMov.dst));
3005 /* Fill in the jump offset. */
3006 *(ptmp-1) = toUChar(p - ptmp);
3007 goto done;
3009 case Xin_FpLdCW:
3010 *p++ = 0xD9;
3011 p = doAMode_M_enc(p, 5/*subopcode*/, i->Xin.FpLdCW.addr);
3012 goto done;
3014 case Xin_FpStSW_AX:
3015 /* note, this emits fnstsw %ax, not fstsw %ax */
3016 *p++ = 0xDF;
3017 *p++ = 0xE0;
3018 goto done;
3020 case Xin_FpCmp:
3021 /* gcmp %fL, %fR, %dst
3022 -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
3023 fnstsw %ax ; movl %eax, %dst
3025 /* ffree %st7 */
3026 p = do_ffree_st7(p);
3027 /* fpush %fL */
3028 p = do_fld_st(p, 0+fregEnc(i->Xin.FpCmp.srcL));
3029 /* fucomp %(fR+1) */
3030 *p++ = 0xDD;
3031 *p++ = toUChar(0xE8 + (7 & (1+fregEnc(i->Xin.FpCmp.srcR))));
3032 /* fnstsw %ax */
3033 *p++ = 0xDF;
3034 *p++ = 0xE0;
3035 /* movl %eax, %dst */
3036 *p++ = 0x89;
3037 p = doAMode_R(p, hregX86_EAX(), i->Xin.FpCmp.dst);
3038 goto done;
3040 case Xin_SseConst: {
3041 UShort con = i->Xin.SseConst.con;
3042 p = push_word_from_tags(p, toUShort((con >> 12) & 0xF));
3043 p = push_word_from_tags(p, toUShort((con >> 8) & 0xF));
3044 p = push_word_from_tags(p, toUShort((con >> 4) & 0xF));
3045 p = push_word_from_tags(p, toUShort(con & 0xF));
3046 /* movl (%esp), %xmm-dst */
3047 *p++ = 0x0F;
3048 *p++ = 0x10;
3049 *p++ = toUChar(0x04 + 8 * (7 & vregEnc(i->Xin.SseConst.dst)));
3050 *p++ = 0x24;
3051 /* addl $16, %esp */
3052 *p++ = 0x83;
3053 *p++ = 0xC4;
3054 *p++ = 0x10;
3055 goto done;
3058 case Xin_SseLdSt:
3059 *p++ = 0x0F;
3060 *p++ = toUChar(i->Xin.SseLdSt.isLoad ? 0x10 : 0x11);
3061 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdSt.reg), i->Xin.SseLdSt.addr);
3062 goto done;
3064 case Xin_SseLdzLO:
3065 vassert(i->Xin.SseLdzLO.sz == 4 || i->Xin.SseLdzLO.sz == 8);
3066 /* movs[sd] amode, %xmm-dst */
3067 *p++ = toUChar(i->Xin.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3068 *p++ = 0x0F;
3069 *p++ = 0x10;
3070 p = doAMode_M_enc(p, vregEnc(i->Xin.SseLdzLO.reg), i->Xin.SseLdzLO.addr);
3071 goto done;
3073 case Xin_Sse32Fx4:
3074 xtra = 0;
3075 *p++ = 0x0F;
3076 switch (i->Xin.Sse32Fx4.op) {
3077 case Xsse_ADDF: *p++ = 0x58; break;
3078 case Xsse_DIVF: *p++ = 0x5E; break;
3079 case Xsse_MAXF: *p++ = 0x5F; break;
3080 case Xsse_MINF: *p++ = 0x5D; break;
3081 case Xsse_MULF: *p++ = 0x59; break;
3082 case Xsse_RCPF: *p++ = 0x53; break;
3083 case Xsse_RSQRTF: *p++ = 0x52; break;
3084 case Xsse_SQRTF: *p++ = 0x51; break;
3085 case Xsse_SUBF: *p++ = 0x5C; break;
3086 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3087 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3088 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3089 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3090 default: goto bad;
3092 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32Fx4.dst),
3093 vregEnc(i->Xin.Sse32Fx4.src) );
3094 if (xtra & 0x100)
3095 *p++ = toUChar(xtra & 0xFF);
3096 goto done;
3098 case Xin_Sse64Fx2:
3099 xtra = 0;
3100 *p++ = 0x66;
3101 *p++ = 0x0F;
3102 switch (i->Xin.Sse64Fx2.op) {
3103 case Xsse_ADDF: *p++ = 0x58; break;
3104 case Xsse_DIVF: *p++ = 0x5E; break;
3105 case Xsse_MAXF: *p++ = 0x5F; break;
3106 case Xsse_MINF: *p++ = 0x5D; break;
3107 case Xsse_MULF: *p++ = 0x59; break;
3108 case Xsse_RCPF: *p++ = 0x53; break;
3109 case Xsse_RSQRTF: *p++ = 0x52; break;
3110 case Xsse_SQRTF: *p++ = 0x51; break;
3111 case Xsse_SUBF: *p++ = 0x5C; break;
3112 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3113 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3114 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3115 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3116 default: goto bad;
3118 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64Fx2.dst),
3119 vregEnc(i->Xin.Sse64Fx2.src) );
3120 if (xtra & 0x100)
3121 *p++ = toUChar(xtra & 0xFF);
3122 goto done;
3124 case Xin_Sse32FLo:
3125 xtra = 0;
3126 *p++ = 0xF3;
3127 *p++ = 0x0F;
3128 switch (i->Xin.Sse32FLo.op) {
3129 case Xsse_ADDF: *p++ = 0x58; break;
3130 case Xsse_DIVF: *p++ = 0x5E; break;
3131 case Xsse_MAXF: *p++ = 0x5F; break;
3132 case Xsse_MINF: *p++ = 0x5D; break;
3133 case Xsse_MULF: *p++ = 0x59; break;
3134 case Xsse_RCPF: *p++ = 0x53; break;
3135 case Xsse_RSQRTF: *p++ = 0x52; break;
3136 case Xsse_SQRTF: *p++ = 0x51; break;
3137 case Xsse_SUBF: *p++ = 0x5C; break;
3138 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3139 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3140 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3141 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3142 default: goto bad;
3144 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse32FLo.dst),
3145 vregEnc(i->Xin.Sse32FLo.src) );
3146 if (xtra & 0x100)
3147 *p++ = toUChar(xtra & 0xFF);
3148 goto done;
3150 case Xin_Sse64FLo:
3151 xtra = 0;
3152 *p++ = 0xF2;
3153 *p++ = 0x0F;
3154 switch (i->Xin.Sse64FLo.op) {
3155 case Xsse_ADDF: *p++ = 0x58; break;
3156 case Xsse_DIVF: *p++ = 0x5E; break;
3157 case Xsse_MAXF: *p++ = 0x5F; break;
3158 case Xsse_MINF: *p++ = 0x5D; break;
3159 case Xsse_MULF: *p++ = 0x59; break;
3160 case Xsse_RCPF: *p++ = 0x53; break;
3161 case Xsse_RSQRTF: *p++ = 0x52; break;
3162 case Xsse_SQRTF: *p++ = 0x51; break;
3163 case Xsse_SUBF: *p++ = 0x5C; break;
3164 case Xsse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3165 case Xsse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3166 case Xsse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3167 case Xsse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3168 default: goto bad;
3170 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.Sse64FLo.dst),
3171 vregEnc(i->Xin.Sse64FLo.src) );
3172 if (xtra & 0x100)
3173 *p++ = toUChar(xtra & 0xFF);
3174 goto done;
3176 case Xin_SseReRg:
3177 # define XX(_n) *p++ = (_n)
3178 switch (i->Xin.SseReRg.op) {
3179 case Xsse_MOV: /*movups*/ XX(0x0F); XX(0x10); break;
3180 case Xsse_OR: XX(0x0F); XX(0x56); break;
3181 case Xsse_XOR: XX(0x0F); XX(0x57); break;
3182 case Xsse_AND: XX(0x0F); XX(0x54); break;
3183 case Xsse_PACKSSD: XX(0x66); XX(0x0F); XX(0x6B); break;
3184 case Xsse_PACKSSW: XX(0x66); XX(0x0F); XX(0x63); break;
3185 case Xsse_PACKUSW: XX(0x66); XX(0x0F); XX(0x67); break;
3186 case Xsse_ADD8: XX(0x66); XX(0x0F); XX(0xFC); break;
3187 case Xsse_ADD16: XX(0x66); XX(0x0F); XX(0xFD); break;
3188 case Xsse_ADD32: XX(0x66); XX(0x0F); XX(0xFE); break;
3189 case Xsse_ADD64: XX(0x66); XX(0x0F); XX(0xD4); break;
3190 case Xsse_QADD8S: XX(0x66); XX(0x0F); XX(0xEC); break;
3191 case Xsse_QADD16S: XX(0x66); XX(0x0F); XX(0xED); break;
3192 case Xsse_QADD8U: XX(0x66); XX(0x0F); XX(0xDC); break;
3193 case Xsse_QADD16U: XX(0x66); XX(0x0F); XX(0xDD); break;
3194 case Xsse_AVG8U: XX(0x66); XX(0x0F); XX(0xE0); break;
3195 case Xsse_AVG16U: XX(0x66); XX(0x0F); XX(0xE3); break;
3196 case Xsse_CMPEQ8: XX(0x66); XX(0x0F); XX(0x74); break;
3197 case Xsse_CMPEQ16: XX(0x66); XX(0x0F); XX(0x75); break;
3198 case Xsse_CMPEQ32: XX(0x66); XX(0x0F); XX(0x76); break;
3199 case Xsse_CMPGT8S: XX(0x66); XX(0x0F); XX(0x64); break;
3200 case Xsse_CMPGT16S: XX(0x66); XX(0x0F); XX(0x65); break;
3201 case Xsse_CMPGT32S: XX(0x66); XX(0x0F); XX(0x66); break;
3202 case Xsse_MAX16S: XX(0x66); XX(0x0F); XX(0xEE); break;
3203 case Xsse_MAX8U: XX(0x66); XX(0x0F); XX(0xDE); break;
3204 case Xsse_MIN16S: XX(0x66); XX(0x0F); XX(0xEA); break;
3205 case Xsse_MIN8U: XX(0x66); XX(0x0F); XX(0xDA); break;
3206 case Xsse_MULHI16U: XX(0x66); XX(0x0F); XX(0xE4); break;
3207 case Xsse_MULHI16S: XX(0x66); XX(0x0F); XX(0xE5); break;
3208 case Xsse_MUL16: XX(0x66); XX(0x0F); XX(0xD5); break;
3209 case Xsse_SHL16: XX(0x66); XX(0x0F); XX(0xF1); break;
3210 case Xsse_SHL32: XX(0x66); XX(0x0F); XX(0xF2); break;
3211 case Xsse_SHL64: XX(0x66); XX(0x0F); XX(0xF3); break;
3212 case Xsse_SAR16: XX(0x66); XX(0x0F); XX(0xE1); break;
3213 case Xsse_SAR32: XX(0x66); XX(0x0F); XX(0xE2); break;
3214 case Xsse_SHR16: XX(0x66); XX(0x0F); XX(0xD1); break;
3215 case Xsse_SHR32: XX(0x66); XX(0x0F); XX(0xD2); break;
3216 case Xsse_SHR64: XX(0x66); XX(0x0F); XX(0xD3); break;
3217 case Xsse_SUB8: XX(0x66); XX(0x0F); XX(0xF8); break;
3218 case Xsse_SUB16: XX(0x66); XX(0x0F); XX(0xF9); break;
3219 case Xsse_SUB32: XX(0x66); XX(0x0F); XX(0xFA); break;
3220 case Xsse_SUB64: XX(0x66); XX(0x0F); XX(0xFB); break;
3221 case Xsse_QSUB8S: XX(0x66); XX(0x0F); XX(0xE8); break;
3222 case Xsse_QSUB16S: XX(0x66); XX(0x0F); XX(0xE9); break;
3223 case Xsse_QSUB8U: XX(0x66); XX(0x0F); XX(0xD8); break;
3224 case Xsse_QSUB16U: XX(0x66); XX(0x0F); XX(0xD9); break;
3225 case Xsse_UNPCKHB: XX(0x66); XX(0x0F); XX(0x68); break;
3226 case Xsse_UNPCKHW: XX(0x66); XX(0x0F); XX(0x69); break;
3227 case Xsse_UNPCKHD: XX(0x66); XX(0x0F); XX(0x6A); break;
3228 case Xsse_UNPCKHQ: XX(0x66); XX(0x0F); XX(0x6D); break;
3229 case Xsse_UNPCKLB: XX(0x66); XX(0x0F); XX(0x60); break;
3230 case Xsse_UNPCKLW: XX(0x66); XX(0x0F); XX(0x61); break;
3231 case Xsse_UNPCKLD: XX(0x66); XX(0x0F); XX(0x62); break;
3232 case Xsse_UNPCKLQ: XX(0x66); XX(0x0F); XX(0x6C); break;
3233 default: goto bad;
3235 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseReRg.dst),
3236 vregEnc(i->Xin.SseReRg.src) );
3237 # undef XX
3238 goto done;
3240 case Xin_SseCMov:
3241 /* jmp fwds if !condition */
3242 *p++ = toUChar(0x70 + (i->Xin.SseCMov.cond ^ 1));
3243 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3244 ptmp = p;
3246 /* movaps %src, %dst */
3247 *p++ = 0x0F;
3248 *p++ = 0x28;
3249 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseCMov.dst),
3250 vregEnc(i->Xin.SseCMov.src) );
3252 /* Fill in the jump offset. */
3253 *(ptmp-1) = toUChar(p - ptmp);
3254 goto done;
3256 case Xin_SseShuf:
3257 *p++ = 0x66;
3258 *p++ = 0x0F;
3259 *p++ = 0x70;
3260 p = doAMode_R_enc_enc(p, vregEnc(i->Xin.SseShuf.dst),
3261 vregEnc(i->Xin.SseShuf.src) );
3262 *p++ = (UChar)(i->Xin.SseShuf.order);
3263 goto done;
3265 case Xin_EvCheck: {
3266 /* We generate:
3267 (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
3268 (2 bytes) jns nofail expected taken
3269 (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
3270 nofail:
3272 /* This is heavily asserted re instruction lengths. It needs to
3273 be. If we get given unexpected forms of .amCounter or
3274 .amFailAddr -- basically, anything that's not of the form
3275 uimm7(%ebp) -- they are likely to fail. */
3276 /* Note also that after the decl we must be very careful not to
3277 read the carry flag, else we get a partial flags stall.
3278 js/jns avoids that, though. */
3279 UChar* p0 = p;
3280 /* --- decl 8(%ebp) --- */
3281 /* "1" because + there's no register in this encoding;
3282 instead the register + field is used as a sub opcode. The
3283 encoding for "decl r/m32" + is FF /1, hence the "1". */
3284 *p++ = 0xFF;
3285 p = doAMode_M_enc(p, 1, i->Xin.EvCheck.amCounter);
3286 vassert(p - p0 == 3);
3287 /* --- jns nofail --- */
3288 *p++ = 0x79;
3289 *p++ = 0x03; /* need to check this 0x03 after the next insn */
3290 vassert(p - p0 == 5);
3291 /* --- jmp* 0(%ebp) --- */
3292 /* The encoding is FF /4. */
3293 *p++ = 0xFF;
3294 p = doAMode_M_enc(p, 4, i->Xin.EvCheck.amFailAddr);
3295 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
3296 /* And crosscheck .. */
3297 vassert(evCheckSzB_X86() == 8);
3298 goto done;
3301 case Xin_ProfInc: {
3302 /* We generate addl $1,NotKnownYet
3303 adcl $0,NotKnownYet+4
3304 in the expectation that a later call to LibVEX_patchProfCtr
3305 will be used to fill in the immediate fields once the right
3306 value is known.
3307 83 05 00 00 00 00 01
3308 83 15 00 00 00 00 00
3310 *p++ = 0x83; *p++ = 0x05;
3311 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3312 *p++ = 0x01;
3313 *p++ = 0x83; *p++ = 0x15;
3314 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
3315 *p++ = 0x00;
3316 /* Tell the caller .. */
3317 vassert(!(*is_profInc));
3318 *is_profInc = True;
3319 goto done;
3322 default:
3323 goto bad;
3326 bad:
3327 ppX86Instr(i, mode64);
3328 vpanic("emit_X86Instr");
3329 /*NOTREACHED*/
3331 done:
3332 vassert(p - &buf[0] <= 32);
3333 return p - &buf[0];
3337 /* How big is an event check? See case for Xin_EvCheck in
3338 emit_X86Instr just above. That crosschecks what this returns, so
3339 we can tell if we're inconsistent. */
3340 Int evCheckSzB_X86 (void)
3342 return 8;
3346 /* NB: what goes on here has to be very closely coordinated with the
3347 emitInstr case for XDirect, above. */
3348 VexInvalRange chainXDirect_X86 ( VexEndness endness_host,
3349 void* place_to_chain,
3350 const void* disp_cp_chain_me_EXPECTED,
3351 const void* place_to_jump_to )
3353 vassert(endness_host == VexEndnessLE);
3355 /* What we're expecting to see is:
3356 movl $disp_cp_chain_me_EXPECTED, %edx
3357 call *%edx
3359 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3360 FF D2
3362 UChar* p = (UChar*)place_to_chain;
3363 vassert(p[0] == 0xBA);
3364 vassert(read_misaligned_UInt_LE(&p[1])
3365 == (UInt)(Addr)disp_cp_chain_me_EXPECTED);
3366 vassert(p[5] == 0xFF);
3367 vassert(p[6] == 0xD2);
3368 /* And what we want to change it to is:
3369 jmp disp32 where disp32 is relative to the next insn
3370 ud2;
3372 E9 <4 bytes == disp32>
3373 0F 0B
3374 The replacement has the same length as the original.
3376 /* This is the delta we need to put into a JMP d32 insn. It's
3377 relative to the start of the next insn, hence the -5. */
3378 Long delta = (Long)((const UChar *)place_to_jump_to - p) - 5;
3380 /* And make the modifications. */
3381 p[0] = 0xE9;
3382 write_misaligned_UInt_LE(&p[1], (UInt)(ULong)delta);
3383 p[5] = 0x0F; p[6] = 0x0B;
3384 /* sanity check on the delta -- top 32 are all 0 or all 1 */
3385 delta >>= 32;
3386 vassert(delta == 0LL || delta == -1LL);
3387 VexInvalRange vir = { (HWord)place_to_chain, 7 };
3388 return vir;
3392 /* NB: what goes on here has to be very closely coordinated with the
3393 emitInstr case for XDirect, above. */
3394 VexInvalRange unchainXDirect_X86 ( VexEndness endness_host,
3395 void* place_to_unchain,
3396 const void* place_to_jump_to_EXPECTED,
3397 const void* disp_cp_chain_me )
3399 vassert(endness_host == VexEndnessLE);
3401 /* What we're expecting to see is:
3402 jmp d32
3403 ud2;
3405 E9 <4 bytes == disp32>
3406 0F 0B
3408 UChar* p = (UChar*)place_to_unchain;
3409 Bool valid = False;
3410 if (p[0] == 0xE9
3411 && p[5] == 0x0F && p[6] == 0x0B) {
3412 /* Check the offset is right. */
3413 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
3414 if ((UChar*)p + 5 + s32 == place_to_jump_to_EXPECTED) {
3415 valid = True;
3416 if (0)
3417 vex_printf("QQQ unchainXDirect_X86: found valid\n");
3420 vassert(valid);
3421 /* And what we want to change it to is:
3422 movl $disp_cp_chain_me, %edx
3423 call *%edx
3425 BA <4 bytes value == disp_cp_chain_me_EXPECTED>
3426 FF D2
3427 So it's the same length (convenient, huh).
3429 p[0] = 0xBA;
3430 write_misaligned_UInt_LE(&p[1], (UInt)(Addr)disp_cp_chain_me);
3431 p[5] = 0xFF;
3432 p[6] = 0xD2;
3433 VexInvalRange vir = { (HWord)place_to_unchain, 7 };
3434 return vir;
3438 /* Patch the counter address into a profile inc point, as previously
3439 created by the Xin_ProfInc case for emit_X86Instr. */
3440 VexInvalRange patchProfInc_X86 ( VexEndness endness_host,
3441 void* place_to_patch,
3442 const ULong* location_of_counter )
3444 vassert(endness_host == VexEndnessLE);
3445 vassert(sizeof(ULong*) == 4);
3446 UChar* p = (UChar*)place_to_patch;
3447 vassert(p[0] == 0x83);
3448 vassert(p[1] == 0x05);
3449 vassert(p[2] == 0x00);
3450 vassert(p[3] == 0x00);
3451 vassert(p[4] == 0x00);
3452 vassert(p[5] == 0x00);
3453 vassert(p[6] == 0x01);
3454 vassert(p[7] == 0x83);
3455 vassert(p[8] == 0x15);
3456 vassert(p[9] == 0x00);
3457 vassert(p[10] == 0x00);
3458 vassert(p[11] == 0x00);
3459 vassert(p[12] == 0x00);
3460 vassert(p[13] == 0x00);
3461 UInt imm32 = (UInt)(Addr)location_of_counter;
3462 p[2] = imm32 & 0xFF; imm32 >>= 8;
3463 p[3] = imm32 & 0xFF; imm32 >>= 8;
3464 p[4] = imm32 & 0xFF; imm32 >>= 8;
3465 p[5] = imm32 & 0xFF;
3466 imm32 = 4 + (UInt)(Addr)location_of_counter;
3467 p[9] = imm32 & 0xFF; imm32 >>= 8;
3468 p[10] = imm32 & 0xFF; imm32 >>= 8;
3469 p[11] = imm32 & 0xFF; imm32 >>= 8;
3470 p[12] = imm32 & 0xFF;
3471 VexInvalRange vir = { (HWord)place_to_patch, 14 };
3472 return vir;
3476 /*---------------------------------------------------------------*/
3477 /*--- end host_x86_defs.c ---*/
3478 /*---------------------------------------------------------------*/