Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_amd64_defs.c
blob253ed65150e5365a3b23f4006ca537150d77b4a7
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_amd64_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse* getRRegUniverse_AMD64 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_AMD64;
50 static Bool rRegUniverse_AMD64_initted = False;
52 /* Handy shorthand, nothing more */
53 RRegUniverse* ru = &rRegUniverse_AMD64;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_AMD64_initted))
57 return ru;
59 RRegUniverse__init(ru);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru->allocable_start[HRcInt64] = ru->size;
65 ru->regs[ru->size++] = hregAMD64_R12();
66 ru->regs[ru->size++] = hregAMD64_R13();
67 ru->regs[ru->size++] = hregAMD64_R14();
68 ru->regs[ru->size++] = hregAMD64_R15();
69 ru->regs[ru->size++] = hregAMD64_RBX();
70 ru->regs[ru->size++] = hregAMD64_RSI();
71 ru->regs[ru->size++] = hregAMD64_RDI();
72 ru->regs[ru->size++] = hregAMD64_R8();
73 ru->regs[ru->size++] = hregAMD64_R9();
74 ru->regs[ru->size++] = hregAMD64_R10();
75 ru->allocable_end[HRcInt64] = ru->size - 1;
77 ru->allocable_start[HRcVec128] = ru->size;
78 ru->regs[ru->size++] = hregAMD64_XMM3();
79 ru->regs[ru->size++] = hregAMD64_XMM4();
80 ru->regs[ru->size++] = hregAMD64_XMM5();
81 ru->regs[ru->size++] = hregAMD64_XMM6();
82 ru->regs[ru->size++] = hregAMD64_XMM7();
83 ru->regs[ru->size++] = hregAMD64_XMM8();
84 ru->regs[ru->size++] = hregAMD64_XMM9();
85 ru->regs[ru->size++] = hregAMD64_XMM10();
86 ru->regs[ru->size++] = hregAMD64_XMM11();
87 ru->regs[ru->size++] = hregAMD64_XMM12();
88 ru->allocable_end[HRcVec128] = ru->size - 1;
89 ru->allocable = ru->size;
91 /* And other regs, not available to the allocator. */
92 ru->regs[ru->size++] = hregAMD64_RAX();
93 ru->regs[ru->size++] = hregAMD64_RCX();
94 ru->regs[ru->size++] = hregAMD64_RDX();
95 ru->regs[ru->size++] = hregAMD64_RSP();
96 ru->regs[ru->size++] = hregAMD64_RBP();
97 ru->regs[ru->size++] = hregAMD64_R11();
98 ru->regs[ru->size++] = hregAMD64_XMM0();
99 ru->regs[ru->size++] = hregAMD64_XMM1();
101 rRegUniverse_AMD64_initted = True;
103 RRegUniverse__check_is_sane(ru);
104 return ru;
108 UInt ppHRegAMD64 ( HReg reg )
110 Int r;
111 static const HChar* ireg64_names[16]
112 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
113 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
114 /* Be generic for all virtual regs. */
115 if (hregIsVirtual(reg)) {
116 return ppHReg(reg);
118 /* But specific for real regs. */
119 switch (hregClass(reg)) {
120 case HRcInt64:
121 r = hregEncoding(reg);
122 vassert(r >= 0 && r < 16);
123 return vex_printf("%s", ireg64_names[r]);
124 case HRcVec128:
125 r = hregEncoding(reg);
126 vassert(r >= 0 && r < 16);
127 return vex_printf("%%xmm%d", r);
128 default:
129 vpanic("ppHRegAMD64");
133 static UInt ppHRegAMD64_lo32 ( HReg reg )
135 Int r;
136 static const HChar* ireg32_names[16]
137 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
138 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
139 /* Be generic for all virtual regs. */
140 if (hregIsVirtual(reg)) {
141 UInt written = ppHReg(reg);
142 written += vex_printf("d");
143 return written;
145 /* But specific for real regs. */
146 switch (hregClass(reg)) {
147 case HRcInt64:
148 r = hregEncoding(reg);
149 vassert(r >= 0 && r < 16);
150 return vex_printf("%s", ireg32_names[r]);
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
157 /* --------- Condition codes, Intel encoding. --------- */
159 const HChar* showAMD64CondCode ( AMD64CondCode cond )
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
184 /* --------- AMD64AMode: memory address expressions. --------- */
186 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
193 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
204 void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
226 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
240 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
256 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
262 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
268 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
275 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
293 void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
296 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
303 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
318 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
334 /* --------- Operand, which can be reg or immediate only. --------- */
336 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
342 AMD64RI* AMD64RI_Reg ( HReg reg ) {
343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
349 void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
365 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
377 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
390 /* --------- Operand, which can be reg or memory only. --------- */
392 AMD64RM* AMD64RM_Reg ( HReg reg ) {
393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
398 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
405 void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
438 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
453 /* --------- Instructions. --------- */
455 static const HChar* showAMD64ScalarSz ( Int sz ) {
456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
464 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
472 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
483 case Aalu_MUL: return "imul";
484 default: vpanic("showAMD64AluOp");
488 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
497 const HChar* showA87FpOp ( A87FpOp op ) {
498 switch (op) {
499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
502 case Afp_YL2XP1: return "yl2xp1";
503 case Afp_PREM: return "prem";
504 case Afp_PREM1: return "prem1";
505 case Afp_SQRT: return "sqrt";
506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
508 case Afp_TAN: return "tan";
509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
515 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
516 switch (op) {
517 case Asse_MOV: return "movups";
518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
530 case Asse_SQRTF: return "sqrt";
531 case Asse_I2F: return "cvtdq2ps.";
532 case Asse_F2I: return "cvtps2dq.";
533 case Asse_AND: return "and";
534 case Asse_OR: return "or";
535 case Asse_XOR: return "xor";
536 case Asse_ANDN: return "andn";
537 case Asse_ADD8: return "paddb";
538 case Asse_ADD16: return "paddw";
539 case Asse_ADD32: return "paddd";
540 case Asse_ADD64: return "paddq";
541 case Asse_QADD8U: return "paddusb";
542 case Asse_QADD16U: return "paddusw";
543 case Asse_QADD8S: return "paddsb";
544 case Asse_QADD16S: return "paddsw";
545 case Asse_SUB8: return "psubb";
546 case Asse_SUB16: return "psubw";
547 case Asse_SUB32: return "psubd";
548 case Asse_SUB64: return "psubq";
549 case Asse_QSUB8U: return "psubusb";
550 case Asse_QSUB16U: return "psubusw";
551 case Asse_QSUB8S: return "psubsb";
552 case Asse_QSUB16S: return "psubsw";
553 case Asse_MUL16: return "pmullw";
554 case Asse_MULHI16U: return "pmulhuw";
555 case Asse_MULHI16S: return "pmulhw";
556 case Asse_AVG8U: return "pavgb";
557 case Asse_AVG16U: return "pavgw";
558 case Asse_MAX16S: return "pmaxw";
559 case Asse_MAX8U: return "pmaxub";
560 case Asse_MIN16S: return "pminw";
561 case Asse_MIN8U: return "pminub";
562 case Asse_CMPEQ8: return "pcmpeqb";
563 case Asse_CMPEQ16: return "pcmpeqw";
564 case Asse_CMPEQ32: return "pcmpeqd";
565 case Asse_CMPGT8S: return "pcmpgtb";
566 case Asse_CMPGT16S: return "pcmpgtw";
567 case Asse_CMPGT32S: return "pcmpgtd";
568 case Asse_SHL16: return "psllw";
569 case Asse_SHL32: return "pslld";
570 case Asse_SHL64: return "psllq";
571 case Asse_SHL128: return "pslldq";
572 case Asse_SHR16: return "psrlw";
573 case Asse_SHR32: return "psrld";
574 case Asse_SHR64: return "psrlq";
575 case Asse_SHR128: return "psrldq";
576 case Asse_SAR16: return "psraw";
577 case Asse_SAR32: return "psrad";
578 case Asse_PACKSSD: return "packssdw";
579 case Asse_PACKSSW: return "packsswb";
580 case Asse_PACKUSW: return "packuswb";
581 case Asse_UNPCKHB: return "punpckhb";
582 case Asse_UNPCKHW: return "punpckhw";
583 case Asse_UNPCKHD: return "punpckhd";
584 case Asse_UNPCKHQ: return "punpckhq";
585 case Asse_UNPCKLB: return "punpcklb";
586 case Asse_UNPCKLW: return "punpcklw";
587 case Asse_UNPCKLD: return "punpckld";
588 case Asse_UNPCKLQ: return "punpcklq";
589 case Asse_PSHUFB: return "pshufb";
590 case Asse_PMADDUBSW: return "pmaddubsw";
591 case Asse_F32toF16: return "vcvtps2ph(rm_field=$0x4).";
592 case Asse_F16toF32: return "vcvtph2ps.";
593 case Asse_VFMADD213: return "vfmadd213";
594 default: vpanic("showAMD64SseOp");
598 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
599 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
600 i->tag = Ain_Imm64;
601 i->Ain.Imm64.imm64 = imm64;
602 i->Ain.Imm64.dst = dst;
603 return i;
605 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
606 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
607 i->tag = Ain_Alu64R;
608 i->Ain.Alu64R.op = op;
609 i->Ain.Alu64R.src = src;
610 i->Ain.Alu64R.dst = dst;
611 return i;
613 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
614 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
615 i->tag = Ain_Alu64M;
616 i->Ain.Alu64M.op = op;
617 i->Ain.Alu64M.src = src;
618 i->Ain.Alu64M.dst = dst;
619 vassert(op != Aalu_MUL);
620 return i;
622 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
623 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
624 i->tag = Ain_Sh64;
625 i->Ain.Sh64.op = op;
626 i->Ain.Sh64.src = src;
627 i->Ain.Sh64.dst = dst;
628 return i;
630 AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) {
631 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
632 i->tag = Ain_Sh32;
633 i->Ain.Sh32.op = op;
634 i->Ain.Sh32.src = src;
635 i->Ain.Sh32.dst = dst;
636 return i;
638 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
639 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
640 i->tag = Ain_Test64;
641 i->Ain.Test64.imm32 = imm32;
642 i->Ain.Test64.dst = dst;
643 return i;
645 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
646 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
647 i->tag = Ain_Unary64;
648 i->Ain.Unary64.op = op;
649 i->Ain.Unary64.dst = dst;
650 return i;
652 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
653 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
654 i->tag = Ain_Lea64;
655 i->Ain.Lea64.am = am;
656 i->Ain.Lea64.dst = dst;
657 return i;
659 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
660 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
661 i->tag = Ain_Alu32R;
662 i->Ain.Alu32R.op = op;
663 i->Ain.Alu32R.src = src;
664 i->Ain.Alu32R.dst = dst;
665 switch (op) {
666 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
667 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
668 default: vassert(0);
670 return i;
672 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
673 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
674 i->tag = Ain_MulL;
675 i->Ain.MulL.syned = syned;
676 i->Ain.MulL.src = src;
677 return i;
679 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
680 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
681 i->tag = Ain_Div;
682 i->Ain.Div.syned = syned;
683 i->Ain.Div.sz = sz;
684 i->Ain.Div.src = src;
685 vassert(sz == 4 || sz == 8);
686 return i;
688 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
689 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
690 i->tag = Ain_Push;
691 i->Ain.Push.src = src;
692 return i;
694 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
695 RetLoc rloc ) {
696 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
697 i->tag = Ain_Call;
698 i->Ain.Call.cond = cond;
699 i->Ain.Call.target = target;
700 i->Ain.Call.regparms = regparms;
701 i->Ain.Call.rloc = rloc;
702 vassert(regparms >= 0 && regparms <= 6);
703 vassert(is_sane_RetLoc(rloc));
704 return i;
707 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
708 AMD64CondCode cond, Bool toFastEP ) {
709 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
710 i->tag = Ain_XDirect;
711 i->Ain.XDirect.dstGA = dstGA;
712 i->Ain.XDirect.amRIP = amRIP;
713 i->Ain.XDirect.cond = cond;
714 i->Ain.XDirect.toFastEP = toFastEP;
715 return i;
717 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
718 AMD64CondCode cond ) {
719 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
720 i->tag = Ain_XIndir;
721 i->Ain.XIndir.dstGA = dstGA;
722 i->Ain.XIndir.amRIP = amRIP;
723 i->Ain.XIndir.cond = cond;
724 return i;
726 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
727 AMD64CondCode cond, IRJumpKind jk ) {
728 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
729 i->tag = Ain_XAssisted;
730 i->Ain.XAssisted.dstGA = dstGA;
731 i->Ain.XAssisted.amRIP = amRIP;
732 i->Ain.XAssisted.cond = cond;
733 i->Ain.XAssisted.jk = jk;
734 return i;
737 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
738 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
739 i->tag = Ain_CMov64;
740 i->Ain.CMov64.cond = cond;
741 i->Ain.CMov64.src = src;
742 i->Ain.CMov64.dst = dst;
743 vassert(cond != Acc_ALWAYS);
744 return i;
746 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
747 AMD64AMode* addr, HReg dst ) {
748 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
749 i->tag = Ain_CLoad;
750 i->Ain.CLoad.cond = cond;
751 i->Ain.CLoad.szB = szB;
752 i->Ain.CLoad.addr = addr;
753 i->Ain.CLoad.dst = dst;
754 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
755 return i;
757 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
758 HReg src, AMD64AMode* addr ) {
759 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
760 i->tag = Ain_CStore;
761 i->Ain.CStore.cond = cond;
762 i->Ain.CStore.szB = szB;
763 i->Ain.CStore.src = src;
764 i->Ain.CStore.addr = addr;
765 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
766 return i;
768 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
769 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
770 i->tag = Ain_MovxLQ;
771 i->Ain.MovxLQ.syned = syned;
772 i->Ain.MovxLQ.src = src;
773 i->Ain.MovxLQ.dst = dst;
774 return i;
776 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
777 AMD64AMode* src, HReg dst ) {
778 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
779 i->tag = Ain_LoadEX;
780 i->Ain.LoadEX.szSmall = szSmall;
781 i->Ain.LoadEX.syned = syned;
782 i->Ain.LoadEX.src = src;
783 i->Ain.LoadEX.dst = dst;
784 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
785 return i;
787 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
788 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
789 i->tag = Ain_Store;
790 i->Ain.Store.sz = sz;
791 i->Ain.Store.src = src;
792 i->Ain.Store.dst = dst;
793 vassert(sz == 1 || sz == 2 || sz == 4);
794 return i;
796 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
797 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
798 i->tag = Ain_Set64;
799 i->Ain.Set64.cond = cond;
800 i->Ain.Set64.dst = dst;
801 return i;
803 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
804 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
805 i->tag = Ain_Bsfr64;
806 i->Ain.Bsfr64.isFwds = isFwds;
807 i->Ain.Bsfr64.src = src;
808 i->Ain.Bsfr64.dst = dst;
809 return i;
811 AMD64Instr* AMD64Instr_MFence ( void ) {
812 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
813 i->tag = Ain_MFence;
814 return i;
816 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
817 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
818 i->tag = Ain_ACAS;
819 i->Ain.ACAS.addr = addr;
820 i->Ain.ACAS.sz = sz;
821 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
822 return i;
824 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
825 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
826 i->tag = Ain_DACAS;
827 i->Ain.DACAS.addr = addr;
828 i->Ain.DACAS.sz = sz;
829 vassert(sz == 8 || sz == 4);
830 return i;
833 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
835 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
836 i->tag = Ain_A87Free;
837 i->Ain.A87Free.nregs = nregs;
838 vassert(nregs >= 1 && nregs <= 7);
839 return i;
841 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
843 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
844 i->tag = Ain_A87PushPop;
845 i->Ain.A87PushPop.addr = addr;
846 i->Ain.A87PushPop.isPush = isPush;
847 i->Ain.A87PushPop.szB = szB;
848 vassert(szB == 8 || szB == 4);
849 return i;
851 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
853 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
854 i->tag = Ain_A87FpOp;
855 i->Ain.A87FpOp.op = op;
856 return i;
858 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
860 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
861 i->tag = Ain_A87LdCW;
862 i->Ain.A87LdCW.addr = addr;
863 return i;
865 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
867 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
868 i->tag = Ain_A87StSW;
869 i->Ain.A87StSW.addr = addr;
870 return i;
872 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
873 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
874 i->tag = Ain_LdMXCSR;
875 i->Ain.LdMXCSR.addr = addr;
876 return i;
878 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
879 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
880 i->tag = Ain_SseUComIS;
881 i->Ain.SseUComIS.sz = toUChar(sz);
882 i->Ain.SseUComIS.srcL = srcL;
883 i->Ain.SseUComIS.srcR = srcR;
884 i->Ain.SseUComIS.dst = dst;
885 vassert(sz == 4 || sz == 8);
886 return i;
888 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
889 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
890 i->tag = Ain_SseSI2SF;
891 i->Ain.SseSI2SF.szS = toUChar(szS);
892 i->Ain.SseSI2SF.szD = toUChar(szD);
893 i->Ain.SseSI2SF.src = src;
894 i->Ain.SseSI2SF.dst = dst;
895 vassert(szS == 4 || szS == 8);
896 vassert(szD == 4 || szD == 8);
897 return i;
899 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
900 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
901 i->tag = Ain_SseSF2SI;
902 i->Ain.SseSF2SI.szS = toUChar(szS);
903 i->Ain.SseSF2SI.szD = toUChar(szD);
904 i->Ain.SseSF2SI.src = src;
905 i->Ain.SseSF2SI.dst = dst;
906 vassert(szS == 4 || szS == 8);
907 vassert(szD == 4 || szD == 8);
908 return i;
910 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
912 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
913 i->tag = Ain_SseSDSS;
914 i->Ain.SseSDSS.from64 = from64;
915 i->Ain.SseSDSS.src = src;
916 i->Ain.SseSDSS.dst = dst;
917 return i;
919 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
920 HReg reg, AMD64AMode* addr ) {
921 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
922 i->tag = Ain_SseLdSt;
923 i->Ain.SseLdSt.isLoad = isLoad;
924 i->Ain.SseLdSt.sz = toUChar(sz);
925 i->Ain.SseLdSt.reg = reg;
926 i->Ain.SseLdSt.addr = addr;
927 vassert(sz == 4 || sz == 8 || sz == 16);
928 return i;
930 AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
931 HReg src, AMD64AMode* addr )
933 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
934 i->tag = Ain_SseCStore;
935 i->Ain.SseCStore.cond = cond;
936 i->Ain.SseCStore.src = src;
937 i->Ain.SseCStore.addr = addr;
938 vassert(cond != Acc_ALWAYS);
939 return i;
941 AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
942 AMD64AMode* addr, HReg dst )
944 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
945 i->tag = Ain_SseCLoad;
946 i->Ain.SseCLoad.cond = cond;
947 i->Ain.SseCLoad.addr = addr;
948 i->Ain.SseCLoad.dst = dst;
949 vassert(cond != Acc_ALWAYS);
950 return i;
952 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
954 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
955 i->tag = Ain_SseLdzLO;
956 i->Ain.SseLdzLO.sz = sz;
957 i->Ain.SseLdzLO.reg = reg;
958 i->Ain.SseLdzLO.addr = addr;
959 vassert(sz == 4 || sz == 8);
960 return i;
962 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
963 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
964 i->tag = Ain_Sse32Fx4;
965 i->Ain.Sse32Fx4.op = op;
966 i->Ain.Sse32Fx4.src = src;
967 i->Ain.Sse32Fx4.dst = dst;
968 vassert(op != Asse_MOV);
969 return i;
971 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
972 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
973 i->tag = Ain_Sse32FLo;
974 i->Ain.Sse32FLo.op = op;
975 i->Ain.Sse32FLo.src = src;
976 i->Ain.Sse32FLo.dst = dst;
977 vassert(op != Asse_MOV);
978 return i;
980 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
981 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
982 i->tag = Ain_Sse64Fx2;
983 i->Ain.Sse64Fx2.op = op;
984 i->Ain.Sse64Fx2.src = src;
985 i->Ain.Sse64Fx2.dst = dst;
986 vassert(op != Asse_MOV);
987 return i;
989 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
990 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
991 i->tag = Ain_Sse64FLo;
992 i->Ain.Sse64FLo.op = op;
993 i->Ain.Sse64FLo.src = src;
994 i->Ain.Sse64FLo.dst = dst;
995 vassert(op != Asse_MOV);
996 return i;
998 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
999 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1000 i->tag = Ain_SseReRg;
1001 i->Ain.SseReRg.op = op;
1002 i->Ain.SseReRg.src = re;
1003 i->Ain.SseReRg.dst = rg;
1004 return i;
1006 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
1007 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1008 i->tag = Ain_SseCMov;
1009 i->Ain.SseCMov.cond = cond;
1010 i->Ain.SseCMov.src = src;
1011 i->Ain.SseCMov.dst = dst;
1012 vassert(cond != Acc_ALWAYS);
1013 return i;
1015 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1016 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1017 i->tag = Ain_SseShuf;
1018 i->Ain.SseShuf.order = order;
1019 i->Ain.SseShuf.src = src;
1020 i->Ain.SseShuf.dst = dst;
1021 vassert(order >= 0 && order <= 0xFF);
1022 return i;
1024 AMD64Instr* AMD64Instr_SseShiftN ( AMD64SseOp op,
1025 UInt shiftBits, HReg dst ) {
1026 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1027 i->tag = Ain_SseShiftN;
1028 i->Ain.SseShiftN.op = op;
1029 i->Ain.SseShiftN.shiftBits = shiftBits;
1030 i->Ain.SseShiftN.dst = dst;
1031 return i;
1033 AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ) {
1034 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1035 i->tag = Ain_SseMOVQ;
1036 i->Ain.SseMOVQ.gpr = gpr;
1037 i->Ain.SseMOVQ.xmm = xmm;
1038 i->Ain.SseMOVQ.toXMM = toXMM;
1039 vassert(hregClass(gpr) == HRcInt64);
1040 vassert(hregClass(xmm) == HRcVec128);
1041 return i;
1043 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1044 //uu HReg reg, AMD64AMode* addr ) {
1045 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1046 //uu i->tag = Ain_AvxLdSt;
1047 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1048 //uu i->Ain.AvxLdSt.reg = reg;
1049 //uu i->Ain.AvxLdSt.addr = addr;
1050 //uu return i;
1051 //uu }
1052 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1053 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1054 //uu i->tag = Ain_AvxReRg;
1055 //uu i->Ain.AvxReRg.op = op;
1056 //uu i->Ain.AvxReRg.src = re;
1057 //uu i->Ain.AvxReRg.dst = rg;
1058 //uu return i;
1059 //uu }
1060 AMD64Instr* AMD64Instr_Avx32FLo ( AMD64SseOp op, HReg src1, HReg src2, HReg dst ) {
1061 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1062 i->tag = Ain_Avx32FLo;
1063 i->Ain.Avx32FLo.op = op;
1064 i->Ain.Avx32FLo.src1 = src1;
1065 i->Ain.Avx32FLo.src2 = src2;
1066 i->Ain.Avx32FLo.dst = dst;
1067 vassert(op != Asse_MOV);
1068 return i;
1071 AMD64Instr* AMD64Instr_Avx64FLo ( AMD64SseOp op, HReg src1, HReg src2, HReg dst ) {
1072 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1073 i->tag = Ain_Avx64FLo;
1074 i->Ain.Avx64FLo.op = op;
1075 i->Ain.Avx64FLo.src1 = src1;
1076 i->Ain.Avx64FLo.src2 = src2;
1077 i->Ain.Avx64FLo.dst = dst;
1078 vassert(op != Asse_MOV);
1079 return i;
1082 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1083 AMD64AMode* amFailAddr ) {
1084 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1085 i->tag = Ain_EvCheck;
1086 i->Ain.EvCheck.amCounter = amCounter;
1087 i->Ain.EvCheck.amFailAddr = amFailAddr;
1088 return i;
1090 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1091 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1092 i->tag = Ain_ProfInc;
1093 return i;
1096 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1098 vassert(mode64 == True);
1099 switch (i->tag) {
1100 case Ain_Imm64:
1101 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1102 ppHRegAMD64(i->Ain.Imm64.dst);
1103 return;
1104 case Ain_Alu64R:
1105 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1106 ppAMD64RMI(i->Ain.Alu64R.src);
1107 vex_printf(",");
1108 ppHRegAMD64(i->Ain.Alu64R.dst);
1109 return;
1110 case Ain_Alu64M:
1111 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1112 ppAMD64RI(i->Ain.Alu64M.src);
1113 vex_printf(",");
1114 ppAMD64AMode(i->Ain.Alu64M.dst);
1115 return;
1116 case Ain_Sh64:
1117 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1118 if (i->Ain.Sh64.src == 0)
1119 vex_printf("%%cl,");
1120 else
1121 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1122 ppHRegAMD64(i->Ain.Sh64.dst);
1123 return;
1124 case Ain_Sh32:
1125 vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op));
1126 if (i->Ain.Sh32.src == 0)
1127 vex_printf("%%cl,");
1128 else
1129 vex_printf("$%d,", (Int)i->Ain.Sh32.src);
1130 ppHRegAMD64_lo32(i->Ain.Sh32.dst);
1131 return;
1132 case Ain_Test64:
1133 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1134 ppHRegAMD64(i->Ain.Test64.dst);
1135 return;
1136 case Ain_Unary64:
1137 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1138 ppHRegAMD64(i->Ain.Unary64.dst);
1139 return;
1140 case Ain_Lea64:
1141 vex_printf("leaq ");
1142 ppAMD64AMode(i->Ain.Lea64.am);
1143 vex_printf(",");
1144 ppHRegAMD64(i->Ain.Lea64.dst);
1145 return;
1146 case Ain_Alu32R:
1147 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1148 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1149 vex_printf(",");
1150 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1151 return;
1152 case Ain_MulL:
1153 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1154 ppAMD64RM(i->Ain.MulL.src);
1155 return;
1156 case Ain_Div:
1157 vex_printf("%cdiv%s ",
1158 i->Ain.Div.syned ? 's' : 'u',
1159 showAMD64ScalarSz(i->Ain.Div.sz));
1160 ppAMD64RM(i->Ain.Div.src);
1161 return;
1162 case Ain_Push:
1163 vex_printf("pushq ");
1164 ppAMD64RMI(i->Ain.Push.src);
1165 return;
1166 case Ain_Call:
1167 vex_printf("call%s[%d,",
1168 i->Ain.Call.cond==Acc_ALWAYS
1169 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1170 i->Ain.Call.regparms );
1171 ppRetLoc(i->Ain.Call.rloc);
1172 vex_printf("] 0x%llx", i->Ain.Call.target);
1173 break;
1175 case Ain_XDirect:
1176 vex_printf("(xDirect) ");
1177 vex_printf("if (%%rflags.%s) { ",
1178 showAMD64CondCode(i->Ain.XDirect.cond));
1179 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1180 vex_printf("movq %%r11,");
1181 ppAMD64AMode(i->Ain.XDirect.amRIP);
1182 vex_printf("; ");
1183 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1184 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1185 return;
1186 case Ain_XIndir:
1187 vex_printf("(xIndir) ");
1188 vex_printf("if (%%rflags.%s) { ",
1189 showAMD64CondCode(i->Ain.XIndir.cond));
1190 vex_printf("movq ");
1191 ppHRegAMD64(i->Ain.XIndir.dstGA);
1192 vex_printf(",");
1193 ppAMD64AMode(i->Ain.XIndir.amRIP);
1194 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1195 return;
1196 case Ain_XAssisted:
1197 vex_printf("(xAssisted) ");
1198 vex_printf("if (%%rflags.%s) { ",
1199 showAMD64CondCode(i->Ain.XAssisted.cond));
1200 vex_printf("movq ");
1201 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1202 vex_printf(",");
1203 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1204 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1205 (Int)i->Ain.XAssisted.jk);
1206 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1207 return;
1209 case Ain_CMov64:
1210 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1211 ppHRegAMD64(i->Ain.CMov64.src);
1212 vex_printf(",");
1213 ppHRegAMD64(i->Ain.CMov64.dst);
1214 return;
1215 case Ain_CLoad:
1216 vex_printf("if (%%rflags.%s) { ",
1217 showAMD64CondCode(i->Ain.CLoad.cond));
1218 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1219 ppAMD64AMode(i->Ain.CLoad.addr);
1220 vex_printf(", ");
1221 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1222 (i->Ain.CLoad.dst);
1223 vex_printf(" }");
1224 return;
1225 case Ain_CStore:
1226 vex_printf("if (%%rflags.%s) { ",
1227 showAMD64CondCode(i->Ain.CStore.cond));
1228 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1229 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1230 (i->Ain.CStore.src);
1231 vex_printf(", ");
1232 ppAMD64AMode(i->Ain.CStore.addr);
1233 vex_printf(" }");
1234 return;
1236 case Ain_MovxLQ:
1237 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1238 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1239 vex_printf(",");
1240 ppHRegAMD64(i->Ain.MovxLQ.dst);
1241 return;
1242 case Ain_LoadEX:
1243 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1244 vex_printf("movl ");
1245 ppAMD64AMode(i->Ain.LoadEX.src);
1246 vex_printf(",");
1247 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1248 } else {
1249 vex_printf("mov%c%cq ",
1250 i->Ain.LoadEX.syned ? 's' : 'z',
1251 i->Ain.LoadEX.szSmall==1
1252 ? 'b'
1253 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1254 ppAMD64AMode(i->Ain.LoadEX.src);
1255 vex_printf(",");
1256 ppHRegAMD64(i->Ain.LoadEX.dst);
1258 return;
1259 case Ain_Store:
1260 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1261 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1262 ppHRegAMD64(i->Ain.Store.src);
1263 vex_printf(",");
1264 ppAMD64AMode(i->Ain.Store.dst);
1265 return;
1266 case Ain_Set64:
1267 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1268 ppHRegAMD64(i->Ain.Set64.dst);
1269 return;
1270 case Ain_Bsfr64:
1271 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1272 ppHRegAMD64(i->Ain.Bsfr64.src);
1273 vex_printf(",");
1274 ppHRegAMD64(i->Ain.Bsfr64.dst);
1275 return;
1276 case Ain_MFence:
1277 vex_printf("mfence" );
1278 return;
1279 case Ain_ACAS:
1280 vex_printf("lock cmpxchg%c ",
1281 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1282 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1283 vex_printf("{%%rax->%%rbx},");
1284 ppAMD64AMode(i->Ain.ACAS.addr);
1285 return;
1286 case Ain_DACAS:
1287 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1288 (Int)(2 * i->Ain.DACAS.sz));
1289 ppAMD64AMode(i->Ain.DACAS.addr);
1290 return;
1291 case Ain_A87Free:
1292 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1293 break;
1294 case Ain_A87PushPop:
1295 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1296 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1297 ppAMD64AMode(i->Ain.A87PushPop.addr);
1298 break;
1299 case Ain_A87FpOp:
1300 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1301 break;
1302 case Ain_A87LdCW:
1303 vex_printf("fldcw ");
1304 ppAMD64AMode(i->Ain.A87LdCW.addr);
1305 break;
1306 case Ain_A87StSW:
1307 vex_printf("fstsw ");
1308 ppAMD64AMode(i->Ain.A87StSW.addr);
1309 break;
1310 case Ain_LdMXCSR:
1311 vex_printf("ldmxcsr ");
1312 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1313 break;
1314 case Ain_SseUComIS:
1315 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1316 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1317 vex_printf(",");
1318 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1319 vex_printf(" ; pushfq ; popq ");
1320 ppHRegAMD64(i->Ain.SseUComIS.dst);
1321 break;
1322 case Ain_SseSI2SF:
1323 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1324 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1325 (i->Ain.SseSI2SF.src);
1326 vex_printf(",");
1327 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1328 break;
1329 case Ain_SseSF2SI:
1330 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1331 ppHRegAMD64(i->Ain.SseSF2SI.src);
1332 vex_printf(",");
1333 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1334 (i->Ain.SseSF2SI.dst);
1335 break;
1336 case Ain_SseSDSS:
1337 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1338 ppHRegAMD64(i->Ain.SseSDSS.src);
1339 vex_printf(",");
1340 ppHRegAMD64(i->Ain.SseSDSS.dst);
1341 break;
1342 case Ain_SseLdSt:
1343 switch (i->Ain.SseLdSt.sz) {
1344 case 4: vex_printf("movss "); break;
1345 case 8: vex_printf("movsd "); break;
1346 case 16: vex_printf("movups "); break;
1347 default: vassert(0);
1349 if (i->Ain.SseLdSt.isLoad) {
1350 ppAMD64AMode(i->Ain.SseLdSt.addr);
1351 vex_printf(",");
1352 ppHRegAMD64(i->Ain.SseLdSt.reg);
1353 } else {
1354 ppHRegAMD64(i->Ain.SseLdSt.reg);
1355 vex_printf(",");
1356 ppAMD64AMode(i->Ain.SseLdSt.addr);
1358 return;
1359 case Ain_SseCStore:
1360 vex_printf("if (%%rflags.%s) { ",
1361 showAMD64CondCode(i->Ain.SseCStore.cond));
1362 vex_printf("movups ");
1363 ppHRegAMD64(i->Ain.SseCStore.src);
1364 vex_printf(", ");
1365 ppAMD64AMode(i->Ain.SseCStore.addr);
1366 vex_printf(" }");
1367 return;
1368 case Ain_SseCLoad:
1369 vex_printf("if (%%rflags.%s) { ",
1370 showAMD64CondCode(i->Ain.SseCLoad.cond));
1371 vex_printf("movups ");
1372 ppAMD64AMode(i->Ain.SseCLoad.addr);
1373 vex_printf(", ");
1374 ppHRegAMD64(i->Ain.SseCLoad.dst);
1375 vex_printf(" }");
1376 return;
1377 case Ain_SseLdzLO:
1378 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1379 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1380 vex_printf(",");
1381 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1382 return;
1383 case Ain_Sse32Fx4:
1384 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1385 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1386 vex_printf(",");
1387 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1388 return;
1389 case Ain_Sse32FLo:
1390 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1391 ppHRegAMD64(i->Ain.Sse32FLo.src);
1392 vex_printf(",");
1393 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1394 return;
1395 case Ain_Sse64Fx2:
1396 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1397 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1398 vex_printf(",");
1399 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1400 return;
1401 case Ain_Sse64FLo:
1402 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1403 ppHRegAMD64(i->Ain.Sse64FLo.src);
1404 vex_printf(",");
1405 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1406 return;
1407 case Ain_SseReRg:
1408 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1409 ppHRegAMD64(i->Ain.SseReRg.src);
1410 vex_printf(",");
1411 ppHRegAMD64(i->Ain.SseReRg.dst);
1412 return;
1413 case Ain_SseCMov:
1414 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1415 ppHRegAMD64(i->Ain.SseCMov.src);
1416 vex_printf(",");
1417 ppHRegAMD64(i->Ain.SseCMov.dst);
1418 return;
1419 case Ain_SseShuf:
1420 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
1421 ppHRegAMD64(i->Ain.SseShuf.src);
1422 vex_printf(",");
1423 ppHRegAMD64(i->Ain.SseShuf.dst);
1424 return;
1425 case Ain_SseShiftN:
1426 vex_printf("%s $%u, ", showAMD64SseOp(i->Ain.SseShiftN.op),
1427 i->Ain.SseShiftN.shiftBits);
1428 ppHRegAMD64(i->Ain.SseShiftN.dst);
1429 return;
1430 case Ain_SseMOVQ:
1431 vex_printf("movq ");
1432 if (i->Ain.SseMOVQ.toXMM) {
1433 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1434 vex_printf(",");
1435 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1436 } else {
1437 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1438 vex_printf(",");
1439 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1441 return;
1442 //uu case Ain_AvxLdSt:
1443 //uu vex_printf("vmovups ");
1444 //uu if (i->Ain.AvxLdSt.isLoad) {
1445 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1446 //uu vex_printf(",");
1447 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1448 //uu } else {
1449 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1450 //uu vex_printf(",");
1451 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1452 //uu }
1453 //uu return;
1454 //uu case Ain_AvxReRg:
1455 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1456 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1457 //uu vex_printf(",");
1458 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1459 //uu return;
1460 case Ain_Avx32FLo:
1461 vex_printf("%sss ", showAMD64SseOp(i->Ain.Avx32FLo.op));
1462 ppHRegAMD64(i->Ain.Avx32FLo.src2);
1463 vex_printf(",");
1464 ppHRegAMD64(i->Ain.Avx32FLo.src1);
1465 vex_printf(",");
1466 ppHRegAMD64(i->Ain.Avx32FLo.dst);
1467 return;
1468 case Ain_Avx64FLo:
1469 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Avx64FLo.op));
1470 ppHRegAMD64(i->Ain.Avx64FLo.src2);
1471 vex_printf(",");
1472 ppHRegAMD64(i->Ain.Avx64FLo.src1);
1473 vex_printf(",");
1474 ppHRegAMD64(i->Ain.Avx64FLo.dst);
1475 return;
1476 case Ain_EvCheck:
1477 vex_printf("(evCheck) decl ");
1478 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1479 vex_printf("; jns nofail; jmp *");
1480 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1481 vex_printf("; nofail:");
1482 return;
1483 case Ain_ProfInc:
1484 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1485 return;
1486 default:
1487 vpanic("ppAMD64Instr");
1491 /* --------- Helpers for register allocation. --------- */
1493 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1495 Bool unary;
1496 vassert(mode64 == True);
1497 initHRegUsage(u);
1498 switch (i->tag) {
1499 case Ain_Imm64:
1500 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1501 return;
1502 case Ain_Alu64R:
1503 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1504 if (i->Ain.Alu64R.op == Aalu_MOV) {
1505 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1507 if (i->Ain.Alu64R.src->tag == Armi_Reg) {
1508 u->isRegRegMove = True;
1509 u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
1510 u->regMoveDst = i->Ain.Alu64R.dst;
1512 return;
1514 if (i->Ain.Alu64R.op == Aalu_CMP) {
1515 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1516 return;
1518 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1519 return;
1520 case Ain_Alu64M:
1521 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1522 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1523 return;
1524 case Ain_Sh64:
1525 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1526 if (i->Ain.Sh64.src == 0)
1527 addHRegUse(u, HRmRead, hregAMD64_RCX());
1528 return;
1529 case Ain_Sh32:
1530 addHRegUse(u, HRmModify, i->Ain.Sh32.dst);
1531 if (i->Ain.Sh32.src == 0)
1532 addHRegUse(u, HRmRead, hregAMD64_RCX());
1533 return;
1534 case Ain_Test64:
1535 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1536 return;
1537 case Ain_Unary64:
1538 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1539 return;
1540 case Ain_Lea64:
1541 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1542 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1543 return;
1544 case Ain_Alu32R:
1545 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1546 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1547 if (i->Ain.Alu32R.op == Aalu_CMP) {
1548 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1549 return;
1551 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1552 return;
1553 case Ain_MulL:
1554 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1555 addHRegUse(u, HRmModify, hregAMD64_RAX());
1556 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1557 return;
1558 case Ain_Div:
1559 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1560 addHRegUse(u, HRmModify, hregAMD64_RAX());
1561 addHRegUse(u, HRmModify, hregAMD64_RDX());
1562 return;
1563 case Ain_Push:
1564 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1565 addHRegUse(u, HRmModify, hregAMD64_RSP());
1566 return;
1567 case Ain_Call:
1568 /* This is a bit subtle. */
1569 /* First off, claim it trashes all the caller-saved regs
1570 which fall within the register allocator's jurisdiction.
1571 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1572 and all the xmm registers. */
1573 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1574 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1575 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1576 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1577 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1578 addHRegUse(u, HRmWrite, hregAMD64_R8());
1579 addHRegUse(u, HRmWrite, hregAMD64_R9());
1580 addHRegUse(u, HRmWrite, hregAMD64_R10());
1581 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1582 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1583 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1584 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1585 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1586 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1587 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1588 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1589 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1590 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1591 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1592 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1594 /* Now we have to state any parameter-carrying registers
1595 which might be read. This depends on the regparmness. */
1596 switch (i->Ain.Call.regparms) {
1597 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1598 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1599 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1600 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1601 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1602 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1603 case 0: break;
1604 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1606 /* Finally, there is the issue that the insn trashes a
1607 register because the literal target address has to be
1608 loaded into a register. Fortunately, r11 is stated in the
1609 ABI as a scratch register, and so seems a suitable victim. */
1610 addHRegUse(u, HRmWrite, hregAMD64_R11());
1611 /* Upshot of this is that the assembler really must use r11,
1612 and no other, as a destination temporary. */
1613 return;
1614 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1615 conditionally exit the block. Hence we only need to list (1)
1616 the registers that they read, and (2) the registers that they
1617 write in the case where the block is not exited. (2) is
1618 empty, hence only (1) is relevant here. */
1619 case Ain_XDirect:
1620 /* Don't bother to mention the write to %r11, since it is not
1621 available to the allocator. */
1622 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1623 return;
1624 case Ain_XIndir:
1625 /* Ditto re %r11 */
1626 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1627 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1628 return;
1629 case Ain_XAssisted:
1630 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1631 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1632 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1633 return;
1634 case Ain_CMov64:
1635 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1636 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1637 return;
1638 case Ain_CLoad:
1639 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1640 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1641 return;
1642 case Ain_CStore:
1643 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1644 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1645 return;
1646 case Ain_MovxLQ:
1647 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1648 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1649 return;
1650 case Ain_LoadEX:
1651 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1652 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1653 return;
1654 case Ain_Store:
1655 addHRegUse(u, HRmRead, i->Ain.Store.src);
1656 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1657 return;
1658 case Ain_Set64:
1659 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1660 return;
1661 case Ain_Bsfr64:
1662 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1663 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1664 return;
1665 case Ain_MFence:
1666 return;
1667 case Ain_ACAS:
1668 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1669 addHRegUse(u, HRmRead, hregAMD64_RBX());
1670 addHRegUse(u, HRmModify, hregAMD64_RAX());
1671 return;
1672 case Ain_DACAS:
1673 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1674 addHRegUse(u, HRmRead, hregAMD64_RCX());
1675 addHRegUse(u, HRmRead, hregAMD64_RBX());
1676 addHRegUse(u, HRmModify, hregAMD64_RDX());
1677 addHRegUse(u, HRmModify, hregAMD64_RAX());
1678 return;
1679 case Ain_A87Free:
1680 return;
1681 case Ain_A87PushPop:
1682 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1683 return;
1684 case Ain_A87FpOp:
1685 return;
1686 case Ain_A87LdCW:
1687 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1688 return;
1689 case Ain_A87StSW:
1690 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1691 return;
1692 case Ain_LdMXCSR:
1693 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1694 return;
1695 case Ain_SseUComIS:
1696 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1697 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1698 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1699 return;
1700 case Ain_SseSI2SF:
1701 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1702 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1703 return;
1704 case Ain_SseSF2SI:
1705 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1706 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1707 return;
1708 case Ain_SseSDSS:
1709 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1710 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1711 return;
1712 case Ain_SseLdSt:
1713 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1714 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1715 i->Ain.SseLdSt.reg);
1716 return;
1717 case Ain_SseCStore:
1718 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1719 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1720 return;
1721 case Ain_SseCLoad:
1722 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1723 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1724 return;
1725 case Ain_SseLdzLO:
1726 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1727 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1728 return;
1729 case Ain_Sse32Fx4:
1730 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1731 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1732 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1733 || i->Ain.Sse32Fx4.op == Asse_SQRTF
1734 || i->Ain.Sse32Fx4.op == Asse_I2F
1735 || i->Ain.Sse32Fx4.op == Asse_F2I
1736 || i->Ain.Sse32Fx4.op == Asse_F32toF16
1737 || i->Ain.Sse32Fx4.op == Asse_F16toF32 );
1738 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1739 addHRegUse(u, unary ? HRmWrite : HRmModify,
1740 i->Ain.Sse32Fx4.dst);
1741 return;
1742 case Ain_Sse32FLo:
1743 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1744 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1745 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1746 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1747 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1748 addHRegUse(u, unary ? HRmWrite : HRmModify,
1749 i->Ain.Sse32FLo.dst);
1750 return;
1751 case Ain_Sse64Fx2:
1752 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1753 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1754 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1755 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1756 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1757 addHRegUse(u, unary ? HRmWrite : HRmModify,
1758 i->Ain.Sse64Fx2.dst);
1759 return;
1760 case Ain_Sse64FLo:
1761 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1762 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1763 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1764 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1765 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1766 addHRegUse(u, unary ? HRmWrite : HRmModify,
1767 i->Ain.Sse64FLo.dst);
1768 return;
1769 case Ain_SseReRg:
1770 if ( (i->Ain.SseReRg.op == Asse_XOR
1771 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1772 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1773 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1774 r,r' as a write of a value to r, and independent of any
1775 previous value in r */
1776 /* (as opposed to a rite of passage :-) */
1777 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1778 } else {
1779 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1780 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1781 ? HRmWrite : HRmModify,
1782 i->Ain.SseReRg.dst);
1784 if (i->Ain.SseReRg.op == Asse_MOV) {
1785 u->isRegRegMove = True;
1786 u->regMoveSrc = i->Ain.SseReRg.src;
1787 u->regMoveDst = i->Ain.SseReRg.dst;
1790 return;
1791 case Ain_SseCMov:
1792 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1793 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1794 return;
1795 case Ain_SseShuf:
1796 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1797 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1798 return;
1799 case Ain_SseShiftN:
1800 addHRegUse(u, HRmModify, i->Ain.SseShiftN.dst);
1801 return;
1802 case Ain_SseMOVQ:
1803 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmRead : HRmWrite,
1804 i->Ain.SseMOVQ.gpr);
1805 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmWrite : HRmRead,
1806 i->Ain.SseMOVQ.xmm);
1807 return;
1808 //uu case Ain_AvxLdSt:
1809 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1810 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1811 //uu i->Ain.AvxLdSt.reg);
1812 //uu return;
1813 //uu case Ain_AvxReRg:
1814 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1815 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1816 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1817 //uu /* See comments on the case for Ain_SseReRg. */
1818 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1819 //uu } else {
1820 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1821 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1822 //uu ? HRmWrite : HRmModify,
1823 //uu i->Ain.AvxReRg.dst);
1824 //uu
1825 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1826 //uu u->isRegRegMove = True;
1827 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1828 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1829 //uu }
1830 //uu }
1831 //uu return;
1832 case Ain_Avx32FLo:
1833 vassert(i->Ain.Avx32FLo.op != Asse_MOV);
1834 addHRegUse(u, HRmRead, i->Ain.Avx32FLo.src1);
1835 addHRegUse(u, HRmRead, i->Ain.Avx32FLo.src2);
1836 addHRegUse(u, HRmModify, i->Ain.Avx32FLo.dst);
1837 return;
1838 case Ain_Avx64FLo:
1839 vassert(i->Ain.Avx64FLo.op != Asse_MOV);
1840 addHRegUse(u, HRmRead, i->Ain.Avx64FLo.src1);
1841 addHRegUse(u, HRmRead, i->Ain.Avx64FLo.src2);
1842 addHRegUse(u, HRmModify, i->Ain.Avx64FLo.dst);
1843 return;
1844 case Ain_EvCheck:
1845 /* We expect both amodes only to mention %rbp, so this is in
1846 fact pointless, since %rbp isn't allocatable, but anyway.. */
1847 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1848 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1849 return;
1850 case Ain_ProfInc:
1851 addHRegUse(u, HRmWrite, hregAMD64_R11());
1852 return;
1853 default:
1854 ppAMD64Instr(i, mode64);
1855 vpanic("getRegUsage_AMD64Instr");
1859 /* local helper */
1860 static inline void mapReg(HRegRemap* m, HReg* r)
1862 *r = lookupHRegRemap(m, *r);
1865 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1867 vassert(mode64 == True);
1868 switch (i->tag) {
1869 case Ain_Imm64:
1870 mapReg(m, &i->Ain.Imm64.dst);
1871 return;
1872 case Ain_Alu64R:
1873 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1874 mapReg(m, &i->Ain.Alu64R.dst);
1875 return;
1876 case Ain_Alu64M:
1877 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1878 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1879 return;
1880 case Ain_Sh64:
1881 mapReg(m, &i->Ain.Sh64.dst);
1882 return;
1883 case Ain_Sh32:
1884 mapReg(m, &i->Ain.Sh32.dst);
1885 return;
1886 case Ain_Test64:
1887 mapReg(m, &i->Ain.Test64.dst);
1888 return;
1889 case Ain_Unary64:
1890 mapReg(m, &i->Ain.Unary64.dst);
1891 return;
1892 case Ain_Lea64:
1893 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1894 mapReg(m, &i->Ain.Lea64.dst);
1895 return;
1896 case Ain_Alu32R:
1897 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1898 mapReg(m, &i->Ain.Alu32R.dst);
1899 return;
1900 case Ain_MulL:
1901 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1902 return;
1903 case Ain_Div:
1904 mapRegs_AMD64RM(m, i->Ain.Div.src);
1905 return;
1906 case Ain_Push:
1907 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1908 return;
1909 case Ain_Call:
1910 return;
1911 case Ain_XDirect:
1912 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1913 return;
1914 case Ain_XIndir:
1915 mapReg(m, &i->Ain.XIndir.dstGA);
1916 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1917 return;
1918 case Ain_XAssisted:
1919 mapReg(m, &i->Ain.XAssisted.dstGA);
1920 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1921 return;
1922 case Ain_CMov64:
1923 mapReg(m, &i->Ain.CMov64.src);
1924 mapReg(m, &i->Ain.CMov64.dst);
1925 return;
1926 case Ain_CLoad:
1927 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1928 mapReg(m, &i->Ain.CLoad.dst);
1929 return;
1930 case Ain_CStore:
1931 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1932 mapReg(m, &i->Ain.CStore.src);
1933 return;
1934 case Ain_MovxLQ:
1935 mapReg(m, &i->Ain.MovxLQ.src);
1936 mapReg(m, &i->Ain.MovxLQ.dst);
1937 return;
1938 case Ain_LoadEX:
1939 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1940 mapReg(m, &i->Ain.LoadEX.dst);
1941 return;
1942 case Ain_Store:
1943 mapReg(m, &i->Ain.Store.src);
1944 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1945 return;
1946 case Ain_Set64:
1947 mapReg(m, &i->Ain.Set64.dst);
1948 return;
1949 case Ain_Bsfr64:
1950 mapReg(m, &i->Ain.Bsfr64.src);
1951 mapReg(m, &i->Ain.Bsfr64.dst);
1952 return;
1953 case Ain_MFence:
1954 return;
1955 case Ain_ACAS:
1956 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1957 return;
1958 case Ain_DACAS:
1959 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1960 return;
1961 case Ain_A87Free:
1962 return;
1963 case Ain_A87PushPop:
1964 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1965 return;
1966 case Ain_A87FpOp:
1967 return;
1968 case Ain_A87LdCW:
1969 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1970 return;
1971 case Ain_A87StSW:
1972 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1973 return;
1974 case Ain_LdMXCSR:
1975 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1976 return;
1977 case Ain_SseUComIS:
1978 mapReg(m, &i->Ain.SseUComIS.srcL);
1979 mapReg(m, &i->Ain.SseUComIS.srcR);
1980 mapReg(m, &i->Ain.SseUComIS.dst);
1981 return;
1982 case Ain_SseSI2SF:
1983 mapReg(m, &i->Ain.SseSI2SF.src);
1984 mapReg(m, &i->Ain.SseSI2SF.dst);
1985 return;
1986 case Ain_SseSF2SI:
1987 mapReg(m, &i->Ain.SseSF2SI.src);
1988 mapReg(m, &i->Ain.SseSF2SI.dst);
1989 return;
1990 case Ain_SseSDSS:
1991 mapReg(m, &i->Ain.SseSDSS.src);
1992 mapReg(m, &i->Ain.SseSDSS.dst);
1993 return;
1994 case Ain_SseLdSt:
1995 mapReg(m, &i->Ain.SseLdSt.reg);
1996 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1997 break;
1998 case Ain_SseCStore:
1999 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
2000 mapReg(m, &i->Ain.SseCStore.src);
2001 return;
2002 case Ain_SseCLoad:
2003 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
2004 mapReg(m, &i->Ain.SseCLoad.dst);
2005 return;
2006 case Ain_SseLdzLO:
2007 mapReg(m, &i->Ain.SseLdzLO.reg);
2008 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
2009 break;
2010 case Ain_Sse32Fx4:
2011 mapReg(m, &i->Ain.Sse32Fx4.src);
2012 mapReg(m, &i->Ain.Sse32Fx4.dst);
2013 return;
2014 case Ain_Sse32FLo:
2015 mapReg(m, &i->Ain.Sse32FLo.src);
2016 mapReg(m, &i->Ain.Sse32FLo.dst);
2017 return;
2018 case Ain_Sse64Fx2:
2019 mapReg(m, &i->Ain.Sse64Fx2.src);
2020 mapReg(m, &i->Ain.Sse64Fx2.dst);
2021 return;
2022 case Ain_Sse64FLo:
2023 mapReg(m, &i->Ain.Sse64FLo.src);
2024 mapReg(m, &i->Ain.Sse64FLo.dst);
2025 return;
2026 case Ain_SseReRg:
2027 mapReg(m, &i->Ain.SseReRg.src);
2028 mapReg(m, &i->Ain.SseReRg.dst);
2029 return;
2030 case Ain_SseCMov:
2031 mapReg(m, &i->Ain.SseCMov.src);
2032 mapReg(m, &i->Ain.SseCMov.dst);
2033 return;
2034 case Ain_SseShuf:
2035 mapReg(m, &i->Ain.SseShuf.src);
2036 mapReg(m, &i->Ain.SseShuf.dst);
2037 return;
2038 case Ain_SseShiftN:
2039 mapReg(m, &i->Ain.SseShiftN.dst);
2040 return;
2041 case Ain_SseMOVQ:
2042 mapReg(m, &i->Ain.SseMOVQ.gpr);
2043 mapReg(m, &i->Ain.SseMOVQ.xmm);
2044 return;
2045 //uu case Ain_AvxLdSt:
2046 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
2047 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
2048 //uu break;
2049 //uu case Ain_AvxReRg:
2050 //uu mapReg(m, &i->Ain.AvxReRg.src);
2051 //uu mapReg(m, &i->Ain.AvxReRg.dst);
2052 //uu return;
2053 case Ain_Avx32FLo:
2054 mapReg(m, &i->Ain.Avx32FLo.src1);
2055 mapReg(m, &i->Ain.Avx32FLo.src2);
2056 mapReg(m, &i->Ain.Avx32FLo.dst);
2057 return;
2058 case Ain_Avx64FLo:
2059 mapReg(m, &i->Ain.Avx64FLo.src1);
2060 mapReg(m, &i->Ain.Avx64FLo.src2);
2061 mapReg(m, &i->Ain.Avx64FLo.dst);
2062 return;
2063 case Ain_EvCheck:
2064 /* We expect both amodes only to mention %rbp, so this is in
2065 fact pointless, since %rbp isn't allocatable, but anyway.. */
2066 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
2067 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
2068 return;
2069 case Ain_ProfInc:
2070 /* hardwires r11 -- nothing to modify. */
2071 return;
2072 default:
2073 ppAMD64Instr(i, mode64);
2074 vpanic("mapRegs_AMD64Instr");
2078 /* Generate amd64 spill/reload instructions under the direction of the
2079 register allocator. Note it's critical these don't write the
2080 condition codes. */
2082 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2083 HReg rreg, Int offsetB, Bool mode64 )
2085 AMD64AMode* am;
2086 vassert(offsetB >= 0);
2087 vassert(!hregIsVirtual(rreg));
2088 vassert(mode64 == True);
2089 *i1 = *i2 = NULL;
2090 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2091 switch (hregClass(rreg)) {
2092 case HRcInt64:
2093 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
2094 return;
2095 case HRcVec128:
2096 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
2097 return;
2098 default:
2099 ppHRegClass(hregClass(rreg));
2100 vpanic("genSpill_AMD64: unimplemented regclass");
2104 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2105 HReg rreg, Int offsetB, Bool mode64 )
2107 AMD64AMode* am;
2108 vassert(offsetB >= 0);
2109 vassert(!hregIsVirtual(rreg));
2110 vassert(mode64 == True);
2111 *i1 = *i2 = NULL;
2112 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2113 switch (hregClass(rreg)) {
2114 case HRcInt64:
2115 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
2116 return;
2117 case HRcVec128:
2118 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
2119 return;
2120 default:
2121 ppHRegClass(hregClass(rreg));
2122 vpanic("genReload_AMD64: unimplemented regclass");
2126 AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
2128 switch (hregClass(from)) {
2129 case HRcInt64:
2130 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
2131 case HRcVec128:
2132 return AMD64Instr_SseReRg(Asse_MOV, from, to);
2133 default:
2134 ppHRegClass(hregClass(from));
2135 vpanic("genMove_AMD64: unimplemented regclass");
2139 AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
2141 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
2143 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2144 Convert to: src=RMI_Mem, dst=Reg
2146 if (i->tag == Ain_Alu64R
2147 && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
2148 || i->Ain.Alu64R.op == Aalu_XOR)
2149 && i->Ain.Alu64R.src->tag == Armi_Reg
2150 && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
2151 vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
2152 return AMD64Instr_Alu64R(
2153 i->Ain.Alu64R.op,
2154 AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
2155 i->Ain.Alu64R.dst
2159 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2160 Convert to: src=RI_Imm, dst=Mem
2162 if (i->tag == Ain_Alu64R
2163 && (i->Ain.Alu64R.op == Aalu_CMP)
2164 && i->Ain.Alu64R.src->tag == Armi_Imm
2165 && sameHReg(i->Ain.Alu64R.dst, vreg)) {
2166 return AMD64Instr_Alu64M(
2167 i->Ain.Alu64R.op,
2168 AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
2169 AMD64AMode_IR( spill_off, hregAMD64_RBP())
2173 return NULL;
2177 /* --------- The amd64 assembler (bleh.) --------- */
2179 /* Produce the low three bits of an integer register number. */
2180 inline static UInt iregEnc210 ( HReg r )
2182 UInt n;
2183 vassert(hregClass(r) == HRcInt64);
2184 vassert(!hregIsVirtual(r));
2185 n = hregEncoding(r);
2186 vassert(n <= 15);
2187 return n & 7;
2190 /* Produce bit 3 of an integer register number. */
2191 inline static UInt iregEnc3 ( HReg r )
2193 UInt n;
2194 vassert(hregClass(r) == HRcInt64);
2195 vassert(!hregIsVirtual(r));
2196 n = hregEncoding(r);
2197 vassert(n <= 15);
2198 return (n >> 3) & 1;
2201 /* Produce a complete 4-bit integer register number. */
2202 inline static UInt iregEnc3210 ( HReg r )
2204 UInt n;
2205 vassert(hregClass(r) == HRcInt64);
2206 vassert(!hregIsVirtual(r));
2207 n = hregEncoding(r);
2208 vassert(n <= 15);
2209 return n;
2212 /* Produce a complete 4-bit integer register number. */
2213 inline static UInt vregEnc3210 ( HReg r )
2215 UInt n;
2216 vassert(hregClass(r) == HRcVec128);
2217 vassert(!hregIsVirtual(r));
2218 n = hregEncoding(r);
2219 vassert(n <= 15);
2220 return n;
2223 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
2225 vassert(mod < 4);
2226 vassert((reg|regmem) < 8);
2227 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
2230 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
2232 vassert(shift < 4);
2233 vassert((regindex|regbase) < 8);
2234 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2237 static UChar* emit32 ( UChar* p, UInt w32 )
2239 *p++ = toUChar((w32) & 0x000000FF);
2240 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2241 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2242 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2243 return p;
2246 static UChar* emit64 ( UChar* p, ULong w64 )
2248 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2249 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2250 return p;
2253 /* Does a sign-extend of the lowest 8 bits give
2254 the original number? */
2255 static Bool fits8bits ( UInt w32 )
2257 Int i32 = (Int)w32;
2258 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2260 /* Can the lower 32 bits be signedly widened to produce the whole
2261 64-bit value? In other words, are the top 33 bits either all 0 or
2262 all 1 ? */
2263 static Bool fitsIn32Bits ( ULong x )
2265 Long y1;
2266 y1 = x << 32;
2267 y1 >>=/*s*/ 32;
2268 return toBool(x == y1);
2272 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2274 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2275 = 00 greg ereg
2277 greg, d8(ereg) | ereg is neither of: RSP R12
2278 = 01 greg ereg, d8
2280 greg, d32(ereg) | ereg is neither of: RSP R12
2281 = 10 greg ereg, d32
2283 greg, d8(ereg) | ereg is either: RSP R12
2284 = 01 greg 100, 0x24, d8
2285 (lowest bit of rex distinguishes R12/RSP)
2287 greg, d32(ereg) | ereg is either: RSP R12
2288 = 10 greg 100, 0x24, d32
2289 (lowest bit of rex distinguishes R12/RSP)
2291 -----------------------------------------------
2293 greg, d8(base,index,scale)
2294 | index != RSP
2295 = 01 greg 100, scale index base, d8
2297 greg, d32(base,index,scale)
2298 | index != RSP
2299 = 10 greg 100, scale index base, d32
2301 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2303 UInt gregEnc210 = gregEnc3210 & 7;
2304 if (am->tag == Aam_IR) {
2305 if (am->Aam.IR.imm == 0
2306 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2307 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2308 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2309 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2311 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2312 return p;
2314 if (fits8bits(am->Aam.IR.imm)
2315 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2316 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2318 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2319 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2320 return p;
2322 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2323 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2325 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2326 p = emit32(p, am->Aam.IR.imm);
2327 return p;
2329 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2330 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2331 && fits8bits(am->Aam.IR.imm)) {
2332 *p++ = mkModRegRM(1, gregEnc210, 4);
2333 *p++ = 0x24;
2334 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2335 return p;
2337 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2338 || wait for test case for RSP case */
2339 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2340 *p++ = mkModRegRM(2, gregEnc210, 4);
2341 *p++ = 0x24;
2342 p = emit32(p, am->Aam.IR.imm);
2343 return p;
2345 ppAMD64AMode(am);
2346 vpanic("doAMode_M: can't emit amode IR");
2347 /*NOTREACHED*/
2349 if (am->tag == Aam_IRRS) {
2350 if (fits8bits(am->Aam.IRRS.imm)
2351 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2352 *p++ = mkModRegRM(1, gregEnc210, 4);
2353 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2354 iregEnc210(am->Aam.IRRS.base));
2355 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2356 return p;
2358 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2359 *p++ = mkModRegRM(2, gregEnc210, 4);
2360 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2361 iregEnc210(am->Aam.IRRS.base));
2362 p = emit32(p, am->Aam.IRRS.imm);
2363 return p;
2365 ppAMD64AMode(am);
2366 vpanic("doAMode_M: can't emit amode IRRS");
2367 /*NOTREACHED*/
2369 vpanic("doAMode_M: unknown amode");
2370 /*NOTREACHED*/
2373 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2375 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2378 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2380 vassert(gregEnc3210 < 16);
2381 return doAMode_M__wrk(p, gregEnc3210, am);
2385 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2386 inline
2387 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2389 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2390 return p;
2393 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2395 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2398 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2400 vassert(gregEnc3210 < 16);
2401 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2404 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2406 vassert(eregEnc3210 < 16);
2407 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2410 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2412 vassert( (gregEnc3210|eregEnc3210) < 16);
2413 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2417 /* Clear the W bit on a REX byte, thereby changing the operand size
2418 back to whatever that instruction's default operand size is. */
2419 static inline UChar clearWBit ( UChar rex )
2421 return rex & ~(1<<3);
2424 static inline UChar setWBit ( UChar rex )
2426 return rex | (1<<3);
2430 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2431 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2433 if (am->tag == Aam_IR) {
2434 UChar W = 1; /* we want 64-bit mode */
2435 UChar R = (gregEnc3210 >> 3) & 1;
2436 UChar X = 0; /* not relevant */
2437 UChar B = iregEnc3(am->Aam.IR.reg);
2438 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2440 if (am->tag == Aam_IRRS) {
2441 UChar W = 1; /* we want 64-bit mode */
2442 UChar R = (gregEnc3210 >> 3) & 1;
2443 UChar X = iregEnc3(am->Aam.IRRS.index);
2444 UChar B = iregEnc3(am->Aam.IRRS.base);
2445 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2447 vassert(0);
2448 return 0; /*NOTREACHED*/
2451 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2453 return rexAMode_M__wrk(iregEnc3210(greg), am);
2456 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2458 vassert(gregEnc3210 < 16);
2459 return rexAMode_M__wrk(gregEnc3210, am);
2463 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2464 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2466 UChar W = 1; /* we want 64-bit mode */
2467 UChar R = (gregEnc3210 >> 3) & 1;
2468 UChar X = 0; /* not relevant */
2469 UChar B = (eregEnc3210 >> 3) & 1;
2470 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2473 static UChar rexAMode_R ( HReg greg, HReg ereg )
2475 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2478 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2480 vassert(gregEnc3210 < 16);
2481 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2484 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2486 vassert(eregEnc3210 < 16);
2487 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2490 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2492 vassert((gregEnc3210|eregEnc3210) < 16);
2493 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2497 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2498 //uu verified correct (I reckon). Certainly it has been known to
2499 //uu produce correct VEX prefixes during testing. */
2500 //uu
2501 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2502 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2503 //uu in verbatim. There's no range checking on the bits. */
2504 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2505 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2506 //uu UInt L, UInt pp )
2507 //uu {
2508 //uu UChar byte0 = 0;
2509 //uu UChar byte1 = 0;
2510 //uu UChar byte2 = 0;
2511 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2512 //uu /* 2 byte encoding is possible. */
2513 //uu byte0 = 0xC5;
2514 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2515 //uu | (L << 2) | pp;
2516 //uu } else {
2517 //uu /* 3 byte encoding is needed. */
2518 //uu byte0 = 0xC4;
2519 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2520 //uu | ((rexB ^ 1) << 5) | mmmmm;
2521 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2522 //uu }
2523 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2524 //uu }
2525 //uu
2526 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2527 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2528 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2529 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2530 //uu vvvv=1111 (unused 3rd reg). */
2531 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2532 //uu {
2533 //uu UChar L = 1; /* size = 256 */
2534 //uu UChar pp = 0; /* no SIMD prefix */
2535 //uu UChar mmmmm = 1; /* 0F */
2536 //uu UChar notVvvv = 0; /* unused */
2537 //uu UChar rexW = 0;
2538 //uu UChar rexR = 0;
2539 //uu UChar rexX = 0;
2540 //uu UChar rexB = 0;
2541 //uu /* Same logic as in rexAMode_M. */
2542 //uu if (am->tag == Aam_IR) {
2543 //uu rexR = iregEnc3(greg);
2544 //uu rexX = 0; /* not relevant */
2545 //uu rexB = iregEnc3(am->Aam.IR.reg);
2546 //uu }
2547 //uu else if (am->tag == Aam_IRRS) {
2548 //uu rexR = iregEnc3(greg);
2549 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2550 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2551 //uu } else {
2552 //uu vassert(0);
2553 //uu }
2554 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2555 //uu }
2556 //uu
2557 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2558 //uu {
2559 //uu switch (vex & 0xFF) {
2560 //uu case 0xC5:
2561 //uu *p++ = 0xC5;
2562 //uu *p++ = (vex >> 8) & 0xFF;
2563 //uu vassert(0 == (vex >> 16));
2564 //uu break;
2565 //uu case 0xC4:
2566 //uu *p++ = 0xC4;
2567 //uu *p++ = (vex >> 8) & 0xFF;
2568 //uu *p++ = (vex >> 16) & 0xFF;
2569 //uu vassert(0 == (vex >> 24));
2570 //uu break;
2571 //uu default:
2572 //uu vassert(0);
2573 //uu }
2574 //uu return p;
2575 //uu }
2578 /* Emit ffree %st(N) */
2579 static UChar* do_ffree_st ( UChar* p, Int n )
2581 vassert(n >= 0 && n <= 7);
2582 *p++ = 0xDD;
2583 *p++ = toUChar(0xC0 + n);
2584 return p;
2587 /* Emit an instruction into buf and return the number of bytes used.
2588 Note that buf is not the insn's final place, and therefore it is
2589 imperative to emit position-independent code. If the emitted
2590 instruction was a profiler inc, set *is_profInc to True, else
2591 leave it unchanged. */
2593 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2594 UChar* buf, Int nbuf, const AMD64Instr* i,
2595 Bool mode64, VexEndness endness_host,
2596 const void* disp_cp_chain_me_to_slowEP,
2597 const void* disp_cp_chain_me_to_fastEP,
2598 const void* disp_cp_xindir,
2599 const void* disp_cp_xassisted )
2601 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2602 UInt xtra;
2603 UInt reg;
2604 UChar rex;
2605 UChar* p = &buf[0];
2606 UChar* ptmp;
2607 Int j;
2608 vassert(nbuf >= 64);
2609 vassert(mode64 == True);
2611 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2613 switch (i->tag) {
2615 case Ain_Imm64:
2616 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2617 /* Use the short form (load into 32 bit reg, + default
2618 widening rule) for constants under 1 million. We could
2619 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2620 limit it to a smaller range for verifiability purposes. */
2621 if (1 & iregEnc3(i->Ain.Imm64.dst))
2622 *p++ = 0x41;
2623 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2624 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2625 } else {
2626 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2627 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2628 p = emit64(p, i->Ain.Imm64.imm64);
2630 goto done;
2632 case Ain_Alu64R:
2633 /* Deal specially with MOV */
2634 if (i->Ain.Alu64R.op == Aalu_MOV) {
2635 switch (i->Ain.Alu64R.src->tag) {
2636 case Armi_Imm:
2637 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2638 /* Actually we could use this form for constants in
2639 the range 0 through 0x7FFFFFFF inclusive, but
2640 limit it to a small range for verifiability
2641 purposes. */
2642 /* Generate "movl $imm32, 32-bit-register" and let
2643 the default zero-extend rule cause the upper half
2644 of the dst to be zeroed out too. This saves 1
2645 and sometimes 2 bytes compared to the more
2646 obvious encoding in the 'else' branch. */
2647 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2648 *p++ = 0x41;
2649 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2650 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2651 } else {
2652 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2653 *p++ = 0xC7;
2654 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2655 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2657 goto done;
2658 case Armi_Reg:
2659 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2660 i->Ain.Alu64R.dst );
2661 *p++ = 0x89;
2662 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2663 i->Ain.Alu64R.dst);
2664 goto done;
2665 case Armi_Mem:
2666 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2667 i->Ain.Alu64R.src->Armi.Mem.am);
2668 *p++ = 0x8B;
2669 p = doAMode_M(p, i->Ain.Alu64R.dst,
2670 i->Ain.Alu64R.src->Armi.Mem.am);
2671 goto done;
2672 default:
2673 goto bad;
2676 /* MUL */
2677 if (i->Ain.Alu64R.op == Aalu_MUL) {
2678 switch (i->Ain.Alu64R.src->tag) {
2679 case Armi_Reg:
2680 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2681 i->Ain.Alu64R.src->Armi.Reg.reg);
2682 *p++ = 0x0F;
2683 *p++ = 0xAF;
2684 p = doAMode_R(p, i->Ain.Alu64R.dst,
2685 i->Ain.Alu64R.src->Armi.Reg.reg);
2686 goto done;
2687 case Armi_Mem:
2688 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2689 i->Ain.Alu64R.src->Armi.Mem.am);
2690 *p++ = 0x0F;
2691 *p++ = 0xAF;
2692 p = doAMode_M(p, i->Ain.Alu64R.dst,
2693 i->Ain.Alu64R.src->Armi.Mem.am);
2694 goto done;
2695 case Armi_Imm:
2696 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2697 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2698 *p++ = 0x6B;
2699 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2700 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2701 } else {
2702 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2703 *p++ = 0x69;
2704 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2705 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2707 goto done;
2708 default:
2709 goto bad;
2712 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2713 opc = opc_rr = subopc_imm = opc_imma = 0;
2714 switch (i->Ain.Alu64R.op) {
2715 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2716 subopc_imm = 2; opc_imma = 0x15; break;
2717 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2718 subopc_imm = 0; opc_imma = 0x05; break;
2719 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2720 subopc_imm = 5; opc_imma = 0x2D; break;
2721 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2722 subopc_imm = 3; opc_imma = 0x1D; break;
2723 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2724 subopc_imm = 4; opc_imma = 0x25; break;
2725 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2726 subopc_imm = 6; opc_imma = 0x35; break;
2727 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2728 subopc_imm = 1; opc_imma = 0x0D; break;
2729 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2730 subopc_imm = 7; opc_imma = 0x3D; break;
2731 default: goto bad;
2733 switch (i->Ain.Alu64R.src->tag) {
2734 case Armi_Imm:
2735 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2736 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2737 goto bad; /* FIXME: awaiting test case */
2738 *p++ = toUChar(opc_imma);
2739 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2740 } else
2741 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2742 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2743 *p++ = 0x83;
2744 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2745 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2746 } else {
2747 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2748 *p++ = 0x81;
2749 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2750 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2752 goto done;
2753 case Armi_Reg:
2754 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2755 i->Ain.Alu64R.dst);
2756 *p++ = toUChar(opc_rr);
2757 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2758 i->Ain.Alu64R.dst);
2759 goto done;
2760 case Armi_Mem:
2761 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2762 i->Ain.Alu64R.src->Armi.Mem.am);
2763 *p++ = toUChar(opc);
2764 p = doAMode_M(p, i->Ain.Alu64R.dst,
2765 i->Ain.Alu64R.src->Armi.Mem.am);
2766 goto done;
2767 default:
2768 goto bad;
2770 break;
2772 case Ain_Alu64M:
2773 /* Deal specially with MOV */
2774 if (i->Ain.Alu64M.op == Aalu_MOV) {
2775 switch (i->Ain.Alu64M.src->tag) {
2776 case Ari_Reg:
2777 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2778 i->Ain.Alu64M.dst);
2779 *p++ = 0x89;
2780 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2781 i->Ain.Alu64M.dst);
2782 goto done;
2783 case Ari_Imm:
2784 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2785 *p++ = 0xC7;
2786 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2787 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2788 goto done;
2789 default:
2790 goto bad;
2793 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2794 allowed here. (This is derived from the x86 version of same). */
2795 opc = subopc_imm = opc_imma = 0;
2796 switch (i->Ain.Alu64M.op) {
2797 case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
2798 default: goto bad;
2800 switch (i->Ain.Alu64M.src->tag) {
2802 case Xri_Reg:
2803 *p++ = toUChar(opc);
2804 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2805 i->Xin.Alu32M.dst);
2806 goto done;
2808 case Ari_Imm:
2809 if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
2810 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2811 *p++ = 0x83;
2812 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2813 *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
2814 goto done;
2815 } else {
2816 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2817 *p++ = 0x81;
2818 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2819 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2820 goto done;
2822 default:
2823 goto bad;
2826 break;
2828 case Ain_Sh64:
2829 opc_cl = opc_imm = subopc = 0;
2830 switch (i->Ain.Sh64.op) {
2831 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2832 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2833 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2834 default: goto bad;
2836 if (i->Ain.Sh64.src == 0) {
2837 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2838 *p++ = toUChar(opc_cl);
2839 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2840 goto done;
2841 } else {
2842 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2843 *p++ = toUChar(opc_imm);
2844 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2845 *p++ = (UChar)(i->Ain.Sh64.src);
2846 goto done;
2848 break;
2850 case Ain_Sh32:
2851 opc_cl = opc_imm = subopc = 0;
2852 switch (i->Ain.Sh32.op) {
2853 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2854 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2855 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2856 default: goto bad;
2858 if (i->Ain.Sh32.src == 0) {
2859 rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
2860 if (rex != 0x40) *p++ = rex;
2861 *p++ = toUChar(opc_cl);
2862 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
2863 goto done;
2864 } else {
2865 rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
2866 if (rex != 0x40) *p++ = rex;
2867 *p++ = toUChar(opc_imm);
2868 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
2869 *p++ = (UChar)(i->Ain.Sh32.src);
2870 goto done;
2872 break;
2874 case Ain_Test64:
2875 /* testq sign-extend($imm32), %reg */
2876 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2877 *p++ = 0xF7;
2878 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2879 p = emit32(p, i->Ain.Test64.imm32);
2880 goto done;
2882 case Ain_Unary64:
2883 if (i->Ain.Unary64.op == Aun_NOT) {
2884 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2885 *p++ = 0xF7;
2886 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2887 goto done;
2889 if (i->Ain.Unary64.op == Aun_NEG) {
2890 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2891 *p++ = 0xF7;
2892 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2893 goto done;
2895 break;
2897 case Ain_Lea64:
2898 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2899 *p++ = 0x8D;
2900 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2901 goto done;
2903 case Ain_Alu32R:
2904 /* ADD/SUB/AND/OR/XOR/CMP */
2905 opc = opc_rr = subopc_imm = opc_imma = 0;
2906 switch (i->Ain.Alu32R.op) {
2907 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2908 subopc_imm = 0; opc_imma = 0x05; break;
2909 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2910 subopc_imm = 5; opc_imma = 0x2D; break;
2911 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2912 subopc_imm = 4; opc_imma = 0x25; break;
2913 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2914 subopc_imm = 6; opc_imma = 0x35; break;
2915 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2916 subopc_imm = 1; opc_imma = 0x0D; break;
2917 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2918 subopc_imm = 7; opc_imma = 0x3D; break;
2919 default: goto bad;
2921 switch (i->Ain.Alu32R.src->tag) {
2922 case Armi_Imm:
2923 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2924 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2925 goto bad; /* FIXME: awaiting test case */
2926 *p++ = toUChar(opc_imma);
2927 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2928 } else
2929 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2930 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2931 if (rex != 0x40) *p++ = rex;
2932 *p++ = 0x83;
2933 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2934 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2935 } else {
2936 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2937 if (rex != 0x40) *p++ = rex;
2938 *p++ = 0x81;
2939 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2940 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2942 goto done;
2943 case Armi_Reg:
2944 rex = clearWBit(
2945 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2946 i->Ain.Alu32R.dst) );
2947 if (rex != 0x40) *p++ = rex;
2948 *p++ = toUChar(opc_rr);
2949 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2950 i->Ain.Alu32R.dst);
2951 goto done;
2952 case Armi_Mem:
2953 rex = clearWBit(
2954 rexAMode_M( i->Ain.Alu32R.dst,
2955 i->Ain.Alu32R.src->Armi.Mem.am) );
2956 if (rex != 0x40) *p++ = rex;
2957 *p++ = toUChar(opc);
2958 p = doAMode_M(p, i->Ain.Alu32R.dst,
2959 i->Ain.Alu32R.src->Armi.Mem.am);
2960 goto done;
2961 default:
2962 goto bad;
2964 break;
2966 case Ain_MulL:
2967 subopc = i->Ain.MulL.syned ? 5 : 4;
2968 switch (i->Ain.MulL.src->tag) {
2969 case Arm_Mem:
2970 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2971 *p++ = 0xF7;
2972 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2973 goto done;
2974 case Arm_Reg:
2975 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2976 *p++ = 0xF7;
2977 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2978 goto done;
2979 default:
2980 goto bad;
2982 break;
2984 case Ain_Div:
2985 subopc = i->Ain.Div.syned ? 7 : 6;
2986 if (i->Ain.Div.sz == 4) {
2987 switch (i->Ain.Div.src->tag) {
2988 case Arm_Mem:
2989 goto bad;
2990 /*FIXME*/
2991 *p++ = 0xF7;
2992 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2993 goto done;
2994 case Arm_Reg:
2995 *p++ = clearWBit(
2996 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2997 *p++ = 0xF7;
2998 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2999 goto done;
3000 default:
3001 goto bad;
3004 if (i->Ain.Div.sz == 8) {
3005 switch (i->Ain.Div.src->tag) {
3006 case Arm_Mem:
3007 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
3008 *p++ = 0xF7;
3009 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
3010 goto done;
3011 case Arm_Reg:
3012 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
3013 *p++ = 0xF7;
3014 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
3015 goto done;
3016 default:
3017 goto bad;
3020 break;
3022 case Ain_Push:
3023 switch (i->Ain.Push.src->tag) {
3024 case Armi_Mem:
3025 *p++ = clearWBit(
3026 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
3027 *p++ = 0xFF;
3028 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
3029 goto done;
3030 case Armi_Imm:
3031 *p++ = 0x68;
3032 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
3033 goto done;
3034 case Armi_Reg:
3035 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
3036 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
3037 goto done;
3038 default:
3039 goto bad;
3042 case Ain_Call: {
3043 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
3044 above, %r11 is used as an address temporary. */
3045 /* If we don't need to do any fixup actions in the case that the
3046 call doesn't happen, just do the simple thing and emit
3047 straight-line code. This is usually the case. */
3048 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
3049 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3050 /* jump over the following two insns if the condition does
3051 not hold */
3052 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
3053 if (i->Ain.Call.cond != Acc_ALWAYS) {
3054 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
3055 *p++ = shortImm ? 10 : 13;
3056 /* 10 or 13 bytes in the next two insns */
3058 if (shortImm) {
3059 /* 7 bytes: movl sign-extend(imm32), %r11 */
3060 *p++ = 0x49;
3061 *p++ = 0xC7;
3062 *p++ = 0xC3;
3063 p = emit32(p, (UInt)i->Ain.Call.target);
3064 } else {
3065 /* 10 bytes: movabsq $target, %r11 */
3066 *p++ = 0x49;
3067 *p++ = 0xBB;
3068 p = emit64(p, i->Ain.Call.target);
3070 /* 3 bytes: call *%r11 */
3071 *p++ = 0x41;
3072 *p++ = 0xFF;
3073 *p++ = 0xD3;
3074 } else {
3075 Int delta;
3076 /* Complex case. We have to generate an if-then-else diamond. */
3077 // before:
3078 // j{!cond} else:
3079 // movabsq $target, %r11
3080 // call* %r11
3081 // preElse:
3082 // jmp after:
3083 // else:
3084 // movabsq $0x5555555555555555, %rax // possibly
3085 // movq %rax, %rdx // possibly
3086 // after:
3088 // before:
3089 UChar* pBefore = p;
3091 // j{!cond} else:
3092 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
3093 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3095 // movabsq $target, %r11
3096 *p++ = 0x49;
3097 *p++ = 0xBB;
3098 p = emit64(p, i->Ain.Call.target);
3100 // call* %r11
3101 *p++ = 0x41;
3102 *p++ = 0xFF;
3103 *p++ = 0xD3;
3105 // preElse:
3106 UChar* pPreElse = p;
3108 // jmp after:
3109 *p++ = 0xEB;
3110 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3112 // else:
3113 UChar* pElse = p;
3115 /* Do the 'else' actions */
3116 switch (i->Ain.Call.rloc.pri) {
3117 case RLPri_Int:
3118 // movabsq $0x5555555555555555, %rax
3119 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3120 break;
3121 case RLPri_2Int:
3122 goto bad; //ATC
3123 // movabsq $0x5555555555555555, %rax
3124 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3125 // movq %rax, %rdx
3126 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
3127 break;
3128 case RLPri_V128SpRel:
3129 if (i->Ain.Call.rloc.spOff == 0) {
3130 // We could accept any |spOff| here, but that's more
3131 // hassle and the only value we're ever going to get
3132 // is zero (I believe.) Hence take the easy path :)
3133 // We need a scag register -- r11 can be it.
3134 // movabsq $0x5555555555555555, %r11
3135 *p++ = 0x49; *p++ = 0xBB;
3136 p = emit64(p, 0x5555555555555555ULL);
3137 // movq %r11, 0(%rsp)
3138 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
3139 // movq %r11, 8(%rsp)
3140 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
3141 *p++ = 0x08;
3142 break;
3144 goto bad; //ATC for all other spOff values
3145 case RLPri_V256SpRel:
3146 goto bad; //ATC
3147 case RLPri_None: case RLPri_INVALID: default:
3148 vassert(0); // should never get here
3151 // after:
3152 UChar* pAfter = p;
3154 // Fix up the branch offsets. The +2s in the offset
3155 // calculations are there because x86 requires conditional
3156 // branches to have their offset stated relative to the
3157 // instruction immediately following the branch insn. And in
3158 // both cases the branch insns are 2 bytes long.
3160 // First, the "j{!cond} else:" at pBefore.
3161 delta = (Int)(Long)(pElse - (pBefore + 2));
3162 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3163 *(pBefore+1) = (UChar)delta;
3165 // And secondly, the "jmp after:" at pPreElse.
3166 delta = (Int)(Long)(pAfter - (pPreElse + 2));
3167 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3168 *(pPreElse+1) = (UChar)delta;
3170 goto done;
3173 case Ain_XDirect: {
3174 /* NB: what goes on here has to be very closely coordinated with the
3175 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3176 /* We're generating chain-me requests here, so we need to be
3177 sure this is actually allowed -- no-redir translations can't
3178 use chain-me's. Hence: */
3179 vassert(disp_cp_chain_me_to_slowEP != NULL);
3180 vassert(disp_cp_chain_me_to_fastEP != NULL);
3182 HReg r11 = hregAMD64_R11();
3184 /* Use ptmp for backpatching conditional jumps. */
3185 ptmp = NULL;
3187 /* First off, if this is conditional, create a conditional
3188 jump over the rest of it. */
3189 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3190 /* jmp fwds if !condition */
3191 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
3192 ptmp = p; /* fill in this bit later */
3193 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3196 /* Update the guest RIP. */
3197 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
3198 /* use a shorter encoding */
3199 /* movl sign-extend(dstGA), %r11 */
3200 *p++ = 0x49;
3201 *p++ = 0xC7;
3202 *p++ = 0xC3;
3203 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
3204 } else {
3205 /* movabsq $dstGA, %r11 */
3206 *p++ = 0x49;
3207 *p++ = 0xBB;
3208 p = emit64(p, i->Ain.XDirect.dstGA);
3211 /* movq %r11, amRIP */
3212 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
3213 *p++ = 0x89;
3214 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
3216 /* --- FIRST PATCHABLE BYTE follows --- */
3217 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3218 to) backs up the return address, so as to find the address of
3219 the first patchable byte. So: don't change the length of the
3220 two instructions below. */
3221 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3222 *p++ = 0x49;
3223 *p++ = 0xBB;
3224 const void* disp_cp_chain_me
3225 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3226 : disp_cp_chain_me_to_slowEP;
3227 p = emit64(p, (Addr)disp_cp_chain_me);
3228 /* call *%r11 */
3229 *p++ = 0x41;
3230 *p++ = 0xFF;
3231 *p++ = 0xD3;
3232 /* --- END of PATCHABLE BYTES --- */
3234 /* Fix up the conditional jump, if there was one. */
3235 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3236 Int delta = p - ptmp;
3237 vassert(delta > 0 && delta < 40);
3238 *ptmp = toUChar(delta-1);
3240 goto done;
3243 case Ain_XIndir: {
3244 /* We're generating transfers that could lead indirectly to a
3245 chain-me, so we need to be sure this is actually allowed --
3246 no-redir translations are not allowed to reach normal
3247 translations without going through the scheduler. That means
3248 no XDirects or XIndirs out from no-redir translations.
3249 Hence: */
3250 vassert(disp_cp_xindir != NULL);
3252 /* Use ptmp for backpatching conditional jumps. */
3253 ptmp = NULL;
3255 /* First off, if this is conditional, create a conditional
3256 jump over the rest of it. */
3257 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3258 /* jmp fwds if !condition */
3259 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3260 ptmp = p; /* fill in this bit later */
3261 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3264 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3265 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3266 *p++ = 0x89;
3267 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3269 /* get $disp_cp_xindir into %r11 */
3270 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
3271 /* use a shorter encoding */
3272 /* movl sign-extend(disp_cp_xindir), %r11 */
3273 *p++ = 0x49;
3274 *p++ = 0xC7;
3275 *p++ = 0xC3;
3276 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
3277 } else {
3278 /* movabsq $disp_cp_xindir, %r11 */
3279 *p++ = 0x49;
3280 *p++ = 0xBB;
3281 p = emit64(p, (Addr)disp_cp_xindir);
3284 /* jmp *%r11 */
3285 *p++ = 0x41;
3286 *p++ = 0xFF;
3287 *p++ = 0xE3;
3289 /* Fix up the conditional jump, if there was one. */
3290 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3291 Int delta = p - ptmp;
3292 vassert(delta > 0 && delta < 40);
3293 *ptmp = toUChar(delta-1);
3295 goto done;
3298 case Ain_XAssisted: {
3299 /* Use ptmp for backpatching conditional jumps. */
3300 ptmp = NULL;
3302 /* First off, if this is conditional, create a conditional
3303 jump over the rest of it. */
3304 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3305 /* jmp fwds if !condition */
3306 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3307 ptmp = p; /* fill in this bit later */
3308 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3311 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3312 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3313 *p++ = 0x89;
3314 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3315 /* movl $magic_number, %ebp. Since these numbers are all small positive
3316 integers, we can get away with "movl $N, %ebp" rather than
3317 the longer "movq $N, %rbp". */
3318 UInt trcval = 0;
3319 switch (i->Ain.XAssisted.jk) {
3320 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3321 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3322 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3323 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
3324 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3325 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3326 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3327 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3328 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3329 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3330 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3331 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3332 case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break;
3333 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3334 /* We don't expect to see the following being assisted. */
3335 case Ijk_Ret:
3336 case Ijk_Call:
3337 /* fallthrough */
3338 default:
3339 ppIRJumpKind(i->Ain.XAssisted.jk);
3340 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3342 vassert(trcval != 0);
3343 *p++ = 0xBD;
3344 p = emit32(p, trcval);
3345 /* movabsq $disp_assisted, %r11 */
3346 *p++ = 0x49;
3347 *p++ = 0xBB;
3348 p = emit64(p, (Addr)disp_cp_xassisted);
3349 /* jmp *%r11 */
3350 *p++ = 0x41;
3351 *p++ = 0xFF;
3352 *p++ = 0xE3;
3354 /* Fix up the conditional jump, if there was one. */
3355 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3356 Int delta = p - ptmp;
3357 vassert(delta > 0 && delta < 40);
3358 *ptmp = toUChar(delta-1);
3360 goto done;
3363 case Ain_CMov64:
3364 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3365 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3366 *p++ = 0x0F;
3367 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3368 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3369 goto done;
3371 case Ain_CLoad: {
3372 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3374 /* Only 32- or 64-bit variants are allowed. */
3375 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3377 /* Use ptmp for backpatching conditional jumps. */
3378 ptmp = NULL;
3380 /* jmp fwds if !condition */
3381 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3382 ptmp = p; /* fill in this bit later */
3383 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3385 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3386 load, which, by the default zero-extension rule, zeroes out
3387 the upper half of the destination, as required. */
3388 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3389 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3390 *p++ = 0x8B;
3391 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3393 /* Fix up the conditional branch */
3394 Int delta = p - ptmp;
3395 vassert(delta > 0 && delta < 40);
3396 *ptmp = toUChar(delta-1);
3397 goto done;
3400 case Ain_CStore: {
3401 /* AFAICS this is identical to Ain_CLoad except that the opcode
3402 is 0x89 instead of 0x8B. */
3403 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3405 /* Only 32- or 64-bit variants are allowed. */
3406 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3408 /* Use ptmp for backpatching conditional jumps. */
3409 ptmp = NULL;
3411 /* jmp fwds if !condition */
3412 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3413 ptmp = p; /* fill in this bit later */
3414 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3416 /* Now the store. */
3417 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3418 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3419 *p++ = 0x89;
3420 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3422 /* Fix up the conditional branch */
3423 Int delta = p - ptmp;
3424 vassert(delta > 0 && delta < 40);
3425 *ptmp = toUChar(delta-1);
3426 goto done;
3429 case Ain_MovxLQ:
3430 /* No, _don't_ ask me why the sense of the args has to be
3431 different in the S vs Z case. I don't know. */
3432 if (i->Ain.MovxLQ.syned) {
3433 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3434 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3435 *p++ = 0x63;
3436 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3437 } else {
3438 /* Produce a 32-bit reg-reg move, since the implicit
3439 zero-extend does what we want. */
3440 *p++ = clearWBit (
3441 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3442 *p++ = 0x89;
3443 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3445 goto done;
3447 case Ain_LoadEX:
3448 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3449 /* movzbq */
3450 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3451 *p++ = 0x0F;
3452 *p++ = 0xB6;
3453 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3454 goto done;
3456 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3457 /* movzwq */
3458 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3459 *p++ = 0x0F;
3460 *p++ = 0xB7;
3461 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3462 goto done;
3464 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3465 /* movzlq */
3466 /* This isn't really an existing AMD64 instruction per se.
3467 Rather, we have to do a 32-bit load. Because a 32-bit
3468 write implicitly clears the upper 32 bits of the target
3469 register, we get what we want. */
3470 *p++ = clearWBit(
3471 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3472 *p++ = 0x8B;
3473 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3474 goto done;
3476 break;
3478 case Ain_Set64:
3479 /* Make the destination register be 1 or 0, depending on whether
3480 the relevant condition holds. Complication: the top 56 bits
3481 of the destination should be forced to zero, but doing 'xorq
3482 %r,%r' kills the flag(s) we are about to read. Sigh. So
3483 start off my moving $0 into the dest. */
3484 reg = iregEnc3210(i->Ain.Set64.dst);
3485 vassert(reg < 16);
3487 /* movq $0, %dst */
3488 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3489 *p++ = 0xC7;
3490 *p++ = toUChar(0xC0 + (reg & 7));
3491 p = emit32(p, 0);
3493 /* setb lo8(%dst) */
3494 /* note, 8-bit register rex trickyness. Be careful here. */
3495 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3496 *p++ = 0x0F;
3497 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3498 *p++ = toUChar(0xC0 + (reg & 7));
3499 goto done;
3501 case Ain_Bsfr64:
3502 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3503 *p++ = 0x0F;
3504 if (i->Ain.Bsfr64.isFwds) {
3505 *p++ = 0xBC;
3506 } else {
3507 *p++ = 0xBD;
3509 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3510 goto done;
3512 case Ain_MFence:
3513 /* mfence */
3514 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3515 goto done;
3517 case Ain_ACAS:
3518 /* lock */
3519 *p++ = 0xF0;
3520 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3521 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3522 in %rbx. The new-value register is hardwired to be %rbx
3523 since dealing with byte integer registers is too much hassle,
3524 so we force the register operand to %rbx (could equally be
3525 %rcx or %rdx). */
3526 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3527 if (i->Ain.ACAS.sz != 8)
3528 rex = clearWBit(rex);
3530 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3531 *p++ = 0x0F;
3532 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3533 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3534 goto done;
3536 case Ain_DACAS:
3537 /* lock */
3538 *p++ = 0xF0;
3539 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3540 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3541 aren't encoded in the insn. */
3542 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3543 if (i->Ain.ACAS.sz != 8)
3544 rex = clearWBit(rex);
3545 *p++ = rex;
3546 *p++ = 0x0F;
3547 *p++ = 0xC7;
3548 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3549 goto done;
3551 case Ain_A87Free:
3552 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3553 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3554 p = do_ffree_st(p, 7-j);
3556 goto done;
3558 case Ain_A87PushPop:
3559 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3560 if (i->Ain.A87PushPop.isPush) {
3561 /* Load from memory into %st(0): flds/fldl amode */
3562 *p++ = clearWBit(
3563 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3564 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3565 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3566 } else {
3567 /* Dump %st(0) to memory: fstps/fstpl amode */
3568 *p++ = clearWBit(
3569 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3570 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3571 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3572 goto done;
3574 goto done;
3576 case Ain_A87FpOp:
3577 switch (i->Ain.A87FpOp.op) {
3578 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3579 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3580 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3581 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3582 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3583 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3584 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3585 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3586 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3587 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3588 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3589 case Afp_TAN:
3590 /* fptan pushes 1.0 on the FP stack, except when the
3591 argument is out of range. Hence we have to do the
3592 instruction, then inspect C2 to see if there is an out
3593 of range condition. If there is, we skip the fincstp
3594 that is used by the in-range case to get rid of this
3595 extra 1.0 value. */
3596 *p++ = 0xD9; *p++ = 0xF2; // fptan
3597 *p++ = 0x50; // pushq %rax
3598 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3599 *p++ = 0x66; *p++ = 0xA9;
3600 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3601 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3602 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3603 *p++ = 0x58; // after_fincstp: popq %rax
3604 break;
3605 default:
3606 goto bad;
3608 goto done;
3610 case Ain_A87LdCW:
3611 *p++ = clearWBit(
3612 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3613 *p++ = 0xD9;
3614 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3615 goto done;
3617 case Ain_A87StSW:
3618 *p++ = clearWBit(
3619 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3620 *p++ = 0xDD;
3621 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3622 goto done;
3624 case Ain_Store:
3625 if (i->Ain.Store.sz == 2) {
3626 /* This just goes to show the crazyness of the instruction
3627 set encoding. We have to insert two prefix bytes, but be
3628 careful to avoid a conflict in what the size should be, by
3629 ensuring that REX.W = 0. */
3630 *p++ = 0x66; /* override to 16-bits */
3631 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3632 *p++ = 0x89;
3633 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3634 goto done;
3636 if (i->Ain.Store.sz == 4) {
3637 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3638 *p++ = 0x89;
3639 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3640 goto done;
3642 if (i->Ain.Store.sz == 1) {
3643 /* This is one place where it would be wrong to skip emitting
3644 a rex byte of 0x40, since the mere presence of rex changes
3645 the meaning of the byte register access. Be careful. */
3646 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3647 *p++ = 0x88;
3648 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3649 goto done;
3651 break;
3653 case Ain_LdMXCSR:
3654 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3655 *p++ = 0x0F;
3656 *p++ = 0xAE;
3657 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3658 goto done;
3660 case Ain_SseUComIS:
3661 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3662 /* ucomi[sd] %srcL, %srcR */
3663 if (i->Ain.SseUComIS.sz == 8) {
3664 *p++ = 0x66;
3665 } else {
3666 goto bad;
3667 vassert(i->Ain.SseUComIS.sz == 4);
3669 *p++ = clearWBit (
3670 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3671 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3672 *p++ = 0x0F;
3673 *p++ = 0x2E;
3674 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3675 vregEnc3210(i->Ain.SseUComIS.srcR) );
3676 /* pushfq */
3677 *p++ = 0x9C;
3678 /* popq %dst */
3679 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3680 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3681 goto done;
3683 case Ain_SseSI2SF:
3684 /* cvssi2s[sd] %src, %dst */
3685 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3686 i->Ain.SseSI2SF.src );
3687 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3688 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3689 *p++ = 0x0F;
3690 *p++ = 0x2A;
3691 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3692 i->Ain.SseSI2SF.src );
3693 goto done;
3695 case Ain_SseSF2SI:
3696 /* cvss[sd]2si %src, %dst */
3697 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3698 vregEnc3210(i->Ain.SseSF2SI.src) );
3699 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3700 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3701 *p++ = 0x0F;
3702 *p++ = 0x2D;
3703 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3704 vregEnc3210(i->Ain.SseSF2SI.src) );
3705 goto done;
3707 case Ain_SseSDSS:
3708 /* cvtsd2ss/cvtss2sd %src, %dst */
3709 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3710 *p++ = clearWBit(
3711 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3712 vregEnc3210(i->Ain.SseSDSS.src) ));
3713 *p++ = 0x0F;
3714 *p++ = 0x5A;
3715 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3716 vregEnc3210(i->Ain.SseSDSS.src) );
3717 goto done;
3719 case Ain_SseLdSt:
3720 if (i->Ain.SseLdSt.sz == 8) {
3721 *p++ = 0xF2;
3722 } else
3723 if (i->Ain.SseLdSt.sz == 4) {
3724 *p++ = 0xF3;
3725 } else
3726 if (i->Ain.SseLdSt.sz != 16) {
3727 vassert(0);
3729 *p++ = clearWBit(
3730 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3731 i->Ain.SseLdSt.addr));
3732 *p++ = 0x0F;
3733 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3734 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3735 i->Ain.SseLdSt.addr);
3736 goto done;
3738 case Ain_SseCStore: {
3739 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3741 /* Use ptmp for backpatching conditional jumps. */
3742 ptmp = NULL;
3744 /* jmp fwds if !condition */
3745 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3746 ptmp = p; /* fill in this bit later */
3747 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3749 /* Now the store. */
3750 *p++ = clearWBit(
3751 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3752 i->Ain.SseCStore.addr));
3753 *p++ = 0x0F;
3754 *p++ = toUChar(0x11);
3755 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3756 i->Ain.SseCStore.addr);
3758 /* Fix up the conditional branch */
3759 Int delta = p - ptmp;
3760 vassert(delta > 0 && delta < 40);
3761 *ptmp = toUChar(delta-1);
3762 goto done;
3765 case Ain_SseCLoad: {
3766 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3768 /* Use ptmp for backpatching conditional jumps. */
3769 ptmp = NULL;
3771 /* jmp fwds if !condition */
3772 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3773 ptmp = p; /* fill in this bit later */
3774 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3776 /* Now the load. */
3777 *p++ = clearWBit(
3778 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3779 i->Ain.SseCLoad.addr));
3780 *p++ = 0x0F;
3781 *p++ = toUChar(0x10);
3782 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3783 i->Ain.SseCLoad.addr);
3785 /* Fix up the conditional branch */
3786 Int delta = p - ptmp;
3787 vassert(delta > 0 && delta < 40);
3788 *ptmp = toUChar(delta-1);
3789 goto done;
3792 case Ain_SseLdzLO:
3793 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3794 /* movs[sd] amode, %xmm-dst */
3795 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3796 *p++ = clearWBit(
3797 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3798 i->Ain.SseLdzLO.addr));
3799 *p++ = 0x0F;
3800 *p++ = 0x10;
3801 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3802 i->Ain.SseLdzLO.addr);
3803 goto done;
3805 case Ain_Sse32Fx4: {
3806 UInt srcRegNo = vregEnc3210(i->Ain.Sse32Fx4.src);
3807 UInt dstRegNo = vregEnc3210(i->Ain.Sse32Fx4.dst);
3808 // VEX encoded cases
3809 switch (i->Ain.Sse32Fx4.op) {
3810 case Asse_F16toF32: { // vcvtph2ps %xmmS, %xmmD
3811 UInt s = srcRegNo;
3812 UInt d = dstRegNo;
3813 // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
3814 // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
3815 UInt byte2 = ((((~d)>>3)&1)<<7) | (1<<6)
3816 | ((((~s)>>3)&1)<<5) | (1<<1);
3817 UInt byte5 = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
3818 *p++ = 0xC4;
3819 *p++ = byte2;
3820 *p++ = 0x79;
3821 *p++ = 0x13;
3822 *p++ = byte5;
3823 goto done;
3825 case Asse_F32toF16: { // vcvtps2ph $4, %xmmS, %xmmD
3826 UInt s = srcRegNo;
3827 UInt d = dstRegNo;
3828 // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
3829 // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
3830 // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
3831 UInt byte2 = ((((~s)>>3)&1)<<7) | (1<<6)
3832 | ((((~d)>>3)&1)<<5) | (1<<1) | (1 << 0);
3833 UInt byte5 = (1<<7) | (1<<6) | ((s&7) << 3) | ((d&7) << 0);
3834 *p++ = 0xC4;
3835 *p++ = byte2;
3836 *p++ = 0x79;
3837 *p++ = 0x1D;
3838 *p++ = byte5;
3839 *p++ = 0x04;
3840 goto done;
3842 default: break;
3844 // After this point, REX encoded cases only
3845 xtra = 0;
3846 switch (i->Ain.Sse32Fx4.op) {
3847 case Asse_F2I: *p++ = 0x66; break;
3848 default: break;
3850 *p++ = clearWBit(rexAMode_R_enc_enc(dstRegNo, srcRegNo));
3851 *p++ = 0x0F;
3852 switch (i->Ain.Sse32Fx4.op) {
3853 case Asse_ADDF: *p++ = 0x58; break;
3854 case Asse_DIVF: *p++ = 0x5E; break;
3855 case Asse_MAXF: *p++ = 0x5F; break;
3856 case Asse_MINF: *p++ = 0x5D; break;
3857 case Asse_MULF: *p++ = 0x59; break;
3858 case Asse_RCPF: *p++ = 0x53; break;
3859 case Asse_RSQRTF: *p++ = 0x52; break;
3860 case Asse_SQRTF: *p++ = 0x51; break;
3861 case Asse_I2F: *p++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3862 case Asse_F2I: *p++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3863 case Asse_SUBF: *p++ = 0x5C; break;
3864 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3865 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3866 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3867 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3868 default: goto bad;
3870 p = doAMode_R_enc_enc(p, dstRegNo, srcRegNo);
3871 if (xtra & 0x100)
3872 *p++ = toUChar(xtra & 0xFF);
3873 goto done;
3876 case Ain_Sse64Fx2:
3877 xtra = 0;
3878 *p++ = 0x66;
3879 *p++ = clearWBit(
3880 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3881 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3882 *p++ = 0x0F;
3883 switch (i->Ain.Sse64Fx2.op) {
3884 case Asse_ADDF: *p++ = 0x58; break;
3885 case Asse_DIVF: *p++ = 0x5E; break;
3886 case Asse_MAXF: *p++ = 0x5F; break;
3887 case Asse_MINF: *p++ = 0x5D; break;
3888 case Asse_MULF: *p++ = 0x59; break;
3889 case Asse_SQRTF: *p++ = 0x51; break;
3890 case Asse_SUBF: *p++ = 0x5C; break;
3891 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3892 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3893 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3894 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3895 default: goto bad;
3897 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3898 vregEnc3210(i->Ain.Sse64Fx2.src) );
3899 if (xtra & 0x100)
3900 *p++ = toUChar(xtra & 0xFF);
3901 goto done;
3903 case Ain_Sse32FLo:
3904 xtra = 0;
3905 *p++ = 0xF3;
3906 *p++ = clearWBit(
3907 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3908 vregEnc3210(i->Ain.Sse32FLo.src) ));
3909 *p++ = 0x0F;
3910 switch (i->Ain.Sse32FLo.op) {
3911 case Asse_ADDF: *p++ = 0x58; break;
3912 case Asse_DIVF: *p++ = 0x5E; break;
3913 case Asse_MAXF: *p++ = 0x5F; break;
3914 case Asse_MINF: *p++ = 0x5D; break;
3915 case Asse_MULF: *p++ = 0x59; break;
3916 case Asse_RCPF: *p++ = 0x53; break;
3917 case Asse_RSQRTF: *p++ = 0x52; break;
3918 case Asse_SQRTF: *p++ = 0x51; break;
3919 case Asse_SUBF: *p++ = 0x5C; break;
3920 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3921 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3922 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3923 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3924 default: goto bad;
3926 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3927 vregEnc3210(i->Ain.Sse32FLo.src) );
3928 if (xtra & 0x100)
3929 *p++ = toUChar(xtra & 0xFF);
3930 goto done;
3932 case Ain_Sse64FLo:
3933 xtra = 0;
3934 *p++ = 0xF2;
3935 *p++ = clearWBit(
3936 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3937 vregEnc3210(i->Ain.Sse64FLo.src) ));
3938 *p++ = 0x0F;
3939 switch (i->Ain.Sse64FLo.op) {
3940 case Asse_ADDF: *p++ = 0x58; break;
3941 case Asse_DIVF: *p++ = 0x5E; break;
3942 case Asse_MAXF: *p++ = 0x5F; break;
3943 case Asse_MINF: *p++ = 0x5D; break;
3944 case Asse_MULF: *p++ = 0x59; break;
3945 case Asse_SQRTF: *p++ = 0x51; break;
3946 case Asse_SUBF: *p++ = 0x5C; break;
3947 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3948 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3949 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3950 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3951 default: goto bad;
3953 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3954 vregEnc3210(i->Ain.Sse64FLo.src) );
3955 if (xtra & 0x100)
3956 *p++ = toUChar(xtra & 0xFF);
3957 goto done;
3959 case Ain_SseReRg:
3960 # define XX(_n) *p++ = (_n)
3962 rex = clearWBit(
3963 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3964 vregEnc3210(i->Ain.SseReRg.src) ));
3966 switch (i->Ain.SseReRg.op) {
3967 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3968 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3969 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3970 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3971 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3972 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3973 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3974 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3975 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3976 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3977 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3978 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3979 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3980 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3981 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3982 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3983 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3984 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3985 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3986 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3987 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3988 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3989 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3990 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3991 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3992 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3993 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3994 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3995 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3996 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3997 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3998 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3999 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
4000 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
4001 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
4002 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
4003 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
4004 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
4005 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
4006 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
4007 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
4008 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
4009 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
4010 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
4011 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
4012 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
4013 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
4014 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
4015 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
4016 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
4017 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
4018 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
4019 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
4020 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
4021 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
4022 case Asse_PSHUFB: XX(0x66); XX(rex);
4023 XX(0x0F); XX(0x38); XX(0x00); break;
4024 case Asse_PMADDUBSW:XX(0x66); XX(rex);
4025 XX(0x0F); XX(0x38); XX(0x04); break;
4026 default: goto bad;
4028 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
4029 vregEnc3210(i->Ain.SseReRg.src) );
4030 # undef XX
4031 goto done;
4033 case Ain_SseCMov:
4034 /* jmp fwds if !condition */
4035 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
4036 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
4037 ptmp = p;
4039 /* movaps %src, %dst */
4040 *p++ = clearWBit(
4041 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
4042 vregEnc3210(i->Ain.SseCMov.src) ));
4043 *p++ = 0x0F;
4044 *p++ = 0x28;
4045 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
4046 vregEnc3210(i->Ain.SseCMov.src) );
4048 /* Fill in the jump offset. */
4049 *(ptmp-1) = toUChar(p - ptmp);
4050 goto done;
4052 case Ain_SseShuf:
4053 *p++ = 0x66;
4054 *p++ = clearWBit(
4055 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
4056 vregEnc3210(i->Ain.SseShuf.src) ));
4057 *p++ = 0x0F;
4058 *p++ = 0x70;
4059 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
4060 vregEnc3210(i->Ain.SseShuf.src) );
4061 *p++ = (UChar)(i->Ain.SseShuf.order);
4062 goto done;
4064 case Ain_SseShiftN: {
4065 UInt limit = 0;
4066 UInt shiftImm = i->Ain.SseShiftN.shiftBits;
4067 switch (i->Ain.SseShiftN.op) {
4068 case Asse_SHL16: limit = 15; opc = 0x71; subopc_imm = 6; break;
4069 case Asse_SHL32: limit = 31; opc = 0x72; subopc_imm = 6; break;
4070 case Asse_SHL64: limit = 63; opc = 0x73; subopc_imm = 6; break;
4071 case Asse_SAR16: limit = 15; opc = 0x71; subopc_imm = 4; break;
4072 case Asse_SAR32: limit = 31; opc = 0x72; subopc_imm = 4; break;
4073 case Asse_SHR16: limit = 15; opc = 0x71; subopc_imm = 2; break;
4074 case Asse_SHR32: limit = 31; opc = 0x72; subopc_imm = 2; break;
4075 case Asse_SHR64: limit = 63; opc = 0x73; subopc_imm = 2; break;
4076 case Asse_SHL128:
4077 if ((shiftImm & 7) != 0) goto bad;
4078 shiftImm >>= 3;
4079 limit = 15; opc = 0x73; subopc_imm = 7;
4080 break;
4081 case Asse_SHR128:
4082 if ((shiftImm & 7) != 0) goto bad;
4083 shiftImm >>= 3;
4084 limit = 15; opc = 0x73; subopc_imm = 3;
4085 break;
4086 default:
4087 // This should never happen .. SSE2 only offers the above 10 insns
4088 // for the "shift with immediate" case
4089 goto bad;
4091 vassert(limit > 0 && opc > 0 && subopc_imm > 0);
4092 if (shiftImm > limit) goto bad;
4093 *p++ = 0x66;
4094 *p++ = clearWBit(
4095 rexAMode_R_enc_enc( subopc_imm,
4096 vregEnc3210(i->Ain.SseShiftN.dst) ));
4097 *p++ = 0x0F;
4098 *p++ = opc;
4099 p = doAMode_R_enc_enc(p, subopc_imm, vregEnc3210(i->Ain.SseShiftN.dst));
4100 *p++ = shiftImm;
4101 goto done;
4104 case Ain_SseMOVQ: {
4105 Bool toXMM = i->Ain.SseMOVQ.toXMM;
4106 HReg gpr = i->Ain.SseMOVQ.gpr;
4107 HReg xmm = i->Ain.SseMOVQ.xmm;
4108 *p++ = 0x66;
4109 *p++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm), iregEnc3210(gpr)) );
4110 *p++ = 0x0F;
4111 *p++ = toXMM ? 0x6E : 0x7E;
4112 p = doAMode_R_enc_enc( p, vregEnc3210(xmm), iregEnc3210(gpr) );
4113 goto done;
4116 //uu case Ain_AvxLdSt: {
4117 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
4118 //uu i->Ain.AvxLdSt.addr );
4119 //uu p = emitVexPrefix(p, vex);
4120 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
4121 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
4122 //uu goto done;
4123 //uu }
4125 case Ain_Avx32FLo: {
4126 UInt d = vregEnc3210(i->Ain.Avx32FLo.dst);
4127 UInt v = vregEnc3210(i->Ain.Avx32FLo.src1);
4128 UInt s = vregEnc3210(i->Ain.Avx32FLo.src2);
4129 UInt m = 2, pp = 1;
4130 UInt opcode;
4131 switch (i->Ain.Avx32FLo.op) {
4132 case Asse_VFMADD213:
4133 // VFMADD213SS %xmmS2, %xmmS1, %xmmD (xmm regs range 0 .. 15)
4134 opcode = 0xa9;
4135 break;
4136 default:
4137 goto bad;
4139 // 0xC4 : ~d3 1 ~s3 o4 o3 o2 o1 o0 : 0 ~v3 ~v2 ~v1 ~v0 0 p1 p0 : opcode_byte
4140 // : 1 1 d2 d1 d0 s2 s1 s0
4141 *p++ = 0xC4; // 3-byte VEX
4142 *p++ = ((((~d)>>3)&1)<<7) | (1<<6) | ((((~s)>>3)&1)<<5) | m;
4143 *p++ = ((~v&0x0f) << 3) | pp;
4144 *p++ = opcode;
4145 *p++ = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
4146 goto done;
4148 case Ain_Avx64FLo: {
4149 UInt d = vregEnc3210(i->Ain.Avx64FLo.dst);
4150 UInt v = vregEnc3210(i->Ain.Avx64FLo.src1);
4151 UInt s = vregEnc3210(i->Ain.Avx64FLo.src2);
4152 UInt m = 2, pp = 1;
4153 UInt opcode;
4154 switch (i->Ain.Avx64FLo.op) {
4155 case Asse_VFMADD213:
4156 // VFMADD213SD %xmmS2, %xmmS1, %xmmD (xmm regs range 0 .. 15)
4157 opcode = 0xa9;
4158 break;
4159 default:
4160 goto bad;
4162 // 0xC4 : ~d3 1 ~s3 o4 o3 o2 o1 o0 : 1 ~v3 ~v2 ~v1 ~v0 0 p1 p0 : opcode_byte
4163 // : 1 1 d2 d1 d0 s2 s1 s0
4164 *p++ = 0xC4; // 3-byte VEX
4165 *p++ = ((((~d)>>3)&1)<<7) | (1<<6) | ((((~s)>>3)&1)<<5) | m;
4166 *p++ = (1<<7)|((~v&0x0f) << 3) | pp;
4167 *p++ = opcode;
4168 *p++ = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
4169 goto done;
4172 case Ain_EvCheck: {
4173 /* We generate:
4174 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
4175 (2 bytes) jns nofail expected taken
4176 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
4177 nofail:
4179 /* This is heavily asserted re instruction lengths. It needs to
4180 be. If we get given unexpected forms of .amCounter or
4181 .amFailAddr -- basically, anything that's not of the form
4182 uimm7(%rbp) -- they are likely to fail. */
4183 /* Note also that after the decl we must be very careful not to
4184 read the carry flag, else we get a partial flags stall.
4185 js/jns avoids that, though. */
4186 UChar* p0 = p;
4187 /* --- decl 8(%rbp) --- */
4188 /* Need to compute the REX byte for the decl in order to prove
4189 that we don't need it, since this is a 32-bit inc and all
4190 registers involved in the amode are < r8. "1" because
4191 there's no register in this encoding; instead the register
4192 field is used as a sub opcode. The encoding for "decl r/m32"
4193 is FF /1, hence the "1". */
4194 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
4195 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
4196 *p++ = 0xFF;
4197 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
4198 vassert(p - p0 == 3);
4199 /* --- jns nofail --- */
4200 *p++ = 0x79;
4201 *p++ = 0x03; /* need to check this 0x03 after the next insn */
4202 vassert(p - p0 == 5);
4203 /* --- jmp* 0(%rbp) --- */
4204 /* Once again, verify we don't need REX. The encoding is FF /4.
4205 We don't need REX.W since by default FF /4 in 64-bit mode
4206 implies a 64 bit load. */
4207 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
4208 if (rex != 0x40) goto bad;
4209 *p++ = 0xFF;
4210 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
4211 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
4212 /* And crosscheck .. */
4213 vassert(evCheckSzB_AMD64() == 8);
4214 goto done;
4217 case Ain_ProfInc: {
4218 /* We generate movabsq $0, %r11
4219 incq (%r11)
4220 in the expectation that a later call to LibVEX_patchProfCtr
4221 will be used to fill in the immediate field once the right
4222 value is known.
4223 49 BB 00 00 00 00 00 00 00 00
4224 49 FF 03
4226 *p++ = 0x49; *p++ = 0xBB;
4227 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4228 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4229 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
4230 /* Tell the caller .. */
4231 vassert(!(*is_profInc));
4232 *is_profInc = True;
4233 goto done;
4236 default:
4237 goto bad;
4240 bad:
4241 ppAMD64Instr(i, mode64);
4242 vpanic("emit_AMD64Instr");
4243 /*NOTREACHED*/
4245 done:
4246 vassert(p - &buf[0] <= 64);
4247 return p - &buf[0];
4251 /* How big is an event check? See case for Ain_EvCheck in
4252 emit_AMD64Instr just above. That crosschecks what this returns, so
4253 we can tell if we're inconsistent. */
4254 Int evCheckSzB_AMD64 (void)
4256 return 8;
4260 /* NB: what goes on here has to be very closely coordinated with the
4261 emitInstr case for XDirect, above. */
4262 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
4263 void* place_to_chain,
4264 const void* disp_cp_chain_me_EXPECTED,
4265 const void* place_to_jump_to )
4267 vassert(endness_host == VexEndnessLE);
4269 /* What we're expecting to see is:
4270 movabsq $disp_cp_chain_me_EXPECTED, %r11
4271 call *%r11
4273 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4274 41 FF D3
4276 UChar* p = (UChar*)place_to_chain;
4277 vassert(p[0] == 0x49);
4278 vassert(p[1] == 0xBB);
4279 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
4280 vassert(p[10] == 0x41);
4281 vassert(p[11] == 0xFF);
4282 vassert(p[12] == 0xD3);
4283 /* And what we want to change it to is either:
4284 (general case):
4285 movabsq $place_to_jump_to, %r11
4286 jmpq *%r11
4288 49 BB <8 bytes value == place_to_jump_to>
4289 41 FF E3
4290 So it's the same length (convenient, huh) and we don't
4291 need to change all the bits.
4292 ---OR---
4293 in the case where the displacement falls within 32 bits
4294 jmpq disp32 where disp32 is relative to the next insn
4295 ud2; ud2; ud2; ud2
4297 E9 <4 bytes == disp32>
4298 0F 0B 0F 0B 0F 0B 0F 0B
4300 In both cases the replacement has the same length as the original.
4301 To remain sane & verifiable,
4302 (1) limit the displacement for the short form to
4303 (say) +/- one billion, so as to avoid wraparound
4304 off-by-ones
4305 (2) even if the short form is applicable, once every (say)
4306 1024 times use the long form anyway, so as to maintain
4307 verifiability
4309 /* This is the delta we need to put into a JMP d32 insn. It's
4310 relative to the start of the next insn, hence the -5. */
4311 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
4312 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
4314 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4315 if (shortOK) {
4316 shortCTR++; // thread safety bleh
4317 if (0 == (shortCTR & 0x3FF)) {
4318 shortOK = False;
4319 if (0)
4320 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4321 "using long jmp\n", shortCTR);
4325 /* And make the modifications. */
4326 if (shortOK) {
4327 p[0] = 0xE9;
4328 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
4329 p[5] = 0x0F; p[6] = 0x0B;
4330 p[7] = 0x0F; p[8] = 0x0B;
4331 p[9] = 0x0F; p[10] = 0x0B;
4332 p[11] = 0x0F; p[12] = 0x0B;
4333 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4334 delta >>= 32;
4335 vassert(delta == 0LL || delta == -1LL);
4336 } else {
4337 /* Minimal modifications from the starting sequence. */
4338 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
4339 p[12] = 0xE3;
4341 VexInvalRange vir = { (HWord)place_to_chain, 13 };
4342 return vir;
4346 /* NB: what goes on here has to be very closely coordinated with the
4347 emitInstr case for XDirect, above. */
4348 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
4349 void* place_to_unchain,
4350 const void* place_to_jump_to_EXPECTED,
4351 const void* disp_cp_chain_me )
4353 vassert(endness_host == VexEndnessLE);
4355 /* What we're expecting to see is either:
4356 (general case)
4357 movabsq $place_to_jump_to_EXPECTED, %r11
4358 jmpq *%r11
4360 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4361 41 FF E3
4362 ---OR---
4363 in the case where the displacement falls within 32 bits
4364 jmpq d32
4365 ud2; ud2; ud2; ud2
4367 E9 <4 bytes == disp32>
4368 0F 0B 0F 0B 0F 0B 0F 0B
4370 UChar* p = (UChar*)place_to_unchain;
4371 Bool valid = False;
4372 if (p[0] == 0x49 && p[1] == 0xBB
4373 && read_misaligned_ULong_LE(&p[2])
4374 == (ULong)(Addr)place_to_jump_to_EXPECTED
4375 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
4376 /* it's the long form */
4377 valid = True;
4379 else
4380 if (p[0] == 0xE9
4381 && p[5] == 0x0F && p[6] == 0x0B
4382 && p[7] == 0x0F && p[8] == 0x0B
4383 && p[9] == 0x0F && p[10] == 0x0B
4384 && p[11] == 0x0F && p[12] == 0x0B) {
4385 /* It's the short form. Check the offset is right. */
4386 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
4387 Long s64 = (Long)s32;
4388 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
4389 valid = True;
4390 if (0)
4391 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4394 vassert(valid);
4395 /* And what we want to change it to is:
4396 movabsq $disp_cp_chain_me, %r11
4397 call *%r11
4399 49 BB <8 bytes value == disp_cp_chain_me>
4400 41 FF D3
4401 So it's the same length (convenient, huh).
4403 p[0] = 0x49;
4404 p[1] = 0xBB;
4405 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
4406 p[10] = 0x41;
4407 p[11] = 0xFF;
4408 p[12] = 0xD3;
4409 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
4410 return vir;
4414 /* Patch the counter address into a profile inc point, as previously
4415 created by the Ain_ProfInc case for emit_AMD64Instr. */
4416 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4417 void* place_to_patch,
4418 const ULong* location_of_counter )
4420 vassert(endness_host == VexEndnessLE);
4421 vassert(sizeof(ULong*) == 8);
4422 UChar* p = (UChar*)place_to_patch;
4423 vassert(p[0] == 0x49);
4424 vassert(p[1] == 0xBB);
4425 vassert(p[2] == 0x00);
4426 vassert(p[3] == 0x00);
4427 vassert(p[4] == 0x00);
4428 vassert(p[5] == 0x00);
4429 vassert(p[6] == 0x00);
4430 vassert(p[7] == 0x00);
4431 vassert(p[8] == 0x00);
4432 vassert(p[9] == 0x00);
4433 vassert(p[10] == 0x49);
4434 vassert(p[11] == 0xFF);
4435 vassert(p[12] == 0x03);
4436 ULong imm64 = (ULong)(Addr)location_of_counter;
4437 p[2] = imm64 & 0xFF; imm64 >>= 8;
4438 p[3] = imm64 & 0xFF; imm64 >>= 8;
4439 p[4] = imm64 & 0xFF; imm64 >>= 8;
4440 p[5] = imm64 & 0xFF; imm64 >>= 8;
4441 p[6] = imm64 & 0xFF; imm64 >>= 8;
4442 p[7] = imm64 & 0xFF; imm64 >>= 8;
4443 p[8] = imm64 & 0xFF; imm64 >>= 8;
4444 p[9] = imm64 & 0xFF; imm64 >>= 8;
4445 VexInvalRange vir = { (HWord)place_to_patch, 13 };
4446 return vir;
4450 /*---------------------------------------------------------------*/
4451 /*--- end host_amd64_defs.c ---*/
4452 /*---------------------------------------------------------------*/