host_amd64_defs.c don't initialize opc and subopc_imm in emit_AMD64Instr.
[valgrind.git] / VEX / priv / host_amd64_defs.c
blob29127c1258c54911b0cb104949614837f7bb8ab3
2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_amd64_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse* getRRegUniverse_AMD64 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_AMD64;
50 static Bool rRegUniverse_AMD64_initted = False;
52 /* Handy shorthand, nothing more */
53 RRegUniverse* ru = &rRegUniverse_AMD64;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_AMD64_initted))
57 return ru;
59 RRegUniverse__init(ru);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru->allocable_start[HRcInt64] = ru->size;
65 ru->regs[ru->size++] = hregAMD64_R12();
66 ru->regs[ru->size++] = hregAMD64_R13();
67 ru->regs[ru->size++] = hregAMD64_R14();
68 ru->regs[ru->size++] = hregAMD64_R15();
69 ru->regs[ru->size++] = hregAMD64_RBX();
70 ru->regs[ru->size++] = hregAMD64_RSI();
71 ru->regs[ru->size++] = hregAMD64_RDI();
72 ru->regs[ru->size++] = hregAMD64_R8();
73 ru->regs[ru->size++] = hregAMD64_R9();
74 ru->regs[ru->size++] = hregAMD64_R10();
75 ru->allocable_end[HRcInt64] = ru->size - 1;
77 ru->allocable_start[HRcVec128] = ru->size;
78 ru->regs[ru->size++] = hregAMD64_XMM3();
79 ru->regs[ru->size++] = hregAMD64_XMM4();
80 ru->regs[ru->size++] = hregAMD64_XMM5();
81 ru->regs[ru->size++] = hregAMD64_XMM6();
82 ru->regs[ru->size++] = hregAMD64_XMM7();
83 ru->regs[ru->size++] = hregAMD64_XMM8();
84 ru->regs[ru->size++] = hregAMD64_XMM9();
85 ru->regs[ru->size++] = hregAMD64_XMM10();
86 ru->regs[ru->size++] = hregAMD64_XMM11();
87 ru->regs[ru->size++] = hregAMD64_XMM12();
88 ru->allocable_end[HRcVec128] = ru->size - 1;
89 ru->allocable = ru->size;
91 /* And other regs, not available to the allocator. */
92 ru->regs[ru->size++] = hregAMD64_RAX();
93 ru->regs[ru->size++] = hregAMD64_RCX();
94 ru->regs[ru->size++] = hregAMD64_RDX();
95 ru->regs[ru->size++] = hregAMD64_RSP();
96 ru->regs[ru->size++] = hregAMD64_RBP();
97 ru->regs[ru->size++] = hregAMD64_R11();
98 ru->regs[ru->size++] = hregAMD64_XMM0();
99 ru->regs[ru->size++] = hregAMD64_XMM1();
101 rRegUniverse_AMD64_initted = True;
103 RRegUniverse__check_is_sane(ru);
104 return ru;
108 UInt ppHRegAMD64 ( HReg reg )
110 Int r;
111 static const HChar* ireg64_names[16]
112 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
113 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
114 /* Be generic for all virtual regs. */
115 if (hregIsVirtual(reg)) {
116 return ppHReg(reg);
118 /* But specific for real regs. */
119 switch (hregClass(reg)) {
120 case HRcInt64:
121 r = hregEncoding(reg);
122 vassert(r >= 0 && r < 16);
123 return vex_printf("%s", ireg64_names[r]);
124 case HRcVec128:
125 r = hregEncoding(reg);
126 vassert(r >= 0 && r < 16);
127 return vex_printf("%%xmm%d", r);
128 default:
129 vpanic("ppHRegAMD64");
133 static UInt ppHRegAMD64_lo32 ( HReg reg )
135 Int r;
136 static const HChar* ireg32_names[16]
137 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
138 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
139 /* Be generic for all virtual regs. */
140 if (hregIsVirtual(reg)) {
141 UInt written = ppHReg(reg);
142 written += vex_printf("d");
143 return written;
145 /* But specific for real regs. */
146 switch (hregClass(reg)) {
147 case HRcInt64:
148 r = hregEncoding(reg);
149 vassert(r >= 0 && r < 16);
150 return vex_printf("%s", ireg32_names[r]);
151 default:
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
157 /* --------- Condition codes, Intel encoding. --------- */
159 const HChar* showAMD64CondCode ( AMD64CondCode cond )
161 switch (cond) {
162 case Acc_O: return "o";
163 case Acc_NO: return "no";
164 case Acc_B: return "b";
165 case Acc_NB: return "nb";
166 case Acc_Z: return "z";
167 case Acc_NZ: return "nz";
168 case Acc_BE: return "be";
169 case Acc_NBE: return "nbe";
170 case Acc_S: return "s";
171 case Acc_NS: return "ns";
172 case Acc_P: return "p";
173 case Acc_NP: return "np";
174 case Acc_L: return "l";
175 case Acc_NL: return "nl";
176 case Acc_LE: return "le";
177 case Acc_NLE: return "nle";
178 case Acc_ALWAYS: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
184 /* --------- AMD64AMode: memory address expressions. --------- */
186 AMD64AMode* AMD64AMode_IR ( UInt imm32, HReg reg ) {
187 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
188 am->tag = Aam_IR;
189 am->Aam.IR.imm = imm32;
190 am->Aam.IR.reg = reg;
191 return am;
193 AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
194 AMD64AMode* am = LibVEX_Alloc_inline(sizeof(AMD64AMode));
195 am->tag = Aam_IRRS;
196 am->Aam.IRRS.imm = imm32;
197 am->Aam.IRRS.base = base;
198 am->Aam.IRRS.index = indEx;
199 am->Aam.IRRS.shift = shift;
200 vassert(shift >= 0 && shift <= 3);
201 return am;
204 void ppAMD64AMode ( AMD64AMode* am ) {
205 switch (am->tag) {
206 case Aam_IR:
207 if (am->Aam.IR.imm == 0)
208 vex_printf("(");
209 else
210 vex_printf("0x%x(", am->Aam.IR.imm);
211 ppHRegAMD64(am->Aam.IR.reg);
212 vex_printf(")");
213 return;
214 case Aam_IRRS:
215 vex_printf("0x%x(", am->Aam.IRRS.imm);
216 ppHRegAMD64(am->Aam.IRRS.base);
217 vex_printf(",");
218 ppHRegAMD64(am->Aam.IRRS.index);
219 vex_printf(",%d)", 1 << am->Aam.IRRS.shift);
220 return;
221 default:
222 vpanic("ppAMD64AMode");
226 static void addRegUsage_AMD64AMode ( HRegUsage* u, AMD64AMode* am ) {
227 switch (am->tag) {
228 case Aam_IR:
229 addHRegUse(u, HRmRead, am->Aam.IR.reg);
230 return;
231 case Aam_IRRS:
232 addHRegUse(u, HRmRead, am->Aam.IRRS.base);
233 addHRegUse(u, HRmRead, am->Aam.IRRS.index);
234 return;
235 default:
236 vpanic("addRegUsage_AMD64AMode");
240 static void mapRegs_AMD64AMode ( HRegRemap* m, AMD64AMode* am ) {
241 switch (am->tag) {
242 case Aam_IR:
243 am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
244 return;
245 case Aam_IRRS:
246 am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
247 am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
248 return;
249 default:
250 vpanic("mapRegs_AMD64AMode");
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
256 AMD64RMI* AMD64RMI_Imm ( UInt imm32 ) {
257 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
258 op->tag = Armi_Imm;
259 op->Armi.Imm.imm32 = imm32;
260 return op;
262 AMD64RMI* AMD64RMI_Reg ( HReg reg ) {
263 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
264 op->tag = Armi_Reg;
265 op->Armi.Reg.reg = reg;
266 return op;
268 AMD64RMI* AMD64RMI_Mem ( AMD64AMode* am ) {
269 AMD64RMI* op = LibVEX_Alloc_inline(sizeof(AMD64RMI));
270 op->tag = Armi_Mem;
271 op->Armi.Mem.am = am;
272 return op;
275 static void ppAMD64RMI_wrk ( AMD64RMI* op, Bool lo32 ) {
276 switch (op->tag) {
277 case Armi_Imm:
278 vex_printf("$0x%x", op->Armi.Imm.imm32);
279 return;
280 case Armi_Reg:
281 if (lo32)
282 ppHRegAMD64_lo32(op->Armi.Reg.reg);
283 else
284 ppHRegAMD64(op->Armi.Reg.reg);
285 return;
286 case Armi_Mem:
287 ppAMD64AMode(op->Armi.Mem.am);
288 return;
289 default:
290 vpanic("ppAMD64RMI");
293 void ppAMD64RMI ( AMD64RMI* op ) {
294 ppAMD64RMI_wrk(op, False/*!lo32*/);
296 void ppAMD64RMI_lo32 ( AMD64RMI* op ) {
297 ppAMD64RMI_wrk(op, True/*lo32*/);
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
302 accordingly. */
303 static void addRegUsage_AMD64RMI ( HRegUsage* u, AMD64RMI* op ) {
304 switch (op->tag) {
305 case Armi_Imm:
306 return;
307 case Armi_Reg:
308 addHRegUse(u, HRmRead, op->Armi.Reg.reg);
309 return;
310 case Armi_Mem:
311 addRegUsage_AMD64AMode(u, op->Armi.Mem.am);
312 return;
313 default:
314 vpanic("addRegUsage_AMD64RMI");
318 static void mapRegs_AMD64RMI ( HRegRemap* m, AMD64RMI* op ) {
319 switch (op->tag) {
320 case Armi_Imm:
321 return;
322 case Armi_Reg:
323 op->Armi.Reg.reg = lookupHRegRemap(m, op->Armi.Reg.reg);
324 return;
325 case Armi_Mem:
326 mapRegs_AMD64AMode(m, op->Armi.Mem.am);
327 return;
328 default:
329 vpanic("mapRegs_AMD64RMI");
334 /* --------- Operand, which can be reg or immediate only. --------- */
336 AMD64RI* AMD64RI_Imm ( UInt imm32 ) {
337 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
338 op->tag = Ari_Imm;
339 op->Ari.Imm.imm32 = imm32;
340 return op;
342 AMD64RI* AMD64RI_Reg ( HReg reg ) {
343 AMD64RI* op = LibVEX_Alloc_inline(sizeof(AMD64RI));
344 op->tag = Ari_Reg;
345 op->Ari.Reg.reg = reg;
346 return op;
349 void ppAMD64RI ( AMD64RI* op ) {
350 switch (op->tag) {
351 case Ari_Imm:
352 vex_printf("$0x%x", op->Ari.Imm.imm32);
353 return;
354 case Ari_Reg:
355 ppHRegAMD64(op->Ari.Reg.reg);
356 return;
357 default:
358 vpanic("ppAMD64RI");
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
364 accordingly. */
365 static void addRegUsage_AMD64RI ( HRegUsage* u, AMD64RI* op ) {
366 switch (op->tag) {
367 case Ari_Imm:
368 return;
369 case Ari_Reg:
370 addHRegUse(u, HRmRead, op->Ari.Reg.reg);
371 return;
372 default:
373 vpanic("addRegUsage_AMD64RI");
377 static void mapRegs_AMD64RI ( HRegRemap* m, AMD64RI* op ) {
378 switch (op->tag) {
379 case Ari_Imm:
380 return;
381 case Ari_Reg:
382 op->Ari.Reg.reg = lookupHRegRemap(m, op->Ari.Reg.reg);
383 return;
384 default:
385 vpanic("mapRegs_AMD64RI");
390 /* --------- Operand, which can be reg or memory only. --------- */
392 AMD64RM* AMD64RM_Reg ( HReg reg ) {
393 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
394 op->tag = Arm_Reg;
395 op->Arm.Reg.reg = reg;
396 return op;
398 AMD64RM* AMD64RM_Mem ( AMD64AMode* am ) {
399 AMD64RM* op = LibVEX_Alloc_inline(sizeof(AMD64RM));
400 op->tag = Arm_Mem;
401 op->Arm.Mem.am = am;
402 return op;
405 void ppAMD64RM ( AMD64RM* op ) {
406 switch (op->tag) {
407 case Arm_Mem:
408 ppAMD64AMode(op->Arm.Mem.am);
409 return;
410 case Arm_Reg:
411 ppHRegAMD64(op->Arm.Reg.reg);
412 return;
413 default:
414 vpanic("ppAMD64RM");
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421 static void addRegUsage_AMD64RM ( HRegUsage* u, AMD64RM* op, HRegMode mode ) {
422 switch (op->tag) {
423 case Arm_Mem:
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u, op->Arm.Mem.am);
427 return;
428 case Arm_Reg:
429 /* reg is read, written or modified. Add it in the
430 appropriate way. */
431 addHRegUse(u, mode, op->Arm.Reg.reg);
432 return;
433 default:
434 vpanic("addRegUsage_AMD64RM");
438 static void mapRegs_AMD64RM ( HRegRemap* m, AMD64RM* op )
440 switch (op->tag) {
441 case Arm_Mem:
442 mapRegs_AMD64AMode(m, op->Arm.Mem.am);
443 return;
444 case Arm_Reg:
445 op->Arm.Reg.reg = lookupHRegRemap(m, op->Arm.Reg.reg);
446 return;
447 default:
448 vpanic("mapRegs_AMD64RM");
453 /* --------- Instructions. --------- */
455 static const HChar* showAMD64ScalarSz ( Int sz ) {
456 switch (sz) {
457 case 2: return "w";
458 case 4: return "l";
459 case 8: return "q";
460 default: vpanic("showAMD64ScalarSz");
464 const HChar* showAMD64UnaryOp ( AMD64UnaryOp op ) {
465 switch (op) {
466 case Aun_NOT: return "not";
467 case Aun_NEG: return "neg";
468 default: vpanic("showAMD64UnaryOp");
472 const HChar* showAMD64AluOp ( AMD64AluOp op ) {
473 switch (op) {
474 case Aalu_MOV: return "mov";
475 case Aalu_CMP: return "cmp";
476 case Aalu_ADD: return "add";
477 case Aalu_SUB: return "sub";
478 case Aalu_ADC: return "adc";
479 case Aalu_SBB: return "sbb";
480 case Aalu_AND: return "and";
481 case Aalu_OR: return "or";
482 case Aalu_XOR: return "xor";
483 case Aalu_MUL: return "imul";
484 default: vpanic("showAMD64AluOp");
488 const HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) {
489 switch (op) {
490 case Ash_SHL: return "shl";
491 case Ash_SHR: return "shr";
492 case Ash_SAR: return "sar";
493 default: vpanic("showAMD64ShiftOp");
497 const HChar* showA87FpOp ( A87FpOp op ) {
498 switch (op) {
499 case Afp_SCALE: return "scale";
500 case Afp_ATAN: return "atan";
501 case Afp_YL2X: return "yl2x";
502 case Afp_YL2XP1: return "yl2xp1";
503 case Afp_PREM: return "prem";
504 case Afp_PREM1: return "prem1";
505 case Afp_SQRT: return "sqrt";
506 case Afp_SIN: return "sin";
507 case Afp_COS: return "cos";
508 case Afp_TAN: return "tan";
509 case Afp_ROUND: return "round";
510 case Afp_2XM1: return "2xm1";
511 default: vpanic("showA87FpOp");
515 const HChar* showAMD64SseOp ( AMD64SseOp op ) {
516 switch (op) {
517 case Asse_MOV: return "movups";
518 case Asse_ADDF: return "add";
519 case Asse_SUBF: return "sub";
520 case Asse_MULF: return "mul";
521 case Asse_DIVF: return "div";
522 case Asse_MAXF: return "max";
523 case Asse_MINF: return "min";
524 case Asse_CMPEQF: return "cmpFeq";
525 case Asse_CMPLTF: return "cmpFlt";
526 case Asse_CMPLEF: return "cmpFle";
527 case Asse_CMPUNF: return "cmpFun";
528 case Asse_RCPF: return "rcp";
529 case Asse_RSQRTF: return "rsqrt";
530 case Asse_SQRTF: return "sqrt";
531 case Asse_I2F: return "cvtdq2ps.";
532 case Asse_F2I: return "cvtps2dq.";
533 case Asse_AND: return "and";
534 case Asse_OR: return "or";
535 case Asse_XOR: return "xor";
536 case Asse_ANDN: return "andn";
537 case Asse_ADD8: return "paddb";
538 case Asse_ADD16: return "paddw";
539 case Asse_ADD32: return "paddd";
540 case Asse_ADD64: return "paddq";
541 case Asse_QADD8U: return "paddusb";
542 case Asse_QADD16U: return "paddusw";
543 case Asse_QADD8S: return "paddsb";
544 case Asse_QADD16S: return "paddsw";
545 case Asse_SUB8: return "psubb";
546 case Asse_SUB16: return "psubw";
547 case Asse_SUB32: return "psubd";
548 case Asse_SUB64: return "psubq";
549 case Asse_QSUB8U: return "psubusb";
550 case Asse_QSUB16U: return "psubusw";
551 case Asse_QSUB8S: return "psubsb";
552 case Asse_QSUB16S: return "psubsw";
553 case Asse_MUL16: return "pmullw";
554 case Asse_MULHI16U: return "pmulhuw";
555 case Asse_MULHI16S: return "pmulhw";
556 case Asse_AVG8U: return "pavgb";
557 case Asse_AVG16U: return "pavgw";
558 case Asse_MAX16S: return "pmaxw";
559 case Asse_MAX8U: return "pmaxub";
560 case Asse_MIN16S: return "pminw";
561 case Asse_MIN8U: return "pminub";
562 case Asse_CMPEQ8: return "pcmpeqb";
563 case Asse_CMPEQ16: return "pcmpeqw";
564 case Asse_CMPEQ32: return "pcmpeqd";
565 case Asse_CMPGT8S: return "pcmpgtb";
566 case Asse_CMPGT16S: return "pcmpgtw";
567 case Asse_CMPGT32S: return "pcmpgtd";
568 case Asse_SHL16: return "psllw";
569 case Asse_SHL32: return "pslld";
570 case Asse_SHL64: return "psllq";
571 case Asse_SHL128: return "pslldq";
572 case Asse_SHR16: return "psrlw";
573 case Asse_SHR32: return "psrld";
574 case Asse_SHR64: return "psrlq";
575 case Asse_SHR128: return "psrldq";
576 case Asse_SAR16: return "psraw";
577 case Asse_SAR32: return "psrad";
578 case Asse_PACKSSD: return "packssdw";
579 case Asse_PACKSSW: return "packsswb";
580 case Asse_PACKUSW: return "packuswb";
581 case Asse_UNPCKHB: return "punpckhb";
582 case Asse_UNPCKHW: return "punpckhw";
583 case Asse_UNPCKHD: return "punpckhd";
584 case Asse_UNPCKHQ: return "punpckhq";
585 case Asse_UNPCKLB: return "punpcklb";
586 case Asse_UNPCKLW: return "punpcklw";
587 case Asse_UNPCKLD: return "punpckld";
588 case Asse_UNPCKLQ: return "punpcklq";
589 case Asse_PSHUFB: return "pshufb";
590 case Asse_PMADDUBSW: return "pmaddubsw";
591 case Asse_F32toF16: return "vcvtps2ph(rm_field=$0x4).";
592 case Asse_F16toF32: return "vcvtph2ps.";
593 default: vpanic("showAMD64SseOp");
597 AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ) {
598 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
599 i->tag = Ain_Imm64;
600 i->Ain.Imm64.imm64 = imm64;
601 i->Ain.Imm64.dst = dst;
602 return i;
604 AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
605 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
606 i->tag = Ain_Alu64R;
607 i->Ain.Alu64R.op = op;
608 i->Ain.Alu64R.src = src;
609 i->Ain.Alu64R.dst = dst;
610 return i;
612 AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp op, AMD64RI* src, AMD64AMode* dst ) {
613 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
614 i->tag = Ain_Alu64M;
615 i->Ain.Alu64M.op = op;
616 i->Ain.Alu64M.src = src;
617 i->Ain.Alu64M.dst = dst;
618 vassert(op != Aalu_MUL);
619 return i;
621 AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
622 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
623 i->tag = Ain_Sh64;
624 i->Ain.Sh64.op = op;
625 i->Ain.Sh64.src = src;
626 i->Ain.Sh64.dst = dst;
627 return i;
629 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
630 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
631 i->tag = Ain_Test64;
632 i->Ain.Test64.imm32 = imm32;
633 i->Ain.Test64.dst = dst;
634 return i;
636 AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, HReg dst ) {
637 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
638 i->tag = Ain_Unary64;
639 i->Ain.Unary64.op = op;
640 i->Ain.Unary64.dst = dst;
641 return i;
643 AMD64Instr* AMD64Instr_Lea64 ( AMD64AMode* am, HReg dst ) {
644 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
645 i->tag = Ain_Lea64;
646 i->Ain.Lea64.am = am;
647 i->Ain.Lea64.dst = dst;
648 return i;
650 AMD64Instr* AMD64Instr_Alu32R ( AMD64AluOp op, AMD64RMI* src, HReg dst ) {
651 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
652 i->tag = Ain_Alu32R;
653 i->Ain.Alu32R.op = op;
654 i->Ain.Alu32R.src = src;
655 i->Ain.Alu32R.dst = dst;
656 switch (op) {
657 case Aalu_ADD: case Aalu_SUB: case Aalu_CMP:
658 case Aalu_AND: case Aalu_OR: case Aalu_XOR: break;
659 default: vassert(0);
661 return i;
663 AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* src ) {
664 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
665 i->tag = Ain_MulL;
666 i->Ain.MulL.syned = syned;
667 i->Ain.MulL.src = src;
668 return i;
670 AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* src ) {
671 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
672 i->tag = Ain_Div;
673 i->Ain.Div.syned = syned;
674 i->Ain.Div.sz = sz;
675 i->Ain.Div.src = src;
676 vassert(sz == 4 || sz == 8);
677 return i;
679 AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
680 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
681 i->tag = Ain_Push;
682 i->Ain.Push.src = src;
683 return i;
685 AMD64Instr* AMD64Instr_Call ( AMD64CondCode cond, Addr64 target, Int regparms,
686 RetLoc rloc ) {
687 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
688 i->tag = Ain_Call;
689 i->Ain.Call.cond = cond;
690 i->Ain.Call.target = target;
691 i->Ain.Call.regparms = regparms;
692 i->Ain.Call.rloc = rloc;
693 vassert(regparms >= 0 && regparms <= 6);
694 vassert(is_sane_RetLoc(rloc));
695 return i;
698 AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
699 AMD64CondCode cond, Bool toFastEP ) {
700 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
701 i->tag = Ain_XDirect;
702 i->Ain.XDirect.dstGA = dstGA;
703 i->Ain.XDirect.amRIP = amRIP;
704 i->Ain.XDirect.cond = cond;
705 i->Ain.XDirect.toFastEP = toFastEP;
706 return i;
708 AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
709 AMD64CondCode cond ) {
710 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
711 i->tag = Ain_XIndir;
712 i->Ain.XIndir.dstGA = dstGA;
713 i->Ain.XIndir.amRIP = amRIP;
714 i->Ain.XIndir.cond = cond;
715 return i;
717 AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
718 AMD64CondCode cond, IRJumpKind jk ) {
719 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
720 i->tag = Ain_XAssisted;
721 i->Ain.XAssisted.dstGA = dstGA;
722 i->Ain.XAssisted.amRIP = amRIP;
723 i->Ain.XAssisted.cond = cond;
724 i->Ain.XAssisted.jk = jk;
725 return i;
728 AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, HReg src, HReg dst ) {
729 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
730 i->tag = Ain_CMov64;
731 i->Ain.CMov64.cond = cond;
732 i->Ain.CMov64.src = src;
733 i->Ain.CMov64.dst = dst;
734 vassert(cond != Acc_ALWAYS);
735 return i;
737 AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
738 AMD64AMode* addr, HReg dst ) {
739 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
740 i->tag = Ain_CLoad;
741 i->Ain.CLoad.cond = cond;
742 i->Ain.CLoad.szB = szB;
743 i->Ain.CLoad.addr = addr;
744 i->Ain.CLoad.dst = dst;
745 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
746 return i;
748 AMD64Instr* AMD64Instr_CStore ( AMD64CondCode cond, UChar szB,
749 HReg src, AMD64AMode* addr ) {
750 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
751 i->tag = Ain_CStore;
752 i->Ain.CStore.cond = cond;
753 i->Ain.CStore.szB = szB;
754 i->Ain.CStore.src = src;
755 i->Ain.CStore.addr = addr;
756 vassert(cond != Acc_ALWAYS && (szB == 4 || szB == 8));
757 return i;
759 AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
760 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
761 i->tag = Ain_MovxLQ;
762 i->Ain.MovxLQ.syned = syned;
763 i->Ain.MovxLQ.src = src;
764 i->Ain.MovxLQ.dst = dst;
765 return i;
767 AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
768 AMD64AMode* src, HReg dst ) {
769 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
770 i->tag = Ain_LoadEX;
771 i->Ain.LoadEX.szSmall = szSmall;
772 i->Ain.LoadEX.syned = syned;
773 i->Ain.LoadEX.src = src;
774 i->Ain.LoadEX.dst = dst;
775 vassert(szSmall == 1 || szSmall == 2 || szSmall == 4);
776 return i;
778 AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ) {
779 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
780 i->tag = Ain_Store;
781 i->Ain.Store.sz = sz;
782 i->Ain.Store.src = src;
783 i->Ain.Store.dst = dst;
784 vassert(sz == 1 || sz == 2 || sz == 4);
785 return i;
787 AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ) {
788 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
789 i->tag = Ain_Set64;
790 i->Ain.Set64.cond = cond;
791 i->Ain.Set64.dst = dst;
792 return i;
794 AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) {
795 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
796 i->tag = Ain_Bsfr64;
797 i->Ain.Bsfr64.isFwds = isFwds;
798 i->Ain.Bsfr64.src = src;
799 i->Ain.Bsfr64.dst = dst;
800 return i;
802 AMD64Instr* AMD64Instr_MFence ( void ) {
803 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
804 i->tag = Ain_MFence;
805 return i;
807 AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ) {
808 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
809 i->tag = Ain_ACAS;
810 i->Ain.ACAS.addr = addr;
811 i->Ain.ACAS.sz = sz;
812 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
813 return i;
815 AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ) {
816 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
817 i->tag = Ain_DACAS;
818 i->Ain.DACAS.addr = addr;
819 i->Ain.DACAS.sz = sz;
820 vassert(sz == 8 || sz == 4);
821 return i;
824 AMD64Instr* AMD64Instr_A87Free ( Int nregs )
826 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
827 i->tag = Ain_A87Free;
828 i->Ain.A87Free.nregs = nregs;
829 vassert(nregs >= 1 && nregs <= 7);
830 return i;
832 AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
834 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
835 i->tag = Ain_A87PushPop;
836 i->Ain.A87PushPop.addr = addr;
837 i->Ain.A87PushPop.isPush = isPush;
838 i->Ain.A87PushPop.szB = szB;
839 vassert(szB == 8 || szB == 4);
840 return i;
842 AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
844 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
845 i->tag = Ain_A87FpOp;
846 i->Ain.A87FpOp.op = op;
847 return i;
849 AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
851 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
852 i->tag = Ain_A87LdCW;
853 i->Ain.A87LdCW.addr = addr;
854 return i;
856 AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr )
858 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
859 i->tag = Ain_A87StSW;
860 i->Ain.A87StSW.addr = addr;
861 return i;
863 AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
864 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
865 i->tag = Ain_LdMXCSR;
866 i->Ain.LdMXCSR.addr = addr;
867 return i;
869 AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
870 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
871 i->tag = Ain_SseUComIS;
872 i->Ain.SseUComIS.sz = toUChar(sz);
873 i->Ain.SseUComIS.srcL = srcL;
874 i->Ain.SseUComIS.srcR = srcR;
875 i->Ain.SseUComIS.dst = dst;
876 vassert(sz == 4 || sz == 8);
877 return i;
879 AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ) {
880 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
881 i->tag = Ain_SseSI2SF;
882 i->Ain.SseSI2SF.szS = toUChar(szS);
883 i->Ain.SseSI2SF.szD = toUChar(szD);
884 i->Ain.SseSI2SF.src = src;
885 i->Ain.SseSI2SF.dst = dst;
886 vassert(szS == 4 || szS == 8);
887 vassert(szD == 4 || szD == 8);
888 return i;
890 AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ) {
891 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
892 i->tag = Ain_SseSF2SI;
893 i->Ain.SseSF2SI.szS = toUChar(szS);
894 i->Ain.SseSF2SI.szD = toUChar(szD);
895 i->Ain.SseSF2SI.src = src;
896 i->Ain.SseSF2SI.dst = dst;
897 vassert(szS == 4 || szS == 8);
898 vassert(szD == 4 || szD == 8);
899 return i;
901 AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst )
903 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
904 i->tag = Ain_SseSDSS;
905 i->Ain.SseSDSS.from64 = from64;
906 i->Ain.SseSDSS.src = src;
907 i->Ain.SseSDSS.dst = dst;
908 return i;
910 AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
911 HReg reg, AMD64AMode* addr ) {
912 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
913 i->tag = Ain_SseLdSt;
914 i->Ain.SseLdSt.isLoad = isLoad;
915 i->Ain.SseLdSt.sz = toUChar(sz);
916 i->Ain.SseLdSt.reg = reg;
917 i->Ain.SseLdSt.addr = addr;
918 vassert(sz == 4 || sz == 8 || sz == 16);
919 return i;
921 AMD64Instr* AMD64Instr_SseCStore ( AMD64CondCode cond,
922 HReg src, AMD64AMode* addr )
924 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
925 i->tag = Ain_SseCStore;
926 i->Ain.SseCStore.cond = cond;
927 i->Ain.SseCStore.src = src;
928 i->Ain.SseCStore.addr = addr;
929 vassert(cond != Acc_ALWAYS);
930 return i;
932 AMD64Instr* AMD64Instr_SseCLoad ( AMD64CondCode cond,
933 AMD64AMode* addr, HReg dst )
935 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
936 i->tag = Ain_SseCLoad;
937 i->Ain.SseCLoad.cond = cond;
938 i->Ain.SseCLoad.addr = addr;
939 i->Ain.SseCLoad.dst = dst;
940 vassert(cond != Acc_ALWAYS);
941 return i;
943 AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg reg, AMD64AMode* addr )
945 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
946 i->tag = Ain_SseLdzLO;
947 i->Ain.SseLdzLO.sz = sz;
948 i->Ain.SseLdzLO.reg = reg;
949 i->Ain.SseLdzLO.addr = addr;
950 vassert(sz == 4 || sz == 8);
951 return i;
953 AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp op, HReg src, HReg dst ) {
954 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
955 i->tag = Ain_Sse32Fx4;
956 i->Ain.Sse32Fx4.op = op;
957 i->Ain.Sse32Fx4.src = src;
958 i->Ain.Sse32Fx4.dst = dst;
959 vassert(op != Asse_MOV);
960 return i;
962 AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp op, HReg src, HReg dst ) {
963 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
964 i->tag = Ain_Sse32FLo;
965 i->Ain.Sse32FLo.op = op;
966 i->Ain.Sse32FLo.src = src;
967 i->Ain.Sse32FLo.dst = dst;
968 vassert(op != Asse_MOV);
969 return i;
971 AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp op, HReg src, HReg dst ) {
972 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
973 i->tag = Ain_Sse64Fx2;
974 i->Ain.Sse64Fx2.op = op;
975 i->Ain.Sse64Fx2.src = src;
976 i->Ain.Sse64Fx2.dst = dst;
977 vassert(op != Asse_MOV);
978 return i;
980 AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp op, HReg src, HReg dst ) {
981 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
982 i->tag = Ain_Sse64FLo;
983 i->Ain.Sse64FLo.op = op;
984 i->Ain.Sse64FLo.src = src;
985 i->Ain.Sse64FLo.dst = dst;
986 vassert(op != Asse_MOV);
987 return i;
989 AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp op, HReg re, HReg rg ) {
990 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
991 i->tag = Ain_SseReRg;
992 i->Ain.SseReRg.op = op;
993 i->Ain.SseReRg.src = re;
994 i->Ain.SseReRg.dst = rg;
995 return i;
997 AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
998 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
999 i->tag = Ain_SseCMov;
1000 i->Ain.SseCMov.cond = cond;
1001 i->Ain.SseCMov.src = src;
1002 i->Ain.SseCMov.dst = dst;
1003 vassert(cond != Acc_ALWAYS);
1004 return i;
1006 AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ) {
1007 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1008 i->tag = Ain_SseShuf;
1009 i->Ain.SseShuf.order = order;
1010 i->Ain.SseShuf.src = src;
1011 i->Ain.SseShuf.dst = dst;
1012 vassert(order >= 0 && order <= 0xFF);
1013 return i;
1015 AMD64Instr* AMD64Instr_SseShiftN ( AMD64SseOp op,
1016 UInt shiftBits, HReg dst ) {
1017 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1018 i->tag = Ain_SseShiftN;
1019 i->Ain.SseShiftN.op = op;
1020 i->Ain.SseShiftN.shiftBits = shiftBits;
1021 i->Ain.SseShiftN.dst = dst;
1022 return i;
1024 AMD64Instr* AMD64Instr_SseMOVQ ( HReg gpr, HReg xmm, Bool toXMM ) {
1025 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1026 i->tag = Ain_SseMOVQ;
1027 i->Ain.SseMOVQ.gpr = gpr;
1028 i->Ain.SseMOVQ.xmm = xmm;
1029 i->Ain.SseMOVQ.toXMM = toXMM;
1030 vassert(hregClass(gpr) == HRcInt64);
1031 vassert(hregClass(xmm) == HRcVec128);
1032 return i;
1034 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1035 //uu HReg reg, AMD64AMode* addr ) {
1036 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1037 //uu i->tag = Ain_AvxLdSt;
1038 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1039 //uu i->Ain.AvxLdSt.reg = reg;
1040 //uu i->Ain.AvxLdSt.addr = addr;
1041 //uu return i;
1042 //uu }
1043 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1044 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1045 //uu i->tag = Ain_AvxReRg;
1046 //uu i->Ain.AvxReRg.op = op;
1047 //uu i->Ain.AvxReRg.src = re;
1048 //uu i->Ain.AvxReRg.dst = rg;
1049 //uu return i;
1050 //uu }
1051 AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
1052 AMD64AMode* amFailAddr ) {
1053 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1054 i->tag = Ain_EvCheck;
1055 i->Ain.EvCheck.amCounter = amCounter;
1056 i->Ain.EvCheck.amFailAddr = amFailAddr;
1057 return i;
1059 AMD64Instr* AMD64Instr_ProfInc ( void ) {
1060 AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1061 i->tag = Ain_ProfInc;
1062 return i;
1065 void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
1067 vassert(mode64 == True);
1068 switch (i->tag) {
1069 case Ain_Imm64:
1070 vex_printf("movabsq $0x%llx,", i->Ain.Imm64.imm64);
1071 ppHRegAMD64(i->Ain.Imm64.dst);
1072 return;
1073 case Ain_Alu64R:
1074 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64R.op));
1075 ppAMD64RMI(i->Ain.Alu64R.src);
1076 vex_printf(",");
1077 ppHRegAMD64(i->Ain.Alu64R.dst);
1078 return;
1079 case Ain_Alu64M:
1080 vex_printf("%sq ", showAMD64AluOp(i->Ain.Alu64M.op));
1081 ppAMD64RI(i->Ain.Alu64M.src);
1082 vex_printf(",");
1083 ppAMD64AMode(i->Ain.Alu64M.dst);
1084 return;
1085 case Ain_Sh64:
1086 vex_printf("%sq ", showAMD64ShiftOp(i->Ain.Sh64.op));
1087 if (i->Ain.Sh64.src == 0)
1088 vex_printf("%%cl,");
1089 else
1090 vex_printf("$%d,", (Int)i->Ain.Sh64.src);
1091 ppHRegAMD64(i->Ain.Sh64.dst);
1092 return;
1093 case Ain_Test64:
1094 vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
1095 ppHRegAMD64(i->Ain.Test64.dst);
1096 return;
1097 case Ain_Unary64:
1098 vex_printf("%sq ", showAMD64UnaryOp(i->Ain.Unary64.op));
1099 ppHRegAMD64(i->Ain.Unary64.dst);
1100 return;
1101 case Ain_Lea64:
1102 vex_printf("leaq ");
1103 ppAMD64AMode(i->Ain.Lea64.am);
1104 vex_printf(",");
1105 ppHRegAMD64(i->Ain.Lea64.dst);
1106 return;
1107 case Ain_Alu32R:
1108 vex_printf("%sl ", showAMD64AluOp(i->Ain.Alu32R.op));
1109 ppAMD64RMI_lo32(i->Ain.Alu32R.src);
1110 vex_printf(",");
1111 ppHRegAMD64_lo32(i->Ain.Alu32R.dst);
1112 return;
1113 case Ain_MulL:
1114 vex_printf("%cmulq ", i->Ain.MulL.syned ? 's' : 'u');
1115 ppAMD64RM(i->Ain.MulL.src);
1116 return;
1117 case Ain_Div:
1118 vex_printf("%cdiv%s ",
1119 i->Ain.Div.syned ? 's' : 'u',
1120 showAMD64ScalarSz(i->Ain.Div.sz));
1121 ppAMD64RM(i->Ain.Div.src);
1122 return;
1123 case Ain_Push:
1124 vex_printf("pushq ");
1125 ppAMD64RMI(i->Ain.Push.src);
1126 return;
1127 case Ain_Call:
1128 vex_printf("call%s[%d,",
1129 i->Ain.Call.cond==Acc_ALWAYS
1130 ? "" : showAMD64CondCode(i->Ain.Call.cond),
1131 i->Ain.Call.regparms );
1132 ppRetLoc(i->Ain.Call.rloc);
1133 vex_printf("] 0x%llx", i->Ain.Call.target);
1134 break;
1136 case Ain_XDirect:
1137 vex_printf("(xDirect) ");
1138 vex_printf("if (%%rflags.%s) { ",
1139 showAMD64CondCode(i->Ain.XDirect.cond));
1140 vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
1141 vex_printf("movq %%r11,");
1142 ppAMD64AMode(i->Ain.XDirect.amRIP);
1143 vex_printf("; ");
1144 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1145 i->Ain.XDirect.toFastEP ? "fast" : "slow");
1146 return;
1147 case Ain_XIndir:
1148 vex_printf("(xIndir) ");
1149 vex_printf("if (%%rflags.%s) { ",
1150 showAMD64CondCode(i->Ain.XIndir.cond));
1151 vex_printf("movq ");
1152 ppHRegAMD64(i->Ain.XIndir.dstGA);
1153 vex_printf(",");
1154 ppAMD64AMode(i->Ain.XIndir.amRIP);
1155 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1156 return;
1157 case Ain_XAssisted:
1158 vex_printf("(xAssisted) ");
1159 vex_printf("if (%%rflags.%s) { ",
1160 showAMD64CondCode(i->Ain.XAssisted.cond));
1161 vex_printf("movq ");
1162 ppHRegAMD64(i->Ain.XAssisted.dstGA);
1163 vex_printf(",");
1164 ppAMD64AMode(i->Ain.XAssisted.amRIP);
1165 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1166 (Int)i->Ain.XAssisted.jk);
1167 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1168 return;
1170 case Ain_CMov64:
1171 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
1172 ppHRegAMD64(i->Ain.CMov64.src);
1173 vex_printf(",");
1174 ppHRegAMD64(i->Ain.CMov64.dst);
1175 return;
1176 case Ain_CLoad:
1177 vex_printf("if (%%rflags.%s) { ",
1178 showAMD64CondCode(i->Ain.CLoad.cond));
1179 vex_printf("mov%c ", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
1180 ppAMD64AMode(i->Ain.CLoad.addr);
1181 vex_printf(", ");
1182 (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1183 (i->Ain.CLoad.dst);
1184 vex_printf(" }");
1185 return;
1186 case Ain_CStore:
1187 vex_printf("if (%%rflags.%s) { ",
1188 showAMD64CondCode(i->Ain.CStore.cond));
1189 vex_printf("mov%c ", i->Ain.CStore.szB == 4 ? 'l' : 'q');
1190 (i->Ain.CStore.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1191 (i->Ain.CStore.src);
1192 vex_printf(", ");
1193 ppAMD64AMode(i->Ain.CStore.addr);
1194 vex_printf(" }");
1195 return;
1197 case Ain_MovxLQ:
1198 vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
1199 ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
1200 vex_printf(",");
1201 ppHRegAMD64(i->Ain.MovxLQ.dst);
1202 return;
1203 case Ain_LoadEX:
1204 if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
1205 vex_printf("movl ");
1206 ppAMD64AMode(i->Ain.LoadEX.src);
1207 vex_printf(",");
1208 ppHRegAMD64_lo32(i->Ain.LoadEX.dst);
1209 } else {
1210 vex_printf("mov%c%cq ",
1211 i->Ain.LoadEX.syned ? 's' : 'z',
1212 i->Ain.LoadEX.szSmall==1
1213 ? 'b'
1214 : (i->Ain.LoadEX.szSmall==2 ? 'w' : 'l'));
1215 ppAMD64AMode(i->Ain.LoadEX.src);
1216 vex_printf(",");
1217 ppHRegAMD64(i->Ain.LoadEX.dst);
1219 return;
1220 case Ain_Store:
1221 vex_printf("mov%c ", i->Ain.Store.sz==1 ? 'b'
1222 : (i->Ain.Store.sz==2 ? 'w' : 'l'));
1223 ppHRegAMD64(i->Ain.Store.src);
1224 vex_printf(",");
1225 ppAMD64AMode(i->Ain.Store.dst);
1226 return;
1227 case Ain_Set64:
1228 vex_printf("setq%s ", showAMD64CondCode(i->Ain.Set64.cond));
1229 ppHRegAMD64(i->Ain.Set64.dst);
1230 return;
1231 case Ain_Bsfr64:
1232 vex_printf("bs%cq ", i->Ain.Bsfr64.isFwds ? 'f' : 'r');
1233 ppHRegAMD64(i->Ain.Bsfr64.src);
1234 vex_printf(",");
1235 ppHRegAMD64(i->Ain.Bsfr64.dst);
1236 return;
1237 case Ain_MFence:
1238 vex_printf("mfence" );
1239 return;
1240 case Ain_ACAS:
1241 vex_printf("lock cmpxchg%c ",
1242 i->Ain.ACAS.sz==1 ? 'b' : i->Ain.ACAS.sz==2 ? 'w'
1243 : i->Ain.ACAS.sz==4 ? 'l' : 'q' );
1244 vex_printf("{%%rax->%%rbx},");
1245 ppAMD64AMode(i->Ain.ACAS.addr);
1246 return;
1247 case Ain_DACAS:
1248 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1249 (Int)(2 * i->Ain.DACAS.sz));
1250 ppAMD64AMode(i->Ain.DACAS.addr);
1251 return;
1252 case Ain_A87Free:
1253 vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
1254 break;
1255 case Ain_A87PushPop:
1256 vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
1257 i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
1258 ppAMD64AMode(i->Ain.A87PushPop.addr);
1259 break;
1260 case Ain_A87FpOp:
1261 vex_printf("f%s", showA87FpOp(i->Ain.A87FpOp.op));
1262 break;
1263 case Ain_A87LdCW:
1264 vex_printf("fldcw ");
1265 ppAMD64AMode(i->Ain.A87LdCW.addr);
1266 break;
1267 case Ain_A87StSW:
1268 vex_printf("fstsw ");
1269 ppAMD64AMode(i->Ain.A87StSW.addr);
1270 break;
1271 case Ain_LdMXCSR:
1272 vex_printf("ldmxcsr ");
1273 ppAMD64AMode(i->Ain.LdMXCSR.addr);
1274 break;
1275 case Ain_SseUComIS:
1276 vex_printf("ucomis%s ", i->Ain.SseUComIS.sz==4 ? "s" : "d");
1277 ppHRegAMD64(i->Ain.SseUComIS.srcL);
1278 vex_printf(",");
1279 ppHRegAMD64(i->Ain.SseUComIS.srcR);
1280 vex_printf(" ; pushfq ; popq ");
1281 ppHRegAMD64(i->Ain.SseUComIS.dst);
1282 break;
1283 case Ain_SseSI2SF:
1284 vex_printf("cvtsi2s%s ", i->Ain.SseSI2SF.szD==4 ? "s" : "d");
1285 (i->Ain.SseSI2SF.szS==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1286 (i->Ain.SseSI2SF.src);
1287 vex_printf(",");
1288 ppHRegAMD64(i->Ain.SseSI2SF.dst);
1289 break;
1290 case Ain_SseSF2SI:
1291 vex_printf("cvts%s2si ", i->Ain.SseSF2SI.szS==4 ? "s" : "d");
1292 ppHRegAMD64(i->Ain.SseSF2SI.src);
1293 vex_printf(",");
1294 (i->Ain.SseSF2SI.szD==4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
1295 (i->Ain.SseSF2SI.dst);
1296 break;
1297 case Ain_SseSDSS:
1298 vex_printf(i->Ain.SseSDSS.from64 ? "cvtsd2ss " : "cvtss2sd ");
1299 ppHRegAMD64(i->Ain.SseSDSS.src);
1300 vex_printf(",");
1301 ppHRegAMD64(i->Ain.SseSDSS.dst);
1302 break;
1303 case Ain_SseLdSt:
1304 switch (i->Ain.SseLdSt.sz) {
1305 case 4: vex_printf("movss "); break;
1306 case 8: vex_printf("movsd "); break;
1307 case 16: vex_printf("movups "); break;
1308 default: vassert(0);
1310 if (i->Ain.SseLdSt.isLoad) {
1311 ppAMD64AMode(i->Ain.SseLdSt.addr);
1312 vex_printf(",");
1313 ppHRegAMD64(i->Ain.SseLdSt.reg);
1314 } else {
1315 ppHRegAMD64(i->Ain.SseLdSt.reg);
1316 vex_printf(",");
1317 ppAMD64AMode(i->Ain.SseLdSt.addr);
1319 return;
1320 case Ain_SseCStore:
1321 vex_printf("if (%%rflags.%s) { ",
1322 showAMD64CondCode(i->Ain.SseCStore.cond));
1323 vex_printf("movups ");
1324 ppHRegAMD64(i->Ain.SseCStore.src);
1325 vex_printf(", ");
1326 ppAMD64AMode(i->Ain.SseCStore.addr);
1327 vex_printf(" }");
1328 return;
1329 case Ain_SseCLoad:
1330 vex_printf("if (%%rflags.%s) { ",
1331 showAMD64CondCode(i->Ain.SseCLoad.cond));
1332 vex_printf("movups ");
1333 ppAMD64AMode(i->Ain.SseCLoad.addr);
1334 vex_printf(", ");
1335 ppHRegAMD64(i->Ain.SseCLoad.dst);
1336 vex_printf(" }");
1337 return;
1338 case Ain_SseLdzLO:
1339 vex_printf("movs%s ", i->Ain.SseLdzLO.sz==4 ? "s" : "d");
1340 ppAMD64AMode(i->Ain.SseLdzLO.addr);
1341 vex_printf(",");
1342 ppHRegAMD64(i->Ain.SseLdzLO.reg);
1343 return;
1344 case Ain_Sse32Fx4:
1345 vex_printf("%sps ", showAMD64SseOp(i->Ain.Sse32Fx4.op));
1346 ppHRegAMD64(i->Ain.Sse32Fx4.src);
1347 vex_printf(",");
1348 ppHRegAMD64(i->Ain.Sse32Fx4.dst);
1349 return;
1350 case Ain_Sse32FLo:
1351 vex_printf("%sss ", showAMD64SseOp(i->Ain.Sse32FLo.op));
1352 ppHRegAMD64(i->Ain.Sse32FLo.src);
1353 vex_printf(",");
1354 ppHRegAMD64(i->Ain.Sse32FLo.dst);
1355 return;
1356 case Ain_Sse64Fx2:
1357 vex_printf("%spd ", showAMD64SseOp(i->Ain.Sse64Fx2.op));
1358 ppHRegAMD64(i->Ain.Sse64Fx2.src);
1359 vex_printf(",");
1360 ppHRegAMD64(i->Ain.Sse64Fx2.dst);
1361 return;
1362 case Ain_Sse64FLo:
1363 vex_printf("%ssd ", showAMD64SseOp(i->Ain.Sse64FLo.op));
1364 ppHRegAMD64(i->Ain.Sse64FLo.src);
1365 vex_printf(",");
1366 ppHRegAMD64(i->Ain.Sse64FLo.dst);
1367 return;
1368 case Ain_SseReRg:
1369 vex_printf("%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1370 ppHRegAMD64(i->Ain.SseReRg.src);
1371 vex_printf(",");
1372 ppHRegAMD64(i->Ain.SseReRg.dst);
1373 return;
1374 case Ain_SseCMov:
1375 vex_printf("cmov%s ", showAMD64CondCode(i->Ain.SseCMov.cond));
1376 ppHRegAMD64(i->Ain.SseCMov.src);
1377 vex_printf(",");
1378 ppHRegAMD64(i->Ain.SseCMov.dst);
1379 return;
1380 case Ain_SseShuf:
1381 vex_printf("pshufd $0x%x,", (UInt)i->Ain.SseShuf.order);
1382 ppHRegAMD64(i->Ain.SseShuf.src);
1383 vex_printf(",");
1384 ppHRegAMD64(i->Ain.SseShuf.dst);
1385 return;
1386 case Ain_SseShiftN:
1387 vex_printf("%s $%u, ", showAMD64SseOp(i->Ain.SseShiftN.op),
1388 i->Ain.SseShiftN.shiftBits);
1389 ppHRegAMD64(i->Ain.SseShiftN.dst);
1390 return;
1391 case Ain_SseMOVQ:
1392 vex_printf("movq ");
1393 if (i->Ain.SseMOVQ.toXMM) {
1394 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1395 vex_printf(",");
1396 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1397 } else {
1398 ppHRegAMD64(i->Ain.SseMOVQ.xmm);
1399 vex_printf(",");
1400 ppHRegAMD64(i->Ain.SseMOVQ.gpr);
1402 return;
1403 //uu case Ain_AvxLdSt:
1404 //uu vex_printf("vmovups ");
1405 //uu if (i->Ain.AvxLdSt.isLoad) {
1406 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1407 //uu vex_printf(",");
1408 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1409 //uu } else {
1410 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1411 //uu vex_printf(",");
1412 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1413 //uu }
1414 //uu return;
1415 //uu case Ain_AvxReRg:
1416 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1417 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1418 //uu vex_printf(",");
1419 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1420 //uu return;
1421 case Ain_EvCheck:
1422 vex_printf("(evCheck) decl ");
1423 ppAMD64AMode(i->Ain.EvCheck.amCounter);
1424 vex_printf("; jns nofail; jmp *");
1425 ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
1426 vex_printf("; nofail:");
1427 return;
1428 case Ain_ProfInc:
1429 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1430 return;
1431 default:
1432 vpanic("ppAMD64Instr");
1436 /* --------- Helpers for register allocation. --------- */
1438 void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
1440 Bool unary;
1441 vassert(mode64 == True);
1442 initHRegUsage(u);
1443 switch (i->tag) {
1444 case Ain_Imm64:
1445 addHRegUse(u, HRmWrite, i->Ain.Imm64.dst);
1446 return;
1447 case Ain_Alu64R:
1448 addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
1449 if (i->Ain.Alu64R.op == Aalu_MOV) {
1450 addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
1452 if (i->Ain.Alu64R.src->tag == Armi_Reg) {
1453 u->isRegRegMove = True;
1454 u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
1455 u->regMoveDst = i->Ain.Alu64R.dst;
1457 return;
1459 if (i->Ain.Alu64R.op == Aalu_CMP) {
1460 addHRegUse(u, HRmRead, i->Ain.Alu64R.dst);
1461 return;
1463 addHRegUse(u, HRmModify, i->Ain.Alu64R.dst);
1464 return;
1465 case Ain_Alu64M:
1466 addRegUsage_AMD64RI(u, i->Ain.Alu64M.src);
1467 addRegUsage_AMD64AMode(u, i->Ain.Alu64M.dst);
1468 return;
1469 case Ain_Sh64:
1470 addHRegUse(u, HRmModify, i->Ain.Sh64.dst);
1471 if (i->Ain.Sh64.src == 0)
1472 addHRegUse(u, HRmRead, hregAMD64_RCX());
1473 return;
1474 case Ain_Test64:
1475 addHRegUse(u, HRmRead, i->Ain.Test64.dst);
1476 return;
1477 case Ain_Unary64:
1478 addHRegUse(u, HRmModify, i->Ain.Unary64.dst);
1479 return;
1480 case Ain_Lea64:
1481 addRegUsage_AMD64AMode(u, i->Ain.Lea64.am);
1482 addHRegUse(u, HRmWrite, i->Ain.Lea64.dst);
1483 return;
1484 case Ain_Alu32R:
1485 vassert(i->Ain.Alu32R.op != Aalu_MOV);
1486 addRegUsage_AMD64RMI(u, i->Ain.Alu32R.src);
1487 if (i->Ain.Alu32R.op == Aalu_CMP) {
1488 addHRegUse(u, HRmRead, i->Ain.Alu32R.dst);
1489 return;
1491 addHRegUse(u, HRmModify, i->Ain.Alu32R.dst);
1492 return;
1493 case Ain_MulL:
1494 addRegUsage_AMD64RM(u, i->Ain.MulL.src, HRmRead);
1495 addHRegUse(u, HRmModify, hregAMD64_RAX());
1496 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1497 return;
1498 case Ain_Div:
1499 addRegUsage_AMD64RM(u, i->Ain.Div.src, HRmRead);
1500 addHRegUse(u, HRmModify, hregAMD64_RAX());
1501 addHRegUse(u, HRmModify, hregAMD64_RDX());
1502 return;
1503 case Ain_Push:
1504 addRegUsage_AMD64RMI(u, i->Ain.Push.src);
1505 addHRegUse(u, HRmModify, hregAMD64_RSP());
1506 return;
1507 case Ain_Call:
1508 /* This is a bit subtle. */
1509 /* First off, claim it trashes all the caller-saved regs
1510 which fall within the register allocator's jurisdiction.
1511 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1512 and all the xmm registers. */
1513 addHRegUse(u, HRmWrite, hregAMD64_RAX());
1514 addHRegUse(u, HRmWrite, hregAMD64_RCX());
1515 addHRegUse(u, HRmWrite, hregAMD64_RDX());
1516 addHRegUse(u, HRmWrite, hregAMD64_RDI());
1517 addHRegUse(u, HRmWrite, hregAMD64_RSI());
1518 addHRegUse(u, HRmWrite, hregAMD64_R8());
1519 addHRegUse(u, HRmWrite, hregAMD64_R9());
1520 addHRegUse(u, HRmWrite, hregAMD64_R10());
1521 addHRegUse(u, HRmWrite, hregAMD64_XMM0());
1522 addHRegUse(u, HRmWrite, hregAMD64_XMM1());
1523 addHRegUse(u, HRmWrite, hregAMD64_XMM3());
1524 addHRegUse(u, HRmWrite, hregAMD64_XMM4());
1525 addHRegUse(u, HRmWrite, hregAMD64_XMM5());
1526 addHRegUse(u, HRmWrite, hregAMD64_XMM6());
1527 addHRegUse(u, HRmWrite, hregAMD64_XMM7());
1528 addHRegUse(u, HRmWrite, hregAMD64_XMM8());
1529 addHRegUse(u, HRmWrite, hregAMD64_XMM9());
1530 addHRegUse(u, HRmWrite, hregAMD64_XMM10());
1531 addHRegUse(u, HRmWrite, hregAMD64_XMM11());
1532 addHRegUse(u, HRmWrite, hregAMD64_XMM12());
1534 /* Now we have to state any parameter-carrying registers
1535 which might be read. This depends on the regparmness. */
1536 switch (i->Ain.Call.regparms) {
1537 case 6: addHRegUse(u, HRmRead, hregAMD64_R9()); /*fallthru*/
1538 case 5: addHRegUse(u, HRmRead, hregAMD64_R8()); /*fallthru*/
1539 case 4: addHRegUse(u, HRmRead, hregAMD64_RCX()); /*fallthru*/
1540 case 3: addHRegUse(u, HRmRead, hregAMD64_RDX()); /*fallthru*/
1541 case 2: addHRegUse(u, HRmRead, hregAMD64_RSI()); /*fallthru*/
1542 case 1: addHRegUse(u, HRmRead, hregAMD64_RDI()); break;
1543 case 0: break;
1544 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1546 /* Finally, there is the issue that the insn trashes a
1547 register because the literal target address has to be
1548 loaded into a register. Fortunately, r11 is stated in the
1549 ABI as a scratch register, and so seems a suitable victim. */
1550 addHRegUse(u, HRmWrite, hregAMD64_R11());
1551 /* Upshot of this is that the assembler really must use r11,
1552 and no other, as a destination temporary. */
1553 return;
1554 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1555 conditionally exit the block. Hence we only need to list (1)
1556 the registers that they read, and (2) the registers that they
1557 write in the case where the block is not exited. (2) is
1558 empty, hence only (1) is relevant here. */
1559 case Ain_XDirect:
1560 /* Don't bother to mention the write to %r11, since it is not
1561 available to the allocator. */
1562 addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
1563 return;
1564 case Ain_XIndir:
1565 /* Ditto re %r11 */
1566 addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
1567 addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
1568 return;
1569 case Ain_XAssisted:
1570 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1571 addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
1572 addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
1573 return;
1574 case Ain_CMov64:
1575 addHRegUse(u, HRmRead, i->Ain.CMov64.src);
1576 addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
1577 return;
1578 case Ain_CLoad:
1579 addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
1580 addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
1581 return;
1582 case Ain_CStore:
1583 addRegUsage_AMD64AMode(u, i->Ain.CStore.addr);
1584 addHRegUse(u, HRmRead, i->Ain.CStore.src);
1585 return;
1586 case Ain_MovxLQ:
1587 addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
1588 addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
1589 return;
1590 case Ain_LoadEX:
1591 addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
1592 addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
1593 return;
1594 case Ain_Store:
1595 addHRegUse(u, HRmRead, i->Ain.Store.src);
1596 addRegUsage_AMD64AMode(u, i->Ain.Store.dst);
1597 return;
1598 case Ain_Set64:
1599 addHRegUse(u, HRmWrite, i->Ain.Set64.dst);
1600 return;
1601 case Ain_Bsfr64:
1602 addHRegUse(u, HRmRead, i->Ain.Bsfr64.src);
1603 addHRegUse(u, HRmWrite, i->Ain.Bsfr64.dst);
1604 return;
1605 case Ain_MFence:
1606 return;
1607 case Ain_ACAS:
1608 addRegUsage_AMD64AMode(u, i->Ain.ACAS.addr);
1609 addHRegUse(u, HRmRead, hregAMD64_RBX());
1610 addHRegUse(u, HRmModify, hregAMD64_RAX());
1611 return;
1612 case Ain_DACAS:
1613 addRegUsage_AMD64AMode(u, i->Ain.DACAS.addr);
1614 addHRegUse(u, HRmRead, hregAMD64_RCX());
1615 addHRegUse(u, HRmRead, hregAMD64_RBX());
1616 addHRegUse(u, HRmModify, hregAMD64_RDX());
1617 addHRegUse(u, HRmModify, hregAMD64_RAX());
1618 return;
1619 case Ain_A87Free:
1620 return;
1621 case Ain_A87PushPop:
1622 addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
1623 return;
1624 case Ain_A87FpOp:
1625 return;
1626 case Ain_A87LdCW:
1627 addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
1628 return;
1629 case Ain_A87StSW:
1630 addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
1631 return;
1632 case Ain_LdMXCSR:
1633 addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
1634 return;
1635 case Ain_SseUComIS:
1636 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
1637 addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
1638 addHRegUse(u, HRmWrite, i->Ain.SseUComIS.dst);
1639 return;
1640 case Ain_SseSI2SF:
1641 addHRegUse(u, HRmRead, i->Ain.SseSI2SF.src);
1642 addHRegUse(u, HRmWrite, i->Ain.SseSI2SF.dst);
1643 return;
1644 case Ain_SseSF2SI:
1645 addHRegUse(u, HRmRead, i->Ain.SseSF2SI.src);
1646 addHRegUse(u, HRmWrite, i->Ain.SseSF2SI.dst);
1647 return;
1648 case Ain_SseSDSS:
1649 addHRegUse(u, HRmRead, i->Ain.SseSDSS.src);
1650 addHRegUse(u, HRmWrite, i->Ain.SseSDSS.dst);
1651 return;
1652 case Ain_SseLdSt:
1653 addRegUsage_AMD64AMode(u, i->Ain.SseLdSt.addr);
1654 addHRegUse(u, i->Ain.SseLdSt.isLoad ? HRmWrite : HRmRead,
1655 i->Ain.SseLdSt.reg);
1656 return;
1657 case Ain_SseCStore:
1658 addRegUsage_AMD64AMode(u, i->Ain.SseCStore.addr);
1659 addHRegUse(u, HRmRead, i->Ain.SseCStore.src);
1660 return;
1661 case Ain_SseCLoad:
1662 addRegUsage_AMD64AMode(u, i->Ain.SseCLoad.addr);
1663 addHRegUse(u, HRmModify, i->Ain.SseCLoad.dst);
1664 return;
1665 case Ain_SseLdzLO:
1666 addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
1667 addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
1668 return;
1669 case Ain_Sse32Fx4:
1670 vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
1671 unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
1672 || i->Ain.Sse32Fx4.op == Asse_RSQRTF
1673 || i->Ain.Sse32Fx4.op == Asse_SQRTF
1674 || i->Ain.Sse32Fx4.op == Asse_I2F
1675 || i->Ain.Sse32Fx4.op == Asse_F2I
1676 || i->Ain.Sse32Fx4.op == Asse_F32toF16
1677 || i->Ain.Sse32Fx4.op == Asse_F16toF32 );
1678 addHRegUse(u, HRmRead, i->Ain.Sse32Fx4.src);
1679 addHRegUse(u, unary ? HRmWrite : HRmModify,
1680 i->Ain.Sse32Fx4.dst);
1681 return;
1682 case Ain_Sse32FLo:
1683 vassert(i->Ain.Sse32FLo.op != Asse_MOV);
1684 unary = toBool( i->Ain.Sse32FLo.op == Asse_RCPF
1685 || i->Ain.Sse32FLo.op == Asse_RSQRTF
1686 || i->Ain.Sse32FLo.op == Asse_SQRTF );
1687 addHRegUse(u, HRmRead, i->Ain.Sse32FLo.src);
1688 addHRegUse(u, unary ? HRmWrite : HRmModify,
1689 i->Ain.Sse32FLo.dst);
1690 return;
1691 case Ain_Sse64Fx2:
1692 vassert(i->Ain.Sse64Fx2.op != Asse_MOV);
1693 unary = toBool( i->Ain.Sse64Fx2.op == Asse_RCPF
1694 || i->Ain.Sse64Fx2.op == Asse_RSQRTF
1695 || i->Ain.Sse64Fx2.op == Asse_SQRTF );
1696 addHRegUse(u, HRmRead, i->Ain.Sse64Fx2.src);
1697 addHRegUse(u, unary ? HRmWrite : HRmModify,
1698 i->Ain.Sse64Fx2.dst);
1699 return;
1700 case Ain_Sse64FLo:
1701 vassert(i->Ain.Sse64FLo.op != Asse_MOV);
1702 unary = toBool( i->Ain.Sse64FLo.op == Asse_RCPF
1703 || i->Ain.Sse64FLo.op == Asse_RSQRTF
1704 || i->Ain.Sse64FLo.op == Asse_SQRTF );
1705 addHRegUse(u, HRmRead, i->Ain.Sse64FLo.src);
1706 addHRegUse(u, unary ? HRmWrite : HRmModify,
1707 i->Ain.Sse64FLo.dst);
1708 return;
1709 case Ain_SseReRg:
1710 if ( (i->Ain.SseReRg.op == Asse_XOR
1711 || i->Ain.SseReRg.op == Asse_CMPEQ32)
1712 && sameHReg(i->Ain.SseReRg.src, i->Ain.SseReRg.dst)) {
1713 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1714 r,r' as a write of a value to r, and independent of any
1715 previous value in r */
1716 /* (as opposed to a rite of passage :-) */
1717 addHRegUse(u, HRmWrite, i->Ain.SseReRg.dst);
1718 } else {
1719 addHRegUse(u, HRmRead, i->Ain.SseReRg.src);
1720 addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
1721 ? HRmWrite : HRmModify,
1722 i->Ain.SseReRg.dst);
1724 if (i->Ain.SseReRg.op == Asse_MOV) {
1725 u->isRegRegMove = True;
1726 u->regMoveSrc = i->Ain.SseReRg.src;
1727 u->regMoveDst = i->Ain.SseReRg.dst;
1730 return;
1731 case Ain_SseCMov:
1732 addHRegUse(u, HRmRead, i->Ain.SseCMov.src);
1733 addHRegUse(u, HRmModify, i->Ain.SseCMov.dst);
1734 return;
1735 case Ain_SseShuf:
1736 addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
1737 addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
1738 return;
1739 case Ain_SseShiftN:
1740 addHRegUse(u, HRmModify, i->Ain.SseShiftN.dst);
1741 return;
1742 case Ain_SseMOVQ:
1743 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmRead : HRmWrite,
1744 i->Ain.SseMOVQ.gpr);
1745 addHRegUse(u, i->Ain.SseMOVQ.toXMM ? HRmWrite : HRmRead,
1746 i->Ain.SseMOVQ.xmm);
1747 return;
1748 //uu case Ain_AvxLdSt:
1749 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1750 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1751 //uu i->Ain.AvxLdSt.reg);
1752 //uu return;
1753 //uu case Ain_AvxReRg:
1754 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1755 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1756 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1757 //uu /* See comments on the case for Ain_SseReRg. */
1758 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1759 //uu } else {
1760 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1761 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1762 //uu ? HRmWrite : HRmModify,
1763 //uu i->Ain.AvxReRg.dst);
1764 //uu
1765 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1766 //uu u->isRegRegMove = True;
1767 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1768 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1769 //uu }
1770 //uu }
1771 //uu return;
1772 case Ain_EvCheck:
1773 /* We expect both amodes only to mention %rbp, so this is in
1774 fact pointless, since %rbp isn't allocatable, but anyway.. */
1775 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
1776 addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
1777 return;
1778 case Ain_ProfInc:
1779 addHRegUse(u, HRmWrite, hregAMD64_R11());
1780 return;
1781 default:
1782 ppAMD64Instr(i, mode64);
1783 vpanic("getRegUsage_AMD64Instr");
1787 /* local helper */
1788 static inline void mapReg(HRegRemap* m, HReg* r)
1790 *r = lookupHRegRemap(m, *r);
1793 void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
1795 vassert(mode64 == True);
1796 switch (i->tag) {
1797 case Ain_Imm64:
1798 mapReg(m, &i->Ain.Imm64.dst);
1799 return;
1800 case Ain_Alu64R:
1801 mapRegs_AMD64RMI(m, i->Ain.Alu64R.src);
1802 mapReg(m, &i->Ain.Alu64R.dst);
1803 return;
1804 case Ain_Alu64M:
1805 mapRegs_AMD64RI(m, i->Ain.Alu64M.src);
1806 mapRegs_AMD64AMode(m, i->Ain.Alu64M.dst);
1807 return;
1808 case Ain_Sh64:
1809 mapReg(m, &i->Ain.Sh64.dst);
1810 return;
1811 case Ain_Test64:
1812 mapReg(m, &i->Ain.Test64.dst);
1813 return;
1814 case Ain_Unary64:
1815 mapReg(m, &i->Ain.Unary64.dst);
1816 return;
1817 case Ain_Lea64:
1818 mapRegs_AMD64AMode(m, i->Ain.Lea64.am);
1819 mapReg(m, &i->Ain.Lea64.dst);
1820 return;
1821 case Ain_Alu32R:
1822 mapRegs_AMD64RMI(m, i->Ain.Alu32R.src);
1823 mapReg(m, &i->Ain.Alu32R.dst);
1824 return;
1825 case Ain_MulL:
1826 mapRegs_AMD64RM(m, i->Ain.MulL.src);
1827 return;
1828 case Ain_Div:
1829 mapRegs_AMD64RM(m, i->Ain.Div.src);
1830 return;
1831 case Ain_Push:
1832 mapRegs_AMD64RMI(m, i->Ain.Push.src);
1833 return;
1834 case Ain_Call:
1835 return;
1836 case Ain_XDirect:
1837 mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
1838 return;
1839 case Ain_XIndir:
1840 mapReg(m, &i->Ain.XIndir.dstGA);
1841 mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
1842 return;
1843 case Ain_XAssisted:
1844 mapReg(m, &i->Ain.XAssisted.dstGA);
1845 mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
1846 return;
1847 case Ain_CMov64:
1848 mapReg(m, &i->Ain.CMov64.src);
1849 mapReg(m, &i->Ain.CMov64.dst);
1850 return;
1851 case Ain_CLoad:
1852 mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
1853 mapReg(m, &i->Ain.CLoad.dst);
1854 return;
1855 case Ain_CStore:
1856 mapRegs_AMD64AMode(m, i->Ain.CStore.addr);
1857 mapReg(m, &i->Ain.CStore.src);
1858 return;
1859 case Ain_MovxLQ:
1860 mapReg(m, &i->Ain.MovxLQ.src);
1861 mapReg(m, &i->Ain.MovxLQ.dst);
1862 return;
1863 case Ain_LoadEX:
1864 mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
1865 mapReg(m, &i->Ain.LoadEX.dst);
1866 return;
1867 case Ain_Store:
1868 mapReg(m, &i->Ain.Store.src);
1869 mapRegs_AMD64AMode(m, i->Ain.Store.dst);
1870 return;
1871 case Ain_Set64:
1872 mapReg(m, &i->Ain.Set64.dst);
1873 return;
1874 case Ain_Bsfr64:
1875 mapReg(m, &i->Ain.Bsfr64.src);
1876 mapReg(m, &i->Ain.Bsfr64.dst);
1877 return;
1878 case Ain_MFence:
1879 return;
1880 case Ain_ACAS:
1881 mapRegs_AMD64AMode(m, i->Ain.ACAS.addr);
1882 return;
1883 case Ain_DACAS:
1884 mapRegs_AMD64AMode(m, i->Ain.DACAS.addr);
1885 return;
1886 case Ain_A87Free:
1887 return;
1888 case Ain_A87PushPop:
1889 mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
1890 return;
1891 case Ain_A87FpOp:
1892 return;
1893 case Ain_A87LdCW:
1894 mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
1895 return;
1896 case Ain_A87StSW:
1897 mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
1898 return;
1899 case Ain_LdMXCSR:
1900 mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
1901 return;
1902 case Ain_SseUComIS:
1903 mapReg(m, &i->Ain.SseUComIS.srcL);
1904 mapReg(m, &i->Ain.SseUComIS.srcR);
1905 mapReg(m, &i->Ain.SseUComIS.dst);
1906 return;
1907 case Ain_SseSI2SF:
1908 mapReg(m, &i->Ain.SseSI2SF.src);
1909 mapReg(m, &i->Ain.SseSI2SF.dst);
1910 return;
1911 case Ain_SseSF2SI:
1912 mapReg(m, &i->Ain.SseSF2SI.src);
1913 mapReg(m, &i->Ain.SseSF2SI.dst);
1914 return;
1915 case Ain_SseSDSS:
1916 mapReg(m, &i->Ain.SseSDSS.src);
1917 mapReg(m, &i->Ain.SseSDSS.dst);
1918 return;
1919 case Ain_SseLdSt:
1920 mapReg(m, &i->Ain.SseLdSt.reg);
1921 mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
1922 break;
1923 case Ain_SseCStore:
1924 mapRegs_AMD64AMode(m, i->Ain.SseCStore.addr);
1925 mapReg(m, &i->Ain.SseCStore.src);
1926 return;
1927 case Ain_SseCLoad:
1928 mapRegs_AMD64AMode(m, i->Ain.SseCLoad.addr);
1929 mapReg(m, &i->Ain.SseCLoad.dst);
1930 return;
1931 case Ain_SseLdzLO:
1932 mapReg(m, &i->Ain.SseLdzLO.reg);
1933 mapRegs_AMD64AMode(m, i->Ain.SseLdzLO.addr);
1934 break;
1935 case Ain_Sse32Fx4:
1936 mapReg(m, &i->Ain.Sse32Fx4.src);
1937 mapReg(m, &i->Ain.Sse32Fx4.dst);
1938 return;
1939 case Ain_Sse32FLo:
1940 mapReg(m, &i->Ain.Sse32FLo.src);
1941 mapReg(m, &i->Ain.Sse32FLo.dst);
1942 return;
1943 case Ain_Sse64Fx2:
1944 mapReg(m, &i->Ain.Sse64Fx2.src);
1945 mapReg(m, &i->Ain.Sse64Fx2.dst);
1946 return;
1947 case Ain_Sse64FLo:
1948 mapReg(m, &i->Ain.Sse64FLo.src);
1949 mapReg(m, &i->Ain.Sse64FLo.dst);
1950 return;
1951 case Ain_SseReRg:
1952 mapReg(m, &i->Ain.SseReRg.src);
1953 mapReg(m, &i->Ain.SseReRg.dst);
1954 return;
1955 case Ain_SseCMov:
1956 mapReg(m, &i->Ain.SseCMov.src);
1957 mapReg(m, &i->Ain.SseCMov.dst);
1958 return;
1959 case Ain_SseShuf:
1960 mapReg(m, &i->Ain.SseShuf.src);
1961 mapReg(m, &i->Ain.SseShuf.dst);
1962 return;
1963 case Ain_SseShiftN:
1964 mapReg(m, &i->Ain.SseShiftN.dst);
1965 return;
1966 case Ain_SseMOVQ:
1967 mapReg(m, &i->Ain.SseMOVQ.gpr);
1968 mapReg(m, &i->Ain.SseMOVQ.xmm);
1969 return;
1970 //uu case Ain_AvxLdSt:
1971 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1972 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1973 //uu break;
1974 //uu case Ain_AvxReRg:
1975 //uu mapReg(m, &i->Ain.AvxReRg.src);
1976 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1977 //uu return;
1978 case Ain_EvCheck:
1979 /* We expect both amodes only to mention %rbp, so this is in
1980 fact pointless, since %rbp isn't allocatable, but anyway.. */
1981 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
1982 mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
1983 return;
1984 case Ain_ProfInc:
1985 /* hardwires r11 -- nothing to modify. */
1986 return;
1987 default:
1988 ppAMD64Instr(i, mode64);
1989 vpanic("mapRegs_AMD64Instr");
1993 /* Generate amd64 spill/reload instructions under the direction of the
1994 register allocator. Note it's critical these don't write the
1995 condition codes. */
1997 void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
1998 HReg rreg, Int offsetB, Bool mode64 )
2000 AMD64AMode* am;
2001 vassert(offsetB >= 0);
2002 vassert(!hregIsVirtual(rreg));
2003 vassert(mode64 == True);
2004 *i1 = *i2 = NULL;
2005 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2006 switch (hregClass(rreg)) {
2007 case HRcInt64:
2008 *i1 = AMD64Instr_Alu64M ( Aalu_MOV, AMD64RI_Reg(rreg), am );
2009 return;
2010 case HRcVec128:
2011 *i1 = AMD64Instr_SseLdSt ( False/*store*/, 16, rreg, am );
2012 return;
2013 default:
2014 ppHRegClass(hregClass(rreg));
2015 vpanic("genSpill_AMD64: unimplemented regclass");
2019 void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2020 HReg rreg, Int offsetB, Bool mode64 )
2022 AMD64AMode* am;
2023 vassert(offsetB >= 0);
2024 vassert(!hregIsVirtual(rreg));
2025 vassert(mode64 == True);
2026 *i1 = *i2 = NULL;
2027 am = AMD64AMode_IR(offsetB, hregAMD64_RBP());
2028 switch (hregClass(rreg)) {
2029 case HRcInt64:
2030 *i1 = AMD64Instr_Alu64R ( Aalu_MOV, AMD64RMI_Mem(am), rreg );
2031 return;
2032 case HRcVec128:
2033 *i1 = AMD64Instr_SseLdSt ( True/*load*/, 16, rreg, am );
2034 return;
2035 default:
2036 ppHRegClass(hregClass(rreg));
2037 vpanic("genReload_AMD64: unimplemented regclass");
2041 AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
2043 switch (hregClass(from)) {
2044 case HRcInt64:
2045 return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
2046 case HRcVec128:
2047 return AMD64Instr_SseReRg(Asse_MOV, from, to);
2048 default:
2049 ppHRegClass(hregClass(from));
2050 vpanic("genMove_AMD64: unimplemented regclass");
2054 AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
2056 vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
2058 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2059 Convert to: src=RMI_Mem, dst=Reg
2061 if (i->tag == Ain_Alu64R
2062 && (i->Ain.Alu64R.op == Aalu_MOV || i->Ain.Alu64R.op == Aalu_OR
2063 || i->Ain.Alu64R.op == Aalu_XOR)
2064 && i->Ain.Alu64R.src->tag == Armi_Reg
2065 && sameHReg(i->Ain.Alu64R.src->Armi.Reg.reg, vreg)) {
2066 vassert(! sameHReg(i->Ain.Alu64R.dst, vreg));
2067 return AMD64Instr_Alu64R(
2068 i->Ain.Alu64R.op,
2069 AMD64RMI_Mem( AMD64AMode_IR( spill_off, hregAMD64_RBP())),
2070 i->Ain.Alu64R.dst
2074 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2075 Convert to: src=RI_Imm, dst=Mem
2077 if (i->tag == Ain_Alu64R
2078 && (i->Ain.Alu64R.op == Aalu_CMP)
2079 && i->Ain.Alu64R.src->tag == Armi_Imm
2080 && sameHReg(i->Ain.Alu64R.dst, vreg)) {
2081 return AMD64Instr_Alu64M(
2082 i->Ain.Alu64R.op,
2083 AMD64RI_Imm( i->Ain.Alu64R.src->Armi.Imm.imm32 ),
2084 AMD64AMode_IR( spill_off, hregAMD64_RBP())
2088 return NULL;
2092 /* --------- The amd64 assembler (bleh.) --------- */
2094 /* Produce the low three bits of an integer register number. */
2095 inline static UInt iregEnc210 ( HReg r )
2097 UInt n;
2098 vassert(hregClass(r) == HRcInt64);
2099 vassert(!hregIsVirtual(r));
2100 n = hregEncoding(r);
2101 vassert(n <= 15);
2102 return n & 7;
2105 /* Produce bit 3 of an integer register number. */
2106 inline static UInt iregEnc3 ( HReg r )
2108 UInt n;
2109 vassert(hregClass(r) == HRcInt64);
2110 vassert(!hregIsVirtual(r));
2111 n = hregEncoding(r);
2112 vassert(n <= 15);
2113 return (n >> 3) & 1;
2116 /* Produce a complete 4-bit integer register number. */
2117 inline static UInt iregEnc3210 ( HReg r )
2119 UInt n;
2120 vassert(hregClass(r) == HRcInt64);
2121 vassert(!hregIsVirtual(r));
2122 n = hregEncoding(r);
2123 vassert(n <= 15);
2124 return n;
2127 /* Produce a complete 4-bit integer register number. */
2128 inline static UInt vregEnc3210 ( HReg r )
2130 UInt n;
2131 vassert(hregClass(r) == HRcVec128);
2132 vassert(!hregIsVirtual(r));
2133 n = hregEncoding(r);
2134 vassert(n <= 15);
2135 return n;
2138 inline static UChar mkModRegRM ( UInt mod, UInt reg, UInt regmem )
2140 vassert(mod < 4);
2141 vassert((reg|regmem) < 8);
2142 return (UChar)( ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7) );
2145 inline static UChar mkSIB ( UInt shift, UInt regindex, UInt regbase )
2147 vassert(shift < 4);
2148 vassert((regindex|regbase) < 8);
2149 return (UChar)( ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7) );
2152 static UChar* emit32 ( UChar* p, UInt w32 )
2154 *p++ = toUChar((w32) & 0x000000FF);
2155 *p++ = toUChar((w32 >> 8) & 0x000000FF);
2156 *p++ = toUChar((w32 >> 16) & 0x000000FF);
2157 *p++ = toUChar((w32 >> 24) & 0x000000FF);
2158 return p;
2161 static UChar* emit64 ( UChar* p, ULong w64 )
2163 p = emit32(p, toUInt(w64 & 0xFFFFFFFF));
2164 p = emit32(p, toUInt((w64 >> 32) & 0xFFFFFFFF));
2165 return p;
2168 /* Does a sign-extend of the lowest 8 bits give
2169 the original number? */
2170 static Bool fits8bits ( UInt w32 )
2172 Int i32 = (Int)w32;
2173 return toBool(i32 == ((Int)(w32 << 24) >> 24));
2175 /* Can the lower 32 bits be signedly widened to produce the whole
2176 64-bit value? In other words, are the top 33 bits either all 0 or
2177 all 1 ? */
2178 static Bool fitsIn32Bits ( ULong x )
2180 Long y1;
2181 y1 = x << 32;
2182 y1 >>=/*s*/ 32;
2183 return toBool(x == y1);
2187 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2189 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2190 = 00 greg ereg
2192 greg, d8(ereg) | ereg is neither of: RSP R12
2193 = 01 greg ereg, d8
2195 greg, d32(ereg) | ereg is neither of: RSP R12
2196 = 10 greg ereg, d32
2198 greg, d8(ereg) | ereg is either: RSP R12
2199 = 01 greg 100, 0x24, d8
2200 (lowest bit of rex distinguishes R12/RSP)
2202 greg, d32(ereg) | ereg is either: RSP R12
2203 = 10 greg 100, 0x24, d32
2204 (lowest bit of rex distinguishes R12/RSP)
2206 -----------------------------------------------
2208 greg, d8(base,index,scale)
2209 | index != RSP
2210 = 01 greg 100, scale index base, d8
2212 greg, d32(base,index,scale)
2213 | index != RSP
2214 = 10 greg 100, scale index base, d32
2216 static UChar* doAMode_M__wrk ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2218 UInt gregEnc210 = gregEnc3210 & 7;
2219 if (am->tag == Aam_IR) {
2220 if (am->Aam.IR.imm == 0
2221 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2222 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RBP())
2223 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2224 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R13())
2226 *p++ = mkModRegRM(0, gregEnc210, iregEnc210(am->Aam.IR.reg));
2227 return p;
2229 if (fits8bits(am->Aam.IR.imm)
2230 && ! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2231 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2233 *p++ = mkModRegRM(1, gregEnc210, iregEnc210(am->Aam.IR.reg));
2234 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2235 return p;
2237 if (! sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2238 && ! sameHReg(am->Aam.IR.reg, hregAMD64_R12())
2240 *p++ = mkModRegRM(2, gregEnc210, iregEnc210(am->Aam.IR.reg));
2241 p = emit32(p, am->Aam.IR.imm);
2242 return p;
2244 if ((sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2245 || sameHReg(am->Aam.IR.reg, hregAMD64_R12()))
2246 && fits8bits(am->Aam.IR.imm)) {
2247 *p++ = mkModRegRM(1, gregEnc210, 4);
2248 *p++ = 0x24;
2249 *p++ = toUChar(am->Aam.IR.imm & 0xFF);
2250 return p;
2252 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2253 || wait for test case for RSP case */
2254 sameHReg(am->Aam.IR.reg, hregAMD64_R12())) {
2255 *p++ = mkModRegRM(2, gregEnc210, 4);
2256 *p++ = 0x24;
2257 p = emit32(p, am->Aam.IR.imm);
2258 return p;
2260 ppAMD64AMode(am);
2261 vpanic("doAMode_M: can't emit amode IR");
2262 /*NOTREACHED*/
2264 if (am->tag == Aam_IRRS) {
2265 if (fits8bits(am->Aam.IRRS.imm)
2266 && ! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2267 *p++ = mkModRegRM(1, gregEnc210, 4);
2268 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2269 iregEnc210(am->Aam.IRRS.base));
2270 *p++ = toUChar(am->Aam.IRRS.imm & 0xFF);
2271 return p;
2273 if (! sameHReg(am->Aam.IRRS.index, hregAMD64_RSP())) {
2274 *p++ = mkModRegRM(2, gregEnc210, 4);
2275 *p++ = mkSIB(am->Aam.IRRS.shift, iregEnc210(am->Aam.IRRS.index),
2276 iregEnc210(am->Aam.IRRS.base));
2277 p = emit32(p, am->Aam.IRRS.imm);
2278 return p;
2280 ppAMD64AMode(am);
2281 vpanic("doAMode_M: can't emit amode IRRS");
2282 /*NOTREACHED*/
2284 vpanic("doAMode_M: unknown amode");
2285 /*NOTREACHED*/
2288 static UChar* doAMode_M ( UChar* p, HReg greg, AMD64AMode* am )
2290 return doAMode_M__wrk(p, iregEnc3210(greg), am);
2293 static UChar* doAMode_M_enc ( UChar* p, UInt gregEnc3210, AMD64AMode* am )
2295 vassert(gregEnc3210 < 16);
2296 return doAMode_M__wrk(p, gregEnc3210, am);
2300 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2301 inline
2302 static UChar* doAMode_R__wrk ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2304 *p++ = mkModRegRM(3, gregEnc3210 & 7, eregEnc3210 & 7);
2305 return p;
2308 static UChar* doAMode_R ( UChar* p, HReg greg, HReg ereg )
2310 return doAMode_R__wrk(p, iregEnc3210(greg), iregEnc3210(ereg));
2313 static UChar* doAMode_R_enc_reg ( UChar* p, UInt gregEnc3210, HReg ereg )
2315 vassert(gregEnc3210 < 16);
2316 return doAMode_R__wrk(p, gregEnc3210, iregEnc3210(ereg));
2319 static UChar* doAMode_R_reg_enc ( UChar* p, HReg greg, UInt eregEnc3210 )
2321 vassert(eregEnc3210 < 16);
2322 return doAMode_R__wrk(p, iregEnc3210(greg), eregEnc3210);
2325 static UChar* doAMode_R_enc_enc ( UChar* p, UInt gregEnc3210, UInt eregEnc3210 )
2327 vassert( (gregEnc3210|eregEnc3210) < 16);
2328 return doAMode_R__wrk(p, gregEnc3210, eregEnc3210);
2332 /* Clear the W bit on a REX byte, thereby changing the operand size
2333 back to whatever that instruction's default operand size is. */
2334 static inline UChar clearWBit ( UChar rex )
2336 return rex & ~(1<<3);
2339 static inline UChar setWBit ( UChar rex )
2341 return rex | (1<<3);
2345 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2346 inline static UChar rexAMode_M__wrk ( UInt gregEnc3210, AMD64AMode* am )
2348 if (am->tag == Aam_IR) {
2349 UChar W = 1; /* we want 64-bit mode */
2350 UChar R = (gregEnc3210 >> 3) & 1;
2351 UChar X = 0; /* not relevant */
2352 UChar B = iregEnc3(am->Aam.IR.reg);
2353 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2355 if (am->tag == Aam_IRRS) {
2356 UChar W = 1; /* we want 64-bit mode */
2357 UChar R = (gregEnc3210 >> 3) & 1;
2358 UChar X = iregEnc3(am->Aam.IRRS.index);
2359 UChar B = iregEnc3(am->Aam.IRRS.base);
2360 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2362 vassert(0);
2363 return 0; /*NOTREACHED*/
2366 static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
2368 return rexAMode_M__wrk(iregEnc3210(greg), am);
2371 static UChar rexAMode_M_enc ( UInt gregEnc3210, AMD64AMode* am )
2373 vassert(gregEnc3210 < 16);
2374 return rexAMode_M__wrk(gregEnc3210, am);
2378 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2379 inline static UChar rexAMode_R__wrk ( UInt gregEnc3210, UInt eregEnc3210 )
2381 UChar W = 1; /* we want 64-bit mode */
2382 UChar R = (gregEnc3210 >> 3) & 1;
2383 UChar X = 0; /* not relevant */
2384 UChar B = (eregEnc3210 >> 3) & 1;
2385 return 0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0));
2388 static UChar rexAMode_R ( HReg greg, HReg ereg )
2390 return rexAMode_R__wrk(iregEnc3210(greg), iregEnc3210(ereg));
2393 static UChar rexAMode_R_enc_reg ( UInt gregEnc3210, HReg ereg )
2395 vassert(gregEnc3210 < 16);
2396 return rexAMode_R__wrk(gregEnc3210, iregEnc3210(ereg));
2399 static UChar rexAMode_R_reg_enc ( HReg greg, UInt eregEnc3210 )
2401 vassert(eregEnc3210 < 16);
2402 return rexAMode_R__wrk(iregEnc3210(greg), eregEnc3210);
2405 static UChar rexAMode_R_enc_enc ( UInt gregEnc3210, UInt eregEnc3210 )
2407 vassert((gregEnc3210|eregEnc3210) < 16);
2408 return rexAMode_R__wrk(gregEnc3210, eregEnc3210);
2412 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2413 //uu verified correct (I reckon). Certainly it has been known to
2414 //uu produce correct VEX prefixes during testing. */
2415 //uu
2416 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2417 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2418 //uu in verbatim. There's no range checking on the bits. */
2419 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2420 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2421 //uu UInt L, UInt pp )
2422 //uu {
2423 //uu UChar byte0 = 0;
2424 //uu UChar byte1 = 0;
2425 //uu UChar byte2 = 0;
2426 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2427 //uu /* 2 byte encoding is possible. */
2428 //uu byte0 = 0xC5;
2429 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2430 //uu | (L << 2) | pp;
2431 //uu } else {
2432 //uu /* 3 byte encoding is needed. */
2433 //uu byte0 = 0xC4;
2434 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2435 //uu | ((rexB ^ 1) << 5) | mmmmm;
2436 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2437 //uu }
2438 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2439 //uu }
2440 //uu
2441 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2442 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2443 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2444 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2445 //uu vvvv=1111 (unused 3rd reg). */
2446 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2447 //uu {
2448 //uu UChar L = 1; /* size = 256 */
2449 //uu UChar pp = 0; /* no SIMD prefix */
2450 //uu UChar mmmmm = 1; /* 0F */
2451 //uu UChar notVvvv = 0; /* unused */
2452 //uu UChar rexW = 0;
2453 //uu UChar rexR = 0;
2454 //uu UChar rexX = 0;
2455 //uu UChar rexB = 0;
2456 //uu /* Same logic as in rexAMode_M. */
2457 //uu if (am->tag == Aam_IR) {
2458 //uu rexR = iregEnc3(greg);
2459 //uu rexX = 0; /* not relevant */
2460 //uu rexB = iregEnc3(am->Aam.IR.reg);
2461 //uu }
2462 //uu else if (am->tag == Aam_IRRS) {
2463 //uu rexR = iregEnc3(greg);
2464 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2465 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2466 //uu } else {
2467 //uu vassert(0);
2468 //uu }
2469 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2470 //uu }
2471 //uu
2472 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2473 //uu {
2474 //uu switch (vex & 0xFF) {
2475 //uu case 0xC5:
2476 //uu *p++ = 0xC5;
2477 //uu *p++ = (vex >> 8) & 0xFF;
2478 //uu vassert(0 == (vex >> 16));
2479 //uu break;
2480 //uu case 0xC4:
2481 //uu *p++ = 0xC4;
2482 //uu *p++ = (vex >> 8) & 0xFF;
2483 //uu *p++ = (vex >> 16) & 0xFF;
2484 //uu vassert(0 == (vex >> 24));
2485 //uu break;
2486 //uu default:
2487 //uu vassert(0);
2488 //uu }
2489 //uu return p;
2490 //uu }
2493 /* Emit ffree %st(N) */
2494 static UChar* do_ffree_st ( UChar* p, Int n )
2496 vassert(n >= 0 && n <= 7);
2497 *p++ = 0xDD;
2498 *p++ = toUChar(0xC0 + n);
2499 return p;
2502 /* Emit an instruction into buf and return the number of bytes used.
2503 Note that buf is not the insn's final place, and therefore it is
2504 imperative to emit position-independent code. If the emitted
2505 instruction was a profiler inc, set *is_profInc to True, else
2506 leave it unchanged. */
2508 Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
2509 UChar* buf, Int nbuf, const AMD64Instr* i,
2510 Bool mode64, VexEndness endness_host,
2511 const void* disp_cp_chain_me_to_slowEP,
2512 const void* disp_cp_chain_me_to_fastEP,
2513 const void* disp_cp_xindir,
2514 const void* disp_cp_xassisted )
2516 UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
2517 UInt xtra;
2518 UInt reg;
2519 UChar rex;
2520 UChar* p = &buf[0];
2521 UChar* ptmp;
2522 Int j;
2523 vassert(nbuf >= 64);
2524 vassert(mode64 == True);
2526 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2528 switch (i->tag) {
2530 case Ain_Imm64:
2531 if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
2532 /* Use the short form (load into 32 bit reg, + default
2533 widening rule) for constants under 1 million. We could
2534 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2535 limit it to a smaller range for verifiability purposes. */
2536 if (1 & iregEnc3(i->Ain.Imm64.dst))
2537 *p++ = 0x41;
2538 *p++ = 0xB8 + iregEnc210(i->Ain.Imm64.dst);
2539 p = emit32(p, (UInt)i->Ain.Imm64.imm64);
2540 } else {
2541 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Imm64.dst)));
2542 *p++ = toUChar(0xB8 + iregEnc210(i->Ain.Imm64.dst));
2543 p = emit64(p, i->Ain.Imm64.imm64);
2545 goto done;
2547 case Ain_Alu64R:
2548 /* Deal specially with MOV */
2549 if (i->Ain.Alu64R.op == Aalu_MOV) {
2550 switch (i->Ain.Alu64R.src->tag) {
2551 case Armi_Imm:
2552 if (0 == (i->Ain.Alu64R.src->Armi.Imm.imm32 & ~0xFFFFF)) {
2553 /* Actually we could use this form for constants in
2554 the range 0 through 0x7FFFFFFF inclusive, but
2555 limit it to a small range for verifiability
2556 purposes. */
2557 /* Generate "movl $imm32, 32-bit-register" and let
2558 the default zero-extend rule cause the upper half
2559 of the dst to be zeroed out too. This saves 1
2560 and sometimes 2 bytes compared to the more
2561 obvious encoding in the 'else' branch. */
2562 if (1 & iregEnc3(i->Ain.Alu64R.dst))
2563 *p++ = 0x41;
2564 *p++ = 0xB8 + iregEnc210(i->Ain.Alu64R.dst);
2565 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2566 } else {
2567 *p++ = toUChar(0x48 + (1 & iregEnc3(i->Ain.Alu64R.dst)));
2568 *p++ = 0xC7;
2569 *p++ = toUChar(0xC0 + iregEnc210(i->Ain.Alu64R.dst));
2570 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2572 goto done;
2573 case Armi_Reg:
2574 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2575 i->Ain.Alu64R.dst );
2576 *p++ = 0x89;
2577 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2578 i->Ain.Alu64R.dst);
2579 goto done;
2580 case Armi_Mem:
2581 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2582 i->Ain.Alu64R.src->Armi.Mem.am);
2583 *p++ = 0x8B;
2584 p = doAMode_M(p, i->Ain.Alu64R.dst,
2585 i->Ain.Alu64R.src->Armi.Mem.am);
2586 goto done;
2587 default:
2588 goto bad;
2591 /* MUL */
2592 if (i->Ain.Alu64R.op == Aalu_MUL) {
2593 switch (i->Ain.Alu64R.src->tag) {
2594 case Armi_Reg:
2595 *p++ = rexAMode_R( i->Ain.Alu64R.dst,
2596 i->Ain.Alu64R.src->Armi.Reg.reg);
2597 *p++ = 0x0F;
2598 *p++ = 0xAF;
2599 p = doAMode_R(p, i->Ain.Alu64R.dst,
2600 i->Ain.Alu64R.src->Armi.Reg.reg);
2601 goto done;
2602 case Armi_Mem:
2603 *p++ = rexAMode_M(i->Ain.Alu64R.dst,
2604 i->Ain.Alu64R.src->Armi.Mem.am);
2605 *p++ = 0x0F;
2606 *p++ = 0xAF;
2607 p = doAMode_M(p, i->Ain.Alu64R.dst,
2608 i->Ain.Alu64R.src->Armi.Mem.am);
2609 goto done;
2610 case Armi_Imm:
2611 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2612 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2613 *p++ = 0x6B;
2614 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2615 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2616 } else {
2617 *p++ = rexAMode_R(i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2618 *p++ = 0x69;
2619 p = doAMode_R(p, i->Ain.Alu64R.dst, i->Ain.Alu64R.dst);
2620 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2622 goto done;
2623 default:
2624 goto bad;
2627 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2628 opc = opc_rr = subopc_imm = opc_imma = 0;
2629 switch (i->Ain.Alu64R.op) {
2630 case Aalu_ADC: opc = 0x13; opc_rr = 0x11;
2631 subopc_imm = 2; opc_imma = 0x15; break;
2632 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2633 subopc_imm = 0; opc_imma = 0x05; break;
2634 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2635 subopc_imm = 5; opc_imma = 0x2D; break;
2636 case Aalu_SBB: opc = 0x1B; opc_rr = 0x19;
2637 subopc_imm = 3; opc_imma = 0x1D; break;
2638 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2639 subopc_imm = 4; opc_imma = 0x25; break;
2640 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2641 subopc_imm = 6; opc_imma = 0x35; break;
2642 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2643 subopc_imm = 1; opc_imma = 0x0D; break;
2644 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2645 subopc_imm = 7; opc_imma = 0x3D; break;
2646 default: goto bad;
2648 switch (i->Ain.Alu64R.src->tag) {
2649 case Armi_Imm:
2650 if (sameHReg(i->Ain.Alu64R.dst, hregAMD64_RAX())
2651 && !fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2652 goto bad; /* FIXME: awaiting test case */
2653 *p++ = toUChar(opc_imma);
2654 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2655 } else
2656 if (fits8bits(i->Ain.Alu64R.src->Armi.Imm.imm32)) {
2657 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst );
2658 *p++ = 0x83;
2659 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2660 *p++ = toUChar(0xFF & i->Ain.Alu64R.src->Armi.Imm.imm32);
2661 } else {
2662 *p++ = rexAMode_R_enc_reg( 0, i->Ain.Alu64R.dst);
2663 *p++ = 0x81;
2664 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu64R.dst);
2665 p = emit32(p, i->Ain.Alu64R.src->Armi.Imm.imm32);
2667 goto done;
2668 case Armi_Reg:
2669 *p++ = rexAMode_R( i->Ain.Alu64R.src->Armi.Reg.reg,
2670 i->Ain.Alu64R.dst);
2671 *p++ = toUChar(opc_rr);
2672 p = doAMode_R(p, i->Ain.Alu64R.src->Armi.Reg.reg,
2673 i->Ain.Alu64R.dst);
2674 goto done;
2675 case Armi_Mem:
2676 *p++ = rexAMode_M( i->Ain.Alu64R.dst,
2677 i->Ain.Alu64R.src->Armi.Mem.am);
2678 *p++ = toUChar(opc);
2679 p = doAMode_M(p, i->Ain.Alu64R.dst,
2680 i->Ain.Alu64R.src->Armi.Mem.am);
2681 goto done;
2682 default:
2683 goto bad;
2685 break;
2687 case Ain_Alu64M:
2688 /* Deal specially with MOV */
2689 if (i->Ain.Alu64M.op == Aalu_MOV) {
2690 switch (i->Ain.Alu64M.src->tag) {
2691 case Ari_Reg:
2692 *p++ = rexAMode_M(i->Ain.Alu64M.src->Ari.Reg.reg,
2693 i->Ain.Alu64M.dst);
2694 *p++ = 0x89;
2695 p = doAMode_M(p, i->Ain.Alu64M.src->Ari.Reg.reg,
2696 i->Ain.Alu64M.dst);
2697 goto done;
2698 case Ari_Imm:
2699 *p++ = rexAMode_M_enc(0, i->Ain.Alu64M.dst);
2700 *p++ = 0xC7;
2701 p = doAMode_M_enc(p, 0, i->Ain.Alu64M.dst);
2702 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2703 goto done;
2704 default:
2705 goto bad;
2708 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2709 allowed here. (This is derived from the x86 version of same). */
2710 opc = subopc_imm = opc_imma = 0;
2711 switch (i->Ain.Alu64M.op) {
2712 case Aalu_CMP: opc = 0x39; subopc_imm = 7; break;
2713 default: goto bad;
2715 switch (i->Ain.Alu64M.src->tag) {
2717 case Xri_Reg:
2718 *p++ = toUChar(opc);
2719 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2720 i->Xin.Alu32M.dst);
2721 goto done;
2723 case Ari_Imm:
2724 if (fits8bits(i->Ain.Alu64M.src->Ari.Imm.imm32)) {
2725 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2726 *p++ = 0x83;
2727 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2728 *p++ = toUChar(0xFF & i->Ain.Alu64M.src->Ari.Imm.imm32);
2729 goto done;
2730 } else {
2731 *p++ = rexAMode_M_enc(subopc_imm, i->Ain.Alu64M.dst);
2732 *p++ = 0x81;
2733 p = doAMode_M_enc(p, subopc_imm, i->Ain.Alu64M.dst);
2734 p = emit32(p, i->Ain.Alu64M.src->Ari.Imm.imm32);
2735 goto done;
2737 default:
2738 goto bad;
2741 break;
2743 case Ain_Sh64:
2744 opc_cl = opc_imm = subopc = 0;
2745 switch (i->Ain.Sh64.op) {
2746 case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
2747 case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
2748 case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
2749 default: goto bad;
2751 if (i->Ain.Sh64.src == 0) {
2752 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2753 *p++ = toUChar(opc_cl);
2754 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2755 goto done;
2756 } else {
2757 *p++ = rexAMode_R_enc_reg(0, i->Ain.Sh64.dst);
2758 *p++ = toUChar(opc_imm);
2759 p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh64.dst);
2760 *p++ = (UChar)(i->Ain.Sh64.src);
2761 goto done;
2763 break;
2765 case Ain_Test64:
2766 /* testq sign-extend($imm32), %reg */
2767 *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
2768 *p++ = 0xF7;
2769 p = doAMode_R_enc_reg(p, 0, i->Ain.Test64.dst);
2770 p = emit32(p, i->Ain.Test64.imm32);
2771 goto done;
2773 case Ain_Unary64:
2774 if (i->Ain.Unary64.op == Aun_NOT) {
2775 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2776 *p++ = 0xF7;
2777 p = doAMode_R_enc_reg(p, 2, i->Ain.Unary64.dst);
2778 goto done;
2780 if (i->Ain.Unary64.op == Aun_NEG) {
2781 *p++ = rexAMode_R_enc_reg(0, i->Ain.Unary64.dst);
2782 *p++ = 0xF7;
2783 p = doAMode_R_enc_reg(p, 3, i->Ain.Unary64.dst);
2784 goto done;
2786 break;
2788 case Ain_Lea64:
2789 *p++ = rexAMode_M(i->Ain.Lea64.dst, i->Ain.Lea64.am);
2790 *p++ = 0x8D;
2791 p = doAMode_M(p, i->Ain.Lea64.dst, i->Ain.Lea64.am);
2792 goto done;
2794 case Ain_Alu32R:
2795 /* ADD/SUB/AND/OR/XOR/CMP */
2796 opc = opc_rr = subopc_imm = opc_imma = 0;
2797 switch (i->Ain.Alu32R.op) {
2798 case Aalu_ADD: opc = 0x03; opc_rr = 0x01;
2799 subopc_imm = 0; opc_imma = 0x05; break;
2800 case Aalu_SUB: opc = 0x2B; opc_rr = 0x29;
2801 subopc_imm = 5; opc_imma = 0x2D; break;
2802 case Aalu_AND: opc = 0x23; opc_rr = 0x21;
2803 subopc_imm = 4; opc_imma = 0x25; break;
2804 case Aalu_XOR: opc = 0x33; opc_rr = 0x31;
2805 subopc_imm = 6; opc_imma = 0x35; break;
2806 case Aalu_OR: opc = 0x0B; opc_rr = 0x09;
2807 subopc_imm = 1; opc_imma = 0x0D; break;
2808 case Aalu_CMP: opc = 0x3B; opc_rr = 0x39;
2809 subopc_imm = 7; opc_imma = 0x3D; break;
2810 default: goto bad;
2812 switch (i->Ain.Alu32R.src->tag) {
2813 case Armi_Imm:
2814 if (sameHReg(i->Ain.Alu32R.dst, hregAMD64_RAX())
2815 && !fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2816 goto bad; /* FIXME: awaiting test case */
2817 *p++ = toUChar(opc_imma);
2818 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2819 } else
2820 if (fits8bits(i->Ain.Alu32R.src->Armi.Imm.imm32)) {
2821 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst ) );
2822 if (rex != 0x40) *p++ = rex;
2823 *p++ = 0x83;
2824 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2825 *p++ = toUChar(0xFF & i->Ain.Alu32R.src->Armi.Imm.imm32);
2826 } else {
2827 rex = clearWBit( rexAMode_R_enc_reg( 0, i->Ain.Alu32R.dst) );
2828 if (rex != 0x40) *p++ = rex;
2829 *p++ = 0x81;
2830 p = doAMode_R_enc_reg(p, subopc_imm, i->Ain.Alu32R.dst);
2831 p = emit32(p, i->Ain.Alu32R.src->Armi.Imm.imm32);
2833 goto done;
2834 case Armi_Reg:
2835 rex = clearWBit(
2836 rexAMode_R( i->Ain.Alu32R.src->Armi.Reg.reg,
2837 i->Ain.Alu32R.dst) );
2838 if (rex != 0x40) *p++ = rex;
2839 *p++ = toUChar(opc_rr);
2840 p = doAMode_R(p, i->Ain.Alu32R.src->Armi.Reg.reg,
2841 i->Ain.Alu32R.dst);
2842 goto done;
2843 case Armi_Mem:
2844 rex = clearWBit(
2845 rexAMode_M( i->Ain.Alu32R.dst,
2846 i->Ain.Alu32R.src->Armi.Mem.am) );
2847 if (rex != 0x40) *p++ = rex;
2848 *p++ = toUChar(opc);
2849 p = doAMode_M(p, i->Ain.Alu32R.dst,
2850 i->Ain.Alu32R.src->Armi.Mem.am);
2851 goto done;
2852 default:
2853 goto bad;
2855 break;
2857 case Ain_MulL:
2858 subopc = i->Ain.MulL.syned ? 5 : 4;
2859 switch (i->Ain.MulL.src->tag) {
2860 case Arm_Mem:
2861 *p++ = rexAMode_M_enc(0, i->Ain.MulL.src->Arm.Mem.am);
2862 *p++ = 0xF7;
2863 p = doAMode_M_enc(p, subopc, i->Ain.MulL.src->Arm.Mem.am);
2864 goto done;
2865 case Arm_Reg:
2866 *p++ = rexAMode_R_enc_reg(0, i->Ain.MulL.src->Arm.Reg.reg);
2867 *p++ = 0xF7;
2868 p = doAMode_R_enc_reg(p, subopc, i->Ain.MulL.src->Arm.Reg.reg);
2869 goto done;
2870 default:
2871 goto bad;
2873 break;
2875 case Ain_Div:
2876 subopc = i->Ain.Div.syned ? 7 : 6;
2877 if (i->Ain.Div.sz == 4) {
2878 switch (i->Ain.Div.src->tag) {
2879 case Arm_Mem:
2880 goto bad;
2881 /*FIXME*/
2882 *p++ = 0xF7;
2883 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2884 goto done;
2885 case Arm_Reg:
2886 *p++ = clearWBit(
2887 rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg));
2888 *p++ = 0xF7;
2889 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2890 goto done;
2891 default:
2892 goto bad;
2895 if (i->Ain.Div.sz == 8) {
2896 switch (i->Ain.Div.src->tag) {
2897 case Arm_Mem:
2898 *p++ = rexAMode_M_enc(0, i->Ain.Div.src->Arm.Mem.am);
2899 *p++ = 0xF7;
2900 p = doAMode_M_enc(p, subopc, i->Ain.Div.src->Arm.Mem.am);
2901 goto done;
2902 case Arm_Reg:
2903 *p++ = rexAMode_R_enc_reg(0, i->Ain.Div.src->Arm.Reg.reg);
2904 *p++ = 0xF7;
2905 p = doAMode_R_enc_reg(p, subopc, i->Ain.Div.src->Arm.Reg.reg);
2906 goto done;
2907 default:
2908 goto bad;
2911 break;
2913 case Ain_Push:
2914 switch (i->Ain.Push.src->tag) {
2915 case Armi_Mem:
2916 *p++ = clearWBit(
2917 rexAMode_M_enc(0, i->Ain.Push.src->Armi.Mem.am));
2918 *p++ = 0xFF;
2919 p = doAMode_M_enc(p, 6, i->Ain.Push.src->Armi.Mem.am);
2920 goto done;
2921 case Armi_Imm:
2922 *p++ = 0x68;
2923 p = emit32(p, i->Ain.Push.src->Armi.Imm.imm32);
2924 goto done;
2925 case Armi_Reg:
2926 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.Push.src->Armi.Reg.reg)));
2927 *p++ = toUChar(0x50 + iregEnc210(i->Ain.Push.src->Armi.Reg.reg));
2928 goto done;
2929 default:
2930 goto bad;
2933 case Ain_Call: {
2934 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2935 above, %r11 is used as an address temporary. */
2936 /* If we don't need to do any fixup actions in the case that the
2937 call doesn't happen, just do the simple thing and emit
2938 straight-line code. This is usually the case. */
2939 if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
2940 || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
2941 /* jump over the following two insns if the condition does
2942 not hold */
2943 Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
2944 if (i->Ain.Call.cond != Acc_ALWAYS) {
2945 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2946 *p++ = shortImm ? 10 : 13;
2947 /* 10 or 13 bytes in the next two insns */
2949 if (shortImm) {
2950 /* 7 bytes: movl sign-extend(imm32), %r11 */
2951 *p++ = 0x49;
2952 *p++ = 0xC7;
2953 *p++ = 0xC3;
2954 p = emit32(p, (UInt)i->Ain.Call.target);
2955 } else {
2956 /* 10 bytes: movabsq $target, %r11 */
2957 *p++ = 0x49;
2958 *p++ = 0xBB;
2959 p = emit64(p, i->Ain.Call.target);
2961 /* 3 bytes: call *%r11 */
2962 *p++ = 0x41;
2963 *p++ = 0xFF;
2964 *p++ = 0xD3;
2965 } else {
2966 Int delta;
2967 /* Complex case. We have to generate an if-then-else diamond. */
2968 // before:
2969 // j{!cond} else:
2970 // movabsq $target, %r11
2971 // call* %r11
2972 // preElse:
2973 // jmp after:
2974 // else:
2975 // movabsq $0x5555555555555555, %rax // possibly
2976 // movq %rax, %rdx // possibly
2977 // after:
2979 // before:
2980 UChar* pBefore = p;
2982 // j{!cond} else:
2983 *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
2984 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
2986 // movabsq $target, %r11
2987 *p++ = 0x49;
2988 *p++ = 0xBB;
2989 p = emit64(p, i->Ain.Call.target);
2991 // call* %r11
2992 *p++ = 0x41;
2993 *p++ = 0xFF;
2994 *p++ = 0xD3;
2996 // preElse:
2997 UChar* pPreElse = p;
2999 // jmp after:
3000 *p++ = 0xEB;
3001 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3003 // else:
3004 UChar* pElse = p;
3006 /* Do the 'else' actions */
3007 switch (i->Ain.Call.rloc.pri) {
3008 case RLPri_Int:
3009 // movabsq $0x5555555555555555, %rax
3010 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3011 break;
3012 case RLPri_2Int:
3013 goto bad; //ATC
3014 // movabsq $0x5555555555555555, %rax
3015 *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
3016 // movq %rax, %rdx
3017 *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
3018 break;
3019 case RLPri_V128SpRel:
3020 if (i->Ain.Call.rloc.spOff == 0) {
3021 // We could accept any |spOff| here, but that's more
3022 // hassle and the only value we're ever going to get
3023 // is zero (I believe.) Hence take the easy path :)
3024 // We need a scag register -- r11 can be it.
3025 // movabsq $0x5555555555555555, %r11
3026 *p++ = 0x49; *p++ = 0xBB;
3027 p = emit64(p, 0x5555555555555555ULL);
3028 // movq %r11, 0(%rsp)
3029 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x1C; *p++ = 0x24;
3030 // movq %r11, 8(%rsp)
3031 *p++ = 0x4C; *p++ = 0x89; *p++ = 0x5C; *p++ = 0x24;
3032 *p++ = 0x08;
3033 break;
3035 goto bad; //ATC for all other spOff values
3036 case RLPri_V256SpRel:
3037 goto bad; //ATC
3038 case RLPri_None: case RLPri_INVALID: default:
3039 vassert(0); // should never get here
3042 // after:
3043 UChar* pAfter = p;
3045 // Fix up the branch offsets. The +2s in the offset
3046 // calculations are there because x86 requires conditional
3047 // branches to have their offset stated relative to the
3048 // instruction immediately following the branch insn. And in
3049 // both cases the branch insns are 2 bytes long.
3051 // First, the "j{!cond} else:" at pBefore.
3052 delta = (Int)(Long)(pElse - (pBefore + 2));
3053 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3054 *(pBefore+1) = (UChar)delta;
3056 // And secondly, the "jmp after:" at pPreElse.
3057 delta = (Int)(Long)(pAfter - (pPreElse + 2));
3058 vassert(delta >= 0 && delta < 100/*arbitrary*/);
3059 *(pPreElse+1) = (UChar)delta;
3061 goto done;
3064 case Ain_XDirect: {
3065 /* NB: what goes on here has to be very closely coordinated with the
3066 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3067 /* We're generating chain-me requests here, so we need to be
3068 sure this is actually allowed -- no-redir translations can't
3069 use chain-me's. Hence: */
3070 vassert(disp_cp_chain_me_to_slowEP != NULL);
3071 vassert(disp_cp_chain_me_to_fastEP != NULL);
3073 HReg r11 = hregAMD64_R11();
3075 /* Use ptmp for backpatching conditional jumps. */
3076 ptmp = NULL;
3078 /* First off, if this is conditional, create a conditional
3079 jump over the rest of it. */
3080 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3081 /* jmp fwds if !condition */
3082 *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
3083 ptmp = p; /* fill in this bit later */
3084 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3087 /* Update the guest RIP. */
3088 if (fitsIn32Bits(i->Ain.XDirect.dstGA)) {
3089 /* use a shorter encoding */
3090 /* movl sign-extend(dstGA), %r11 */
3091 *p++ = 0x49;
3092 *p++ = 0xC7;
3093 *p++ = 0xC3;
3094 p = emit32(p, (UInt)i->Ain.XDirect.dstGA);
3095 } else {
3096 /* movabsq $dstGA, %r11 */
3097 *p++ = 0x49;
3098 *p++ = 0xBB;
3099 p = emit64(p, i->Ain.XDirect.dstGA);
3102 /* movq %r11, amRIP */
3103 *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
3104 *p++ = 0x89;
3105 p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
3107 /* --- FIRST PATCHABLE BYTE follows --- */
3108 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3109 to) backs up the return address, so as to find the address of
3110 the first patchable byte. So: don't change the length of the
3111 two instructions below. */
3112 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3113 *p++ = 0x49;
3114 *p++ = 0xBB;
3115 const void* disp_cp_chain_me
3116 = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3117 : disp_cp_chain_me_to_slowEP;
3118 p = emit64(p, (Addr)disp_cp_chain_me);
3119 /* call *%r11 */
3120 *p++ = 0x41;
3121 *p++ = 0xFF;
3122 *p++ = 0xD3;
3123 /* --- END of PATCHABLE BYTES --- */
3125 /* Fix up the conditional jump, if there was one. */
3126 if (i->Ain.XDirect.cond != Acc_ALWAYS) {
3127 Int delta = p - ptmp;
3128 vassert(delta > 0 && delta < 40);
3129 *ptmp = toUChar(delta-1);
3131 goto done;
3134 case Ain_XIndir: {
3135 /* We're generating transfers that could lead indirectly to a
3136 chain-me, so we need to be sure this is actually allowed --
3137 no-redir translations are not allowed to reach normal
3138 translations without going through the scheduler. That means
3139 no XDirects or XIndirs out from no-redir translations.
3140 Hence: */
3141 vassert(disp_cp_xindir != NULL);
3143 /* Use ptmp for backpatching conditional jumps. */
3144 ptmp = NULL;
3146 /* First off, if this is conditional, create a conditional
3147 jump over the rest of it. */
3148 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3149 /* jmp fwds if !condition */
3150 *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
3151 ptmp = p; /* fill in this bit later */
3152 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3155 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3156 *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3157 *p++ = 0x89;
3158 p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
3160 /* get $disp_cp_xindir into %r11 */
3161 if (fitsIn32Bits((Addr)disp_cp_xindir)) {
3162 /* use a shorter encoding */
3163 /* movl sign-extend(disp_cp_xindir), %r11 */
3164 *p++ = 0x49;
3165 *p++ = 0xC7;
3166 *p++ = 0xC3;
3167 p = emit32(p, (UInt)(Addr)disp_cp_xindir);
3168 } else {
3169 /* movabsq $disp_cp_xindir, %r11 */
3170 *p++ = 0x49;
3171 *p++ = 0xBB;
3172 p = emit64(p, (Addr)disp_cp_xindir);
3175 /* jmp *%r11 */
3176 *p++ = 0x41;
3177 *p++ = 0xFF;
3178 *p++ = 0xE3;
3180 /* Fix up the conditional jump, if there was one. */
3181 if (i->Ain.XIndir.cond != Acc_ALWAYS) {
3182 Int delta = p - ptmp;
3183 vassert(delta > 0 && delta < 40);
3184 *ptmp = toUChar(delta-1);
3186 goto done;
3189 case Ain_XAssisted: {
3190 /* Use ptmp for backpatching conditional jumps. */
3191 ptmp = NULL;
3193 /* First off, if this is conditional, create a conditional
3194 jump over the rest of it. */
3195 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3196 /* jmp fwds if !condition */
3197 *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
3198 ptmp = p; /* fill in this bit later */
3199 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3202 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3203 *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3204 *p++ = 0x89;
3205 p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
3206 /* movl $magic_number, %ebp. Since these numbers are all small positive
3207 integers, we can get away with "movl $N, %ebp" rather than
3208 the longer "movq $N, %rbp". */
3209 UInt trcval = 0;
3210 switch (i->Ain.XAssisted.jk) {
3211 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3212 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3213 case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
3214 case Ijk_Sys_int210: trcval = VEX_TRC_JMP_SYS_INT210; break;
3215 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3216 case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3217 case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3218 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3219 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3220 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3221 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3222 case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3223 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3224 /* We don't expect to see the following being assisted. */
3225 case Ijk_Ret:
3226 case Ijk_Call:
3227 /* fallthrough */
3228 default:
3229 ppIRJumpKind(i->Ain.XAssisted.jk);
3230 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3232 vassert(trcval != 0);
3233 *p++ = 0xBD;
3234 p = emit32(p, trcval);
3235 /* movabsq $disp_assisted, %r11 */
3236 *p++ = 0x49;
3237 *p++ = 0xBB;
3238 p = emit64(p, (Addr)disp_cp_xassisted);
3239 /* jmp *%r11 */
3240 *p++ = 0x41;
3241 *p++ = 0xFF;
3242 *p++ = 0xE3;
3244 /* Fix up the conditional jump, if there was one. */
3245 if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
3246 Int delta = p - ptmp;
3247 vassert(delta > 0 && delta < 40);
3248 *ptmp = toUChar(delta-1);
3250 goto done;
3253 case Ain_CMov64:
3254 vassert(i->Ain.CMov64.cond != Acc_ALWAYS);
3255 *p++ = rexAMode_R(i->Ain.CMov64.dst, i->Ain.CMov64.src);
3256 *p++ = 0x0F;
3257 *p++ = toUChar(0x40 + (0xF & i->Ain.CMov64.cond));
3258 p = doAMode_R(p, i->Ain.CMov64.dst, i->Ain.CMov64.src);
3259 goto done;
3261 case Ain_CLoad: {
3262 vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
3264 /* Only 32- or 64-bit variants are allowed. */
3265 vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
3267 /* Use ptmp for backpatching conditional jumps. */
3268 ptmp = NULL;
3270 /* jmp fwds if !condition */
3271 *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
3272 ptmp = p; /* fill in this bit later */
3273 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3275 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3276 load, which, by the default zero-extension rule, zeroes out
3277 the upper half of the destination, as required. */
3278 rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3279 *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
3280 *p++ = 0x8B;
3281 p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
3283 /* Fix up the conditional branch */
3284 Int delta = p - ptmp;
3285 vassert(delta > 0 && delta < 40);
3286 *ptmp = toUChar(delta-1);
3287 goto done;
3290 case Ain_CStore: {
3291 /* AFAICS this is identical to Ain_CLoad except that the opcode
3292 is 0x89 instead of 0x8B. */
3293 vassert(i->Ain.CStore.cond != Acc_ALWAYS);
3295 /* Only 32- or 64-bit variants are allowed. */
3296 vassert(i->Ain.CStore.szB == 4 || i->Ain.CStore.szB == 8);
3298 /* Use ptmp for backpatching conditional jumps. */
3299 ptmp = NULL;
3301 /* jmp fwds if !condition */
3302 *p++ = toUChar(0x70 + (0xF & (i->Ain.CStore.cond ^ 1)));
3303 ptmp = p; /* fill in this bit later */
3304 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3306 /* Now the store. */
3307 rex = rexAMode_M(i->Ain.CStore.src, i->Ain.CStore.addr);
3308 *p++ = i->Ain.CStore.szB == 4 ? clearWBit(rex) : rex;
3309 *p++ = 0x89;
3310 p = doAMode_M(p, i->Ain.CStore.src, i->Ain.CStore.addr);
3312 /* Fix up the conditional branch */
3313 Int delta = p - ptmp;
3314 vassert(delta > 0 && delta < 40);
3315 *ptmp = toUChar(delta-1);
3316 goto done;
3319 case Ain_MovxLQ:
3320 /* No, _don't_ ask me why the sense of the args has to be
3321 different in the S vs Z case. I don't know. */
3322 if (i->Ain.MovxLQ.syned) {
3323 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3324 *p++ = rexAMode_R(i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3325 *p++ = 0x63;
3326 p = doAMode_R(p, i->Ain.MovxLQ.dst, i->Ain.MovxLQ.src);
3327 } else {
3328 /* Produce a 32-bit reg-reg move, since the implicit
3329 zero-extend does what we want. */
3330 *p++ = clearWBit (
3331 rexAMode_R(i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst));
3332 *p++ = 0x89;
3333 p = doAMode_R(p, i->Ain.MovxLQ.src, i->Ain.MovxLQ.dst);
3335 goto done;
3337 case Ain_LoadEX:
3338 if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
3339 /* movzbq */
3340 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3341 *p++ = 0x0F;
3342 *p++ = 0xB6;
3343 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3344 goto done;
3346 if (i->Ain.LoadEX.szSmall == 2 && !i->Ain.LoadEX.syned) {
3347 /* movzwq */
3348 *p++ = rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3349 *p++ = 0x0F;
3350 *p++ = 0xB7;
3351 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3352 goto done;
3354 if (i->Ain.LoadEX.szSmall == 4 && !i->Ain.LoadEX.syned) {
3355 /* movzlq */
3356 /* This isn't really an existing AMD64 instruction per se.
3357 Rather, we have to do a 32-bit load. Because a 32-bit
3358 write implicitly clears the upper 32 bits of the target
3359 register, we get what we want. */
3360 *p++ = clearWBit(
3361 rexAMode_M(i->Ain.LoadEX.dst, i->Ain.LoadEX.src));
3362 *p++ = 0x8B;
3363 p = doAMode_M(p, i->Ain.LoadEX.dst, i->Ain.LoadEX.src);
3364 goto done;
3366 break;
3368 case Ain_Set64:
3369 /* Make the destination register be 1 or 0, depending on whether
3370 the relevant condition holds. Complication: the top 56 bits
3371 of the destination should be forced to zero, but doing 'xorq
3372 %r,%r' kills the flag(s) we are about to read. Sigh. So
3373 start off my moving $0 into the dest. */
3374 reg = iregEnc3210(i->Ain.Set64.dst);
3375 vassert(reg < 16);
3377 /* movq $0, %dst */
3378 *p++ = toUChar(reg >= 8 ? 0x49 : 0x48);
3379 *p++ = 0xC7;
3380 *p++ = toUChar(0xC0 + (reg & 7));
3381 p = emit32(p, 0);
3383 /* setb lo8(%dst) */
3384 /* note, 8-bit register rex trickyness. Be careful here. */
3385 *p++ = toUChar(reg >= 8 ? 0x41 : 0x40);
3386 *p++ = 0x0F;
3387 *p++ = toUChar(0x90 + (0x0F & i->Ain.Set64.cond));
3388 *p++ = toUChar(0xC0 + (reg & 7));
3389 goto done;
3391 case Ain_Bsfr64:
3392 *p++ = rexAMode_R(i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3393 *p++ = 0x0F;
3394 if (i->Ain.Bsfr64.isFwds) {
3395 *p++ = 0xBC;
3396 } else {
3397 *p++ = 0xBD;
3399 p = doAMode_R(p, i->Ain.Bsfr64.dst, i->Ain.Bsfr64.src);
3400 goto done;
3402 case Ain_MFence:
3403 /* mfence */
3404 *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
3405 goto done;
3407 case Ain_ACAS:
3408 /* lock */
3409 *p++ = 0xF0;
3410 if (i->Ain.ACAS.sz == 2) *p++ = 0x66;
3411 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3412 in %rbx. The new-value register is hardwired to be %rbx
3413 since dealing with byte integer registers is too much hassle,
3414 so we force the register operand to %rbx (could equally be
3415 %rcx or %rdx). */
3416 rex = rexAMode_M( hregAMD64_RBX(), i->Ain.ACAS.addr );
3417 if (i->Ain.ACAS.sz != 8)
3418 rex = clearWBit(rex);
3420 *p++ = rex; /* this can emit 0x40, which is pointless. oh well. */
3421 *p++ = 0x0F;
3422 if (i->Ain.ACAS.sz == 1) *p++ = 0xB0; else *p++ = 0xB1;
3423 p = doAMode_M(p, hregAMD64_RBX(), i->Ain.ACAS.addr);
3424 goto done;
3426 case Ain_DACAS:
3427 /* lock */
3428 *p++ = 0xF0;
3429 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3430 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3431 aren't encoded in the insn. */
3432 rex = rexAMode_M_enc(1, i->Ain.ACAS.addr );
3433 if (i->Ain.ACAS.sz != 8)
3434 rex = clearWBit(rex);
3435 *p++ = rex;
3436 *p++ = 0x0F;
3437 *p++ = 0xC7;
3438 p = doAMode_M_enc(p, 1, i->Ain.DACAS.addr);
3439 goto done;
3441 case Ain_A87Free:
3442 vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
3443 for (j = 0; j < i->Ain.A87Free.nregs; j++) {
3444 p = do_ffree_st(p, 7-j);
3446 goto done;
3448 case Ain_A87PushPop:
3449 vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
3450 if (i->Ain.A87PushPop.isPush) {
3451 /* Load from memory into %st(0): flds/fldl amode */
3452 *p++ = clearWBit(
3453 rexAMode_M_enc(0, i->Ain.A87PushPop.addr) );
3454 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3455 p = doAMode_M_enc(p, 0/*subopcode*/, i->Ain.A87PushPop.addr);
3456 } else {
3457 /* Dump %st(0) to memory: fstps/fstpl amode */
3458 *p++ = clearWBit(
3459 rexAMode_M_enc(3, i->Ain.A87PushPop.addr) );
3460 *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
3461 p = doAMode_M_enc(p, 3/*subopcode*/, i->Ain.A87PushPop.addr);
3462 goto done;
3464 goto done;
3466 case Ain_A87FpOp:
3467 switch (i->Ain.A87FpOp.op) {
3468 case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
3469 case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
3470 case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
3471 case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
3472 case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
3473 case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
3474 case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
3475 case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
3476 case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break;
3477 case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break;
3478 case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break;
3479 case Afp_TAN:
3480 /* fptan pushes 1.0 on the FP stack, except when the
3481 argument is out of range. Hence we have to do the
3482 instruction, then inspect C2 to see if there is an out
3483 of range condition. If there is, we skip the fincstp
3484 that is used by the in-range case to get rid of this
3485 extra 1.0 value. */
3486 *p++ = 0xD9; *p++ = 0xF2; // fptan
3487 *p++ = 0x50; // pushq %rax
3488 *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax
3489 *p++ = 0x66; *p++ = 0xA9;
3490 *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax
3491 *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp
3492 *p++ = 0xD9; *p++ = 0xF7; // fincstp
3493 *p++ = 0x58; // after_fincstp: popq %rax
3494 break;
3495 default:
3496 goto bad;
3498 goto done;
3500 case Ain_A87LdCW:
3501 *p++ = clearWBit(
3502 rexAMode_M_enc(5, i->Ain.A87LdCW.addr) );
3503 *p++ = 0xD9;
3504 p = doAMode_M_enc(p, 5/*subopcode*/, i->Ain.A87LdCW.addr);
3505 goto done;
3507 case Ain_A87StSW:
3508 *p++ = clearWBit(
3509 rexAMode_M_enc(7, i->Ain.A87StSW.addr) );
3510 *p++ = 0xDD;
3511 p = doAMode_M_enc(p, 7/*subopcode*/, i->Ain.A87StSW.addr);
3512 goto done;
3514 case Ain_Store:
3515 if (i->Ain.Store.sz == 2) {
3516 /* This just goes to show the crazyness of the instruction
3517 set encoding. We have to insert two prefix bytes, but be
3518 careful to avoid a conflict in what the size should be, by
3519 ensuring that REX.W = 0. */
3520 *p++ = 0x66; /* override to 16-bits */
3521 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3522 *p++ = 0x89;
3523 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3524 goto done;
3526 if (i->Ain.Store.sz == 4) {
3527 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3528 *p++ = 0x89;
3529 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3530 goto done;
3532 if (i->Ain.Store.sz == 1) {
3533 /* This is one place where it would be wrong to skip emitting
3534 a rex byte of 0x40, since the mere presence of rex changes
3535 the meaning of the byte register access. Be careful. */
3536 *p++ = clearWBit( rexAMode_M( i->Ain.Store.src, i->Ain.Store.dst) );
3537 *p++ = 0x88;
3538 p = doAMode_M(p, i->Ain.Store.src, i->Ain.Store.dst);
3539 goto done;
3541 break;
3543 case Ain_LdMXCSR:
3544 *p++ = clearWBit(rexAMode_M_enc(0, i->Ain.LdMXCSR.addr));
3545 *p++ = 0x0F;
3546 *p++ = 0xAE;
3547 p = doAMode_M_enc(p, 2/*subopcode*/, i->Ain.LdMXCSR.addr);
3548 goto done;
3550 case Ain_SseUComIS:
3551 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3552 /* ucomi[sd] %srcL, %srcR */
3553 if (i->Ain.SseUComIS.sz == 8) {
3554 *p++ = 0x66;
3555 } else {
3556 goto bad;
3557 vassert(i->Ain.SseUComIS.sz == 4);
3559 *p++ = clearWBit (
3560 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseUComIS.srcL),
3561 vregEnc3210(i->Ain.SseUComIS.srcR) ));
3562 *p++ = 0x0F;
3563 *p++ = 0x2E;
3564 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseUComIS.srcL),
3565 vregEnc3210(i->Ain.SseUComIS.srcR) );
3566 /* pushfq */
3567 *p++ = 0x9C;
3568 /* popq %dst */
3569 *p++ = toUChar(0x40 + (1 & iregEnc3(i->Ain.SseUComIS.dst)));
3570 *p++ = toUChar(0x58 + iregEnc210(i->Ain.SseUComIS.dst));
3571 goto done;
3573 case Ain_SseSI2SF:
3574 /* cvssi2s[sd] %src, %dst */
3575 rex = rexAMode_R_enc_reg( vregEnc3210(i->Ain.SseSI2SF.dst),
3576 i->Ain.SseSI2SF.src );
3577 *p++ = toUChar(i->Ain.SseSI2SF.szD==4 ? 0xF3 : 0xF2);
3578 *p++ = toUChar(i->Ain.SseSI2SF.szS==4 ? clearWBit(rex) : rex);
3579 *p++ = 0x0F;
3580 *p++ = 0x2A;
3581 p = doAMode_R_enc_reg( p, vregEnc3210(i->Ain.SseSI2SF.dst),
3582 i->Ain.SseSI2SF.src );
3583 goto done;
3585 case Ain_SseSF2SI:
3586 /* cvss[sd]2si %src, %dst */
3587 rex = rexAMode_R_reg_enc( i->Ain.SseSF2SI.dst,
3588 vregEnc3210(i->Ain.SseSF2SI.src) );
3589 *p++ = toUChar(i->Ain.SseSF2SI.szS==4 ? 0xF3 : 0xF2);
3590 *p++ = toUChar(i->Ain.SseSF2SI.szD==4 ? clearWBit(rex) : rex);
3591 *p++ = 0x0F;
3592 *p++ = 0x2D;
3593 p = doAMode_R_reg_enc( p, i->Ain.SseSF2SI.dst,
3594 vregEnc3210(i->Ain.SseSF2SI.src) );
3595 goto done;
3597 case Ain_SseSDSS:
3598 /* cvtsd2ss/cvtss2sd %src, %dst */
3599 *p++ = toUChar(i->Ain.SseSDSS.from64 ? 0xF2 : 0xF3);
3600 *p++ = clearWBit(
3601 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseSDSS.dst),
3602 vregEnc3210(i->Ain.SseSDSS.src) ));
3603 *p++ = 0x0F;
3604 *p++ = 0x5A;
3605 p = doAMode_R_enc_enc( p, vregEnc3210(i->Ain.SseSDSS.dst),
3606 vregEnc3210(i->Ain.SseSDSS.src) );
3607 goto done;
3609 case Ain_SseLdSt:
3610 if (i->Ain.SseLdSt.sz == 8) {
3611 *p++ = 0xF2;
3612 } else
3613 if (i->Ain.SseLdSt.sz == 4) {
3614 *p++ = 0xF3;
3615 } else
3616 if (i->Ain.SseLdSt.sz != 16) {
3617 vassert(0);
3619 *p++ = clearWBit(
3620 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdSt.reg),
3621 i->Ain.SseLdSt.addr));
3622 *p++ = 0x0F;
3623 *p++ = toUChar(i->Ain.SseLdSt.isLoad ? 0x10 : 0x11);
3624 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdSt.reg),
3625 i->Ain.SseLdSt.addr);
3626 goto done;
3628 case Ain_SseCStore: {
3629 vassert(i->Ain.SseCStore.cond != Acc_ALWAYS);
3631 /* Use ptmp for backpatching conditional jumps. */
3632 ptmp = NULL;
3634 /* jmp fwds if !condition */
3635 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCStore.cond ^ 1)));
3636 ptmp = p; /* fill in this bit later */
3637 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3639 /* Now the store. */
3640 *p++ = clearWBit(
3641 rexAMode_M_enc(vregEnc3210(i->Ain.SseCStore.src),
3642 i->Ain.SseCStore.addr));
3643 *p++ = 0x0F;
3644 *p++ = toUChar(0x11);
3645 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCStore.src),
3646 i->Ain.SseCStore.addr);
3648 /* Fix up the conditional branch */
3649 Int delta = p - ptmp;
3650 vassert(delta > 0 && delta < 40);
3651 *ptmp = toUChar(delta-1);
3652 goto done;
3655 case Ain_SseCLoad: {
3656 vassert(i->Ain.SseCLoad.cond != Acc_ALWAYS);
3658 /* Use ptmp for backpatching conditional jumps. */
3659 ptmp = NULL;
3661 /* jmp fwds if !condition */
3662 *p++ = toUChar(0x70 + (0xF & (i->Ain.SseCLoad.cond ^ 1)));
3663 ptmp = p; /* fill in this bit later */
3664 *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
3666 /* Now the load. */
3667 *p++ = clearWBit(
3668 rexAMode_M_enc(vregEnc3210(i->Ain.SseCLoad.dst),
3669 i->Ain.SseCLoad.addr));
3670 *p++ = 0x0F;
3671 *p++ = toUChar(0x10);
3672 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseCLoad.dst),
3673 i->Ain.SseCLoad.addr);
3675 /* Fix up the conditional branch */
3676 Int delta = p - ptmp;
3677 vassert(delta > 0 && delta < 40);
3678 *ptmp = toUChar(delta-1);
3679 goto done;
3682 case Ain_SseLdzLO:
3683 vassert(i->Ain.SseLdzLO.sz == 4 || i->Ain.SseLdzLO.sz == 8);
3684 /* movs[sd] amode, %xmm-dst */
3685 *p++ = toUChar(i->Ain.SseLdzLO.sz==4 ? 0xF3 : 0xF2);
3686 *p++ = clearWBit(
3687 rexAMode_M_enc(vregEnc3210(i->Ain.SseLdzLO.reg),
3688 i->Ain.SseLdzLO.addr));
3689 *p++ = 0x0F;
3690 *p++ = 0x10;
3691 p = doAMode_M_enc(p, vregEnc3210(i->Ain.SseLdzLO.reg),
3692 i->Ain.SseLdzLO.addr);
3693 goto done;
3695 case Ain_Sse32Fx4: {
3696 UInt srcRegNo = vregEnc3210(i->Ain.Sse32Fx4.src);
3697 UInt dstRegNo = vregEnc3210(i->Ain.Sse32Fx4.dst);
3698 // VEX encoded cases
3699 switch (i->Ain.Sse32Fx4.op) {
3700 case Asse_F16toF32: { // vcvtph2ps %xmmS, %xmmD
3701 UInt s = srcRegNo;
3702 UInt d = dstRegNo;
3703 // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
3704 // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
3705 UInt byte2 = ((((~d)>>3)&1)<<7) | (1<<6)
3706 | ((((~s)>>3)&1)<<5) | (1<<1);
3707 UInt byte5 = (1<<7) | (1<<6) | ((d&7) << 3) | ((s&7) << 0);
3708 *p++ = 0xC4;
3709 *p++ = byte2;
3710 *p++ = 0x79;
3711 *p++ = 0x13;
3712 *p++ = byte5;
3713 goto done;
3715 case Asse_F32toF16: { // vcvtps2ph $4, %xmmS, %xmmD
3716 UInt s = srcRegNo;
3717 UInt d = dstRegNo;
3718 // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
3719 // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
3720 // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
3721 UInt byte2 = ((((~s)>>3)&1)<<7) | (1<<6)
3722 | ((((~d)>>3)&1)<<5) | (1<<1) | (1 << 0);
3723 UInt byte5 = (1<<7) | (1<<6) | ((s&7) << 3) | ((d&7) << 0);
3724 *p++ = 0xC4;
3725 *p++ = byte2;
3726 *p++ = 0x79;
3727 *p++ = 0x1D;
3728 *p++ = byte5;
3729 *p++ = 0x04;
3730 goto done;
3732 default: break;
3734 // After this point, REX encoded cases only
3735 xtra = 0;
3736 switch (i->Ain.Sse32Fx4.op) {
3737 case Asse_F2I: *p++ = 0x66; break;
3738 default: break;
3740 *p++ = clearWBit(rexAMode_R_enc_enc(dstRegNo, srcRegNo));
3741 *p++ = 0x0F;
3742 switch (i->Ain.Sse32Fx4.op) {
3743 case Asse_ADDF: *p++ = 0x58; break;
3744 case Asse_DIVF: *p++ = 0x5E; break;
3745 case Asse_MAXF: *p++ = 0x5F; break;
3746 case Asse_MINF: *p++ = 0x5D; break;
3747 case Asse_MULF: *p++ = 0x59; break;
3748 case Asse_RCPF: *p++ = 0x53; break;
3749 case Asse_RSQRTF: *p++ = 0x52; break;
3750 case Asse_SQRTF: *p++ = 0x51; break;
3751 case Asse_I2F: *p++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3752 case Asse_F2I: *p++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3753 case Asse_SUBF: *p++ = 0x5C; break;
3754 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3755 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3756 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3757 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3758 default: goto bad;
3760 p = doAMode_R_enc_enc(p, dstRegNo, srcRegNo);
3761 if (xtra & 0x100)
3762 *p++ = toUChar(xtra & 0xFF);
3763 goto done;
3766 case Ain_Sse64Fx2:
3767 xtra = 0;
3768 *p++ = 0x66;
3769 *p++ = clearWBit(
3770 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64Fx2.dst),
3771 vregEnc3210(i->Ain.Sse64Fx2.src) ));
3772 *p++ = 0x0F;
3773 switch (i->Ain.Sse64Fx2.op) {
3774 case Asse_ADDF: *p++ = 0x58; break;
3775 case Asse_DIVF: *p++ = 0x5E; break;
3776 case Asse_MAXF: *p++ = 0x5F; break;
3777 case Asse_MINF: *p++ = 0x5D; break;
3778 case Asse_MULF: *p++ = 0x59; break;
3779 case Asse_SQRTF: *p++ = 0x51; break;
3780 case Asse_SUBF: *p++ = 0x5C; break;
3781 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3782 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3783 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3784 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3785 default: goto bad;
3787 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64Fx2.dst),
3788 vregEnc3210(i->Ain.Sse64Fx2.src) );
3789 if (xtra & 0x100)
3790 *p++ = toUChar(xtra & 0xFF);
3791 goto done;
3793 case Ain_Sse32FLo:
3794 xtra = 0;
3795 *p++ = 0xF3;
3796 *p++ = clearWBit(
3797 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse32FLo.dst),
3798 vregEnc3210(i->Ain.Sse32FLo.src) ));
3799 *p++ = 0x0F;
3800 switch (i->Ain.Sse32FLo.op) {
3801 case Asse_ADDF: *p++ = 0x58; break;
3802 case Asse_DIVF: *p++ = 0x5E; break;
3803 case Asse_MAXF: *p++ = 0x5F; break;
3804 case Asse_MINF: *p++ = 0x5D; break;
3805 case Asse_MULF: *p++ = 0x59; break;
3806 case Asse_RCPF: *p++ = 0x53; break;
3807 case Asse_RSQRTF: *p++ = 0x52; break;
3808 case Asse_SQRTF: *p++ = 0x51; break;
3809 case Asse_SUBF: *p++ = 0x5C; break;
3810 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3811 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3812 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3813 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3814 default: goto bad;
3816 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse32FLo.dst),
3817 vregEnc3210(i->Ain.Sse32FLo.src) );
3818 if (xtra & 0x100)
3819 *p++ = toUChar(xtra & 0xFF);
3820 goto done;
3822 case Ain_Sse64FLo:
3823 xtra = 0;
3824 *p++ = 0xF2;
3825 *p++ = clearWBit(
3826 rexAMode_R_enc_enc( vregEnc3210(i->Ain.Sse64FLo.dst),
3827 vregEnc3210(i->Ain.Sse64FLo.src) ));
3828 *p++ = 0x0F;
3829 switch (i->Ain.Sse64FLo.op) {
3830 case Asse_ADDF: *p++ = 0x58; break;
3831 case Asse_DIVF: *p++ = 0x5E; break;
3832 case Asse_MAXF: *p++ = 0x5F; break;
3833 case Asse_MINF: *p++ = 0x5D; break;
3834 case Asse_MULF: *p++ = 0x59; break;
3835 case Asse_SQRTF: *p++ = 0x51; break;
3836 case Asse_SUBF: *p++ = 0x5C; break;
3837 case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
3838 case Asse_CMPLTF: *p++ = 0xC2; xtra = 0x101; break;
3839 case Asse_CMPLEF: *p++ = 0xC2; xtra = 0x102; break;
3840 case Asse_CMPUNF: *p++ = 0xC2; xtra = 0x103; break;
3841 default: goto bad;
3843 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.Sse64FLo.dst),
3844 vregEnc3210(i->Ain.Sse64FLo.src) );
3845 if (xtra & 0x100)
3846 *p++ = toUChar(xtra & 0xFF);
3847 goto done;
3849 case Ain_SseReRg:
3850 # define XX(_n) *p++ = (_n)
3852 rex = clearWBit(
3853 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseReRg.dst),
3854 vregEnc3210(i->Ain.SseReRg.src) ));
3856 switch (i->Ain.SseReRg.op) {
3857 case Asse_MOV: /*movups*/ XX(rex); XX(0x0F); XX(0x10); break;
3858 case Asse_OR: XX(rex); XX(0x0F); XX(0x56); break;
3859 case Asse_XOR: XX(rex); XX(0x0F); XX(0x57); break;
3860 case Asse_AND: XX(rex); XX(0x0F); XX(0x54); break;
3861 case Asse_ANDN: XX(rex); XX(0x0F); XX(0x55); break;
3862 case Asse_PACKSSD: XX(0x66); XX(rex); XX(0x0F); XX(0x6B); break;
3863 case Asse_PACKSSW: XX(0x66); XX(rex); XX(0x0F); XX(0x63); break;
3864 case Asse_PACKUSW: XX(0x66); XX(rex); XX(0x0F); XX(0x67); break;
3865 case Asse_ADD8: XX(0x66); XX(rex); XX(0x0F); XX(0xFC); break;
3866 case Asse_ADD16: XX(0x66); XX(rex); XX(0x0F); XX(0xFD); break;
3867 case Asse_ADD32: XX(0x66); XX(rex); XX(0x0F); XX(0xFE); break;
3868 case Asse_ADD64: XX(0x66); XX(rex); XX(0x0F); XX(0xD4); break;
3869 case Asse_QADD8S: XX(0x66); XX(rex); XX(0x0F); XX(0xEC); break;
3870 case Asse_QADD16S: XX(0x66); XX(rex); XX(0x0F); XX(0xED); break;
3871 case Asse_QADD8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDC); break;
3872 case Asse_QADD16U: XX(0x66); XX(rex); XX(0x0F); XX(0xDD); break;
3873 case Asse_AVG8U: XX(0x66); XX(rex); XX(0x0F); XX(0xE0); break;
3874 case Asse_AVG16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE3); break;
3875 case Asse_CMPEQ8: XX(0x66); XX(rex); XX(0x0F); XX(0x74); break;
3876 case Asse_CMPEQ16: XX(0x66); XX(rex); XX(0x0F); XX(0x75); break;
3877 case Asse_CMPEQ32: XX(0x66); XX(rex); XX(0x0F); XX(0x76); break;
3878 case Asse_CMPGT8S: XX(0x66); XX(rex); XX(0x0F); XX(0x64); break;
3879 case Asse_CMPGT16S: XX(0x66); XX(rex); XX(0x0F); XX(0x65); break;
3880 case Asse_CMPGT32S: XX(0x66); XX(rex); XX(0x0F); XX(0x66); break;
3881 case Asse_MAX16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEE); break;
3882 case Asse_MAX8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDE); break;
3883 case Asse_MIN16S: XX(0x66); XX(rex); XX(0x0F); XX(0xEA); break;
3884 case Asse_MIN8U: XX(0x66); XX(rex); XX(0x0F); XX(0xDA); break;
3885 case Asse_MULHI16U: XX(0x66); XX(rex); XX(0x0F); XX(0xE4); break;
3886 case Asse_MULHI16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE5); break;
3887 case Asse_MUL16: XX(0x66); XX(rex); XX(0x0F); XX(0xD5); break;
3888 case Asse_SHL16: XX(0x66); XX(rex); XX(0x0F); XX(0xF1); break;
3889 case Asse_SHL32: XX(0x66); XX(rex); XX(0x0F); XX(0xF2); break;
3890 case Asse_SHL64: XX(0x66); XX(rex); XX(0x0F); XX(0xF3); break;
3891 case Asse_SAR16: XX(0x66); XX(rex); XX(0x0F); XX(0xE1); break;
3892 case Asse_SAR32: XX(0x66); XX(rex); XX(0x0F); XX(0xE2); break;
3893 case Asse_SHR16: XX(0x66); XX(rex); XX(0x0F); XX(0xD1); break;
3894 case Asse_SHR32: XX(0x66); XX(rex); XX(0x0F); XX(0xD2); break;
3895 case Asse_SHR64: XX(0x66); XX(rex); XX(0x0F); XX(0xD3); break;
3896 case Asse_SUB8: XX(0x66); XX(rex); XX(0x0F); XX(0xF8); break;
3897 case Asse_SUB16: XX(0x66); XX(rex); XX(0x0F); XX(0xF9); break;
3898 case Asse_SUB32: XX(0x66); XX(rex); XX(0x0F); XX(0xFA); break;
3899 case Asse_SUB64: XX(0x66); XX(rex); XX(0x0F); XX(0xFB); break;
3900 case Asse_QSUB8S: XX(0x66); XX(rex); XX(0x0F); XX(0xE8); break;
3901 case Asse_QSUB16S: XX(0x66); XX(rex); XX(0x0F); XX(0xE9); break;
3902 case Asse_QSUB8U: XX(0x66); XX(rex); XX(0x0F); XX(0xD8); break;
3903 case Asse_QSUB16U: XX(0x66); XX(rex); XX(0x0F); XX(0xD9); break;
3904 case Asse_UNPCKHB: XX(0x66); XX(rex); XX(0x0F); XX(0x68); break;
3905 case Asse_UNPCKHW: XX(0x66); XX(rex); XX(0x0F); XX(0x69); break;
3906 case Asse_UNPCKHD: XX(0x66); XX(rex); XX(0x0F); XX(0x6A); break;
3907 case Asse_UNPCKHQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6D); break;
3908 case Asse_UNPCKLB: XX(0x66); XX(rex); XX(0x0F); XX(0x60); break;
3909 case Asse_UNPCKLW: XX(0x66); XX(rex); XX(0x0F); XX(0x61); break;
3910 case Asse_UNPCKLD: XX(0x66); XX(rex); XX(0x0F); XX(0x62); break;
3911 case Asse_UNPCKLQ: XX(0x66); XX(rex); XX(0x0F); XX(0x6C); break;
3912 case Asse_PSHUFB: XX(0x66); XX(rex);
3913 XX(0x0F); XX(0x38); XX(0x00); break;
3914 case Asse_PMADDUBSW:XX(0x66); XX(rex);
3915 XX(0x0F); XX(0x38); XX(0x04); break;
3916 default: goto bad;
3918 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseReRg.dst),
3919 vregEnc3210(i->Ain.SseReRg.src) );
3920 # undef XX
3921 goto done;
3923 case Ain_SseCMov:
3924 /* jmp fwds if !condition */
3925 *p++ = toUChar(0x70 + (i->Ain.SseCMov.cond ^ 1));
3926 *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
3927 ptmp = p;
3929 /* movaps %src, %dst */
3930 *p++ = clearWBit(
3931 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseCMov.dst),
3932 vregEnc3210(i->Ain.SseCMov.src) ));
3933 *p++ = 0x0F;
3934 *p++ = 0x28;
3935 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseCMov.dst),
3936 vregEnc3210(i->Ain.SseCMov.src) );
3938 /* Fill in the jump offset. */
3939 *(ptmp-1) = toUChar(p - ptmp);
3940 goto done;
3942 case Ain_SseShuf:
3943 *p++ = 0x66;
3944 *p++ = clearWBit(
3945 rexAMode_R_enc_enc( vregEnc3210(i->Ain.SseShuf.dst),
3946 vregEnc3210(i->Ain.SseShuf.src) ));
3947 *p++ = 0x0F;
3948 *p++ = 0x70;
3949 p = doAMode_R_enc_enc(p, vregEnc3210(i->Ain.SseShuf.dst),
3950 vregEnc3210(i->Ain.SseShuf.src) );
3951 *p++ = (UChar)(i->Ain.SseShuf.order);
3952 goto done;
3954 case Ain_SseShiftN: {
3955 UInt limit = 0;
3956 UInt shiftImm = i->Ain.SseShiftN.shiftBits;
3957 switch (i->Ain.SseShiftN.op) {
3958 case Asse_SHL16: limit = 15; opc = 0x71; subopc_imm = 6; break;
3959 case Asse_SHL32: limit = 31; opc = 0x72; subopc_imm = 6; break;
3960 case Asse_SHL64: limit = 63; opc = 0x73; subopc_imm = 6; break;
3961 case Asse_SAR16: limit = 15; opc = 0x71; subopc_imm = 4; break;
3962 case Asse_SAR32: limit = 31; opc = 0x72; subopc_imm = 4; break;
3963 case Asse_SHR16: limit = 15; opc = 0x71; subopc_imm = 2; break;
3964 case Asse_SHR32: limit = 31; opc = 0x72; subopc_imm = 2; break;
3965 case Asse_SHR64: limit = 63; opc = 0x73; subopc_imm = 2; break;
3966 case Asse_SHL128:
3967 if ((shiftImm & 7) != 0) goto bad;
3968 shiftImm >>= 3;
3969 limit = 15; opc = 0x73; subopc_imm = 7;
3970 break;
3971 case Asse_SHR128:
3972 if ((shiftImm & 7) != 0) goto bad;
3973 shiftImm >>= 3;
3974 limit = 15; opc = 0x73; subopc_imm = 3;
3975 break;
3976 default:
3977 // This should never happen .. SSE2 only offers the above 10 insns
3978 // for the "shift with immediate" case
3979 goto bad;
3981 vassert(limit > 0 && opc > 0 && subopc_imm > 0);
3982 if (shiftImm > limit) goto bad;
3983 *p++ = 0x66;
3984 *p++ = clearWBit(
3985 rexAMode_R_enc_enc( subopc_imm,
3986 vregEnc3210(i->Ain.SseShiftN.dst) ));
3987 *p++ = 0x0F;
3988 *p++ = opc;
3989 p = doAMode_R_enc_enc(p, subopc_imm, vregEnc3210(i->Ain.SseShiftN.dst));
3990 *p++ = shiftImm;
3991 goto done;
3994 case Ain_SseMOVQ: {
3995 Bool toXMM = i->Ain.SseMOVQ.toXMM;
3996 HReg gpr = i->Ain.SseMOVQ.gpr;
3997 HReg xmm = i->Ain.SseMOVQ.xmm;
3998 *p++ = 0x66;
3999 *p++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm), iregEnc3210(gpr)) );
4000 *p++ = 0x0F;
4001 *p++ = toXMM ? 0x6E : 0x7E;
4002 p = doAMode_R_enc_enc( p, vregEnc3210(xmm), iregEnc3210(gpr) );
4003 goto done;
4006 //uu case Ain_AvxLdSt: {
4007 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
4008 //uu i->Ain.AvxLdSt.addr );
4009 //uu p = emitVexPrefix(p, vex);
4010 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
4011 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
4012 //uu goto done;
4013 //uu }
4015 case Ain_EvCheck: {
4016 /* We generate:
4017 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
4018 (2 bytes) jns nofail expected taken
4019 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
4020 nofail:
4022 /* This is heavily asserted re instruction lengths. It needs to
4023 be. If we get given unexpected forms of .amCounter or
4024 .amFailAddr -- basically, anything that's not of the form
4025 uimm7(%rbp) -- they are likely to fail. */
4026 /* Note also that after the decl we must be very careful not to
4027 read the carry flag, else we get a partial flags stall.
4028 js/jns avoids that, though. */
4029 UChar* p0 = p;
4030 /* --- decl 8(%rbp) --- */
4031 /* Need to compute the REX byte for the decl in order to prove
4032 that we don't need it, since this is a 32-bit inc and all
4033 registers involved in the amode are < r8. "1" because
4034 there's no register in this encoding; instead the register
4035 field is used as a sub opcode. The encoding for "decl r/m32"
4036 is FF /1, hence the "1". */
4037 rex = clearWBit(rexAMode_M_enc(1, i->Ain.EvCheck.amCounter));
4038 if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
4039 *p++ = 0xFF;
4040 p = doAMode_M_enc(p, 1, i->Ain.EvCheck.amCounter);
4041 vassert(p - p0 == 3);
4042 /* --- jns nofail --- */
4043 *p++ = 0x79;
4044 *p++ = 0x03; /* need to check this 0x03 after the next insn */
4045 vassert(p - p0 == 5);
4046 /* --- jmp* 0(%rbp) --- */
4047 /* Once again, verify we don't need REX. The encoding is FF /4.
4048 We don't need REX.W since by default FF /4 in 64-bit mode
4049 implies a 64 bit load. */
4050 rex = clearWBit(rexAMode_M_enc(4, i->Ain.EvCheck.amFailAddr));
4051 if (rex != 0x40) goto bad;
4052 *p++ = 0xFF;
4053 p = doAMode_M_enc(p, 4, i->Ain.EvCheck.amFailAddr);
4054 vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
4055 /* And crosscheck .. */
4056 vassert(evCheckSzB_AMD64() == 8);
4057 goto done;
4060 case Ain_ProfInc: {
4061 /* We generate movabsq $0, %r11
4062 incq (%r11)
4063 in the expectation that a later call to LibVEX_patchProfCtr
4064 will be used to fill in the immediate field once the right
4065 value is known.
4066 49 BB 00 00 00 00 00 00 00 00
4067 49 FF 03
4069 *p++ = 0x49; *p++ = 0xBB;
4070 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4071 *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
4072 *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
4073 /* Tell the caller .. */
4074 vassert(!(*is_profInc));
4075 *is_profInc = True;
4076 goto done;
4079 default:
4080 goto bad;
4083 bad:
4084 ppAMD64Instr(i, mode64);
4085 vpanic("emit_AMD64Instr");
4086 /*NOTREACHED*/
4088 done:
4089 vassert(p - &buf[0] <= 64);
4090 return p - &buf[0];
4094 /* How big is an event check? See case for Ain_EvCheck in
4095 emit_AMD64Instr just above. That crosschecks what this returns, so
4096 we can tell if we're inconsistent. */
4097 Int evCheckSzB_AMD64 (void)
4099 return 8;
4103 /* NB: what goes on here has to be very closely coordinated with the
4104 emitInstr case for XDirect, above. */
4105 VexInvalRange chainXDirect_AMD64 ( VexEndness endness_host,
4106 void* place_to_chain,
4107 const void* disp_cp_chain_me_EXPECTED,
4108 const void* place_to_jump_to )
4110 vassert(endness_host == VexEndnessLE);
4112 /* What we're expecting to see is:
4113 movabsq $disp_cp_chain_me_EXPECTED, %r11
4114 call *%r11
4116 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4117 41 FF D3
4119 UChar* p = (UChar*)place_to_chain;
4120 vassert(p[0] == 0x49);
4121 vassert(p[1] == 0xBB);
4122 vassert(read_misaligned_ULong_LE(&p[2]) == (Addr)disp_cp_chain_me_EXPECTED);
4123 vassert(p[10] == 0x41);
4124 vassert(p[11] == 0xFF);
4125 vassert(p[12] == 0xD3);
4126 /* And what we want to change it to is either:
4127 (general case):
4128 movabsq $place_to_jump_to, %r11
4129 jmpq *%r11
4131 49 BB <8 bytes value == place_to_jump_to>
4132 41 FF E3
4133 So it's the same length (convenient, huh) and we don't
4134 need to change all the bits.
4135 ---OR---
4136 in the case where the displacement falls within 32 bits
4137 jmpq disp32 where disp32 is relative to the next insn
4138 ud2; ud2; ud2; ud2
4140 E9 <4 bytes == disp32>
4141 0F 0B 0F 0B 0F 0B 0F 0B
4143 In both cases the replacement has the same length as the original.
4144 To remain sane & verifiable,
4145 (1) limit the displacement for the short form to
4146 (say) +/- one billion, so as to avoid wraparound
4147 off-by-ones
4148 (2) even if the short form is applicable, once every (say)
4149 1024 times use the long form anyway, so as to maintain
4150 verifiability
4152 /* This is the delta we need to put into a JMP d32 insn. It's
4153 relative to the start of the next insn, hence the -5. */
4154 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 5;
4155 Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
4157 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4158 if (shortOK) {
4159 shortCTR++; // thread safety bleh
4160 if (0 == (shortCTR & 0x3FF)) {
4161 shortOK = False;
4162 if (0)
4163 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4164 "using long jmp\n", shortCTR);
4168 /* And make the modifications. */
4169 if (shortOK) {
4170 p[0] = 0xE9;
4171 write_misaligned_UInt_LE(&p[1], (UInt)(Int)delta);
4172 p[5] = 0x0F; p[6] = 0x0B;
4173 p[7] = 0x0F; p[8] = 0x0B;
4174 p[9] = 0x0F; p[10] = 0x0B;
4175 p[11] = 0x0F; p[12] = 0x0B;
4176 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4177 delta >>= 32;
4178 vassert(delta == 0LL || delta == -1LL);
4179 } else {
4180 /* Minimal modifications from the starting sequence. */
4181 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)place_to_jump_to);
4182 p[12] = 0xE3;
4184 VexInvalRange vir = { (HWord)place_to_chain, 13 };
4185 return vir;
4189 /* NB: what goes on here has to be very closely coordinated with the
4190 emitInstr case for XDirect, above. */
4191 VexInvalRange unchainXDirect_AMD64 ( VexEndness endness_host,
4192 void* place_to_unchain,
4193 const void* place_to_jump_to_EXPECTED,
4194 const void* disp_cp_chain_me )
4196 vassert(endness_host == VexEndnessLE);
4198 /* What we're expecting to see is either:
4199 (general case)
4200 movabsq $place_to_jump_to_EXPECTED, %r11
4201 jmpq *%r11
4203 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4204 41 FF E3
4205 ---OR---
4206 in the case where the displacement falls within 32 bits
4207 jmpq d32
4208 ud2; ud2; ud2; ud2
4210 E9 <4 bytes == disp32>
4211 0F 0B 0F 0B 0F 0B 0F 0B
4213 UChar* p = (UChar*)place_to_unchain;
4214 Bool valid = False;
4215 if (p[0] == 0x49 && p[1] == 0xBB
4216 && read_misaligned_ULong_LE(&p[2])
4217 == (ULong)(Addr)place_to_jump_to_EXPECTED
4218 && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
4219 /* it's the long form */
4220 valid = True;
4222 else
4223 if (p[0] == 0xE9
4224 && p[5] == 0x0F && p[6] == 0x0B
4225 && p[7] == 0x0F && p[8] == 0x0B
4226 && p[9] == 0x0F && p[10] == 0x0B
4227 && p[11] == 0x0F && p[12] == 0x0B) {
4228 /* It's the short form. Check the offset is right. */
4229 Int s32 = (Int)read_misaligned_UInt_LE(&p[1]);
4230 Long s64 = (Long)s32;
4231 if ((UChar*)p + 5 + s64 == place_to_jump_to_EXPECTED) {
4232 valid = True;
4233 if (0)
4234 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4237 vassert(valid);
4238 /* And what we want to change it to is:
4239 movabsq $disp_cp_chain_me, %r11
4240 call *%r11
4242 49 BB <8 bytes value == disp_cp_chain_me>
4243 41 FF D3
4244 So it's the same length (convenient, huh).
4246 p[0] = 0x49;
4247 p[1] = 0xBB;
4248 write_misaligned_ULong_LE(&p[2], (ULong)(Addr)disp_cp_chain_me);
4249 p[10] = 0x41;
4250 p[11] = 0xFF;
4251 p[12] = 0xD3;
4252 VexInvalRange vir = { (HWord)place_to_unchain, 13 };
4253 return vir;
4257 /* Patch the counter address into a profile inc point, as previously
4258 created by the Ain_ProfInc case for emit_AMD64Instr. */
4259 VexInvalRange patchProfInc_AMD64 ( VexEndness endness_host,
4260 void* place_to_patch,
4261 const ULong* location_of_counter )
4263 vassert(endness_host == VexEndnessLE);
4264 vassert(sizeof(ULong*) == 8);
4265 UChar* p = (UChar*)place_to_patch;
4266 vassert(p[0] == 0x49);
4267 vassert(p[1] == 0xBB);
4268 vassert(p[2] == 0x00);
4269 vassert(p[3] == 0x00);
4270 vassert(p[4] == 0x00);
4271 vassert(p[5] == 0x00);
4272 vassert(p[6] == 0x00);
4273 vassert(p[7] == 0x00);
4274 vassert(p[8] == 0x00);
4275 vassert(p[9] == 0x00);
4276 vassert(p[10] == 0x49);
4277 vassert(p[11] == 0xFF);
4278 vassert(p[12] == 0x03);
4279 ULong imm64 = (ULong)(Addr)location_of_counter;
4280 p[2] = imm64 & 0xFF; imm64 >>= 8;
4281 p[3] = imm64 & 0xFF; imm64 >>= 8;
4282 p[4] = imm64 & 0xFF; imm64 >>= 8;
4283 p[5] = imm64 & 0xFF; imm64 >>= 8;
4284 p[6] = imm64 & 0xFF; imm64 >>= 8;
4285 p[7] = imm64 & 0xFF; imm64 >>= 8;
4286 p[8] = imm64 & 0xFF; imm64 >>= 8;
4287 p[9] = imm64 & 0xFF; imm64 >>= 8;
4288 VexInvalRange vir = { (HWord)place_to_patch, 13 };
4289 return vir;
4293 /*---------------------------------------------------------------*/
4294 /*--- end host_amd64_defs.c ---*/
4295 /*---------------------------------------------------------------*/