2 /*---------------------------------------------------------------*/
3 /*--- begin host_amd64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2004-2017 OpenWorks LLP
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_amd64_defs.h"
43 /* --------- Registers. --------- */
45 const RRegUniverse
* getRRegUniverse_AMD64 ( void )
47 /* The real-register universe is a big constant, so we just want to
48 initialise it once. */
49 static RRegUniverse rRegUniverse_AMD64
;
50 static Bool rRegUniverse_AMD64_initted
= False
;
52 /* Handy shorthand, nothing more */
53 RRegUniverse
* ru
= &rRegUniverse_AMD64
;
55 /* This isn't thread-safe. Sigh. */
56 if (LIKELY(rRegUniverse_AMD64_initted
))
59 RRegUniverse__init(ru
);
61 /* Add the registers. The initial segment of this array must be
62 those available for allocation by reg-alloc, and those that
63 follow are not available for allocation. */
64 ru
->allocable_start
[HRcInt64
] = ru
->size
;
65 ru
->regs
[ru
->size
++] = hregAMD64_R12();
66 ru
->regs
[ru
->size
++] = hregAMD64_R13();
67 ru
->regs
[ru
->size
++] = hregAMD64_R14();
68 ru
->regs
[ru
->size
++] = hregAMD64_R15();
69 ru
->regs
[ru
->size
++] = hregAMD64_RBX();
70 ru
->regs
[ru
->size
++] = hregAMD64_RSI();
71 ru
->regs
[ru
->size
++] = hregAMD64_RDI();
72 ru
->regs
[ru
->size
++] = hregAMD64_R8();
73 ru
->regs
[ru
->size
++] = hregAMD64_R9();
74 ru
->regs
[ru
->size
++] = hregAMD64_R10();
75 ru
->allocable_end
[HRcInt64
] = ru
->size
- 1;
77 ru
->allocable_start
[HRcVec128
] = ru
->size
;
78 ru
->regs
[ru
->size
++] = hregAMD64_XMM3();
79 ru
->regs
[ru
->size
++] = hregAMD64_XMM4();
80 ru
->regs
[ru
->size
++] = hregAMD64_XMM5();
81 ru
->regs
[ru
->size
++] = hregAMD64_XMM6();
82 ru
->regs
[ru
->size
++] = hregAMD64_XMM7();
83 ru
->regs
[ru
->size
++] = hregAMD64_XMM8();
84 ru
->regs
[ru
->size
++] = hregAMD64_XMM9();
85 ru
->regs
[ru
->size
++] = hregAMD64_XMM10();
86 ru
->regs
[ru
->size
++] = hregAMD64_XMM11();
87 ru
->regs
[ru
->size
++] = hregAMD64_XMM12();
88 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
89 ru
->allocable
= ru
->size
;
91 /* And other regs, not available to the allocator. */
92 ru
->regs
[ru
->size
++] = hregAMD64_RAX();
93 ru
->regs
[ru
->size
++] = hregAMD64_RCX();
94 ru
->regs
[ru
->size
++] = hregAMD64_RDX();
95 ru
->regs
[ru
->size
++] = hregAMD64_RSP();
96 ru
->regs
[ru
->size
++] = hregAMD64_RBP();
97 ru
->regs
[ru
->size
++] = hregAMD64_R11();
98 ru
->regs
[ru
->size
++] = hregAMD64_XMM0();
99 ru
->regs
[ru
->size
++] = hregAMD64_XMM1();
101 rRegUniverse_AMD64_initted
= True
;
103 RRegUniverse__check_is_sane(ru
);
108 UInt
ppHRegAMD64 ( HReg reg
)
111 static const HChar
* ireg64_names
[16]
112 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
113 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
114 /* Be generic for all virtual regs. */
115 if (hregIsVirtual(reg
)) {
118 /* But specific for real regs. */
119 switch (hregClass(reg
)) {
121 r
= hregEncoding(reg
);
122 vassert(r
>= 0 && r
< 16);
123 return vex_printf("%s", ireg64_names
[r
]);
125 r
= hregEncoding(reg
);
126 vassert(r
>= 0 && r
< 16);
127 return vex_printf("%%xmm%d", r
);
129 vpanic("ppHRegAMD64");
133 static UInt
ppHRegAMD64_lo32 ( HReg reg
)
136 static const HChar
* ireg32_names
[16]
137 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
138 "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
139 /* Be generic for all virtual regs. */
140 if (hregIsVirtual(reg
)) {
141 UInt written
= ppHReg(reg
);
142 written
+= vex_printf("d");
145 /* But specific for real regs. */
146 switch (hregClass(reg
)) {
148 r
= hregEncoding(reg
);
149 vassert(r
>= 0 && r
< 16);
150 return vex_printf("%s", ireg32_names
[r
]);
152 vpanic("ppHRegAMD64_lo32: invalid regclass");
157 /* --------- Condition codes, Intel encoding. --------- */
159 const HChar
* showAMD64CondCode ( AMD64CondCode cond
)
162 case Acc_O
: return "o";
163 case Acc_NO
: return "no";
164 case Acc_B
: return "b";
165 case Acc_NB
: return "nb";
166 case Acc_Z
: return "z";
167 case Acc_NZ
: return "nz";
168 case Acc_BE
: return "be";
169 case Acc_NBE
: return "nbe";
170 case Acc_S
: return "s";
171 case Acc_NS
: return "ns";
172 case Acc_P
: return "p";
173 case Acc_NP
: return "np";
174 case Acc_L
: return "l";
175 case Acc_NL
: return "nl";
176 case Acc_LE
: return "le";
177 case Acc_NLE
: return "nle";
178 case Acc_ALWAYS
: return "ALWAYS";
179 default: vpanic("ppAMD64CondCode");
184 /* --------- AMD64AMode: memory address expressions. --------- */
186 AMD64AMode
* AMD64AMode_IR ( UInt imm32
, HReg reg
) {
187 AMD64AMode
* am
= LibVEX_Alloc_inline(sizeof(AMD64AMode
));
189 am
->Aam
.IR
.imm
= imm32
;
190 am
->Aam
.IR
.reg
= reg
;
193 AMD64AMode
* AMD64AMode_IRRS ( UInt imm32
, HReg base
, HReg indEx
, Int shift
) {
194 AMD64AMode
* am
= LibVEX_Alloc_inline(sizeof(AMD64AMode
));
196 am
->Aam
.IRRS
.imm
= imm32
;
197 am
->Aam
.IRRS
.base
= base
;
198 am
->Aam
.IRRS
.index
= indEx
;
199 am
->Aam
.IRRS
.shift
= shift
;
200 vassert(shift
>= 0 && shift
<= 3);
204 void ppAMD64AMode ( AMD64AMode
* am
) {
207 if (am
->Aam
.IR
.imm
== 0)
210 vex_printf("0x%x(", am
->Aam
.IR
.imm
);
211 ppHRegAMD64(am
->Aam
.IR
.reg
);
215 vex_printf("0x%x(", am
->Aam
.IRRS
.imm
);
216 ppHRegAMD64(am
->Aam
.IRRS
.base
);
218 ppHRegAMD64(am
->Aam
.IRRS
.index
);
219 vex_printf(",%d)", 1 << am
->Aam
.IRRS
.shift
);
222 vpanic("ppAMD64AMode");
226 static void addRegUsage_AMD64AMode ( HRegUsage
* u
, AMD64AMode
* am
) {
229 addHRegUse(u
, HRmRead
, am
->Aam
.IR
.reg
);
232 addHRegUse(u
, HRmRead
, am
->Aam
.IRRS
.base
);
233 addHRegUse(u
, HRmRead
, am
->Aam
.IRRS
.index
);
236 vpanic("addRegUsage_AMD64AMode");
240 static void mapRegs_AMD64AMode ( HRegRemap
* m
, AMD64AMode
* am
) {
243 am
->Aam
.IR
.reg
= lookupHRegRemap(m
, am
->Aam
.IR
.reg
);
246 am
->Aam
.IRRS
.base
= lookupHRegRemap(m
, am
->Aam
.IRRS
.base
);
247 am
->Aam
.IRRS
.index
= lookupHRegRemap(m
, am
->Aam
.IRRS
.index
);
250 vpanic("mapRegs_AMD64AMode");
254 /* --------- Operand, which can be reg, immediate or memory. --------- */
256 AMD64RMI
* AMD64RMI_Imm ( UInt imm32
) {
257 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
259 op
->Armi
.Imm
.imm32
= imm32
;
262 AMD64RMI
* AMD64RMI_Reg ( HReg reg
) {
263 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
265 op
->Armi
.Reg
.reg
= reg
;
268 AMD64RMI
* AMD64RMI_Mem ( AMD64AMode
* am
) {
269 AMD64RMI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RMI
));
271 op
->Armi
.Mem
.am
= am
;
275 static void ppAMD64RMI_wrk ( AMD64RMI
* op
, Bool lo32
) {
278 vex_printf("$0x%x", op
->Armi
.Imm
.imm32
);
282 ppHRegAMD64_lo32(op
->Armi
.Reg
.reg
);
284 ppHRegAMD64(op
->Armi
.Reg
.reg
);
287 ppAMD64AMode(op
->Armi
.Mem
.am
);
290 vpanic("ppAMD64RMI");
293 void ppAMD64RMI ( AMD64RMI
* op
) {
294 ppAMD64RMI_wrk(op
, False
/*!lo32*/);
296 void ppAMD64RMI_lo32 ( AMD64RMI
* op
) {
297 ppAMD64RMI_wrk(op
, True
/*lo32*/);
300 /* An AMD64RMI can only be used in a "read" context (what would it mean
301 to write or modify a literal?) and so we enumerate its registers
303 static void addRegUsage_AMD64RMI ( HRegUsage
* u
, AMD64RMI
* op
) {
308 addHRegUse(u
, HRmRead
, op
->Armi
.Reg
.reg
);
311 addRegUsage_AMD64AMode(u
, op
->Armi
.Mem
.am
);
314 vpanic("addRegUsage_AMD64RMI");
318 static void mapRegs_AMD64RMI ( HRegRemap
* m
, AMD64RMI
* op
) {
323 op
->Armi
.Reg
.reg
= lookupHRegRemap(m
, op
->Armi
.Reg
.reg
);
326 mapRegs_AMD64AMode(m
, op
->Armi
.Mem
.am
);
329 vpanic("mapRegs_AMD64RMI");
334 /* --------- Operand, which can be reg or immediate only. --------- */
336 AMD64RI
* AMD64RI_Imm ( UInt imm32
) {
337 AMD64RI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RI
));
339 op
->Ari
.Imm
.imm32
= imm32
;
342 AMD64RI
* AMD64RI_Reg ( HReg reg
) {
343 AMD64RI
* op
= LibVEX_Alloc_inline(sizeof(AMD64RI
));
345 op
->Ari
.Reg
.reg
= reg
;
349 void ppAMD64RI ( AMD64RI
* op
) {
352 vex_printf("$0x%x", op
->Ari
.Imm
.imm32
);
355 ppHRegAMD64(op
->Ari
.Reg
.reg
);
362 /* An AMD64RI can only be used in a "read" context (what would it mean
363 to write or modify a literal?) and so we enumerate its registers
365 static void addRegUsage_AMD64RI ( HRegUsage
* u
, AMD64RI
* op
) {
370 addHRegUse(u
, HRmRead
, op
->Ari
.Reg
.reg
);
373 vpanic("addRegUsage_AMD64RI");
377 static void mapRegs_AMD64RI ( HRegRemap
* m
, AMD64RI
* op
) {
382 op
->Ari
.Reg
.reg
= lookupHRegRemap(m
, op
->Ari
.Reg
.reg
);
385 vpanic("mapRegs_AMD64RI");
390 /* --------- Operand, which can be reg or memory only. --------- */
392 AMD64RM
* AMD64RM_Reg ( HReg reg
) {
393 AMD64RM
* op
= LibVEX_Alloc_inline(sizeof(AMD64RM
));
395 op
->Arm
.Reg
.reg
= reg
;
398 AMD64RM
* AMD64RM_Mem ( AMD64AMode
* am
) {
399 AMD64RM
* op
= LibVEX_Alloc_inline(sizeof(AMD64RM
));
405 void ppAMD64RM ( AMD64RM
* op
) {
408 ppAMD64AMode(op
->Arm
.Mem
.am
);
411 ppHRegAMD64(op
->Arm
.Reg
.reg
);
418 /* Because an AMD64RM can be both a source or destination operand, we
419 have to supply a mode -- pertaining to the operand as a whole --
420 indicating how it's being used. */
421 static void addRegUsage_AMD64RM ( HRegUsage
* u
, AMD64RM
* op
, HRegMode mode
) {
424 /* Memory is read, written or modified. So we just want to
425 know the regs read by the amode. */
426 addRegUsage_AMD64AMode(u
, op
->Arm
.Mem
.am
);
429 /* reg is read, written or modified. Add it in the
431 addHRegUse(u
, mode
, op
->Arm
.Reg
.reg
);
434 vpanic("addRegUsage_AMD64RM");
438 static void mapRegs_AMD64RM ( HRegRemap
* m
, AMD64RM
* op
)
442 mapRegs_AMD64AMode(m
, op
->Arm
.Mem
.am
);
445 op
->Arm
.Reg
.reg
= lookupHRegRemap(m
, op
->Arm
.Reg
.reg
);
448 vpanic("mapRegs_AMD64RM");
453 /* --------- Instructions. --------- */
455 static const HChar
* showAMD64ScalarSz ( Int sz
) {
460 default: vpanic("showAMD64ScalarSz");
464 const HChar
* showAMD64UnaryOp ( AMD64UnaryOp op
) {
466 case Aun_NOT
: return "not";
467 case Aun_NEG
: return "neg";
468 default: vpanic("showAMD64UnaryOp");
472 const HChar
* showAMD64AluOp ( AMD64AluOp op
) {
474 case Aalu_MOV
: return "mov";
475 case Aalu_CMP
: return "cmp";
476 case Aalu_ADD
: return "add";
477 case Aalu_SUB
: return "sub";
478 case Aalu_ADC
: return "adc";
479 case Aalu_SBB
: return "sbb";
480 case Aalu_AND
: return "and";
481 case Aalu_OR
: return "or";
482 case Aalu_XOR
: return "xor";
483 case Aalu_MUL
: return "imul";
484 default: vpanic("showAMD64AluOp");
488 const HChar
* showAMD64ShiftOp ( AMD64ShiftOp op
) {
490 case Ash_SHL
: return "shl";
491 case Ash_SHR
: return "shr";
492 case Ash_SAR
: return "sar";
493 default: vpanic("showAMD64ShiftOp");
497 const HChar
* showA87FpOp ( A87FpOp op
) {
499 case Afp_SCALE
: return "scale";
500 case Afp_ATAN
: return "atan";
501 case Afp_YL2X
: return "yl2x";
502 case Afp_YL2XP1
: return "yl2xp1";
503 case Afp_PREM
: return "prem";
504 case Afp_PREM1
: return "prem1";
505 case Afp_SQRT
: return "sqrt";
506 case Afp_SIN
: return "sin";
507 case Afp_COS
: return "cos";
508 case Afp_TAN
: return "tan";
509 case Afp_ROUND
: return "round";
510 case Afp_2XM1
: return "2xm1";
511 default: vpanic("showA87FpOp");
515 const HChar
* showAMD64SseOp ( AMD64SseOp op
) {
517 case Asse_MOV
: return "movups";
518 case Asse_ADDF
: return "add";
519 case Asse_SUBF
: return "sub";
520 case Asse_MULF
: return "mul";
521 case Asse_DIVF
: return "div";
522 case Asse_MAXF
: return "max";
523 case Asse_MINF
: return "min";
524 case Asse_CMPEQF
: return "cmpFeq";
525 case Asse_CMPLTF
: return "cmpFlt";
526 case Asse_CMPLEF
: return "cmpFle";
527 case Asse_CMPUNF
: return "cmpFun";
528 case Asse_RCPF
: return "rcp";
529 case Asse_RSQRTF
: return "rsqrt";
530 case Asse_SQRTF
: return "sqrt";
531 case Asse_I2F
: return "cvtdq2ps.";
532 case Asse_F2I
: return "cvtps2dq.";
533 case Asse_AND
: return "and";
534 case Asse_OR
: return "or";
535 case Asse_XOR
: return "xor";
536 case Asse_ANDN
: return "andn";
537 case Asse_ADD8
: return "paddb";
538 case Asse_ADD16
: return "paddw";
539 case Asse_ADD32
: return "paddd";
540 case Asse_ADD64
: return "paddq";
541 case Asse_QADD8U
: return "paddusb";
542 case Asse_QADD16U
: return "paddusw";
543 case Asse_QADD8S
: return "paddsb";
544 case Asse_QADD16S
: return "paddsw";
545 case Asse_SUB8
: return "psubb";
546 case Asse_SUB16
: return "psubw";
547 case Asse_SUB32
: return "psubd";
548 case Asse_SUB64
: return "psubq";
549 case Asse_QSUB8U
: return "psubusb";
550 case Asse_QSUB16U
: return "psubusw";
551 case Asse_QSUB8S
: return "psubsb";
552 case Asse_QSUB16S
: return "psubsw";
553 case Asse_MUL16
: return "pmullw";
554 case Asse_MULHI16U
: return "pmulhuw";
555 case Asse_MULHI16S
: return "pmulhw";
556 case Asse_AVG8U
: return "pavgb";
557 case Asse_AVG16U
: return "pavgw";
558 case Asse_MAX16S
: return "pmaxw";
559 case Asse_MAX8U
: return "pmaxub";
560 case Asse_MIN16S
: return "pminw";
561 case Asse_MIN8U
: return "pminub";
562 case Asse_CMPEQ8
: return "pcmpeqb";
563 case Asse_CMPEQ16
: return "pcmpeqw";
564 case Asse_CMPEQ32
: return "pcmpeqd";
565 case Asse_CMPGT8S
: return "pcmpgtb";
566 case Asse_CMPGT16S
: return "pcmpgtw";
567 case Asse_CMPGT32S
: return "pcmpgtd";
568 case Asse_SHL16
: return "psllw";
569 case Asse_SHL32
: return "pslld";
570 case Asse_SHL64
: return "psllq";
571 case Asse_SHL128
: return "pslldq";
572 case Asse_SHR16
: return "psrlw";
573 case Asse_SHR32
: return "psrld";
574 case Asse_SHR64
: return "psrlq";
575 case Asse_SHR128
: return "psrldq";
576 case Asse_SAR16
: return "psraw";
577 case Asse_SAR32
: return "psrad";
578 case Asse_PACKSSD
: return "packssdw";
579 case Asse_PACKSSW
: return "packsswb";
580 case Asse_PACKUSW
: return "packuswb";
581 case Asse_UNPCKHB
: return "punpckhb";
582 case Asse_UNPCKHW
: return "punpckhw";
583 case Asse_UNPCKHD
: return "punpckhd";
584 case Asse_UNPCKHQ
: return "punpckhq";
585 case Asse_UNPCKLB
: return "punpcklb";
586 case Asse_UNPCKLW
: return "punpcklw";
587 case Asse_UNPCKLD
: return "punpckld";
588 case Asse_UNPCKLQ
: return "punpcklq";
589 case Asse_PSHUFB
: return "pshufb";
590 case Asse_PMADDUBSW
: return "pmaddubsw";
591 case Asse_F32toF16
: return "vcvtps2ph(rm_field=$0x4).";
592 case Asse_F16toF32
: return "vcvtph2ps.";
593 default: vpanic("showAMD64SseOp");
597 AMD64Instr
* AMD64Instr_Imm64 ( ULong imm64
, HReg dst
) {
598 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
600 i
->Ain
.Imm64
.imm64
= imm64
;
601 i
->Ain
.Imm64
.dst
= dst
;
604 AMD64Instr
* AMD64Instr_Alu64R ( AMD64AluOp op
, AMD64RMI
* src
, HReg dst
) {
605 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
607 i
->Ain
.Alu64R
.op
= op
;
608 i
->Ain
.Alu64R
.src
= src
;
609 i
->Ain
.Alu64R
.dst
= dst
;
612 AMD64Instr
* AMD64Instr_Alu64M ( AMD64AluOp op
, AMD64RI
* src
, AMD64AMode
* dst
) {
613 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
615 i
->Ain
.Alu64M
.op
= op
;
616 i
->Ain
.Alu64M
.src
= src
;
617 i
->Ain
.Alu64M
.dst
= dst
;
618 vassert(op
!= Aalu_MUL
);
621 AMD64Instr
* AMD64Instr_Sh64 ( AMD64ShiftOp op
, UInt src
, HReg dst
) {
622 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
625 i
->Ain
.Sh64
.src
= src
;
626 i
->Ain
.Sh64
.dst
= dst
;
629 AMD64Instr
* AMD64Instr_Test64 ( UInt imm32
, HReg dst
) {
630 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
632 i
->Ain
.Test64
.imm32
= imm32
;
633 i
->Ain
.Test64
.dst
= dst
;
636 AMD64Instr
* AMD64Instr_Unary64 ( AMD64UnaryOp op
, HReg dst
) {
637 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
638 i
->tag
= Ain_Unary64
;
639 i
->Ain
.Unary64
.op
= op
;
640 i
->Ain
.Unary64
.dst
= dst
;
643 AMD64Instr
* AMD64Instr_Lea64 ( AMD64AMode
* am
, HReg dst
) {
644 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
646 i
->Ain
.Lea64
.am
= am
;
647 i
->Ain
.Lea64
.dst
= dst
;
650 AMD64Instr
* AMD64Instr_Alu32R ( AMD64AluOp op
, AMD64RMI
* src
, HReg dst
) {
651 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
653 i
->Ain
.Alu32R
.op
= op
;
654 i
->Ain
.Alu32R
.src
= src
;
655 i
->Ain
.Alu32R
.dst
= dst
;
657 case Aalu_ADD
: case Aalu_SUB
: case Aalu_CMP
:
658 case Aalu_AND
: case Aalu_OR
: case Aalu_XOR
: break;
663 AMD64Instr
* AMD64Instr_MulL ( Bool syned
, AMD64RM
* src
) {
664 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
666 i
->Ain
.MulL
.syned
= syned
;
667 i
->Ain
.MulL
.src
= src
;
670 AMD64Instr
* AMD64Instr_Div ( Bool syned
, Int sz
, AMD64RM
* src
) {
671 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
673 i
->Ain
.Div
.syned
= syned
;
675 i
->Ain
.Div
.src
= src
;
676 vassert(sz
== 4 || sz
== 8);
679 AMD64Instr
* AMD64Instr_Push( AMD64RMI
* src
) {
680 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
682 i
->Ain
.Push
.src
= src
;
685 AMD64Instr
* AMD64Instr_Call ( AMD64CondCode cond
, Addr64 target
, Int regparms
,
687 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
689 i
->Ain
.Call
.cond
= cond
;
690 i
->Ain
.Call
.target
= target
;
691 i
->Ain
.Call
.regparms
= regparms
;
692 i
->Ain
.Call
.rloc
= rloc
;
693 vassert(regparms
>= 0 && regparms
<= 6);
694 vassert(is_sane_RetLoc(rloc
));
698 AMD64Instr
* AMD64Instr_XDirect ( Addr64 dstGA
, AMD64AMode
* amRIP
,
699 AMD64CondCode cond
, Bool toFastEP
) {
700 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
701 i
->tag
= Ain_XDirect
;
702 i
->Ain
.XDirect
.dstGA
= dstGA
;
703 i
->Ain
.XDirect
.amRIP
= amRIP
;
704 i
->Ain
.XDirect
.cond
= cond
;
705 i
->Ain
.XDirect
.toFastEP
= toFastEP
;
708 AMD64Instr
* AMD64Instr_XIndir ( HReg dstGA
, AMD64AMode
* amRIP
,
709 AMD64CondCode cond
) {
710 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
712 i
->Ain
.XIndir
.dstGA
= dstGA
;
713 i
->Ain
.XIndir
.amRIP
= amRIP
;
714 i
->Ain
.XIndir
.cond
= cond
;
717 AMD64Instr
* AMD64Instr_XAssisted ( HReg dstGA
, AMD64AMode
* amRIP
,
718 AMD64CondCode cond
, IRJumpKind jk
) {
719 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
720 i
->tag
= Ain_XAssisted
;
721 i
->Ain
.XAssisted
.dstGA
= dstGA
;
722 i
->Ain
.XAssisted
.amRIP
= amRIP
;
723 i
->Ain
.XAssisted
.cond
= cond
;
724 i
->Ain
.XAssisted
.jk
= jk
;
728 AMD64Instr
* AMD64Instr_CMov64 ( AMD64CondCode cond
, HReg src
, HReg dst
) {
729 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
731 i
->Ain
.CMov64
.cond
= cond
;
732 i
->Ain
.CMov64
.src
= src
;
733 i
->Ain
.CMov64
.dst
= dst
;
734 vassert(cond
!= Acc_ALWAYS
);
737 AMD64Instr
* AMD64Instr_CLoad ( AMD64CondCode cond
, UChar szB
,
738 AMD64AMode
* addr
, HReg dst
) {
739 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
741 i
->Ain
.CLoad
.cond
= cond
;
742 i
->Ain
.CLoad
.szB
= szB
;
743 i
->Ain
.CLoad
.addr
= addr
;
744 i
->Ain
.CLoad
.dst
= dst
;
745 vassert(cond
!= Acc_ALWAYS
&& (szB
== 4 || szB
== 8));
748 AMD64Instr
* AMD64Instr_CStore ( AMD64CondCode cond
, UChar szB
,
749 HReg src
, AMD64AMode
* addr
) {
750 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
752 i
->Ain
.CStore
.cond
= cond
;
753 i
->Ain
.CStore
.szB
= szB
;
754 i
->Ain
.CStore
.src
= src
;
755 i
->Ain
.CStore
.addr
= addr
;
756 vassert(cond
!= Acc_ALWAYS
&& (szB
== 4 || szB
== 8));
759 AMD64Instr
* AMD64Instr_MovxLQ ( Bool syned
, HReg src
, HReg dst
) {
760 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
762 i
->Ain
.MovxLQ
.syned
= syned
;
763 i
->Ain
.MovxLQ
.src
= src
;
764 i
->Ain
.MovxLQ
.dst
= dst
;
767 AMD64Instr
* AMD64Instr_LoadEX ( UChar szSmall
, Bool syned
,
768 AMD64AMode
* src
, HReg dst
) {
769 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
771 i
->Ain
.LoadEX
.szSmall
= szSmall
;
772 i
->Ain
.LoadEX
.syned
= syned
;
773 i
->Ain
.LoadEX
.src
= src
;
774 i
->Ain
.LoadEX
.dst
= dst
;
775 vassert(szSmall
== 1 || szSmall
== 2 || szSmall
== 4);
778 AMD64Instr
* AMD64Instr_Store ( UChar sz
, HReg src
, AMD64AMode
* dst
) {
779 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
781 i
->Ain
.Store
.sz
= sz
;
782 i
->Ain
.Store
.src
= src
;
783 i
->Ain
.Store
.dst
= dst
;
784 vassert(sz
== 1 || sz
== 2 || sz
== 4);
787 AMD64Instr
* AMD64Instr_Set64 ( AMD64CondCode cond
, HReg dst
) {
788 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
790 i
->Ain
.Set64
.cond
= cond
;
791 i
->Ain
.Set64
.dst
= dst
;
794 AMD64Instr
* AMD64Instr_Bsfr64 ( Bool isFwds
, HReg src
, HReg dst
) {
795 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
797 i
->Ain
.Bsfr64
.isFwds
= isFwds
;
798 i
->Ain
.Bsfr64
.src
= src
;
799 i
->Ain
.Bsfr64
.dst
= dst
;
802 AMD64Instr
* AMD64Instr_MFence ( void ) {
803 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
807 AMD64Instr
* AMD64Instr_ACAS ( AMD64AMode
* addr
, UChar sz
) {
808 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
810 i
->Ain
.ACAS
.addr
= addr
;
812 vassert(sz
== 8 || sz
== 4 || sz
== 2 || sz
== 1);
815 AMD64Instr
* AMD64Instr_DACAS ( AMD64AMode
* addr
, UChar sz
) {
816 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
818 i
->Ain
.DACAS
.addr
= addr
;
819 i
->Ain
.DACAS
.sz
= sz
;
820 vassert(sz
== 8 || sz
== 4);
824 AMD64Instr
* AMD64Instr_A87Free ( Int nregs
)
826 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
827 i
->tag
= Ain_A87Free
;
828 i
->Ain
.A87Free
.nregs
= nregs
;
829 vassert(nregs
>= 1 && nregs
<= 7);
832 AMD64Instr
* AMD64Instr_A87PushPop ( AMD64AMode
* addr
, Bool isPush
, UChar szB
)
834 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
835 i
->tag
= Ain_A87PushPop
;
836 i
->Ain
.A87PushPop
.addr
= addr
;
837 i
->Ain
.A87PushPop
.isPush
= isPush
;
838 i
->Ain
.A87PushPop
.szB
= szB
;
839 vassert(szB
== 8 || szB
== 4);
842 AMD64Instr
* AMD64Instr_A87FpOp ( A87FpOp op
)
844 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
845 i
->tag
= Ain_A87FpOp
;
846 i
->Ain
.A87FpOp
.op
= op
;
849 AMD64Instr
* AMD64Instr_A87LdCW ( AMD64AMode
* addr
)
851 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
852 i
->tag
= Ain_A87LdCW
;
853 i
->Ain
.A87LdCW
.addr
= addr
;
856 AMD64Instr
* AMD64Instr_A87StSW ( AMD64AMode
* addr
)
858 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
859 i
->tag
= Ain_A87StSW
;
860 i
->Ain
.A87StSW
.addr
= addr
;
863 AMD64Instr
* AMD64Instr_LdMXCSR ( AMD64AMode
* addr
) {
864 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
865 i
->tag
= Ain_LdMXCSR
;
866 i
->Ain
.LdMXCSR
.addr
= addr
;
869 AMD64Instr
* AMD64Instr_SseUComIS ( Int sz
, HReg srcL
, HReg srcR
, HReg dst
) {
870 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
871 i
->tag
= Ain_SseUComIS
;
872 i
->Ain
.SseUComIS
.sz
= toUChar(sz
);
873 i
->Ain
.SseUComIS
.srcL
= srcL
;
874 i
->Ain
.SseUComIS
.srcR
= srcR
;
875 i
->Ain
.SseUComIS
.dst
= dst
;
876 vassert(sz
== 4 || sz
== 8);
879 AMD64Instr
* AMD64Instr_SseSI2SF ( Int szS
, Int szD
, HReg src
, HReg dst
) {
880 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
881 i
->tag
= Ain_SseSI2SF
;
882 i
->Ain
.SseSI2SF
.szS
= toUChar(szS
);
883 i
->Ain
.SseSI2SF
.szD
= toUChar(szD
);
884 i
->Ain
.SseSI2SF
.src
= src
;
885 i
->Ain
.SseSI2SF
.dst
= dst
;
886 vassert(szS
== 4 || szS
== 8);
887 vassert(szD
== 4 || szD
== 8);
890 AMD64Instr
* AMD64Instr_SseSF2SI ( Int szS
, Int szD
, HReg src
, HReg dst
) {
891 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
892 i
->tag
= Ain_SseSF2SI
;
893 i
->Ain
.SseSF2SI
.szS
= toUChar(szS
);
894 i
->Ain
.SseSF2SI
.szD
= toUChar(szD
);
895 i
->Ain
.SseSF2SI
.src
= src
;
896 i
->Ain
.SseSF2SI
.dst
= dst
;
897 vassert(szS
== 4 || szS
== 8);
898 vassert(szD
== 4 || szD
== 8);
901 AMD64Instr
* AMD64Instr_SseSDSS ( Bool from64
, HReg src
, HReg dst
)
903 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
904 i
->tag
= Ain_SseSDSS
;
905 i
->Ain
.SseSDSS
.from64
= from64
;
906 i
->Ain
.SseSDSS
.src
= src
;
907 i
->Ain
.SseSDSS
.dst
= dst
;
910 AMD64Instr
* AMD64Instr_SseLdSt ( Bool isLoad
, Int sz
,
911 HReg reg
, AMD64AMode
* addr
) {
912 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
913 i
->tag
= Ain_SseLdSt
;
914 i
->Ain
.SseLdSt
.isLoad
= isLoad
;
915 i
->Ain
.SseLdSt
.sz
= toUChar(sz
);
916 i
->Ain
.SseLdSt
.reg
= reg
;
917 i
->Ain
.SseLdSt
.addr
= addr
;
918 vassert(sz
== 4 || sz
== 8 || sz
== 16);
921 AMD64Instr
* AMD64Instr_SseCStore ( AMD64CondCode cond
,
922 HReg src
, AMD64AMode
* addr
)
924 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
925 i
->tag
= Ain_SseCStore
;
926 i
->Ain
.SseCStore
.cond
= cond
;
927 i
->Ain
.SseCStore
.src
= src
;
928 i
->Ain
.SseCStore
.addr
= addr
;
929 vassert(cond
!= Acc_ALWAYS
);
932 AMD64Instr
* AMD64Instr_SseCLoad ( AMD64CondCode cond
,
933 AMD64AMode
* addr
, HReg dst
)
935 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
936 i
->tag
= Ain_SseCLoad
;
937 i
->Ain
.SseCLoad
.cond
= cond
;
938 i
->Ain
.SseCLoad
.addr
= addr
;
939 i
->Ain
.SseCLoad
.dst
= dst
;
940 vassert(cond
!= Acc_ALWAYS
);
943 AMD64Instr
* AMD64Instr_SseLdzLO ( Int sz
, HReg reg
, AMD64AMode
* addr
)
945 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
946 i
->tag
= Ain_SseLdzLO
;
947 i
->Ain
.SseLdzLO
.sz
= sz
;
948 i
->Ain
.SseLdzLO
.reg
= reg
;
949 i
->Ain
.SseLdzLO
.addr
= addr
;
950 vassert(sz
== 4 || sz
== 8);
953 AMD64Instr
* AMD64Instr_Sse32Fx4 ( AMD64SseOp op
, HReg src
, HReg dst
) {
954 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
955 i
->tag
= Ain_Sse32Fx4
;
956 i
->Ain
.Sse32Fx4
.op
= op
;
957 i
->Ain
.Sse32Fx4
.src
= src
;
958 i
->Ain
.Sse32Fx4
.dst
= dst
;
959 vassert(op
!= Asse_MOV
);
962 AMD64Instr
* AMD64Instr_Sse32FLo ( AMD64SseOp op
, HReg src
, HReg dst
) {
963 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
964 i
->tag
= Ain_Sse32FLo
;
965 i
->Ain
.Sse32FLo
.op
= op
;
966 i
->Ain
.Sse32FLo
.src
= src
;
967 i
->Ain
.Sse32FLo
.dst
= dst
;
968 vassert(op
!= Asse_MOV
);
971 AMD64Instr
* AMD64Instr_Sse64Fx2 ( AMD64SseOp op
, HReg src
, HReg dst
) {
972 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
973 i
->tag
= Ain_Sse64Fx2
;
974 i
->Ain
.Sse64Fx2
.op
= op
;
975 i
->Ain
.Sse64Fx2
.src
= src
;
976 i
->Ain
.Sse64Fx2
.dst
= dst
;
977 vassert(op
!= Asse_MOV
);
980 AMD64Instr
* AMD64Instr_Sse64FLo ( AMD64SseOp op
, HReg src
, HReg dst
) {
981 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
982 i
->tag
= Ain_Sse64FLo
;
983 i
->Ain
.Sse64FLo
.op
= op
;
984 i
->Ain
.Sse64FLo
.src
= src
;
985 i
->Ain
.Sse64FLo
.dst
= dst
;
986 vassert(op
!= Asse_MOV
);
989 AMD64Instr
* AMD64Instr_SseReRg ( AMD64SseOp op
, HReg re
, HReg rg
) {
990 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
991 i
->tag
= Ain_SseReRg
;
992 i
->Ain
.SseReRg
.op
= op
;
993 i
->Ain
.SseReRg
.src
= re
;
994 i
->Ain
.SseReRg
.dst
= rg
;
997 AMD64Instr
* AMD64Instr_SseCMov ( AMD64CondCode cond
, HReg src
, HReg dst
) {
998 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
999 i
->tag
= Ain_SseCMov
;
1000 i
->Ain
.SseCMov
.cond
= cond
;
1001 i
->Ain
.SseCMov
.src
= src
;
1002 i
->Ain
.SseCMov
.dst
= dst
;
1003 vassert(cond
!= Acc_ALWAYS
);
1006 AMD64Instr
* AMD64Instr_SseShuf ( Int order
, HReg src
, HReg dst
) {
1007 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1008 i
->tag
= Ain_SseShuf
;
1009 i
->Ain
.SseShuf
.order
= order
;
1010 i
->Ain
.SseShuf
.src
= src
;
1011 i
->Ain
.SseShuf
.dst
= dst
;
1012 vassert(order
>= 0 && order
<= 0xFF);
1015 AMD64Instr
* AMD64Instr_SseShiftN ( AMD64SseOp op
,
1016 UInt shiftBits
, HReg dst
) {
1017 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1018 i
->tag
= Ain_SseShiftN
;
1019 i
->Ain
.SseShiftN
.op
= op
;
1020 i
->Ain
.SseShiftN
.shiftBits
= shiftBits
;
1021 i
->Ain
.SseShiftN
.dst
= dst
;
1024 AMD64Instr
* AMD64Instr_SseMOVQ ( HReg gpr
, HReg xmm
, Bool toXMM
) {
1025 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1026 i
->tag
= Ain_SseMOVQ
;
1027 i
->Ain
.SseMOVQ
.gpr
= gpr
;
1028 i
->Ain
.SseMOVQ
.xmm
= xmm
;
1029 i
->Ain
.SseMOVQ
.toXMM
= toXMM
;
1030 vassert(hregClass(gpr
) == HRcInt64
);
1031 vassert(hregClass(xmm
) == HRcVec128
);
1034 //uu AMD64Instr* AMD64Instr_AvxLdSt ( Bool isLoad,
1035 //uu HReg reg, AMD64AMode* addr ) {
1036 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1037 //uu i->tag = Ain_AvxLdSt;
1038 //uu i->Ain.AvxLdSt.isLoad = isLoad;
1039 //uu i->Ain.AvxLdSt.reg = reg;
1040 //uu i->Ain.AvxLdSt.addr = addr;
1043 //uu AMD64Instr* AMD64Instr_AvxReRg ( AMD64SseOp op, HReg re, HReg rg ) {
1044 //uu AMD64Instr* i = LibVEX_Alloc_inline(sizeof(AMD64Instr));
1045 //uu i->tag = Ain_AvxReRg;
1046 //uu i->Ain.AvxReRg.op = op;
1047 //uu i->Ain.AvxReRg.src = re;
1048 //uu i->Ain.AvxReRg.dst = rg;
1051 AMD64Instr
* AMD64Instr_EvCheck ( AMD64AMode
* amCounter
,
1052 AMD64AMode
* amFailAddr
) {
1053 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1054 i
->tag
= Ain_EvCheck
;
1055 i
->Ain
.EvCheck
.amCounter
= amCounter
;
1056 i
->Ain
.EvCheck
.amFailAddr
= amFailAddr
;
1059 AMD64Instr
* AMD64Instr_ProfInc ( void ) {
1060 AMD64Instr
* i
= LibVEX_Alloc_inline(sizeof(AMD64Instr
));
1061 i
->tag
= Ain_ProfInc
;
1065 void ppAMD64Instr ( const AMD64Instr
* i
, Bool mode64
)
1067 vassert(mode64
== True
);
1070 vex_printf("movabsq $0x%llx,", i
->Ain
.Imm64
.imm64
);
1071 ppHRegAMD64(i
->Ain
.Imm64
.dst
);
1074 vex_printf("%sq ", showAMD64AluOp(i
->Ain
.Alu64R
.op
));
1075 ppAMD64RMI(i
->Ain
.Alu64R
.src
);
1077 ppHRegAMD64(i
->Ain
.Alu64R
.dst
);
1080 vex_printf("%sq ", showAMD64AluOp(i
->Ain
.Alu64M
.op
));
1081 ppAMD64RI(i
->Ain
.Alu64M
.src
);
1083 ppAMD64AMode(i
->Ain
.Alu64M
.dst
);
1086 vex_printf("%sq ", showAMD64ShiftOp(i
->Ain
.Sh64
.op
));
1087 if (i
->Ain
.Sh64
.src
== 0)
1088 vex_printf("%%cl,");
1090 vex_printf("$%d,", (Int
)i
->Ain
.Sh64
.src
);
1091 ppHRegAMD64(i
->Ain
.Sh64
.dst
);
1094 vex_printf("testq $%d,", (Int
)i
->Ain
.Test64
.imm32
);
1095 ppHRegAMD64(i
->Ain
.Test64
.dst
);
1098 vex_printf("%sq ", showAMD64UnaryOp(i
->Ain
.Unary64
.op
));
1099 ppHRegAMD64(i
->Ain
.Unary64
.dst
);
1102 vex_printf("leaq ");
1103 ppAMD64AMode(i
->Ain
.Lea64
.am
);
1105 ppHRegAMD64(i
->Ain
.Lea64
.dst
);
1108 vex_printf("%sl ", showAMD64AluOp(i
->Ain
.Alu32R
.op
));
1109 ppAMD64RMI_lo32(i
->Ain
.Alu32R
.src
);
1111 ppHRegAMD64_lo32(i
->Ain
.Alu32R
.dst
);
1114 vex_printf("%cmulq ", i
->Ain
.MulL
.syned
? 's' : 'u');
1115 ppAMD64RM(i
->Ain
.MulL
.src
);
1118 vex_printf("%cdiv%s ",
1119 i
->Ain
.Div
.syned
? 's' : 'u',
1120 showAMD64ScalarSz(i
->Ain
.Div
.sz
));
1121 ppAMD64RM(i
->Ain
.Div
.src
);
1124 vex_printf("pushq ");
1125 ppAMD64RMI(i
->Ain
.Push
.src
);
1128 vex_printf("call%s[%d,",
1129 i
->Ain
.Call
.cond
==Acc_ALWAYS
1130 ? "" : showAMD64CondCode(i
->Ain
.Call
.cond
),
1131 i
->Ain
.Call
.regparms
);
1132 ppRetLoc(i
->Ain
.Call
.rloc
);
1133 vex_printf("] 0x%llx", i
->Ain
.Call
.target
);
1137 vex_printf("(xDirect) ");
1138 vex_printf("if (%%rflags.%s) { ",
1139 showAMD64CondCode(i
->Ain
.XDirect
.cond
));
1140 vex_printf("movabsq $0x%llx,%%r11; ", i
->Ain
.XDirect
.dstGA
);
1141 vex_printf("movq %%r11,");
1142 ppAMD64AMode(i
->Ain
.XDirect
.amRIP
);
1144 vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
1145 i
->Ain
.XDirect
.toFastEP
? "fast" : "slow");
1148 vex_printf("(xIndir) ");
1149 vex_printf("if (%%rflags.%s) { ",
1150 showAMD64CondCode(i
->Ain
.XIndir
.cond
));
1151 vex_printf("movq ");
1152 ppHRegAMD64(i
->Ain
.XIndir
.dstGA
);
1154 ppAMD64AMode(i
->Ain
.XIndir
.amRIP
);
1155 vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
1158 vex_printf("(xAssisted) ");
1159 vex_printf("if (%%rflags.%s) { ",
1160 showAMD64CondCode(i
->Ain
.XAssisted
.cond
));
1161 vex_printf("movq ");
1162 ppHRegAMD64(i
->Ain
.XAssisted
.dstGA
);
1164 ppAMD64AMode(i
->Ain
.XAssisted
.amRIP
);
1165 vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
1166 (Int
)i
->Ain
.XAssisted
.jk
);
1167 vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
1171 vex_printf("cmov%s ", showAMD64CondCode(i
->Ain
.CMov64
.cond
));
1172 ppHRegAMD64(i
->Ain
.CMov64
.src
);
1174 ppHRegAMD64(i
->Ain
.CMov64
.dst
);
1177 vex_printf("if (%%rflags.%s) { ",
1178 showAMD64CondCode(i
->Ain
.CLoad
.cond
));
1179 vex_printf("mov%c ", i
->Ain
.CLoad
.szB
== 4 ? 'l' : 'q');
1180 ppAMD64AMode(i
->Ain
.CLoad
.addr
);
1182 (i
->Ain
.CLoad
.szB
== 4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1187 vex_printf("if (%%rflags.%s) { ",
1188 showAMD64CondCode(i
->Ain
.CStore
.cond
));
1189 vex_printf("mov%c ", i
->Ain
.CStore
.szB
== 4 ? 'l' : 'q');
1190 (i
->Ain
.CStore
.szB
== 4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1191 (i
->Ain
.CStore
.src
);
1193 ppAMD64AMode(i
->Ain
.CStore
.addr
);
1198 vex_printf("mov%clq ", i
->Ain
.MovxLQ
.syned
? 's' : 'z');
1199 ppHRegAMD64_lo32(i
->Ain
.MovxLQ
.src
);
1201 ppHRegAMD64(i
->Ain
.MovxLQ
.dst
);
1204 if (i
->Ain
.LoadEX
.szSmall
==4 && !i
->Ain
.LoadEX
.syned
) {
1205 vex_printf("movl ");
1206 ppAMD64AMode(i
->Ain
.LoadEX
.src
);
1208 ppHRegAMD64_lo32(i
->Ain
.LoadEX
.dst
);
1210 vex_printf("mov%c%cq ",
1211 i
->Ain
.LoadEX
.syned
? 's' : 'z',
1212 i
->Ain
.LoadEX
.szSmall
==1
1214 : (i
->Ain
.LoadEX
.szSmall
==2 ? 'w' : 'l'));
1215 ppAMD64AMode(i
->Ain
.LoadEX
.src
);
1217 ppHRegAMD64(i
->Ain
.LoadEX
.dst
);
1221 vex_printf("mov%c ", i
->Ain
.Store
.sz
==1 ? 'b'
1222 : (i
->Ain
.Store
.sz
==2 ? 'w' : 'l'));
1223 ppHRegAMD64(i
->Ain
.Store
.src
);
1225 ppAMD64AMode(i
->Ain
.Store
.dst
);
1228 vex_printf("setq%s ", showAMD64CondCode(i
->Ain
.Set64
.cond
));
1229 ppHRegAMD64(i
->Ain
.Set64
.dst
);
1232 vex_printf("bs%cq ", i
->Ain
.Bsfr64
.isFwds
? 'f' : 'r');
1233 ppHRegAMD64(i
->Ain
.Bsfr64
.src
);
1235 ppHRegAMD64(i
->Ain
.Bsfr64
.dst
);
1238 vex_printf("mfence" );
1241 vex_printf("lock cmpxchg%c ",
1242 i
->Ain
.ACAS
.sz
==1 ? 'b' : i
->Ain
.ACAS
.sz
==2 ? 'w'
1243 : i
->Ain
.ACAS
.sz
==4 ? 'l' : 'q' );
1244 vex_printf("{%%rax->%%rbx},");
1245 ppAMD64AMode(i
->Ain
.ACAS
.addr
);
1248 vex_printf("lock cmpxchg%db {%%rdx:%%rax->%%rcx:%%rbx},",
1249 (Int
)(2 * i
->Ain
.DACAS
.sz
));
1250 ppAMD64AMode(i
->Ain
.DACAS
.addr
);
1253 vex_printf("ffree %%st(7..%d)", 8 - i
->Ain
.A87Free
.nregs
);
1255 case Ain_A87PushPop
:
1256 vex_printf(i
->Ain
.A87PushPop
.isPush
? "fld%c " : "fstp%c ",
1257 i
->Ain
.A87PushPop
.szB
== 4 ? 's' : 'l');
1258 ppAMD64AMode(i
->Ain
.A87PushPop
.addr
);
1261 vex_printf("f%s", showA87FpOp(i
->Ain
.A87FpOp
.op
));
1264 vex_printf("fldcw ");
1265 ppAMD64AMode(i
->Ain
.A87LdCW
.addr
);
1268 vex_printf("fstsw ");
1269 ppAMD64AMode(i
->Ain
.A87StSW
.addr
);
1272 vex_printf("ldmxcsr ");
1273 ppAMD64AMode(i
->Ain
.LdMXCSR
.addr
);
1276 vex_printf("ucomis%s ", i
->Ain
.SseUComIS
.sz
==4 ? "s" : "d");
1277 ppHRegAMD64(i
->Ain
.SseUComIS
.srcL
);
1279 ppHRegAMD64(i
->Ain
.SseUComIS
.srcR
);
1280 vex_printf(" ; pushfq ; popq ");
1281 ppHRegAMD64(i
->Ain
.SseUComIS
.dst
);
1284 vex_printf("cvtsi2s%s ", i
->Ain
.SseSI2SF
.szD
==4 ? "s" : "d");
1285 (i
->Ain
.SseSI2SF
.szS
==4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1286 (i
->Ain
.SseSI2SF
.src
);
1288 ppHRegAMD64(i
->Ain
.SseSI2SF
.dst
);
1291 vex_printf("cvts%s2si ", i
->Ain
.SseSF2SI
.szS
==4 ? "s" : "d");
1292 ppHRegAMD64(i
->Ain
.SseSF2SI
.src
);
1294 (i
->Ain
.SseSF2SI
.szD
==4 ? ppHRegAMD64_lo32
: ppHRegAMD64
)
1295 (i
->Ain
.SseSF2SI
.dst
);
1298 vex_printf(i
->Ain
.SseSDSS
.from64
? "cvtsd2ss " : "cvtss2sd ");
1299 ppHRegAMD64(i
->Ain
.SseSDSS
.src
);
1301 ppHRegAMD64(i
->Ain
.SseSDSS
.dst
);
1304 switch (i
->Ain
.SseLdSt
.sz
) {
1305 case 4: vex_printf("movss "); break;
1306 case 8: vex_printf("movsd "); break;
1307 case 16: vex_printf("movups "); break;
1308 default: vassert(0);
1310 if (i
->Ain
.SseLdSt
.isLoad
) {
1311 ppAMD64AMode(i
->Ain
.SseLdSt
.addr
);
1313 ppHRegAMD64(i
->Ain
.SseLdSt
.reg
);
1315 ppHRegAMD64(i
->Ain
.SseLdSt
.reg
);
1317 ppAMD64AMode(i
->Ain
.SseLdSt
.addr
);
1321 vex_printf("if (%%rflags.%s) { ",
1322 showAMD64CondCode(i
->Ain
.SseCStore
.cond
));
1323 vex_printf("movups ");
1324 ppHRegAMD64(i
->Ain
.SseCStore
.src
);
1326 ppAMD64AMode(i
->Ain
.SseCStore
.addr
);
1330 vex_printf("if (%%rflags.%s) { ",
1331 showAMD64CondCode(i
->Ain
.SseCLoad
.cond
));
1332 vex_printf("movups ");
1333 ppAMD64AMode(i
->Ain
.SseCLoad
.addr
);
1335 ppHRegAMD64(i
->Ain
.SseCLoad
.dst
);
1339 vex_printf("movs%s ", i
->Ain
.SseLdzLO
.sz
==4 ? "s" : "d");
1340 ppAMD64AMode(i
->Ain
.SseLdzLO
.addr
);
1342 ppHRegAMD64(i
->Ain
.SseLdzLO
.reg
);
1345 vex_printf("%sps ", showAMD64SseOp(i
->Ain
.Sse32Fx4
.op
));
1346 ppHRegAMD64(i
->Ain
.Sse32Fx4
.src
);
1348 ppHRegAMD64(i
->Ain
.Sse32Fx4
.dst
);
1351 vex_printf("%sss ", showAMD64SseOp(i
->Ain
.Sse32FLo
.op
));
1352 ppHRegAMD64(i
->Ain
.Sse32FLo
.src
);
1354 ppHRegAMD64(i
->Ain
.Sse32FLo
.dst
);
1357 vex_printf("%spd ", showAMD64SseOp(i
->Ain
.Sse64Fx2
.op
));
1358 ppHRegAMD64(i
->Ain
.Sse64Fx2
.src
);
1360 ppHRegAMD64(i
->Ain
.Sse64Fx2
.dst
);
1363 vex_printf("%ssd ", showAMD64SseOp(i
->Ain
.Sse64FLo
.op
));
1364 ppHRegAMD64(i
->Ain
.Sse64FLo
.src
);
1366 ppHRegAMD64(i
->Ain
.Sse64FLo
.dst
);
1369 vex_printf("%s ", showAMD64SseOp(i
->Ain
.SseReRg
.op
));
1370 ppHRegAMD64(i
->Ain
.SseReRg
.src
);
1372 ppHRegAMD64(i
->Ain
.SseReRg
.dst
);
1375 vex_printf("cmov%s ", showAMD64CondCode(i
->Ain
.SseCMov
.cond
));
1376 ppHRegAMD64(i
->Ain
.SseCMov
.src
);
1378 ppHRegAMD64(i
->Ain
.SseCMov
.dst
);
1381 vex_printf("pshufd $0x%x,", (UInt
)i
->Ain
.SseShuf
.order
);
1382 ppHRegAMD64(i
->Ain
.SseShuf
.src
);
1384 ppHRegAMD64(i
->Ain
.SseShuf
.dst
);
1387 vex_printf("%s $%u, ", showAMD64SseOp(i
->Ain
.SseShiftN
.op
),
1388 i
->Ain
.SseShiftN
.shiftBits
);
1389 ppHRegAMD64(i
->Ain
.SseShiftN
.dst
);
1392 vex_printf("movq ");
1393 if (i
->Ain
.SseMOVQ
.toXMM
) {
1394 ppHRegAMD64(i
->Ain
.SseMOVQ
.gpr
);
1396 ppHRegAMD64(i
->Ain
.SseMOVQ
.xmm
);
1398 ppHRegAMD64(i
->Ain
.SseMOVQ
.xmm
);
1400 ppHRegAMD64(i
->Ain
.SseMOVQ
.gpr
);
1403 //uu case Ain_AvxLdSt:
1404 //uu vex_printf("vmovups ");
1405 //uu if (i->Ain.AvxLdSt.isLoad) {
1406 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1407 //uu vex_printf(",");
1408 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1410 //uu ppHRegAMD64(i->Ain.AvxLdSt.reg);
1411 //uu vex_printf(",");
1412 //uu ppAMD64AMode(i->Ain.AvxLdSt.addr);
1415 //uu case Ain_AvxReRg:
1416 //uu vex_printf("v%s ", showAMD64SseOp(i->Ain.SseReRg.op));
1417 //uu ppHRegAMD64(i->Ain.AvxReRg.src);
1418 //uu vex_printf(",");
1419 //uu ppHRegAMD64(i->Ain.AvxReRg.dst);
1422 vex_printf("(evCheck) decl ");
1423 ppAMD64AMode(i
->Ain
.EvCheck
.amCounter
);
1424 vex_printf("; jns nofail; jmp *");
1425 ppAMD64AMode(i
->Ain
.EvCheck
.amFailAddr
);
1426 vex_printf("; nofail:");
1429 vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
1432 vpanic("ppAMD64Instr");
1436 /* --------- Helpers for register allocation. --------- */
1438 void getRegUsage_AMD64Instr ( HRegUsage
* u
, const AMD64Instr
* i
, Bool mode64
)
1441 vassert(mode64
== True
);
1445 addHRegUse(u
, HRmWrite
, i
->Ain
.Imm64
.dst
);
1448 addRegUsage_AMD64RMI(u
, i
->Ain
.Alu64R
.src
);
1449 if (i
->Ain
.Alu64R
.op
== Aalu_MOV
) {
1450 addHRegUse(u
, HRmWrite
, i
->Ain
.Alu64R
.dst
);
1452 if (i
->Ain
.Alu64R
.src
->tag
== Armi_Reg
) {
1453 u
->isRegRegMove
= True
;
1454 u
->regMoveSrc
= i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
;
1455 u
->regMoveDst
= i
->Ain
.Alu64R
.dst
;
1459 if (i
->Ain
.Alu64R
.op
== Aalu_CMP
) {
1460 addHRegUse(u
, HRmRead
, i
->Ain
.Alu64R
.dst
);
1463 addHRegUse(u
, HRmModify
, i
->Ain
.Alu64R
.dst
);
1466 addRegUsage_AMD64RI(u
, i
->Ain
.Alu64M
.src
);
1467 addRegUsage_AMD64AMode(u
, i
->Ain
.Alu64M
.dst
);
1470 addHRegUse(u
, HRmModify
, i
->Ain
.Sh64
.dst
);
1471 if (i
->Ain
.Sh64
.src
== 0)
1472 addHRegUse(u
, HRmRead
, hregAMD64_RCX());
1475 addHRegUse(u
, HRmRead
, i
->Ain
.Test64
.dst
);
1478 addHRegUse(u
, HRmModify
, i
->Ain
.Unary64
.dst
);
1481 addRegUsage_AMD64AMode(u
, i
->Ain
.Lea64
.am
);
1482 addHRegUse(u
, HRmWrite
, i
->Ain
.Lea64
.dst
);
1485 vassert(i
->Ain
.Alu32R
.op
!= Aalu_MOV
);
1486 addRegUsage_AMD64RMI(u
, i
->Ain
.Alu32R
.src
);
1487 if (i
->Ain
.Alu32R
.op
== Aalu_CMP
) {
1488 addHRegUse(u
, HRmRead
, i
->Ain
.Alu32R
.dst
);
1491 addHRegUse(u
, HRmModify
, i
->Ain
.Alu32R
.dst
);
1494 addRegUsage_AMD64RM(u
, i
->Ain
.MulL
.src
, HRmRead
);
1495 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1496 addHRegUse(u
, HRmWrite
, hregAMD64_RDX());
1499 addRegUsage_AMD64RM(u
, i
->Ain
.Div
.src
, HRmRead
);
1500 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1501 addHRegUse(u
, HRmModify
, hregAMD64_RDX());
1504 addRegUsage_AMD64RMI(u
, i
->Ain
.Push
.src
);
1505 addHRegUse(u
, HRmModify
, hregAMD64_RSP());
1508 /* This is a bit subtle. */
1509 /* First off, claim it trashes all the caller-saved regs
1510 which fall within the register allocator's jurisdiction.
1511 These I believe to be: rax rcx rdx rdi rsi r8 r9 r10
1512 and all the xmm registers. */
1513 addHRegUse(u
, HRmWrite
, hregAMD64_RAX());
1514 addHRegUse(u
, HRmWrite
, hregAMD64_RCX());
1515 addHRegUse(u
, HRmWrite
, hregAMD64_RDX());
1516 addHRegUse(u
, HRmWrite
, hregAMD64_RDI());
1517 addHRegUse(u
, HRmWrite
, hregAMD64_RSI());
1518 addHRegUse(u
, HRmWrite
, hregAMD64_R8());
1519 addHRegUse(u
, HRmWrite
, hregAMD64_R9());
1520 addHRegUse(u
, HRmWrite
, hregAMD64_R10());
1521 addHRegUse(u
, HRmWrite
, hregAMD64_XMM0());
1522 addHRegUse(u
, HRmWrite
, hregAMD64_XMM1());
1523 addHRegUse(u
, HRmWrite
, hregAMD64_XMM3());
1524 addHRegUse(u
, HRmWrite
, hregAMD64_XMM4());
1525 addHRegUse(u
, HRmWrite
, hregAMD64_XMM5());
1526 addHRegUse(u
, HRmWrite
, hregAMD64_XMM6());
1527 addHRegUse(u
, HRmWrite
, hregAMD64_XMM7());
1528 addHRegUse(u
, HRmWrite
, hregAMD64_XMM8());
1529 addHRegUse(u
, HRmWrite
, hregAMD64_XMM9());
1530 addHRegUse(u
, HRmWrite
, hregAMD64_XMM10());
1531 addHRegUse(u
, HRmWrite
, hregAMD64_XMM11());
1532 addHRegUse(u
, HRmWrite
, hregAMD64_XMM12());
1534 /* Now we have to state any parameter-carrying registers
1535 which might be read. This depends on the regparmness. */
1536 switch (i
->Ain
.Call
.regparms
) {
1537 case 6: addHRegUse(u
, HRmRead
, hregAMD64_R9()); /*fallthru*/
1538 case 5: addHRegUse(u
, HRmRead
, hregAMD64_R8()); /*fallthru*/
1539 case 4: addHRegUse(u
, HRmRead
, hregAMD64_RCX()); /*fallthru*/
1540 case 3: addHRegUse(u
, HRmRead
, hregAMD64_RDX()); /*fallthru*/
1541 case 2: addHRegUse(u
, HRmRead
, hregAMD64_RSI()); /*fallthru*/
1542 case 1: addHRegUse(u
, HRmRead
, hregAMD64_RDI()); break;
1544 default: vpanic("getRegUsage_AMD64Instr:Call:regparms");
1546 /* Finally, there is the issue that the insn trashes a
1547 register because the literal target address has to be
1548 loaded into a register. Fortunately, r11 is stated in the
1549 ABI as a scratch register, and so seems a suitable victim. */
1550 addHRegUse(u
, HRmWrite
, hregAMD64_R11());
1551 /* Upshot of this is that the assembler really must use r11,
1552 and no other, as a destination temporary. */
1554 /* XDirect/XIndir/XAssisted are also a bit subtle. They
1555 conditionally exit the block. Hence we only need to list (1)
1556 the registers that they read, and (2) the registers that they
1557 write in the case where the block is not exited. (2) is
1558 empty, hence only (1) is relevant here. */
1560 /* Don't bother to mention the write to %r11, since it is not
1561 available to the allocator. */
1562 addRegUsage_AMD64AMode(u
, i
->Ain
.XDirect
.amRIP
);
1566 addHRegUse(u
, HRmRead
, i
->Ain
.XIndir
.dstGA
);
1567 addRegUsage_AMD64AMode(u
, i
->Ain
.XIndir
.amRIP
);
1570 /* Ditto re %r11 and %rbp (the baseblock ptr) */
1571 addHRegUse(u
, HRmRead
, i
->Ain
.XAssisted
.dstGA
);
1572 addRegUsage_AMD64AMode(u
, i
->Ain
.XAssisted
.amRIP
);
1575 addHRegUse(u
, HRmRead
, i
->Ain
.CMov64
.src
);
1576 addHRegUse(u
, HRmModify
, i
->Ain
.CMov64
.dst
);
1579 addRegUsage_AMD64AMode(u
, i
->Ain
.CLoad
.addr
);
1580 addHRegUse(u
, HRmModify
, i
->Ain
.CLoad
.dst
);
1583 addRegUsage_AMD64AMode(u
, i
->Ain
.CStore
.addr
);
1584 addHRegUse(u
, HRmRead
, i
->Ain
.CStore
.src
);
1587 addHRegUse(u
, HRmRead
, i
->Ain
.MovxLQ
.src
);
1588 addHRegUse(u
, HRmWrite
, i
->Ain
.MovxLQ
.dst
);
1591 addRegUsage_AMD64AMode(u
, i
->Ain
.LoadEX
.src
);
1592 addHRegUse(u
, HRmWrite
, i
->Ain
.LoadEX
.dst
);
1595 addHRegUse(u
, HRmRead
, i
->Ain
.Store
.src
);
1596 addRegUsage_AMD64AMode(u
, i
->Ain
.Store
.dst
);
1599 addHRegUse(u
, HRmWrite
, i
->Ain
.Set64
.dst
);
1602 addHRegUse(u
, HRmRead
, i
->Ain
.Bsfr64
.src
);
1603 addHRegUse(u
, HRmWrite
, i
->Ain
.Bsfr64
.dst
);
1608 addRegUsage_AMD64AMode(u
, i
->Ain
.ACAS
.addr
);
1609 addHRegUse(u
, HRmRead
, hregAMD64_RBX());
1610 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1613 addRegUsage_AMD64AMode(u
, i
->Ain
.DACAS
.addr
);
1614 addHRegUse(u
, HRmRead
, hregAMD64_RCX());
1615 addHRegUse(u
, HRmRead
, hregAMD64_RBX());
1616 addHRegUse(u
, HRmModify
, hregAMD64_RDX());
1617 addHRegUse(u
, HRmModify
, hregAMD64_RAX());
1621 case Ain_A87PushPop
:
1622 addRegUsage_AMD64AMode(u
, i
->Ain
.A87PushPop
.addr
);
1627 addRegUsage_AMD64AMode(u
, i
->Ain
.A87LdCW
.addr
);
1630 addRegUsage_AMD64AMode(u
, i
->Ain
.A87StSW
.addr
);
1633 addRegUsage_AMD64AMode(u
, i
->Ain
.LdMXCSR
.addr
);
1636 addHRegUse(u
, HRmRead
, i
->Ain
.SseUComIS
.srcL
);
1637 addHRegUse(u
, HRmRead
, i
->Ain
.SseUComIS
.srcR
);
1638 addHRegUse(u
, HRmWrite
, i
->Ain
.SseUComIS
.dst
);
1641 addHRegUse(u
, HRmRead
, i
->Ain
.SseSI2SF
.src
);
1642 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSI2SF
.dst
);
1645 addHRegUse(u
, HRmRead
, i
->Ain
.SseSF2SI
.src
);
1646 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSF2SI
.dst
);
1649 addHRegUse(u
, HRmRead
, i
->Ain
.SseSDSS
.src
);
1650 addHRegUse(u
, HRmWrite
, i
->Ain
.SseSDSS
.dst
);
1653 addRegUsage_AMD64AMode(u
, i
->Ain
.SseLdSt
.addr
);
1654 addHRegUse(u
, i
->Ain
.SseLdSt
.isLoad
? HRmWrite
: HRmRead
,
1655 i
->Ain
.SseLdSt
.reg
);
1658 addRegUsage_AMD64AMode(u
, i
->Ain
.SseCStore
.addr
);
1659 addHRegUse(u
, HRmRead
, i
->Ain
.SseCStore
.src
);
1662 addRegUsage_AMD64AMode(u
, i
->Ain
.SseCLoad
.addr
);
1663 addHRegUse(u
, HRmModify
, i
->Ain
.SseCLoad
.dst
);
1666 addRegUsage_AMD64AMode(u
, i
->Ain
.SseLdzLO
.addr
);
1667 addHRegUse(u
, HRmWrite
, i
->Ain
.SseLdzLO
.reg
);
1670 vassert(i
->Ain
.Sse32Fx4
.op
!= Asse_MOV
);
1671 unary
= toBool( i
->Ain
.Sse32Fx4
.op
== Asse_RCPF
1672 || i
->Ain
.Sse32Fx4
.op
== Asse_RSQRTF
1673 || i
->Ain
.Sse32Fx4
.op
== Asse_SQRTF
1674 || i
->Ain
.Sse32Fx4
.op
== Asse_I2F
1675 || i
->Ain
.Sse32Fx4
.op
== Asse_F2I
1676 || i
->Ain
.Sse32Fx4
.op
== Asse_F32toF16
1677 || i
->Ain
.Sse32Fx4
.op
== Asse_F16toF32
);
1678 addHRegUse(u
, HRmRead
, i
->Ain
.Sse32Fx4
.src
);
1679 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1680 i
->Ain
.Sse32Fx4
.dst
);
1683 vassert(i
->Ain
.Sse32FLo
.op
!= Asse_MOV
);
1684 unary
= toBool( i
->Ain
.Sse32FLo
.op
== Asse_RCPF
1685 || i
->Ain
.Sse32FLo
.op
== Asse_RSQRTF
1686 || i
->Ain
.Sse32FLo
.op
== Asse_SQRTF
);
1687 addHRegUse(u
, HRmRead
, i
->Ain
.Sse32FLo
.src
);
1688 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1689 i
->Ain
.Sse32FLo
.dst
);
1692 vassert(i
->Ain
.Sse64Fx2
.op
!= Asse_MOV
);
1693 unary
= toBool( i
->Ain
.Sse64Fx2
.op
== Asse_RCPF
1694 || i
->Ain
.Sse64Fx2
.op
== Asse_RSQRTF
1695 || i
->Ain
.Sse64Fx2
.op
== Asse_SQRTF
);
1696 addHRegUse(u
, HRmRead
, i
->Ain
.Sse64Fx2
.src
);
1697 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1698 i
->Ain
.Sse64Fx2
.dst
);
1701 vassert(i
->Ain
.Sse64FLo
.op
!= Asse_MOV
);
1702 unary
= toBool( i
->Ain
.Sse64FLo
.op
== Asse_RCPF
1703 || i
->Ain
.Sse64FLo
.op
== Asse_RSQRTF
1704 || i
->Ain
.Sse64FLo
.op
== Asse_SQRTF
);
1705 addHRegUse(u
, HRmRead
, i
->Ain
.Sse64FLo
.src
);
1706 addHRegUse(u
, unary
? HRmWrite
: HRmModify
,
1707 i
->Ain
.Sse64FLo
.dst
);
1710 if ( (i
->Ain
.SseReRg
.op
== Asse_XOR
1711 || i
->Ain
.SseReRg
.op
== Asse_CMPEQ32
)
1712 && sameHReg(i
->Ain
.SseReRg
.src
, i
->Ain
.SseReRg
.dst
)) {
1713 /* reg-alloc needs to understand 'xor r,r' and 'cmpeqd
1714 r,r' as a write of a value to r, and independent of any
1715 previous value in r */
1716 /* (as opposed to a rite of passage :-) */
1717 addHRegUse(u
, HRmWrite
, i
->Ain
.SseReRg
.dst
);
1719 addHRegUse(u
, HRmRead
, i
->Ain
.SseReRg
.src
);
1720 addHRegUse(u
, i
->Ain
.SseReRg
.op
== Asse_MOV
1721 ? HRmWrite
: HRmModify
,
1722 i
->Ain
.SseReRg
.dst
);
1724 if (i
->Ain
.SseReRg
.op
== Asse_MOV
) {
1725 u
->isRegRegMove
= True
;
1726 u
->regMoveSrc
= i
->Ain
.SseReRg
.src
;
1727 u
->regMoveDst
= i
->Ain
.SseReRg
.dst
;
1732 addHRegUse(u
, HRmRead
, i
->Ain
.SseCMov
.src
);
1733 addHRegUse(u
, HRmModify
, i
->Ain
.SseCMov
.dst
);
1736 addHRegUse(u
, HRmRead
, i
->Ain
.SseShuf
.src
);
1737 addHRegUse(u
, HRmWrite
, i
->Ain
.SseShuf
.dst
);
1740 addHRegUse(u
, HRmModify
, i
->Ain
.SseShiftN
.dst
);
1743 addHRegUse(u
, i
->Ain
.SseMOVQ
.toXMM
? HRmRead
: HRmWrite
,
1744 i
->Ain
.SseMOVQ
.gpr
);
1745 addHRegUse(u
, i
->Ain
.SseMOVQ
.toXMM
? HRmWrite
: HRmRead
,
1746 i
->Ain
.SseMOVQ
.xmm
);
1748 //uu case Ain_AvxLdSt:
1749 //uu addRegUsage_AMD64AMode(u, i->Ain.AvxLdSt.addr);
1750 //uu addHRegUse(u, i->Ain.AvxLdSt.isLoad ? HRmWrite : HRmRead,
1751 //uu i->Ain.AvxLdSt.reg);
1753 //uu case Ain_AvxReRg:
1754 //uu if ( (i->Ain.AvxReRg.op == Asse_XOR
1755 //uu || i->Ain.AvxReRg.op == Asse_CMPEQ32)
1756 //uu && i->Ain.AvxReRg.src == i->Ain.AvxReRg.dst) {
1757 //uu /* See comments on the case for Ain_SseReRg. */
1758 //uu addHRegUse(u, HRmWrite, i->Ain.AvxReRg.dst);
1760 //uu addHRegUse(u, HRmRead, i->Ain.AvxReRg.src);
1761 //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
1762 //uu ? HRmWrite : HRmModify,
1763 //uu i->Ain.AvxReRg.dst);
1765 //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
1766 //uu u->isRegRegMove = True;
1767 //uu u->regMoveSrc = i->Ain.AvxReRg.src;
1768 //uu u->regMoveDst = i->Ain.AvxReRg.dst;
1773 /* We expect both amodes only to mention %rbp, so this is in
1774 fact pointless, since %rbp isn't allocatable, but anyway.. */
1775 addRegUsage_AMD64AMode(u
, i
->Ain
.EvCheck
.amCounter
);
1776 addRegUsage_AMD64AMode(u
, i
->Ain
.EvCheck
.amFailAddr
);
1779 addHRegUse(u
, HRmWrite
, hregAMD64_R11());
1782 ppAMD64Instr(i
, mode64
);
1783 vpanic("getRegUsage_AMD64Instr");
1788 static inline void mapReg(HRegRemap
* m
, HReg
* r
)
1790 *r
= lookupHRegRemap(m
, *r
);
1793 void mapRegs_AMD64Instr ( HRegRemap
* m
, AMD64Instr
* i
, Bool mode64
)
1795 vassert(mode64
== True
);
1798 mapReg(m
, &i
->Ain
.Imm64
.dst
);
1801 mapRegs_AMD64RMI(m
, i
->Ain
.Alu64R
.src
);
1802 mapReg(m
, &i
->Ain
.Alu64R
.dst
);
1805 mapRegs_AMD64RI(m
, i
->Ain
.Alu64M
.src
);
1806 mapRegs_AMD64AMode(m
, i
->Ain
.Alu64M
.dst
);
1809 mapReg(m
, &i
->Ain
.Sh64
.dst
);
1812 mapReg(m
, &i
->Ain
.Test64
.dst
);
1815 mapReg(m
, &i
->Ain
.Unary64
.dst
);
1818 mapRegs_AMD64AMode(m
, i
->Ain
.Lea64
.am
);
1819 mapReg(m
, &i
->Ain
.Lea64
.dst
);
1822 mapRegs_AMD64RMI(m
, i
->Ain
.Alu32R
.src
);
1823 mapReg(m
, &i
->Ain
.Alu32R
.dst
);
1826 mapRegs_AMD64RM(m
, i
->Ain
.MulL
.src
);
1829 mapRegs_AMD64RM(m
, i
->Ain
.Div
.src
);
1832 mapRegs_AMD64RMI(m
, i
->Ain
.Push
.src
);
1837 mapRegs_AMD64AMode(m
, i
->Ain
.XDirect
.amRIP
);
1840 mapReg(m
, &i
->Ain
.XIndir
.dstGA
);
1841 mapRegs_AMD64AMode(m
, i
->Ain
.XIndir
.amRIP
);
1844 mapReg(m
, &i
->Ain
.XAssisted
.dstGA
);
1845 mapRegs_AMD64AMode(m
, i
->Ain
.XAssisted
.amRIP
);
1848 mapReg(m
, &i
->Ain
.CMov64
.src
);
1849 mapReg(m
, &i
->Ain
.CMov64
.dst
);
1852 mapRegs_AMD64AMode(m
, i
->Ain
.CLoad
.addr
);
1853 mapReg(m
, &i
->Ain
.CLoad
.dst
);
1856 mapRegs_AMD64AMode(m
, i
->Ain
.CStore
.addr
);
1857 mapReg(m
, &i
->Ain
.CStore
.src
);
1860 mapReg(m
, &i
->Ain
.MovxLQ
.src
);
1861 mapReg(m
, &i
->Ain
.MovxLQ
.dst
);
1864 mapRegs_AMD64AMode(m
, i
->Ain
.LoadEX
.src
);
1865 mapReg(m
, &i
->Ain
.LoadEX
.dst
);
1868 mapReg(m
, &i
->Ain
.Store
.src
);
1869 mapRegs_AMD64AMode(m
, i
->Ain
.Store
.dst
);
1872 mapReg(m
, &i
->Ain
.Set64
.dst
);
1875 mapReg(m
, &i
->Ain
.Bsfr64
.src
);
1876 mapReg(m
, &i
->Ain
.Bsfr64
.dst
);
1881 mapRegs_AMD64AMode(m
, i
->Ain
.ACAS
.addr
);
1884 mapRegs_AMD64AMode(m
, i
->Ain
.DACAS
.addr
);
1888 case Ain_A87PushPop
:
1889 mapRegs_AMD64AMode(m
, i
->Ain
.A87PushPop
.addr
);
1894 mapRegs_AMD64AMode(m
, i
->Ain
.A87LdCW
.addr
);
1897 mapRegs_AMD64AMode(m
, i
->Ain
.A87StSW
.addr
);
1900 mapRegs_AMD64AMode(m
, i
->Ain
.LdMXCSR
.addr
);
1903 mapReg(m
, &i
->Ain
.SseUComIS
.srcL
);
1904 mapReg(m
, &i
->Ain
.SseUComIS
.srcR
);
1905 mapReg(m
, &i
->Ain
.SseUComIS
.dst
);
1908 mapReg(m
, &i
->Ain
.SseSI2SF
.src
);
1909 mapReg(m
, &i
->Ain
.SseSI2SF
.dst
);
1912 mapReg(m
, &i
->Ain
.SseSF2SI
.src
);
1913 mapReg(m
, &i
->Ain
.SseSF2SI
.dst
);
1916 mapReg(m
, &i
->Ain
.SseSDSS
.src
);
1917 mapReg(m
, &i
->Ain
.SseSDSS
.dst
);
1920 mapReg(m
, &i
->Ain
.SseLdSt
.reg
);
1921 mapRegs_AMD64AMode(m
, i
->Ain
.SseLdSt
.addr
);
1924 mapRegs_AMD64AMode(m
, i
->Ain
.SseCStore
.addr
);
1925 mapReg(m
, &i
->Ain
.SseCStore
.src
);
1928 mapRegs_AMD64AMode(m
, i
->Ain
.SseCLoad
.addr
);
1929 mapReg(m
, &i
->Ain
.SseCLoad
.dst
);
1932 mapReg(m
, &i
->Ain
.SseLdzLO
.reg
);
1933 mapRegs_AMD64AMode(m
, i
->Ain
.SseLdzLO
.addr
);
1936 mapReg(m
, &i
->Ain
.Sse32Fx4
.src
);
1937 mapReg(m
, &i
->Ain
.Sse32Fx4
.dst
);
1940 mapReg(m
, &i
->Ain
.Sse32FLo
.src
);
1941 mapReg(m
, &i
->Ain
.Sse32FLo
.dst
);
1944 mapReg(m
, &i
->Ain
.Sse64Fx2
.src
);
1945 mapReg(m
, &i
->Ain
.Sse64Fx2
.dst
);
1948 mapReg(m
, &i
->Ain
.Sse64FLo
.src
);
1949 mapReg(m
, &i
->Ain
.Sse64FLo
.dst
);
1952 mapReg(m
, &i
->Ain
.SseReRg
.src
);
1953 mapReg(m
, &i
->Ain
.SseReRg
.dst
);
1956 mapReg(m
, &i
->Ain
.SseCMov
.src
);
1957 mapReg(m
, &i
->Ain
.SseCMov
.dst
);
1960 mapReg(m
, &i
->Ain
.SseShuf
.src
);
1961 mapReg(m
, &i
->Ain
.SseShuf
.dst
);
1964 mapReg(m
, &i
->Ain
.SseShiftN
.dst
);
1967 mapReg(m
, &i
->Ain
.SseMOVQ
.gpr
);
1968 mapReg(m
, &i
->Ain
.SseMOVQ
.xmm
);
1970 //uu case Ain_AvxLdSt:
1971 //uu mapReg(m, &i->Ain.AvxLdSt.reg);
1972 //uu mapRegs_AMD64AMode(m, i->Ain.AvxLdSt.addr);
1974 //uu case Ain_AvxReRg:
1975 //uu mapReg(m, &i->Ain.AvxReRg.src);
1976 //uu mapReg(m, &i->Ain.AvxReRg.dst);
1979 /* We expect both amodes only to mention %rbp, so this is in
1980 fact pointless, since %rbp isn't allocatable, but anyway.. */
1981 mapRegs_AMD64AMode(m
, i
->Ain
.EvCheck
.amCounter
);
1982 mapRegs_AMD64AMode(m
, i
->Ain
.EvCheck
.amFailAddr
);
1985 /* hardwires r11 -- nothing to modify. */
1988 ppAMD64Instr(i
, mode64
);
1989 vpanic("mapRegs_AMD64Instr");
1993 /* Generate amd64 spill/reload instructions under the direction of the
1994 register allocator. Note it's critical these don't write the
1997 void genSpill_AMD64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
1998 HReg rreg
, Int offsetB
, Bool mode64
)
2001 vassert(offsetB
>= 0);
2002 vassert(!hregIsVirtual(rreg
));
2003 vassert(mode64
== True
);
2005 am
= AMD64AMode_IR(offsetB
, hregAMD64_RBP());
2006 switch (hregClass(rreg
)) {
2008 *i1
= AMD64Instr_Alu64M ( Aalu_MOV
, AMD64RI_Reg(rreg
), am
);
2011 *i1
= AMD64Instr_SseLdSt ( False
/*store*/, 16, rreg
, am
);
2014 ppHRegClass(hregClass(rreg
));
2015 vpanic("genSpill_AMD64: unimplemented regclass");
2019 void genReload_AMD64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2020 HReg rreg
, Int offsetB
, Bool mode64
)
2023 vassert(offsetB
>= 0);
2024 vassert(!hregIsVirtual(rreg
));
2025 vassert(mode64
== True
);
2027 am
= AMD64AMode_IR(offsetB
, hregAMD64_RBP());
2028 switch (hregClass(rreg
)) {
2030 *i1
= AMD64Instr_Alu64R ( Aalu_MOV
, AMD64RMI_Mem(am
), rreg
);
2033 *i1
= AMD64Instr_SseLdSt ( True
/*load*/, 16, rreg
, am
);
2036 ppHRegClass(hregClass(rreg
));
2037 vpanic("genReload_AMD64: unimplemented regclass");
2041 AMD64Instr
* genMove_AMD64(HReg from
, HReg to
, Bool mode64
)
2043 switch (hregClass(from
)) {
2045 return AMD64Instr_Alu64R(Aalu_MOV
, AMD64RMI_Reg(from
), to
);
2047 return AMD64Instr_SseReRg(Asse_MOV
, from
, to
);
2049 ppHRegClass(hregClass(from
));
2050 vpanic("genMove_AMD64: unimplemented regclass");
2054 AMD64Instr
* directReload_AMD64( AMD64Instr
* i
, HReg vreg
, Short spill_off
)
2056 vassert(spill_off
>= 0 && spill_off
< 10000); /* let's say */
2058 /* Deal with form: src=RMI_Reg, dst=Reg where src == vreg
2059 Convert to: src=RMI_Mem, dst=Reg
2061 if (i
->tag
== Ain_Alu64R
2062 && (i
->Ain
.Alu64R
.op
== Aalu_MOV
|| i
->Ain
.Alu64R
.op
== Aalu_OR
2063 || i
->Ain
.Alu64R
.op
== Aalu_XOR
)
2064 && i
->Ain
.Alu64R
.src
->tag
== Armi_Reg
2065 && sameHReg(i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
, vreg
)) {
2066 vassert(! sameHReg(i
->Ain
.Alu64R
.dst
, vreg
));
2067 return AMD64Instr_Alu64R(
2069 AMD64RMI_Mem( AMD64AMode_IR( spill_off
, hregAMD64_RBP())),
2074 /* Deal with form: src=RMI_Imm, dst=Reg where dst == vreg
2075 Convert to: src=RI_Imm, dst=Mem
2077 if (i
->tag
== Ain_Alu64R
2078 && (i
->Ain
.Alu64R
.op
== Aalu_CMP
)
2079 && i
->Ain
.Alu64R
.src
->tag
== Armi_Imm
2080 && sameHReg(i
->Ain
.Alu64R
.dst
, vreg
)) {
2081 return AMD64Instr_Alu64M(
2083 AMD64RI_Imm( i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
),
2084 AMD64AMode_IR( spill_off
, hregAMD64_RBP())
2092 /* --------- The amd64 assembler (bleh.) --------- */
2094 /* Produce the low three bits of an integer register number. */
2095 inline static UInt
iregEnc210 ( HReg r
)
2098 vassert(hregClass(r
) == HRcInt64
);
2099 vassert(!hregIsVirtual(r
));
2100 n
= hregEncoding(r
);
2105 /* Produce bit 3 of an integer register number. */
2106 inline static UInt
iregEnc3 ( HReg r
)
2109 vassert(hregClass(r
) == HRcInt64
);
2110 vassert(!hregIsVirtual(r
));
2111 n
= hregEncoding(r
);
2113 return (n
>> 3) & 1;
2116 /* Produce a complete 4-bit integer register number. */
2117 inline static UInt
iregEnc3210 ( HReg r
)
2120 vassert(hregClass(r
) == HRcInt64
);
2121 vassert(!hregIsVirtual(r
));
2122 n
= hregEncoding(r
);
2127 /* Produce a complete 4-bit integer register number. */
2128 inline static UInt
vregEnc3210 ( HReg r
)
2131 vassert(hregClass(r
) == HRcVec128
);
2132 vassert(!hregIsVirtual(r
));
2133 n
= hregEncoding(r
);
2138 inline static UChar
mkModRegRM ( UInt mod
, UInt reg
, UInt regmem
)
2141 vassert((reg
|regmem
) < 8);
2142 return (UChar
)( ((mod
& 3) << 6) | ((reg
& 7) << 3) | (regmem
& 7) );
2145 inline static UChar
mkSIB ( UInt shift
, UInt regindex
, UInt regbase
)
2148 vassert((regindex
|regbase
) < 8);
2149 return (UChar
)( ((shift
& 3) << 6) | ((regindex
& 7) << 3) | (regbase
& 7) );
2152 static UChar
* emit32 ( UChar
* p
, UInt w32
)
2154 *p
++ = toUChar((w32
) & 0x000000FF);
2155 *p
++ = toUChar((w32
>> 8) & 0x000000FF);
2156 *p
++ = toUChar((w32
>> 16) & 0x000000FF);
2157 *p
++ = toUChar((w32
>> 24) & 0x000000FF);
2161 static UChar
* emit64 ( UChar
* p
, ULong w64
)
2163 p
= emit32(p
, toUInt(w64
& 0xFFFFFFFF));
2164 p
= emit32(p
, toUInt((w64
>> 32) & 0xFFFFFFFF));
2168 /* Does a sign-extend of the lowest 8 bits give
2169 the original number? */
2170 static Bool
fits8bits ( UInt w32
)
2173 return toBool(i32
== ((Int
)(w32
<< 24) >> 24));
2175 /* Can the lower 32 bits be signedly widened to produce the whole
2176 64-bit value? In other words, are the top 33 bits either all 0 or
2178 static Bool
fitsIn32Bits ( ULong x
)
2183 return toBool(x
== y1
);
2187 /* Forming mod-reg-rm bytes and scale-index-base bytes.
2189 greg, 0(ereg) | ereg is not any of: RSP RBP R12 R13
2192 greg, d8(ereg) | ereg is neither of: RSP R12
2195 greg, d32(ereg) | ereg is neither of: RSP R12
2198 greg, d8(ereg) | ereg is either: RSP R12
2199 = 01 greg 100, 0x24, d8
2200 (lowest bit of rex distinguishes R12/RSP)
2202 greg, d32(ereg) | ereg is either: RSP R12
2203 = 10 greg 100, 0x24, d32
2204 (lowest bit of rex distinguishes R12/RSP)
2206 -----------------------------------------------
2208 greg, d8(base,index,scale)
2210 = 01 greg 100, scale index base, d8
2212 greg, d32(base,index,scale)
2214 = 10 greg 100, scale index base, d32
2216 static UChar
* doAMode_M__wrk ( UChar
* p
, UInt gregEnc3210
, AMD64AMode
* am
)
2218 UInt gregEnc210
= gregEnc3210
& 7;
2219 if (am
->tag
== Aam_IR
) {
2220 if (am
->Aam
.IR
.imm
== 0
2221 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2222 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RBP())
2223 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2224 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R13())
2226 *p
++ = mkModRegRM(0, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2229 if (fits8bits(am
->Aam
.IR
.imm
)
2230 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2231 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2233 *p
++ = mkModRegRM(1, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2234 *p
++ = toUChar(am
->Aam
.IR
.imm
& 0xFF);
2237 if (! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2238 && ! sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())
2240 *p
++ = mkModRegRM(2, gregEnc210
, iregEnc210(am
->Aam
.IR
.reg
));
2241 p
= emit32(p
, am
->Aam
.IR
.imm
);
2244 if ((sameHReg(am
->Aam
.IR
.reg
, hregAMD64_RSP())
2245 || sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12()))
2246 && fits8bits(am
->Aam
.IR
.imm
)) {
2247 *p
++ = mkModRegRM(1, gregEnc210
, 4);
2249 *p
++ = toUChar(am
->Aam
.IR
.imm
& 0xFF);
2252 if (/* (sameHReg(am->Aam.IR.reg, hregAMD64_RSP())
2253 || wait for test case for RSP case */
2254 sameHReg(am
->Aam
.IR
.reg
, hregAMD64_R12())) {
2255 *p
++ = mkModRegRM(2, gregEnc210
, 4);
2257 p
= emit32(p
, am
->Aam
.IR
.imm
);
2261 vpanic("doAMode_M: can't emit amode IR");
2264 if (am
->tag
== Aam_IRRS
) {
2265 if (fits8bits(am
->Aam
.IRRS
.imm
)
2266 && ! sameHReg(am
->Aam
.IRRS
.index
, hregAMD64_RSP())) {
2267 *p
++ = mkModRegRM(1, gregEnc210
, 4);
2268 *p
++ = mkSIB(am
->Aam
.IRRS
.shift
, iregEnc210(am
->Aam
.IRRS
.index
),
2269 iregEnc210(am
->Aam
.IRRS
.base
));
2270 *p
++ = toUChar(am
->Aam
.IRRS
.imm
& 0xFF);
2273 if (! sameHReg(am
->Aam
.IRRS
.index
, hregAMD64_RSP())) {
2274 *p
++ = mkModRegRM(2, gregEnc210
, 4);
2275 *p
++ = mkSIB(am
->Aam
.IRRS
.shift
, iregEnc210(am
->Aam
.IRRS
.index
),
2276 iregEnc210(am
->Aam
.IRRS
.base
));
2277 p
= emit32(p
, am
->Aam
.IRRS
.imm
);
2281 vpanic("doAMode_M: can't emit amode IRRS");
2284 vpanic("doAMode_M: unknown amode");
2288 static UChar
* doAMode_M ( UChar
* p
, HReg greg
, AMD64AMode
* am
)
2290 return doAMode_M__wrk(p
, iregEnc3210(greg
), am
);
2293 static UChar
* doAMode_M_enc ( UChar
* p
, UInt gregEnc3210
, AMD64AMode
* am
)
2295 vassert(gregEnc3210
< 16);
2296 return doAMode_M__wrk(p
, gregEnc3210
, am
);
2300 /* Emit a mod-reg-rm byte when the rm bit denotes a reg. */
2302 static UChar
* doAMode_R__wrk ( UChar
* p
, UInt gregEnc3210
, UInt eregEnc3210
)
2304 *p
++ = mkModRegRM(3, gregEnc3210
& 7, eregEnc3210
& 7);
2308 static UChar
* doAMode_R ( UChar
* p
, HReg greg
, HReg ereg
)
2310 return doAMode_R__wrk(p
, iregEnc3210(greg
), iregEnc3210(ereg
));
2313 static UChar
* doAMode_R_enc_reg ( UChar
* p
, UInt gregEnc3210
, HReg ereg
)
2315 vassert(gregEnc3210
< 16);
2316 return doAMode_R__wrk(p
, gregEnc3210
, iregEnc3210(ereg
));
2319 static UChar
* doAMode_R_reg_enc ( UChar
* p
, HReg greg
, UInt eregEnc3210
)
2321 vassert(eregEnc3210
< 16);
2322 return doAMode_R__wrk(p
, iregEnc3210(greg
), eregEnc3210
);
2325 static UChar
* doAMode_R_enc_enc ( UChar
* p
, UInt gregEnc3210
, UInt eregEnc3210
)
2327 vassert( (gregEnc3210
|eregEnc3210
) < 16);
2328 return doAMode_R__wrk(p
, gregEnc3210
, eregEnc3210
);
2332 /* Clear the W bit on a REX byte, thereby changing the operand size
2333 back to whatever that instruction's default operand size is. */
2334 static inline UChar
clearWBit ( UChar rex
)
2336 return rex
& ~(1<<3);
2339 static inline UChar
setWBit ( UChar rex
)
2341 return rex
| (1<<3);
2345 /* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
2346 inline static UChar
rexAMode_M__wrk ( UInt gregEnc3210
, AMD64AMode
* am
)
2348 if (am
->tag
== Aam_IR
) {
2349 UChar W
= 1; /* we want 64-bit mode */
2350 UChar R
= (gregEnc3210
>> 3) & 1;
2351 UChar X
= 0; /* not relevant */
2352 UChar B
= iregEnc3(am
->Aam
.IR
.reg
);
2353 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2355 if (am
->tag
== Aam_IRRS
) {
2356 UChar W
= 1; /* we want 64-bit mode */
2357 UChar R
= (gregEnc3210
>> 3) & 1;
2358 UChar X
= iregEnc3(am
->Aam
.IRRS
.index
);
2359 UChar B
= iregEnc3(am
->Aam
.IRRS
.base
);
2360 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2363 return 0; /*NOTREACHED*/
2366 static UChar
rexAMode_M ( HReg greg
, AMD64AMode
* am
)
2368 return rexAMode_M__wrk(iregEnc3210(greg
), am
);
2371 static UChar
rexAMode_M_enc ( UInt gregEnc3210
, AMD64AMode
* am
)
2373 vassert(gregEnc3210
< 16);
2374 return rexAMode_M__wrk(gregEnc3210
, am
);
2378 /* Make up a REX byte, with W=1 (size=64), for a (greg,ereg) pair. */
2379 inline static UChar
rexAMode_R__wrk ( UInt gregEnc3210
, UInt eregEnc3210
)
2381 UChar W
= 1; /* we want 64-bit mode */
2382 UChar R
= (gregEnc3210
>> 3) & 1;
2383 UChar X
= 0; /* not relevant */
2384 UChar B
= (eregEnc3210
>> 3) & 1;
2385 return 0x40 + ((W
<< 3) | (R
<< 2) | (X
<< 1) | (B
<< 0));
2388 static UChar
rexAMode_R ( HReg greg
, HReg ereg
)
2390 return rexAMode_R__wrk(iregEnc3210(greg
), iregEnc3210(ereg
));
2393 static UChar
rexAMode_R_enc_reg ( UInt gregEnc3210
, HReg ereg
)
2395 vassert(gregEnc3210
< 16);
2396 return rexAMode_R__wrk(gregEnc3210
, iregEnc3210(ereg
));
2399 static UChar
rexAMode_R_reg_enc ( HReg greg
, UInt eregEnc3210
)
2401 vassert(eregEnc3210
< 16);
2402 return rexAMode_R__wrk(iregEnc3210(greg
), eregEnc3210
);
2405 static UChar
rexAMode_R_enc_enc ( UInt gregEnc3210
, UInt eregEnc3210
)
2407 vassert((gregEnc3210
|eregEnc3210
) < 16);
2408 return rexAMode_R__wrk(gregEnc3210
, eregEnc3210
);
2412 //uu /* May 2012: this VEX prefix stuff is currently unused, but has
2413 //uu verified correct (I reckon). Certainly it has been known to
2414 //uu produce correct VEX prefixes during testing. */
2416 //uu /* Assemble a 2 or 3 byte VEX prefix from parts. rexR, rexX, rexB and
2417 //uu notVvvvv need to be not-ed before packing. mmmmm, rexW, L and pp go
2418 //uu in verbatim. There's no range checking on the bits. */
2419 //uu static UInt packVexPrefix ( UInt rexR, UInt rexX, UInt rexB,
2420 //uu UInt mmmmm, UInt rexW, UInt notVvvv,
2421 //uu UInt L, UInt pp )
2423 //uu UChar byte0 = 0;
2424 //uu UChar byte1 = 0;
2425 //uu UChar byte2 = 0;
2426 //uu if (rexX == 0 && rexB == 0 && mmmmm == 1 && rexW == 0) {
2427 //uu /* 2 byte encoding is possible. */
2429 //uu byte1 = ((rexR ^ 1) << 7) | ((notVvvv ^ 0xF) << 3)
2430 //uu | (L << 2) | pp;
2432 //uu /* 3 byte encoding is needed. */
2434 //uu byte1 = ((rexR ^ 1) << 7) | ((rexX ^ 1) << 6)
2435 //uu | ((rexB ^ 1) << 5) | mmmmm;
2436 //uu byte2 = (rexW << 7) | ((notVvvv ^ 0xF) << 3) | (L << 2) | pp;
2438 //uu return (((UInt)byte2) << 16) | (((UInt)byte1) << 8) | ((UInt)byte0);
2441 //uu /* Make up a VEX prefix for a (greg,amode) pair. First byte in bits
2442 //uu 7:0 of result, second in 15:8, third (for a 3 byte prefix) in
2443 //uu 23:16. Has m-mmmm set to indicate a prefix of 0F, pp set to
2444 //uu indicate no SIMD prefix, W=0 (ignore), L=1 (size=256), and
2445 //uu vvvv=1111 (unused 3rd reg). */
2446 //uu static UInt vexAMode_M ( HReg greg, AMD64AMode* am )
2448 //uu UChar L = 1; /* size = 256 */
2449 //uu UChar pp = 0; /* no SIMD prefix */
2450 //uu UChar mmmmm = 1; /* 0F */
2451 //uu UChar notVvvv = 0; /* unused */
2452 //uu UChar rexW = 0;
2453 //uu UChar rexR = 0;
2454 //uu UChar rexX = 0;
2455 //uu UChar rexB = 0;
2456 //uu /* Same logic as in rexAMode_M. */
2457 //uu if (am->tag == Aam_IR) {
2458 //uu rexR = iregEnc3(greg);
2459 //uu rexX = 0; /* not relevant */
2460 //uu rexB = iregEnc3(am->Aam.IR.reg);
2462 //uu else if (am->tag == Aam_IRRS) {
2463 //uu rexR = iregEnc3(greg);
2464 //uu rexX = iregEnc3(am->Aam.IRRS.index);
2465 //uu rexB = iregEnc3(am->Aam.IRRS.base);
2469 //uu return packVexPrefix( rexR, rexX, rexB, mmmmm, rexW, notVvvv, L, pp );
2472 //uu static UChar* emitVexPrefix ( UChar* p, UInt vex )
2474 //uu switch (vex & 0xFF) {
2477 //uu *p++ = (vex >> 8) & 0xFF;
2478 //uu vassert(0 == (vex >> 16));
2482 //uu *p++ = (vex >> 8) & 0xFF;
2483 //uu *p++ = (vex >> 16) & 0xFF;
2484 //uu vassert(0 == (vex >> 24));
2493 /* Emit ffree %st(N) */
2494 static UChar
* do_ffree_st ( UChar
* p
, Int n
)
2496 vassert(n
>= 0 && n
<= 7);
2498 *p
++ = toUChar(0xC0 + n
);
2502 /* Emit an instruction into buf and return the number of bytes used.
2503 Note that buf is not the insn's final place, and therefore it is
2504 imperative to emit position-independent code. If the emitted
2505 instruction was a profiler inc, set *is_profInc to True, else
2506 leave it unchanged. */
2508 Int
emit_AMD64Instr ( /*MB_MOD*/Bool
* is_profInc
,
2509 UChar
* buf
, Int nbuf
, const AMD64Instr
* i
,
2510 Bool mode64
, VexEndness endness_host
,
2511 const void* disp_cp_chain_me_to_slowEP
,
2512 const void* disp_cp_chain_me_to_fastEP
,
2513 const void* disp_cp_xindir
,
2514 const void* disp_cp_xassisted
)
2516 UInt
/*irno,*/ opc
, opc_rr
, subopc_imm
, opc_imma
, opc_cl
, opc_imm
, subopc
;
2523 vassert(nbuf
>= 64);
2524 vassert(mode64
== True
);
2526 /* vex_printf("asm "); ppAMD64Instr(i, mode64); vex_printf("\n"); */
2531 if (i
->Ain
.Imm64
.imm64
<= 0xFFFFFULL
) {
2532 /* Use the short form (load into 32 bit reg, + default
2533 widening rule) for constants under 1 million. We could
2534 use this form for the range 0 to 0x7FFFFFFF inclusive, but
2535 limit it to a smaller range for verifiability purposes. */
2536 if (1 & iregEnc3(i
->Ain
.Imm64
.dst
))
2538 *p
++ = 0xB8 + iregEnc210(i
->Ain
.Imm64
.dst
);
2539 p
= emit32(p
, (UInt
)i
->Ain
.Imm64
.imm64
);
2541 *p
++ = toUChar(0x48 + (1 & iregEnc3(i
->Ain
.Imm64
.dst
)));
2542 *p
++ = toUChar(0xB8 + iregEnc210(i
->Ain
.Imm64
.dst
));
2543 p
= emit64(p
, i
->Ain
.Imm64
.imm64
);
2548 /* Deal specially with MOV */
2549 if (i
->Ain
.Alu64R
.op
== Aalu_MOV
) {
2550 switch (i
->Ain
.Alu64R
.src
->tag
) {
2552 if (0 == (i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
& ~0xFFFFF)) {
2553 /* Actually we could use this form for constants in
2554 the range 0 through 0x7FFFFFFF inclusive, but
2555 limit it to a small range for verifiability
2557 /* Generate "movl $imm32, 32-bit-register" and let
2558 the default zero-extend rule cause the upper half
2559 of the dst to be zeroed out too. This saves 1
2560 and sometimes 2 bytes compared to the more
2561 obvious encoding in the 'else' branch. */
2562 if (1 & iregEnc3(i
->Ain
.Alu64R
.dst
))
2564 *p
++ = 0xB8 + iregEnc210(i
->Ain
.Alu64R
.dst
);
2565 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2567 *p
++ = toUChar(0x48 + (1 & iregEnc3(i
->Ain
.Alu64R
.dst
)));
2569 *p
++ = toUChar(0xC0 + iregEnc210(i
->Ain
.Alu64R
.dst
));
2570 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2574 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2575 i
->Ain
.Alu64R
.dst
);
2577 p
= doAMode_R(p
, i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2581 *p
++ = rexAMode_M(i
->Ain
.Alu64R
.dst
,
2582 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2584 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2585 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2592 if (i
->Ain
.Alu64R
.op
== Aalu_MUL
) {
2593 switch (i
->Ain
.Alu64R
.src
->tag
) {
2595 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.dst
,
2596 i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
);
2599 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
,
2600 i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
);
2603 *p
++ = rexAMode_M(i
->Ain
.Alu64R
.dst
,
2604 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2607 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2608 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2611 if (fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2612 *p
++ = rexAMode_R(i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2614 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2615 *p
++ = toUChar(0xFF & i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2617 *p
++ = rexAMode_R(i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2619 p
= doAMode_R(p
, i
->Ain
.Alu64R
.dst
, i
->Ain
.Alu64R
.dst
);
2620 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2627 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP */
2628 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2629 switch (i
->Ain
.Alu64R
.op
) {
2630 case Aalu_ADC
: opc
= 0x13; opc_rr
= 0x11;
2631 subopc_imm
= 2; opc_imma
= 0x15; break;
2632 case Aalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2633 subopc_imm
= 0; opc_imma
= 0x05; break;
2634 case Aalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2635 subopc_imm
= 5; opc_imma
= 0x2D; break;
2636 case Aalu_SBB
: opc
= 0x1B; opc_rr
= 0x19;
2637 subopc_imm
= 3; opc_imma
= 0x1D; break;
2638 case Aalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2639 subopc_imm
= 4; opc_imma
= 0x25; break;
2640 case Aalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2641 subopc_imm
= 6; opc_imma
= 0x35; break;
2642 case Aalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2643 subopc_imm
= 1; opc_imma
= 0x0D; break;
2644 case Aalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2645 subopc_imm
= 7; opc_imma
= 0x3D; break;
2648 switch (i
->Ain
.Alu64R
.src
->tag
) {
2650 if (sameHReg(i
->Ain
.Alu64R
.dst
, hregAMD64_RAX())
2651 && !fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2652 goto bad
; /* FIXME: awaiting test case */
2653 *p
++ = toUChar(opc_imma
);
2654 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2656 if (fits8bits(i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
)) {
2657 *p
++ = rexAMode_R_enc_reg( 0, i
->Ain
.Alu64R
.dst
);
2659 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu64R
.dst
);
2660 *p
++ = toUChar(0xFF & i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2662 *p
++ = rexAMode_R_enc_reg( 0, i
->Ain
.Alu64R
.dst
);
2664 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu64R
.dst
);
2665 p
= emit32(p
, i
->Ain
.Alu64R
.src
->Armi
.Imm
.imm32
);
2669 *p
++ = rexAMode_R( i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2671 *p
++ = toUChar(opc_rr
);
2672 p
= doAMode_R(p
, i
->Ain
.Alu64R
.src
->Armi
.Reg
.reg
,
2676 *p
++ = rexAMode_M( i
->Ain
.Alu64R
.dst
,
2677 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2678 *p
++ = toUChar(opc
);
2679 p
= doAMode_M(p
, i
->Ain
.Alu64R
.dst
,
2680 i
->Ain
.Alu64R
.src
->Armi
.Mem
.am
);
2688 /* Deal specially with MOV */
2689 if (i
->Ain
.Alu64M
.op
== Aalu_MOV
) {
2690 switch (i
->Ain
.Alu64M
.src
->tag
) {
2692 *p
++ = rexAMode_M(i
->Ain
.Alu64M
.src
->Ari
.Reg
.reg
,
2695 p
= doAMode_M(p
, i
->Ain
.Alu64M
.src
->Ari
.Reg
.reg
,
2699 *p
++ = rexAMode_M_enc(0, i
->Ain
.Alu64M
.dst
);
2701 p
= doAMode_M_enc(p
, 0, i
->Ain
.Alu64M
.dst
);
2702 p
= emit32(p
, i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2708 /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
2709 allowed here. (This is derived from the x86 version of same). */
2710 opc
= subopc_imm
= opc_imma
= 0;
2711 switch (i
->Ain
.Alu64M
.op
) {
2712 case Aalu_CMP
: opc
= 0x39; subopc_imm
= 7; break;
2715 switch (i
->Ain
.Alu64M
.src
->tag
) {
2718 *p++ = toUChar(opc);
2719 p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
2724 if (fits8bits(i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
)) {
2725 *p
++ = rexAMode_M_enc(subopc_imm
, i
->Ain
.Alu64M
.dst
);
2727 p
= doAMode_M_enc(p
, subopc_imm
, i
->Ain
.Alu64M
.dst
);
2728 *p
++ = toUChar(0xFF & i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2731 *p
++ = rexAMode_M_enc(subopc_imm
, i
->Ain
.Alu64M
.dst
);
2733 p
= doAMode_M_enc(p
, subopc_imm
, i
->Ain
.Alu64M
.dst
);
2734 p
= emit32(p
, i
->Ain
.Alu64M
.src
->Ari
.Imm
.imm32
);
2744 opc_cl
= opc_imm
= subopc
= 0;
2745 switch (i
->Ain
.Sh64
.op
) {
2746 case Ash_SHR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 5; break;
2747 case Ash_SAR
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 7; break;
2748 case Ash_SHL
: opc_cl
= 0xD3; opc_imm
= 0xC1; subopc
= 4; break;
2751 if (i
->Ain
.Sh64
.src
== 0) {
2752 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Sh64
.dst
);
2753 *p
++ = toUChar(opc_cl
);
2754 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh64
.dst
);
2757 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Sh64
.dst
);
2758 *p
++ = toUChar(opc_imm
);
2759 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Sh64
.dst
);
2760 *p
++ = (UChar
)(i
->Ain
.Sh64
.src
);
2766 /* testq sign-extend($imm32), %reg */
2767 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Test64
.dst
);
2769 p
= doAMode_R_enc_reg(p
, 0, i
->Ain
.Test64
.dst
);
2770 p
= emit32(p
, i
->Ain
.Test64
.imm32
);
2774 if (i
->Ain
.Unary64
.op
== Aun_NOT
) {
2775 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Unary64
.dst
);
2777 p
= doAMode_R_enc_reg(p
, 2, i
->Ain
.Unary64
.dst
);
2780 if (i
->Ain
.Unary64
.op
== Aun_NEG
) {
2781 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Unary64
.dst
);
2783 p
= doAMode_R_enc_reg(p
, 3, i
->Ain
.Unary64
.dst
);
2789 *p
++ = rexAMode_M(i
->Ain
.Lea64
.dst
, i
->Ain
.Lea64
.am
);
2791 p
= doAMode_M(p
, i
->Ain
.Lea64
.dst
, i
->Ain
.Lea64
.am
);
2795 /* ADD/SUB/AND/OR/XOR/CMP */
2796 opc
= opc_rr
= subopc_imm
= opc_imma
= 0;
2797 switch (i
->Ain
.Alu32R
.op
) {
2798 case Aalu_ADD
: opc
= 0x03; opc_rr
= 0x01;
2799 subopc_imm
= 0; opc_imma
= 0x05; break;
2800 case Aalu_SUB
: opc
= 0x2B; opc_rr
= 0x29;
2801 subopc_imm
= 5; opc_imma
= 0x2D; break;
2802 case Aalu_AND
: opc
= 0x23; opc_rr
= 0x21;
2803 subopc_imm
= 4; opc_imma
= 0x25; break;
2804 case Aalu_XOR
: opc
= 0x33; opc_rr
= 0x31;
2805 subopc_imm
= 6; opc_imma
= 0x35; break;
2806 case Aalu_OR
: opc
= 0x0B; opc_rr
= 0x09;
2807 subopc_imm
= 1; opc_imma
= 0x0D; break;
2808 case Aalu_CMP
: opc
= 0x3B; opc_rr
= 0x39;
2809 subopc_imm
= 7; opc_imma
= 0x3D; break;
2812 switch (i
->Ain
.Alu32R
.src
->tag
) {
2814 if (sameHReg(i
->Ain
.Alu32R
.dst
, hregAMD64_RAX())
2815 && !fits8bits(i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
)) {
2816 goto bad
; /* FIXME: awaiting test case */
2817 *p
++ = toUChar(opc_imma
);
2818 p
= emit32(p
, i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2820 if (fits8bits(i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
)) {
2821 rex
= clearWBit( rexAMode_R_enc_reg( 0, i
->Ain
.Alu32R
.dst
) );
2822 if (rex
!= 0x40) *p
++ = rex
;
2824 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu32R
.dst
);
2825 *p
++ = toUChar(0xFF & i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2827 rex
= clearWBit( rexAMode_R_enc_reg( 0, i
->Ain
.Alu32R
.dst
) );
2828 if (rex
!= 0x40) *p
++ = rex
;
2830 p
= doAMode_R_enc_reg(p
, subopc_imm
, i
->Ain
.Alu32R
.dst
);
2831 p
= emit32(p
, i
->Ain
.Alu32R
.src
->Armi
.Imm
.imm32
);
2836 rexAMode_R( i
->Ain
.Alu32R
.src
->Armi
.Reg
.reg
,
2837 i
->Ain
.Alu32R
.dst
) );
2838 if (rex
!= 0x40) *p
++ = rex
;
2839 *p
++ = toUChar(opc_rr
);
2840 p
= doAMode_R(p
, i
->Ain
.Alu32R
.src
->Armi
.Reg
.reg
,
2845 rexAMode_M( i
->Ain
.Alu32R
.dst
,
2846 i
->Ain
.Alu32R
.src
->Armi
.Mem
.am
) );
2847 if (rex
!= 0x40) *p
++ = rex
;
2848 *p
++ = toUChar(opc
);
2849 p
= doAMode_M(p
, i
->Ain
.Alu32R
.dst
,
2850 i
->Ain
.Alu32R
.src
->Armi
.Mem
.am
);
2858 subopc
= i
->Ain
.MulL
.syned
? 5 : 4;
2859 switch (i
->Ain
.MulL
.src
->tag
) {
2861 *p
++ = rexAMode_M_enc(0, i
->Ain
.MulL
.src
->Arm
.Mem
.am
);
2863 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.MulL
.src
->Arm
.Mem
.am
);
2866 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.MulL
.src
->Arm
.Reg
.reg
);
2868 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.MulL
.src
->Arm
.Reg
.reg
);
2876 subopc
= i
->Ain
.Div
.syned
? 7 : 6;
2877 if (i
->Ain
.Div
.sz
== 4) {
2878 switch (i
->Ain
.Div
.src
->tag
) {
2883 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2887 rexAMode_R_enc_reg(0, i
->Ain
.Div
.src
->Arm
.Reg
.reg
));
2889 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2895 if (i
->Ain
.Div
.sz
== 8) {
2896 switch (i
->Ain
.Div
.src
->tag
) {
2898 *p
++ = rexAMode_M_enc(0, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2900 p
= doAMode_M_enc(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Mem
.am
);
2903 *p
++ = rexAMode_R_enc_reg(0, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2905 p
= doAMode_R_enc_reg(p
, subopc
, i
->Ain
.Div
.src
->Arm
.Reg
.reg
);
2914 switch (i
->Ain
.Push
.src
->tag
) {
2917 rexAMode_M_enc(0, i
->Ain
.Push
.src
->Armi
.Mem
.am
));
2919 p
= doAMode_M_enc(p
, 6, i
->Ain
.Push
.src
->Armi
.Mem
.am
);
2923 p
= emit32(p
, i
->Ain
.Push
.src
->Armi
.Imm
.imm32
);
2926 *p
++ = toUChar(0x40 + (1 & iregEnc3(i
->Ain
.Push
.src
->Armi
.Reg
.reg
)));
2927 *p
++ = toUChar(0x50 + iregEnc210(i
->Ain
.Push
.src
->Armi
.Reg
.reg
));
2934 /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
2935 above, %r11 is used as an address temporary. */
2936 /* If we don't need to do any fixup actions in the case that the
2937 call doesn't happen, just do the simple thing and emit
2938 straight-line code. This is usually the case. */
2939 if (i
->Ain
.Call
.cond
== Acc_ALWAYS
/*call always happens*/
2940 || i
->Ain
.Call
.rloc
.pri
== RLPri_None
/*no fixup action*/) {
2941 /* jump over the following two insns if the condition does
2943 Bool shortImm
= fitsIn32Bits(i
->Ain
.Call
.target
);
2944 if (i
->Ain
.Call
.cond
!= Acc_ALWAYS
) {
2945 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.Call
.cond
^ 1)));
2946 *p
++ = shortImm
? 10 : 13;
2947 /* 10 or 13 bytes in the next two insns */
2950 /* 7 bytes: movl sign-extend(imm32), %r11 */
2954 p
= emit32(p
, (UInt
)i
->Ain
.Call
.target
);
2956 /* 10 bytes: movabsq $target, %r11 */
2959 p
= emit64(p
, i
->Ain
.Call
.target
);
2961 /* 3 bytes: call *%r11 */
2967 /* Complex case. We have to generate an if-then-else diamond. */
2970 // movabsq $target, %r11
2975 // movabsq $0x5555555555555555, %rax // possibly
2976 // movq %rax, %rdx // possibly
2983 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.Call
.cond
^ 1)));
2984 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
2986 // movabsq $target, %r11
2989 p
= emit64(p
, i
->Ain
.Call
.target
);
2997 UChar
* pPreElse
= p
;
3001 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3006 /* Do the 'else' actions */
3007 switch (i
->Ain
.Call
.rloc
.pri
) {
3009 // movabsq $0x5555555555555555, %rax
3010 *p
++ = 0x48; *p
++ = 0xB8; p
= emit64(p
, 0x5555555555555555ULL
);
3014 // movabsq $0x5555555555555555, %rax
3015 *p
++ = 0x48; *p
++ = 0xB8; p
= emit64(p
, 0x5555555555555555ULL
);
3017 *p
++ = 0x48; *p
++ = 0x89; *p
++ = 0xC2;
3019 case RLPri_V128SpRel
:
3020 if (i
->Ain
.Call
.rloc
.spOff
== 0) {
3021 // We could accept any |spOff| here, but that's more
3022 // hassle and the only value we're ever going to get
3023 // is zero (I believe.) Hence take the easy path :)
3024 // We need a scag register -- r11 can be it.
3025 // movabsq $0x5555555555555555, %r11
3026 *p
++ = 0x49; *p
++ = 0xBB;
3027 p
= emit64(p
, 0x5555555555555555ULL
);
3028 // movq %r11, 0(%rsp)
3029 *p
++ = 0x4C; *p
++ = 0x89; *p
++ = 0x1C; *p
++ = 0x24;
3030 // movq %r11, 8(%rsp)
3031 *p
++ = 0x4C; *p
++ = 0x89; *p
++ = 0x5C; *p
++ = 0x24;
3035 goto bad
; //ATC for all other spOff values
3036 case RLPri_V256SpRel
:
3038 case RLPri_None
: case RLPri_INVALID
: default:
3039 vassert(0); // should never get here
3045 // Fix up the branch offsets. The +2s in the offset
3046 // calculations are there because x86 requires conditional
3047 // branches to have their offset stated relative to the
3048 // instruction immediately following the branch insn. And in
3049 // both cases the branch insns are 2 bytes long.
3051 // First, the "j{!cond} else:" at pBefore.
3052 delta
= (Int
)(Long
)(pElse
- (pBefore
+ 2));
3053 vassert(delta
>= 0 && delta
< 100/*arbitrary*/);
3054 *(pBefore
+1) = (UChar
)delta
;
3056 // And secondly, the "jmp after:" at pPreElse.
3057 delta
= (Int
)(Long
)(pAfter
- (pPreElse
+ 2));
3058 vassert(delta
>= 0 && delta
< 100/*arbitrary*/);
3059 *(pPreElse
+1) = (UChar
)delta
;
3065 /* NB: what goes on here has to be very closely coordinated with the
3066 chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
3067 /* We're generating chain-me requests here, so we need to be
3068 sure this is actually allowed -- no-redir translations can't
3069 use chain-me's. Hence: */
3070 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
3071 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
3073 HReg r11
= hregAMD64_R11();
3075 /* Use ptmp for backpatching conditional jumps. */
3078 /* First off, if this is conditional, create a conditional
3079 jump over the rest of it. */
3080 if (i
->Ain
.XDirect
.cond
!= Acc_ALWAYS
) {
3081 /* jmp fwds if !condition */
3082 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XDirect
.cond
^ 1)));
3083 ptmp
= p
; /* fill in this bit later */
3084 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3087 /* Update the guest RIP. */
3088 if (fitsIn32Bits(i
->Ain
.XDirect
.dstGA
)) {
3089 /* use a shorter encoding */
3090 /* movl sign-extend(dstGA), %r11 */
3094 p
= emit32(p
, (UInt
)i
->Ain
.XDirect
.dstGA
);
3096 /* movabsq $dstGA, %r11 */
3099 p
= emit64(p
, i
->Ain
.XDirect
.dstGA
);
3102 /* movq %r11, amRIP */
3103 *p
++ = rexAMode_M(r11
, i
->Ain
.XDirect
.amRIP
);
3105 p
= doAMode_M(p
, r11
, i
->Ain
.XDirect
.amRIP
);
3107 /* --- FIRST PATCHABLE BYTE follows --- */
3108 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
3109 to) backs up the return address, so as to find the address of
3110 the first patchable byte. So: don't change the length of the
3111 two instructions below. */
3112 /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
3115 const void* disp_cp_chain_me
3116 = i
->Ain
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
3117 : disp_cp_chain_me_to_slowEP
;
3118 p
= emit64(p
, (Addr
)disp_cp_chain_me
);
3123 /* --- END of PATCHABLE BYTES --- */
3125 /* Fix up the conditional jump, if there was one. */
3126 if (i
->Ain
.XDirect
.cond
!= Acc_ALWAYS
) {
3127 Int delta
= p
- ptmp
;
3128 vassert(delta
> 0 && delta
< 40);
3129 *ptmp
= toUChar(delta
-1);
3135 /* We're generating transfers that could lead indirectly to a
3136 chain-me, so we need to be sure this is actually allowed --
3137 no-redir translations are not allowed to reach normal
3138 translations without going through the scheduler. That means
3139 no XDirects or XIndirs out from no-redir translations.
3141 vassert(disp_cp_xindir
!= NULL
);
3143 /* Use ptmp for backpatching conditional jumps. */
3146 /* First off, if this is conditional, create a conditional
3147 jump over the rest of it. */
3148 if (i
->Ain
.XIndir
.cond
!= Acc_ALWAYS
) {
3149 /* jmp fwds if !condition */
3150 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XIndir
.cond
^ 1)));
3151 ptmp
= p
; /* fill in this bit later */
3152 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3155 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3156 *p
++ = rexAMode_M(i
->Ain
.XIndir
.dstGA
, i
->Ain
.XIndir
.amRIP
);
3158 p
= doAMode_M(p
, i
->Ain
.XIndir
.dstGA
, i
->Ain
.XIndir
.amRIP
);
3160 /* get $disp_cp_xindir into %r11 */
3161 if (fitsIn32Bits((Addr
)disp_cp_xindir
)) {
3162 /* use a shorter encoding */
3163 /* movl sign-extend(disp_cp_xindir), %r11 */
3167 p
= emit32(p
, (UInt
)(Addr
)disp_cp_xindir
);
3169 /* movabsq $disp_cp_xindir, %r11 */
3172 p
= emit64(p
, (Addr
)disp_cp_xindir
);
3180 /* Fix up the conditional jump, if there was one. */
3181 if (i
->Ain
.XIndir
.cond
!= Acc_ALWAYS
) {
3182 Int delta
= p
- ptmp
;
3183 vassert(delta
> 0 && delta
< 40);
3184 *ptmp
= toUChar(delta
-1);
3189 case Ain_XAssisted
: {
3190 /* Use ptmp for backpatching conditional jumps. */
3193 /* First off, if this is conditional, create a conditional
3194 jump over the rest of it. */
3195 if (i
->Ain
.XAssisted
.cond
!= Acc_ALWAYS
) {
3196 /* jmp fwds if !condition */
3197 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.XAssisted
.cond
^ 1)));
3198 ptmp
= p
; /* fill in this bit later */
3199 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3202 /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
3203 *p
++ = rexAMode_M(i
->Ain
.XAssisted
.dstGA
, i
->Ain
.XAssisted
.amRIP
);
3205 p
= doAMode_M(p
, i
->Ain
.XAssisted
.dstGA
, i
->Ain
.XAssisted
.amRIP
);
3206 /* movl $magic_number, %ebp. Since these numbers are all small positive
3207 integers, we can get away with "movl $N, %ebp" rather than
3208 the longer "movq $N, %rbp". */
3210 switch (i
->Ain
.XAssisted
.jk
) {
3211 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
3212 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
3213 case Ijk_Sys_int32
: trcval
= VEX_TRC_JMP_SYS_INT32
; break;
3214 case Ijk_Sys_int210
: trcval
= VEX_TRC_JMP_SYS_INT210
; break;
3215 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
3216 case Ijk_EmWarn
: trcval
= VEX_TRC_JMP_EMWARN
; break;
3217 case Ijk_MapFail
: trcval
= VEX_TRC_JMP_MAPFAIL
; break;
3218 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
3219 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
3220 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
3221 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
3222 case Ijk_SigSEGV
: trcval
= VEX_TRC_JMP_SIGSEGV
; break;
3223 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
3224 /* We don't expect to see the following being assisted. */
3229 ppIRJumpKind(i
->Ain
.XAssisted
.jk
);
3230 vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
3232 vassert(trcval
!= 0);
3234 p
= emit32(p
, trcval
);
3235 /* movabsq $disp_assisted, %r11 */
3238 p
= emit64(p
, (Addr
)disp_cp_xassisted
);
3244 /* Fix up the conditional jump, if there was one. */
3245 if (i
->Ain
.XAssisted
.cond
!= Acc_ALWAYS
) {
3246 Int delta
= p
- ptmp
;
3247 vassert(delta
> 0 && delta
< 40);
3248 *ptmp
= toUChar(delta
-1);
3254 vassert(i
->Ain
.CMov64
.cond
!= Acc_ALWAYS
);
3255 *p
++ = rexAMode_R(i
->Ain
.CMov64
.dst
, i
->Ain
.CMov64
.src
);
3257 *p
++ = toUChar(0x40 + (0xF & i
->Ain
.CMov64
.cond
));
3258 p
= doAMode_R(p
, i
->Ain
.CMov64
.dst
, i
->Ain
.CMov64
.src
);
3262 vassert(i
->Ain
.CLoad
.cond
!= Acc_ALWAYS
);
3264 /* Only 32- or 64-bit variants are allowed. */
3265 vassert(i
->Ain
.CLoad
.szB
== 4 || i
->Ain
.CLoad
.szB
== 8);
3267 /* Use ptmp for backpatching conditional jumps. */
3270 /* jmp fwds if !condition */
3271 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.CLoad
.cond
^ 1)));
3272 ptmp
= p
; /* fill in this bit later */
3273 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3275 /* Now the load. Either a normal 64 bit load or a normal 32 bit
3276 load, which, by the default zero-extension rule, zeroes out
3277 the upper half of the destination, as required. */
3278 rex
= rexAMode_M(i
->Ain
.CLoad
.dst
, i
->Ain
.CLoad
.addr
);
3279 *p
++ = i
->Ain
.CLoad
.szB
== 4 ? clearWBit(rex
) : rex
;
3281 p
= doAMode_M(p
, i
->Ain
.CLoad
.dst
, i
->Ain
.CLoad
.addr
);
3283 /* Fix up the conditional branch */
3284 Int delta
= p
- ptmp
;
3285 vassert(delta
> 0 && delta
< 40);
3286 *ptmp
= toUChar(delta
-1);
3291 /* AFAICS this is identical to Ain_CLoad except that the opcode
3292 is 0x89 instead of 0x8B. */
3293 vassert(i
->Ain
.CStore
.cond
!= Acc_ALWAYS
);
3295 /* Only 32- or 64-bit variants are allowed. */
3296 vassert(i
->Ain
.CStore
.szB
== 4 || i
->Ain
.CStore
.szB
== 8);
3298 /* Use ptmp for backpatching conditional jumps. */
3301 /* jmp fwds if !condition */
3302 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.CStore
.cond
^ 1)));
3303 ptmp
= p
; /* fill in this bit later */
3304 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3306 /* Now the store. */
3307 rex
= rexAMode_M(i
->Ain
.CStore
.src
, i
->Ain
.CStore
.addr
);
3308 *p
++ = i
->Ain
.CStore
.szB
== 4 ? clearWBit(rex
) : rex
;
3310 p
= doAMode_M(p
, i
->Ain
.CStore
.src
, i
->Ain
.CStore
.addr
);
3312 /* Fix up the conditional branch */
3313 Int delta
= p
- ptmp
;
3314 vassert(delta
> 0 && delta
< 40);
3315 *ptmp
= toUChar(delta
-1);
3320 /* No, _don't_ ask me why the sense of the args has to be
3321 different in the S vs Z case. I don't know. */
3322 if (i
->Ain
.MovxLQ
.syned
) {
3323 /* Need REX.W = 1 here, but rexAMode_R does that for us. */
3324 *p
++ = rexAMode_R(i
->Ain
.MovxLQ
.dst
, i
->Ain
.MovxLQ
.src
);
3326 p
= doAMode_R(p
, i
->Ain
.MovxLQ
.dst
, i
->Ain
.MovxLQ
.src
);
3328 /* Produce a 32-bit reg-reg move, since the implicit
3329 zero-extend does what we want. */
3331 rexAMode_R(i
->Ain
.MovxLQ
.src
, i
->Ain
.MovxLQ
.dst
));
3333 p
= doAMode_R(p
, i
->Ain
.MovxLQ
.src
, i
->Ain
.MovxLQ
.dst
);
3338 if (i
->Ain
.LoadEX
.szSmall
== 1 && !i
->Ain
.LoadEX
.syned
) {
3340 *p
++ = rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3343 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3346 if (i
->Ain
.LoadEX
.szSmall
== 2 && !i
->Ain
.LoadEX
.syned
) {
3348 *p
++ = rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3351 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3354 if (i
->Ain
.LoadEX
.szSmall
== 4 && !i
->Ain
.LoadEX
.syned
) {
3356 /* This isn't really an existing AMD64 instruction per se.
3357 Rather, we have to do a 32-bit load. Because a 32-bit
3358 write implicitly clears the upper 32 bits of the target
3359 register, we get what we want. */
3361 rexAMode_M(i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
));
3363 p
= doAMode_M(p
, i
->Ain
.LoadEX
.dst
, i
->Ain
.LoadEX
.src
);
3369 /* Make the destination register be 1 or 0, depending on whether
3370 the relevant condition holds. Complication: the top 56 bits
3371 of the destination should be forced to zero, but doing 'xorq
3372 %r,%r' kills the flag(s) we are about to read. Sigh. So
3373 start off my moving $0 into the dest. */
3374 reg
= iregEnc3210(i
->Ain
.Set64
.dst
);
3378 *p
++ = toUChar(reg
>= 8 ? 0x49 : 0x48);
3380 *p
++ = toUChar(0xC0 + (reg
& 7));
3383 /* setb lo8(%dst) */
3384 /* note, 8-bit register rex trickyness. Be careful here. */
3385 *p
++ = toUChar(reg
>= 8 ? 0x41 : 0x40);
3387 *p
++ = toUChar(0x90 + (0x0F & i
->Ain
.Set64
.cond
));
3388 *p
++ = toUChar(0xC0 + (reg
& 7));
3392 *p
++ = rexAMode_R(i
->Ain
.Bsfr64
.dst
, i
->Ain
.Bsfr64
.src
);
3394 if (i
->Ain
.Bsfr64
.isFwds
) {
3399 p
= doAMode_R(p
, i
->Ain
.Bsfr64
.dst
, i
->Ain
.Bsfr64
.src
);
3404 *p
++ = 0x0F; *p
++ = 0xAE; *p
++ = 0xF0;
3410 if (i
->Ain
.ACAS
.sz
== 2) *p
++ = 0x66;
3411 /* cmpxchg{b,w,l,q} %rbx,mem. Expected-value in %rax, new value
3412 in %rbx. The new-value register is hardwired to be %rbx
3413 since dealing with byte integer registers is too much hassle,
3414 so we force the register operand to %rbx (could equally be
3416 rex
= rexAMode_M( hregAMD64_RBX(), i
->Ain
.ACAS
.addr
);
3417 if (i
->Ain
.ACAS
.sz
!= 8)
3418 rex
= clearWBit(rex
);
3420 *p
++ = rex
; /* this can emit 0x40, which is pointless. oh well. */
3422 if (i
->Ain
.ACAS
.sz
== 1) *p
++ = 0xB0; else *p
++ = 0xB1;
3423 p
= doAMode_M(p
, hregAMD64_RBX(), i
->Ain
.ACAS
.addr
);
3429 /* cmpxchg{8,16}b m{64,128}. Expected-value in %rdx:%rax, new
3430 value in %rcx:%rbx. All 4 regs are hardwired in the ISA, so
3431 aren't encoded in the insn. */
3432 rex
= rexAMode_M_enc(1, i
->Ain
.ACAS
.addr
);
3433 if (i
->Ain
.ACAS
.sz
!= 8)
3434 rex
= clearWBit(rex
);
3438 p
= doAMode_M_enc(p
, 1, i
->Ain
.DACAS
.addr
);
3442 vassert(i
->Ain
.A87Free
.nregs
> 0 && i
->Ain
.A87Free
.nregs
<= 7);
3443 for (j
= 0; j
< i
->Ain
.A87Free
.nregs
; j
++) {
3444 p
= do_ffree_st(p
, 7-j
);
3448 case Ain_A87PushPop
:
3449 vassert(i
->Ain
.A87PushPop
.szB
== 8 || i
->Ain
.A87PushPop
.szB
== 4);
3450 if (i
->Ain
.A87PushPop
.isPush
) {
3451 /* Load from memory into %st(0): flds/fldl amode */
3453 rexAMode_M_enc(0, i
->Ain
.A87PushPop
.addr
) );
3454 *p
++ = i
->Ain
.A87PushPop
.szB
== 4 ? 0xD9 : 0xDD;
3455 p
= doAMode_M_enc(p
, 0/*subopcode*/, i
->Ain
.A87PushPop
.addr
);
3457 /* Dump %st(0) to memory: fstps/fstpl amode */
3459 rexAMode_M_enc(3, i
->Ain
.A87PushPop
.addr
) );
3460 *p
++ = i
->Ain
.A87PushPop
.szB
== 4 ? 0xD9 : 0xDD;
3461 p
= doAMode_M_enc(p
, 3/*subopcode*/, i
->Ain
.A87PushPop
.addr
);
3467 switch (i
->Ain
.A87FpOp
.op
) {
3468 case Afp_SQRT
: *p
++ = 0xD9; *p
++ = 0xFA; break;
3469 case Afp_SIN
: *p
++ = 0xD9; *p
++ = 0xFE; break;
3470 case Afp_COS
: *p
++ = 0xD9; *p
++ = 0xFF; break;
3471 case Afp_ROUND
: *p
++ = 0xD9; *p
++ = 0xFC; break;
3472 case Afp_2XM1
: *p
++ = 0xD9; *p
++ = 0xF0; break;
3473 case Afp_SCALE
: *p
++ = 0xD9; *p
++ = 0xFD; break;
3474 case Afp_ATAN
: *p
++ = 0xD9; *p
++ = 0xF3; break;
3475 case Afp_YL2X
: *p
++ = 0xD9; *p
++ = 0xF1; break;
3476 case Afp_YL2XP1
: *p
++ = 0xD9; *p
++ = 0xF9; break;
3477 case Afp_PREM
: *p
++ = 0xD9; *p
++ = 0xF8; break;
3478 case Afp_PREM1
: *p
++ = 0xD9; *p
++ = 0xF5; break;
3480 /* fptan pushes 1.0 on the FP stack, except when the
3481 argument is out of range. Hence we have to do the
3482 instruction, then inspect C2 to see if there is an out
3483 of range condition. If there is, we skip the fincstp
3484 that is used by the in-range case to get rid of this
3486 *p
++ = 0xD9; *p
++ = 0xF2; // fptan
3487 *p
++ = 0x50; // pushq %rax
3488 *p
++ = 0xDF; *p
++ = 0xE0; // fnstsw %ax
3489 *p
++ = 0x66; *p
++ = 0xA9;
3490 *p
++ = 0x00; *p
++ = 0x04; // testw $0x400,%ax
3491 *p
++ = 0x75; *p
++ = 0x02; // jnz after_fincstp
3492 *p
++ = 0xD9; *p
++ = 0xF7; // fincstp
3493 *p
++ = 0x58; // after_fincstp: popq %rax
3502 rexAMode_M_enc(5, i
->Ain
.A87LdCW
.addr
) );
3504 p
= doAMode_M_enc(p
, 5/*subopcode*/, i
->Ain
.A87LdCW
.addr
);
3509 rexAMode_M_enc(7, i
->Ain
.A87StSW
.addr
) );
3511 p
= doAMode_M_enc(p
, 7/*subopcode*/, i
->Ain
.A87StSW
.addr
);
3515 if (i
->Ain
.Store
.sz
== 2) {
3516 /* This just goes to show the crazyness of the instruction
3517 set encoding. We have to insert two prefix bytes, but be
3518 careful to avoid a conflict in what the size should be, by
3519 ensuring that REX.W = 0. */
3520 *p
++ = 0x66; /* override to 16-bits */
3521 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3523 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3526 if (i
->Ain
.Store
.sz
== 4) {
3527 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3529 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3532 if (i
->Ain
.Store
.sz
== 1) {
3533 /* This is one place where it would be wrong to skip emitting
3534 a rex byte of 0x40, since the mere presence of rex changes
3535 the meaning of the byte register access. Be careful. */
3536 *p
++ = clearWBit( rexAMode_M( i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
) );
3538 p
= doAMode_M(p
, i
->Ain
.Store
.src
, i
->Ain
.Store
.dst
);
3544 *p
++ = clearWBit(rexAMode_M_enc(0, i
->Ain
.LdMXCSR
.addr
));
3547 p
= doAMode_M_enc(p
, 2/*subopcode*/, i
->Ain
.LdMXCSR
.addr
);
3551 /* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
3552 /* ucomi[sd] %srcL, %srcR */
3553 if (i
->Ain
.SseUComIS
.sz
== 8) {
3557 vassert(i
->Ain
.SseUComIS
.sz
== 4);
3560 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseUComIS
.srcL
),
3561 vregEnc3210(i
->Ain
.SseUComIS
.srcR
) ));
3564 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseUComIS
.srcL
),
3565 vregEnc3210(i
->Ain
.SseUComIS
.srcR
) );
3569 *p
++ = toUChar(0x40 + (1 & iregEnc3(i
->Ain
.SseUComIS
.dst
)));
3570 *p
++ = toUChar(0x58 + iregEnc210(i
->Ain
.SseUComIS
.dst
));
3574 /* cvssi2s[sd] %src, %dst */
3575 rex
= rexAMode_R_enc_reg( vregEnc3210(i
->Ain
.SseSI2SF
.dst
),
3576 i
->Ain
.SseSI2SF
.src
);
3577 *p
++ = toUChar(i
->Ain
.SseSI2SF
.szD
==4 ? 0xF3 : 0xF2);
3578 *p
++ = toUChar(i
->Ain
.SseSI2SF
.szS
==4 ? clearWBit(rex
) : rex
);
3581 p
= doAMode_R_enc_reg( p
, vregEnc3210(i
->Ain
.SseSI2SF
.dst
),
3582 i
->Ain
.SseSI2SF
.src
);
3586 /* cvss[sd]2si %src, %dst */
3587 rex
= rexAMode_R_reg_enc( i
->Ain
.SseSF2SI
.dst
,
3588 vregEnc3210(i
->Ain
.SseSF2SI
.src
) );
3589 *p
++ = toUChar(i
->Ain
.SseSF2SI
.szS
==4 ? 0xF3 : 0xF2);
3590 *p
++ = toUChar(i
->Ain
.SseSF2SI
.szD
==4 ? clearWBit(rex
) : rex
);
3593 p
= doAMode_R_reg_enc( p
, i
->Ain
.SseSF2SI
.dst
,
3594 vregEnc3210(i
->Ain
.SseSF2SI
.src
) );
3598 /* cvtsd2ss/cvtss2sd %src, %dst */
3599 *p
++ = toUChar(i
->Ain
.SseSDSS
.from64
? 0xF2 : 0xF3);
3601 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseSDSS
.dst
),
3602 vregEnc3210(i
->Ain
.SseSDSS
.src
) ));
3605 p
= doAMode_R_enc_enc( p
, vregEnc3210(i
->Ain
.SseSDSS
.dst
),
3606 vregEnc3210(i
->Ain
.SseSDSS
.src
) );
3610 if (i
->Ain
.SseLdSt
.sz
== 8) {
3613 if (i
->Ain
.SseLdSt
.sz
== 4) {
3616 if (i
->Ain
.SseLdSt
.sz
!= 16) {
3620 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseLdSt
.reg
),
3621 i
->Ain
.SseLdSt
.addr
));
3623 *p
++ = toUChar(i
->Ain
.SseLdSt
.isLoad
? 0x10 : 0x11);
3624 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseLdSt
.reg
),
3625 i
->Ain
.SseLdSt
.addr
);
3628 case Ain_SseCStore
: {
3629 vassert(i
->Ain
.SseCStore
.cond
!= Acc_ALWAYS
);
3631 /* Use ptmp for backpatching conditional jumps. */
3634 /* jmp fwds if !condition */
3635 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.SseCStore
.cond
^ 1)));
3636 ptmp
= p
; /* fill in this bit later */
3637 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3639 /* Now the store. */
3641 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseCStore
.src
),
3642 i
->Ain
.SseCStore
.addr
));
3644 *p
++ = toUChar(0x11);
3645 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseCStore
.src
),
3646 i
->Ain
.SseCStore
.addr
);
3648 /* Fix up the conditional branch */
3649 Int delta
= p
- ptmp
;
3650 vassert(delta
> 0 && delta
< 40);
3651 *ptmp
= toUChar(delta
-1);
3655 case Ain_SseCLoad
: {
3656 vassert(i
->Ain
.SseCLoad
.cond
!= Acc_ALWAYS
);
3658 /* Use ptmp for backpatching conditional jumps. */
3661 /* jmp fwds if !condition */
3662 *p
++ = toUChar(0x70 + (0xF & (i
->Ain
.SseCLoad
.cond
^ 1)));
3663 ptmp
= p
; /* fill in this bit later */
3664 *p
++ = 0; /* # of bytes to jump over; don't know how many yet. */
3668 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseCLoad
.dst
),
3669 i
->Ain
.SseCLoad
.addr
));
3671 *p
++ = toUChar(0x10);
3672 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseCLoad
.dst
),
3673 i
->Ain
.SseCLoad
.addr
);
3675 /* Fix up the conditional branch */
3676 Int delta
= p
- ptmp
;
3677 vassert(delta
> 0 && delta
< 40);
3678 *ptmp
= toUChar(delta
-1);
3683 vassert(i
->Ain
.SseLdzLO
.sz
== 4 || i
->Ain
.SseLdzLO
.sz
== 8);
3684 /* movs[sd] amode, %xmm-dst */
3685 *p
++ = toUChar(i
->Ain
.SseLdzLO
.sz
==4 ? 0xF3 : 0xF2);
3687 rexAMode_M_enc(vregEnc3210(i
->Ain
.SseLdzLO
.reg
),
3688 i
->Ain
.SseLdzLO
.addr
));
3691 p
= doAMode_M_enc(p
, vregEnc3210(i
->Ain
.SseLdzLO
.reg
),
3692 i
->Ain
.SseLdzLO
.addr
);
3695 case Ain_Sse32Fx4
: {
3696 UInt srcRegNo
= vregEnc3210(i
->Ain
.Sse32Fx4
.src
);
3697 UInt dstRegNo
= vregEnc3210(i
->Ain
.Sse32Fx4
.dst
);
3698 // VEX encoded cases
3699 switch (i
->Ain
.Sse32Fx4
.op
) {
3700 case Asse_F16toF32
: { // vcvtph2ps %xmmS, %xmmD
3703 // VCVTPH2PS %xmmS, %xmmD (s and d are both xmm regs, range 0 .. 15)
3704 // 0xC4 : ~d3 1 ~s3 0 0 0 1 0 : 0x79 : 0x13 : 1 1 d2 d1 d0 s2 s1 s0
3705 UInt byte2
= ((((~d
)>>3)&1)<<7) | (1<<6)
3706 | ((((~s
)>>3)&1)<<5) | (1<<1);
3707 UInt byte5
= (1<<7) | (1<<6) | ((d
&7) << 3) | ((s
&7) << 0);
3715 case Asse_F32toF16
: { // vcvtps2ph $4, %xmmS, %xmmD
3718 // VCVTPS2PH $4, %xmmS, %xmmD (s and d both xmm regs, range 0 .. 15)
3719 // 0xC4 : ~s3 1 ~d3 0 0 0 1 1 : 0x79
3720 // : 0x1D : 11 s2 s1 s0 d2 d1 d0 : 0x4
3721 UInt byte2
= ((((~s
)>>3)&1)<<7) | (1<<6)
3722 | ((((~d
)>>3)&1)<<5) | (1<<1) | (1 << 0);
3723 UInt byte5
= (1<<7) | (1<<6) | ((s
&7) << 3) | ((d
&7) << 0);
3734 // After this point, REX encoded cases only
3736 switch (i
->Ain
.Sse32Fx4
.op
) {
3737 case Asse_F2I
: *p
++ = 0x66; break;
3740 *p
++ = clearWBit(rexAMode_R_enc_enc(dstRegNo
, srcRegNo
));
3742 switch (i
->Ain
.Sse32Fx4
.op
) {
3743 case Asse_ADDF
: *p
++ = 0x58; break;
3744 case Asse_DIVF
: *p
++ = 0x5E; break;
3745 case Asse_MAXF
: *p
++ = 0x5F; break;
3746 case Asse_MINF
: *p
++ = 0x5D; break;
3747 case Asse_MULF
: *p
++ = 0x59; break;
3748 case Asse_RCPF
: *p
++ = 0x53; break;
3749 case Asse_RSQRTF
: *p
++ = 0x52; break;
3750 case Asse_SQRTF
: *p
++ = 0x51; break;
3751 case Asse_I2F
: *p
++ = 0x5B; break; // cvtdq2ps; no 0x66 pfx
3752 case Asse_F2I
: *p
++ = 0x5B; break; // cvtps2dq; with 0x66 pfx
3753 case Asse_SUBF
: *p
++ = 0x5C; break;
3754 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3755 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3756 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3757 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3760 p
= doAMode_R_enc_enc(p
, dstRegNo
, srcRegNo
);
3762 *p
++ = toUChar(xtra
& 0xFF);
3770 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse64Fx2
.dst
),
3771 vregEnc3210(i
->Ain
.Sse64Fx2
.src
) ));
3773 switch (i
->Ain
.Sse64Fx2
.op
) {
3774 case Asse_ADDF
: *p
++ = 0x58; break;
3775 case Asse_DIVF
: *p
++ = 0x5E; break;
3776 case Asse_MAXF
: *p
++ = 0x5F; break;
3777 case Asse_MINF
: *p
++ = 0x5D; break;
3778 case Asse_MULF
: *p
++ = 0x59; break;
3779 case Asse_SQRTF
: *p
++ = 0x51; break;
3780 case Asse_SUBF
: *p
++ = 0x5C; break;
3781 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3782 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3783 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3784 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3787 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse64Fx2
.dst
),
3788 vregEnc3210(i
->Ain
.Sse64Fx2
.src
) );
3790 *p
++ = toUChar(xtra
& 0xFF);
3797 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse32FLo
.dst
),
3798 vregEnc3210(i
->Ain
.Sse32FLo
.src
) ));
3800 switch (i
->Ain
.Sse32FLo
.op
) {
3801 case Asse_ADDF
: *p
++ = 0x58; break;
3802 case Asse_DIVF
: *p
++ = 0x5E; break;
3803 case Asse_MAXF
: *p
++ = 0x5F; break;
3804 case Asse_MINF
: *p
++ = 0x5D; break;
3805 case Asse_MULF
: *p
++ = 0x59; break;
3806 case Asse_RCPF
: *p
++ = 0x53; break;
3807 case Asse_RSQRTF
: *p
++ = 0x52; break;
3808 case Asse_SQRTF
: *p
++ = 0x51; break;
3809 case Asse_SUBF
: *p
++ = 0x5C; break;
3810 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3811 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3812 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3813 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3816 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse32FLo
.dst
),
3817 vregEnc3210(i
->Ain
.Sse32FLo
.src
) );
3819 *p
++ = toUChar(xtra
& 0xFF);
3826 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.Sse64FLo
.dst
),
3827 vregEnc3210(i
->Ain
.Sse64FLo
.src
) ));
3829 switch (i
->Ain
.Sse64FLo
.op
) {
3830 case Asse_ADDF
: *p
++ = 0x58; break;
3831 case Asse_DIVF
: *p
++ = 0x5E; break;
3832 case Asse_MAXF
: *p
++ = 0x5F; break;
3833 case Asse_MINF
: *p
++ = 0x5D; break;
3834 case Asse_MULF
: *p
++ = 0x59; break;
3835 case Asse_SQRTF
: *p
++ = 0x51; break;
3836 case Asse_SUBF
: *p
++ = 0x5C; break;
3837 case Asse_CMPEQF
: *p
++ = 0xC2; xtra
= 0x100; break;
3838 case Asse_CMPLTF
: *p
++ = 0xC2; xtra
= 0x101; break;
3839 case Asse_CMPLEF
: *p
++ = 0xC2; xtra
= 0x102; break;
3840 case Asse_CMPUNF
: *p
++ = 0xC2; xtra
= 0x103; break;
3843 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.Sse64FLo
.dst
),
3844 vregEnc3210(i
->Ain
.Sse64FLo
.src
) );
3846 *p
++ = toUChar(xtra
& 0xFF);
3850 # define XX(_n) *p++ = (_n)
3853 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseReRg
.dst
),
3854 vregEnc3210(i
->Ain
.SseReRg
.src
) ));
3856 switch (i
->Ain
.SseReRg
.op
) {
3857 case Asse_MOV
: /*movups*/ XX(rex
); XX(0x0F); XX(0x10); break;
3858 case Asse_OR
: XX(rex
); XX(0x0F); XX(0x56); break;
3859 case Asse_XOR
: XX(rex
); XX(0x0F); XX(0x57); break;
3860 case Asse_AND
: XX(rex
); XX(0x0F); XX(0x54); break;
3861 case Asse_ANDN
: XX(rex
); XX(0x0F); XX(0x55); break;
3862 case Asse_PACKSSD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6B); break;
3863 case Asse_PACKSSW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x63); break;
3864 case Asse_PACKUSW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x67); break;
3865 case Asse_ADD8
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFC); break;
3866 case Asse_ADD16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFD); break;
3867 case Asse_ADD32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFE); break;
3868 case Asse_ADD64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD4); break;
3869 case Asse_QADD8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEC); break;
3870 case Asse_QADD16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xED); break;
3871 case Asse_QADD8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDC); break;
3872 case Asse_QADD16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDD); break;
3873 case Asse_AVG8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE0); break;
3874 case Asse_AVG16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE3); break;
3875 case Asse_CMPEQ8
: XX(0x66); XX(rex
); XX(0x0F); XX(0x74); break;
3876 case Asse_CMPEQ16
: XX(0x66); XX(rex
); XX(0x0F); XX(0x75); break;
3877 case Asse_CMPEQ32
: XX(0x66); XX(rex
); XX(0x0F); XX(0x76); break;
3878 case Asse_CMPGT8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x64); break;
3879 case Asse_CMPGT16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x65); break;
3880 case Asse_CMPGT32S
: XX(0x66); XX(rex
); XX(0x0F); XX(0x66); break;
3881 case Asse_MAX16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEE); break;
3882 case Asse_MAX8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDE); break;
3883 case Asse_MIN16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xEA); break;
3884 case Asse_MIN8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xDA); break;
3885 case Asse_MULHI16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE4); break;
3886 case Asse_MULHI16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE5); break;
3887 case Asse_MUL16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD5); break;
3888 case Asse_SHL16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF1); break;
3889 case Asse_SHL32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF2); break;
3890 case Asse_SHL64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF3); break;
3891 case Asse_SAR16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE1); break;
3892 case Asse_SAR32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE2); break;
3893 case Asse_SHR16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD1); break;
3894 case Asse_SHR32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD2); break;
3895 case Asse_SHR64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD3); break;
3896 case Asse_SUB8
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF8); break;
3897 case Asse_SUB16
: XX(0x66); XX(rex
); XX(0x0F); XX(0xF9); break;
3898 case Asse_SUB32
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFA); break;
3899 case Asse_SUB64
: XX(0x66); XX(rex
); XX(0x0F); XX(0xFB); break;
3900 case Asse_QSUB8S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE8); break;
3901 case Asse_QSUB16S
: XX(0x66); XX(rex
); XX(0x0F); XX(0xE9); break;
3902 case Asse_QSUB8U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD8); break;
3903 case Asse_QSUB16U
: XX(0x66); XX(rex
); XX(0x0F); XX(0xD9); break;
3904 case Asse_UNPCKHB
: XX(0x66); XX(rex
); XX(0x0F); XX(0x68); break;
3905 case Asse_UNPCKHW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x69); break;
3906 case Asse_UNPCKHD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6A); break;
3907 case Asse_UNPCKHQ
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6D); break;
3908 case Asse_UNPCKLB
: XX(0x66); XX(rex
); XX(0x0F); XX(0x60); break;
3909 case Asse_UNPCKLW
: XX(0x66); XX(rex
); XX(0x0F); XX(0x61); break;
3910 case Asse_UNPCKLD
: XX(0x66); XX(rex
); XX(0x0F); XX(0x62); break;
3911 case Asse_UNPCKLQ
: XX(0x66); XX(rex
); XX(0x0F); XX(0x6C); break;
3912 case Asse_PSHUFB
: XX(0x66); XX(rex
);
3913 XX(0x0F); XX(0x38); XX(0x00); break;
3914 case Asse_PMADDUBSW
:XX(0x66); XX(rex
);
3915 XX(0x0F); XX(0x38); XX(0x04); break;
3918 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseReRg
.dst
),
3919 vregEnc3210(i
->Ain
.SseReRg
.src
) );
3924 /* jmp fwds if !condition */
3925 *p
++ = toUChar(0x70 + (i
->Ain
.SseCMov
.cond
^ 1));
3926 *p
++ = 0; /* # of bytes in the next bit, which we don't know yet */
3929 /* movaps %src, %dst */
3931 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseCMov
.dst
),
3932 vregEnc3210(i
->Ain
.SseCMov
.src
) ));
3935 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseCMov
.dst
),
3936 vregEnc3210(i
->Ain
.SseCMov
.src
) );
3938 /* Fill in the jump offset. */
3939 *(ptmp
-1) = toUChar(p
- ptmp
);
3945 rexAMode_R_enc_enc( vregEnc3210(i
->Ain
.SseShuf
.dst
),
3946 vregEnc3210(i
->Ain
.SseShuf
.src
) ));
3949 p
= doAMode_R_enc_enc(p
, vregEnc3210(i
->Ain
.SseShuf
.dst
),
3950 vregEnc3210(i
->Ain
.SseShuf
.src
) );
3951 *p
++ = (UChar
)(i
->Ain
.SseShuf
.order
);
3954 case Ain_SseShiftN
: {
3956 UInt shiftImm
= i
->Ain
.SseShiftN
.shiftBits
;
3957 switch (i
->Ain
.SseShiftN
.op
) {
3958 case Asse_SHL16
: limit
= 15; opc
= 0x71; subopc_imm
= 6; break;
3959 case Asse_SHL32
: limit
= 31; opc
= 0x72; subopc_imm
= 6; break;
3960 case Asse_SHL64
: limit
= 63; opc
= 0x73; subopc_imm
= 6; break;
3961 case Asse_SAR16
: limit
= 15; opc
= 0x71; subopc_imm
= 4; break;
3962 case Asse_SAR32
: limit
= 31; opc
= 0x72; subopc_imm
= 4; break;
3963 case Asse_SHR16
: limit
= 15; opc
= 0x71; subopc_imm
= 2; break;
3964 case Asse_SHR32
: limit
= 31; opc
= 0x72; subopc_imm
= 2; break;
3965 case Asse_SHR64
: limit
= 63; opc
= 0x73; subopc_imm
= 2; break;
3967 if ((shiftImm
& 7) != 0) goto bad
;
3969 limit
= 15; opc
= 0x73; subopc_imm
= 7;
3972 if ((shiftImm
& 7) != 0) goto bad
;
3974 limit
= 15; opc
= 0x73; subopc_imm
= 3;
3977 // This should never happen .. SSE2 only offers the above 10 insns
3978 // for the "shift with immediate" case
3981 vassert(limit
> 0 && opc
> 0 && subopc_imm
> 0);
3982 if (shiftImm
> limit
) goto bad
;
3985 rexAMode_R_enc_enc( subopc_imm
,
3986 vregEnc3210(i
->Ain
.SseShiftN
.dst
) ));
3989 p
= doAMode_R_enc_enc(p
, subopc_imm
, vregEnc3210(i
->Ain
.SseShiftN
.dst
));
3995 Bool toXMM
= i
->Ain
.SseMOVQ
.toXMM
;
3996 HReg gpr
= i
->Ain
.SseMOVQ
.gpr
;
3997 HReg xmm
= i
->Ain
.SseMOVQ
.xmm
;
3999 *p
++ = setWBit( rexAMode_R_enc_enc( vregEnc3210(xmm
), iregEnc3210(gpr
)) );
4001 *p
++ = toXMM
? 0x6E : 0x7E;
4002 p
= doAMode_R_enc_enc( p
, vregEnc3210(xmm
), iregEnc3210(gpr
) );
4006 //uu case Ain_AvxLdSt: {
4007 //uu UInt vex = vexAMode_M( dvreg2ireg(i->Ain.AvxLdSt.reg),
4008 //uu i->Ain.AvxLdSt.addr );
4009 //uu p = emitVexPrefix(p, vex);
4010 //uu *p++ = toUChar(i->Ain.AvxLdSt.isLoad ? 0x10 : 0x11);
4011 //uu p = doAMode_M(p, dvreg2ireg(i->Ain.AvxLdSt.reg), i->Ain.AvxLdSt.addr);
4017 (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
4018 (2 bytes) jns nofail expected taken
4019 (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
4022 /* This is heavily asserted re instruction lengths. It needs to
4023 be. If we get given unexpected forms of .amCounter or
4024 .amFailAddr -- basically, anything that's not of the form
4025 uimm7(%rbp) -- they are likely to fail. */
4026 /* Note also that after the decl we must be very careful not to
4027 read the carry flag, else we get a partial flags stall.
4028 js/jns avoids that, though. */
4030 /* --- decl 8(%rbp) --- */
4031 /* Need to compute the REX byte for the decl in order to prove
4032 that we don't need it, since this is a 32-bit inc and all
4033 registers involved in the amode are < r8. "1" because
4034 there's no register in this encoding; instead the register
4035 field is used as a sub opcode. The encoding for "decl r/m32"
4036 is FF /1, hence the "1". */
4037 rex
= clearWBit(rexAMode_M_enc(1, i
->Ain
.EvCheck
.amCounter
));
4038 if (rex
!= 0x40) goto bad
; /* We don't expect to need the REX byte. */
4040 p
= doAMode_M_enc(p
, 1, i
->Ain
.EvCheck
.amCounter
);
4041 vassert(p
- p0
== 3);
4042 /* --- jns nofail --- */
4044 *p
++ = 0x03; /* need to check this 0x03 after the next insn */
4045 vassert(p
- p0
== 5);
4046 /* --- jmp* 0(%rbp) --- */
4047 /* Once again, verify we don't need REX. The encoding is FF /4.
4048 We don't need REX.W since by default FF /4 in 64-bit mode
4049 implies a 64 bit load. */
4050 rex
= clearWBit(rexAMode_M_enc(4, i
->Ain
.EvCheck
.amFailAddr
));
4051 if (rex
!= 0x40) goto bad
;
4053 p
= doAMode_M_enc(p
, 4, i
->Ain
.EvCheck
.amFailAddr
);
4054 vassert(p
- p0
== 8); /* also ensures that 0x03 offset above is ok */
4055 /* And crosscheck .. */
4056 vassert(evCheckSzB_AMD64() == 8);
4061 /* We generate movabsq $0, %r11
4063 in the expectation that a later call to LibVEX_patchProfCtr
4064 will be used to fill in the immediate field once the right
4066 49 BB 00 00 00 00 00 00 00 00
4069 *p
++ = 0x49; *p
++ = 0xBB;
4070 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
4071 *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00; *p
++ = 0x00;
4072 *p
++ = 0x49; *p
++ = 0xFF; *p
++ = 0x03;
4073 /* Tell the caller .. */
4074 vassert(!(*is_profInc
));
4084 ppAMD64Instr(i
, mode64
);
4085 vpanic("emit_AMD64Instr");
4089 vassert(p
- &buf
[0] <= 64);
4094 /* How big is an event check? See case for Ain_EvCheck in
4095 emit_AMD64Instr just above. That crosschecks what this returns, so
4096 we can tell if we're inconsistent. */
4097 Int
evCheckSzB_AMD64 (void)
4103 /* NB: what goes on here has to be very closely coordinated with the
4104 emitInstr case for XDirect, above. */
4105 VexInvalRange
chainXDirect_AMD64 ( VexEndness endness_host
,
4106 void* place_to_chain
,
4107 const void* disp_cp_chain_me_EXPECTED
,
4108 const void* place_to_jump_to
)
4110 vassert(endness_host
== VexEndnessLE
);
4112 /* What we're expecting to see is:
4113 movabsq $disp_cp_chain_me_EXPECTED, %r11
4116 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
4119 UChar
* p
= (UChar
*)place_to_chain
;
4120 vassert(p
[0] == 0x49);
4121 vassert(p
[1] == 0xBB);
4122 vassert(read_misaligned_ULong_LE(&p
[2]) == (Addr
)disp_cp_chain_me_EXPECTED
);
4123 vassert(p
[10] == 0x41);
4124 vassert(p
[11] == 0xFF);
4125 vassert(p
[12] == 0xD3);
4126 /* And what we want to change it to is either:
4128 movabsq $place_to_jump_to, %r11
4131 49 BB <8 bytes value == place_to_jump_to>
4133 So it's the same length (convenient, huh) and we don't
4134 need to change all the bits.
4136 in the case where the displacement falls within 32 bits
4137 jmpq disp32 where disp32 is relative to the next insn
4140 E9 <4 bytes == disp32>
4141 0F 0B 0F 0B 0F 0B 0F 0B
4143 In both cases the replacement has the same length as the original.
4144 To remain sane & verifiable,
4145 (1) limit the displacement for the short form to
4146 (say) +/- one billion, so as to avoid wraparound
4148 (2) even if the short form is applicable, once every (say)
4149 1024 times use the long form anyway, so as to maintain
4152 /* This is the delta we need to put into a JMP d32 insn. It's
4153 relative to the start of the next insn, hence the -5. */
4154 Long delta
= (Long
)((const UChar
*)place_to_jump_to
- (const UChar
*)p
) - 5;
4155 Bool shortOK
= delta
>= -1000*1000*1000 && delta
< 1000*1000*1000;
4157 static UInt shortCTR
= 0; /* DO NOT MAKE NON-STATIC */
4159 shortCTR
++; // thread safety bleh
4160 if (0 == (shortCTR
& 0x3FF)) {
4163 vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
4164 "using long jmp\n", shortCTR
);
4168 /* And make the modifications. */
4171 write_misaligned_UInt_LE(&p
[1], (UInt
)(Int
)delta
);
4172 p
[5] = 0x0F; p
[6] = 0x0B;
4173 p
[7] = 0x0F; p
[8] = 0x0B;
4174 p
[9] = 0x0F; p
[10] = 0x0B;
4175 p
[11] = 0x0F; p
[12] = 0x0B;
4176 /* sanity check on the delta -- top 32 are all 0 or all 1 */
4178 vassert(delta
== 0LL || delta
== -1LL);
4180 /* Minimal modifications from the starting sequence. */
4181 write_misaligned_ULong_LE(&p
[2], (ULong
)(Addr
)place_to_jump_to
);
4184 VexInvalRange vir
= { (HWord
)place_to_chain
, 13 };
4189 /* NB: what goes on here has to be very closely coordinated with the
4190 emitInstr case for XDirect, above. */
4191 VexInvalRange
unchainXDirect_AMD64 ( VexEndness endness_host
,
4192 void* place_to_unchain
,
4193 const void* place_to_jump_to_EXPECTED
,
4194 const void* disp_cp_chain_me
)
4196 vassert(endness_host
== VexEndnessLE
);
4198 /* What we're expecting to see is either:
4200 movabsq $place_to_jump_to_EXPECTED, %r11
4203 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
4206 in the case where the displacement falls within 32 bits
4210 E9 <4 bytes == disp32>
4211 0F 0B 0F 0B 0F 0B 0F 0B
4213 UChar
* p
= (UChar
*)place_to_unchain
;
4215 if (p
[0] == 0x49 && p
[1] == 0xBB
4216 && read_misaligned_ULong_LE(&p
[2])
4217 == (ULong
)(Addr
)place_to_jump_to_EXPECTED
4218 && p
[10] == 0x41 && p
[11] == 0xFF && p
[12] == 0xE3) {
4219 /* it's the long form */
4224 && p
[5] == 0x0F && p
[6] == 0x0B
4225 && p
[7] == 0x0F && p
[8] == 0x0B
4226 && p
[9] == 0x0F && p
[10] == 0x0B
4227 && p
[11] == 0x0F && p
[12] == 0x0B) {
4228 /* It's the short form. Check the offset is right. */
4229 Int s32
= (Int
)read_misaligned_UInt_LE(&p
[1]);
4230 Long s64
= (Long
)s32
;
4231 if ((UChar
*)p
+ 5 + s64
== place_to_jump_to_EXPECTED
) {
4234 vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
4238 /* And what we want to change it to is:
4239 movabsq $disp_cp_chain_me, %r11
4242 49 BB <8 bytes value == disp_cp_chain_me>
4244 So it's the same length (convenient, huh).
4248 write_misaligned_ULong_LE(&p
[2], (ULong
)(Addr
)disp_cp_chain_me
);
4252 VexInvalRange vir
= { (HWord
)place_to_unchain
, 13 };
4257 /* Patch the counter address into a profile inc point, as previously
4258 created by the Ain_ProfInc case for emit_AMD64Instr. */
4259 VexInvalRange
patchProfInc_AMD64 ( VexEndness endness_host
,
4260 void* place_to_patch
,
4261 const ULong
* location_of_counter
)
4263 vassert(endness_host
== VexEndnessLE
);
4264 vassert(sizeof(ULong
*) == 8);
4265 UChar
* p
= (UChar
*)place_to_patch
;
4266 vassert(p
[0] == 0x49);
4267 vassert(p
[1] == 0xBB);
4268 vassert(p
[2] == 0x00);
4269 vassert(p
[3] == 0x00);
4270 vassert(p
[4] == 0x00);
4271 vassert(p
[5] == 0x00);
4272 vassert(p
[6] == 0x00);
4273 vassert(p
[7] == 0x00);
4274 vassert(p
[8] == 0x00);
4275 vassert(p
[9] == 0x00);
4276 vassert(p
[10] == 0x49);
4277 vassert(p
[11] == 0xFF);
4278 vassert(p
[12] == 0x03);
4279 ULong imm64
= (ULong
)(Addr
)location_of_counter
;
4280 p
[2] = imm64
& 0xFF; imm64
>>= 8;
4281 p
[3] = imm64
& 0xFF; imm64
>>= 8;
4282 p
[4] = imm64
& 0xFF; imm64
>>= 8;
4283 p
[5] = imm64
& 0xFF; imm64
>>= 8;
4284 p
[6] = imm64
& 0xFF; imm64
>>= 8;
4285 p
[7] = imm64
& 0xFF; imm64
>>= 8;
4286 p
[8] = imm64
& 0xFF; imm64
>>= 8;
4287 p
[9] = imm64
& 0xFF; imm64
>>= 8;
4288 VexInvalRange vir
= { (HWord
)place_to_patch
, 13 };
4293 /*---------------------------------------------------------------*/
4294 /*--- end host_amd64_defs.c ---*/
4295 /*---------------------------------------------------------------*/