2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 static const char * const tcg_target_reg_names
[TCG_TARGET_NB_REGS
] = {
27 #if TCG_TARGET_REG_BITS == 64
28 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
31 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
36 static const int tcg_target_reg_alloc_order
[] = {
37 #if TCG_TARGET_REG_BITS == 64
64 static const int tcg_target_call_iarg_regs
[] = {
65 #if TCG_TARGET_REG_BITS == 64
79 static const int tcg_target_call_oarg_regs
[2] = {
84 static uint8_t *tb_ret_addr
;
86 static void patch_reloc(uint8_t *code_ptr
, int type
,
87 tcg_target_long value
, tcg_target_long addend
)
92 value
-= (uintptr_t)code_ptr
;
93 if (value
!= (int32_t)value
) {
96 *(uint32_t *)code_ptr
= value
;
99 value
-= (uintptr_t)code_ptr
;
100 if (value
!= (int8_t)value
) {
103 *(uint8_t *)code_ptr
= value
;
110 /* maximum number of register used for input function arguments */
111 static inline int tcg_target_get_call_iarg_regs_count(int flags
)
113 if (TCG_TARGET_REG_BITS
== 64) {
117 flags
&= TCG_CALL_TYPE_MASK
;
119 case TCG_CALL_TYPE_STD
:
121 case TCG_CALL_TYPE_REGPARM_1
:
122 case TCG_CALL_TYPE_REGPARM_2
:
123 case TCG_CALL_TYPE_REGPARM
:
124 return flags
- TCG_CALL_TYPE_REGPARM_1
+ 1;
130 /* parse target specific constraints */
131 static int target_parse_constraint(TCGArgConstraint
*ct
, const char **pct_str
)
138 ct
->ct
|= TCG_CT_REG
;
139 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EAX
);
142 ct
->ct
|= TCG_CT_REG
;
143 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EBX
);
146 ct
->ct
|= TCG_CT_REG
;
147 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ECX
);
150 ct
->ct
|= TCG_CT_REG
;
151 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDX
);
154 ct
->ct
|= TCG_CT_REG
;
155 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_ESI
);
158 ct
->ct
|= TCG_CT_REG
;
159 tcg_regset_set_reg(ct
->u
.regs
, TCG_REG_EDI
);
162 ct
->ct
|= TCG_CT_REG
;
163 if (TCG_TARGET_REG_BITS
== 64) {
164 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
166 tcg_regset_set32(ct
->u
.regs
, 0, 0xf);
170 ct
->ct
|= TCG_CT_REG
;
171 if (TCG_TARGET_REG_BITS
== 64) {
172 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
174 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
178 /* qemu_ld/st address constraint */
180 ct
->ct
|= TCG_CT_REG
;
181 if (TCG_TARGET_REG_BITS
== 64) {
182 tcg_regset_set32(ct
->u
.regs
, 0, 0xffff);
183 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_RSI
);
184 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_RDI
);
186 tcg_regset_set32(ct
->u
.regs
, 0, 0xff);
187 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_EAX
);
188 tcg_regset_reset_reg(ct
->u
.regs
, TCG_REG_EDX
);
193 ct
->ct
|= TCG_CT_CONST_S32
;
196 ct
->ct
|= TCG_CT_CONST_U32
;
207 /* test if a constant matches the constraint */
208 static inline int tcg_target_const_match(tcg_target_long val
,
209 const TCGArgConstraint
*arg_ct
)
212 if (ct
& TCG_CT_CONST
) {
215 if ((ct
& TCG_CT_CONST_S32
) && val
== (int32_t)val
) {
218 if ((ct
& TCG_CT_CONST_U32
) && val
== (uint32_t)val
) {
224 #if TCG_TARGET_REG_BITS == 64
225 # define LOWREGMASK(x) ((x) & 7)
227 # define LOWREGMASK(x) (x)
230 #define P_EXT 0x100 /* 0x0f opcode prefix */
231 #define P_DATA16 0x200 /* 0x66 opcode prefix */
232 #if TCG_TARGET_REG_BITS == 64
233 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
234 # define P_REXW 0x800 /* Set REX.W = 1 */
235 # define P_REXB_R 0x1000 /* REG field as byte register */
236 # define P_REXB_RM 0x2000 /* R/M field as byte register */
244 #define OPC_ARITH_EvIz (0x81)
245 #define OPC_ARITH_EvIb (0x83)
246 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
247 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
248 #define OPC_BSWAP (0xc8 | P_EXT)
249 #define OPC_CALL_Jz (0xe8)
250 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
251 #define OPC_DEC_r32 (0x48)
252 #define OPC_IMUL_GvEv (0xaf | P_EXT)
253 #define OPC_IMUL_GvEvIb (0x6b)
254 #define OPC_IMUL_GvEvIz (0x69)
255 #define OPC_INC_r32 (0x40)
256 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
257 #define OPC_JCC_short (0x70) /* ... plus condition code */
258 #define OPC_JMP_long (0xe9)
259 #define OPC_JMP_short (0xeb)
260 #define OPC_LEA (0x8d)
261 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
262 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
263 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
264 #define OPC_MOVL_EvIz (0xc7)
265 #define OPC_MOVL_Iv (0xb8)
266 #define OPC_MOVSBL (0xbe | P_EXT)
267 #define OPC_MOVSWL (0xbf | P_EXT)
268 #define OPC_MOVSLQ (0x63 | P_REXW)
269 #define OPC_MOVZBL (0xb6 | P_EXT)
270 #define OPC_MOVZWL (0xb7 | P_EXT)
271 #define OPC_POP_r32 (0x58)
272 #define OPC_PUSH_r32 (0x50)
273 #define OPC_PUSH_Iv (0x68)
274 #define OPC_PUSH_Ib (0x6a)
275 #define OPC_RET (0xc3)
276 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
277 #define OPC_SHIFT_1 (0xd1)
278 #define OPC_SHIFT_Ib (0xc1)
279 #define OPC_SHIFT_cl (0xd3)
280 #define OPC_TESTL (0x85)
281 #define OPC_XCHG_ax_r32 (0x90)
283 #define OPC_GRP3_Ev (0xf7)
284 #define OPC_GRP5 (0xff)
286 /* Group 1 opcode extensions for 0x80-0x83.
287 These are also used as modifiers for OPC_ARITH. */
297 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
304 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
312 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
313 #define EXT5_INC_Ev 0
314 #define EXT5_DEC_Ev 1
315 #define EXT5_CALLN_Ev 2
316 #define EXT5_JMPN_Ev 4
318 /* Condition codes to be added to OPC_JCC_{long,short}. */
337 static const uint8_t tcg_cond_to_jcc
[10] = {
338 [TCG_COND_EQ
] = JCC_JE
,
339 [TCG_COND_NE
] = JCC_JNE
,
340 [TCG_COND_LT
] = JCC_JL
,
341 [TCG_COND_GE
] = JCC_JGE
,
342 [TCG_COND_LE
] = JCC_JLE
,
343 [TCG_COND_GT
] = JCC_JG
,
344 [TCG_COND_LTU
] = JCC_JB
,
345 [TCG_COND_GEU
] = JCC_JAE
,
346 [TCG_COND_LEU
] = JCC_JBE
,
347 [TCG_COND_GTU
] = JCC_JA
,
350 #if TCG_TARGET_REG_BITS == 64
351 static void tcg_out_opc(TCGContext
*s
, int opc
, int r
, int rm
, int x
)
355 if (opc
& P_DATA16
) {
356 /* We should never be asking for both 16 and 64-bit operation. */
357 assert((opc
& P_REXW
) == 0);
360 if (opc
& P_ADDR32
) {
365 rex
|= (opc
& P_REXW
) >> 8; /* REX.W */
366 rex
|= (r
& 8) >> 1; /* REX.R */
367 rex
|= (x
& 8) >> 2; /* REX.X */
368 rex
|= (rm
& 8) >> 3; /* REX.B */
370 /* P_REXB_{R,RM} indicates that the given register is the low byte.
371 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372 as otherwise the encoding indicates %[abcd]h. Note that the values
373 that are ORed in merely indicate that the REX byte must be present;
374 those bits get discarded in output. */
375 rex
|= opc
& (r
>= 4 ? P_REXB_R
: 0);
376 rex
|= opc
& (rm
>= 4 ? P_REXB_RM
: 0);
379 tcg_out8(s
, (uint8_t)(rex
| 0x40));
388 static void tcg_out_opc(TCGContext
*s
, int opc
)
390 if (opc
& P_DATA16
) {
398 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
399 the 32-bit compilation paths. This method works with all versions of gcc,
400 whereas relying on optimization may not be able to exclude them. */
401 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
404 static void tcg_out_modrm(TCGContext
*s
, int opc
, int r
, int rm
)
406 tcg_out_opc(s
, opc
, r
, rm
, 0);
407 tcg_out8(s
, 0xc0 | (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
410 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
411 We handle either RM and INDEX missing with a negative value. In 64-bit
412 mode for absolute addresses, ~RM is the size of the immediate operand
413 that will follow the instruction. */
415 static void tcg_out_modrm_sib_offset(TCGContext
*s
, int opc
, int r
, int rm
,
416 int index
, int shift
,
417 tcg_target_long offset
)
421 if (index
< 0 && rm
< 0) {
422 if (TCG_TARGET_REG_BITS
== 64) {
423 /* Try for a rip-relative addressing mode. This has replaced
424 the 32-bit-mode absolute addressing encoding. */
425 tcg_target_long pc
= (tcg_target_long
)s
->code_ptr
+ 5 + ~rm
;
426 tcg_target_long disp
= offset
- pc
;
427 if (disp
== (int32_t)disp
) {
428 tcg_out_opc(s
, opc
, r
, 0, 0);
429 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 5);
434 /* Try for an absolute address encoding. This requires the
435 use of the MODRM+SIB encoding and is therefore larger than
436 rip-relative addressing. */
437 if (offset
== (int32_t)offset
) {
438 tcg_out_opc(s
, opc
, r
, 0, 0);
439 tcg_out8(s
, (LOWREGMASK(r
) << 3) | 4);
440 tcg_out8(s
, (4 << 3) | 5);
441 tcg_out32(s
, offset
);
445 /* ??? The memory isn't directly addressable. */
448 /* Absolute address. */
449 tcg_out_opc(s
, opc
, r
, 0, 0);
450 tcg_out8(s
, (r
<< 3) | 5);
451 tcg_out32(s
, offset
);
456 /* Find the length of the immediate addend. Note that the encoding
457 that would be used for (%ebp) indicates absolute addressing. */
459 mod
= 0, len
= 4, rm
= 5;
460 } else if (offset
== 0 && LOWREGMASK(rm
) != TCG_REG_EBP
) {
462 } else if (offset
== (int8_t)offset
) {
468 /* Use a single byte MODRM format if possible. Note that the encoding
469 that would be used for %esp is the escape to the two byte form. */
470 if (index
< 0 && LOWREGMASK(rm
) != TCG_REG_ESP
) {
471 /* Single byte MODRM format. */
472 tcg_out_opc(s
, opc
, r
, rm
, 0);
473 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | LOWREGMASK(rm
));
475 /* Two byte MODRM+SIB format. */
477 /* Note that the encoding that would place %esp into the index
478 field indicates no index register. In 64-bit mode, the REX.X
479 bit counts, so %r12 can be used as the index. */
483 assert(index
!= TCG_REG_ESP
);
486 tcg_out_opc(s
, opc
, r
, rm
, index
);
487 tcg_out8(s
, mod
| (LOWREGMASK(r
) << 3) | 4);
488 tcg_out8(s
, (shift
<< 6) | (LOWREGMASK(index
) << 3) | LOWREGMASK(rm
));
493 } else if (len
== 4) {
494 tcg_out32(s
, offset
);
498 /* A simplification of the above with no index or shift. */
499 static inline void tcg_out_modrm_offset(TCGContext
*s
, int opc
, int r
,
500 int rm
, tcg_target_long offset
)
502 tcg_out_modrm_sib_offset(s
, opc
, r
, rm
, -1, 0, offset
);
505 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
506 static inline void tgen_arithr(TCGContext
*s
, int subop
, int dest
, int src
)
508 /* Propagate an opcode prefix, such as P_REXW. */
509 int ext
= subop
& ~0x7;
512 tcg_out_modrm(s
, OPC_ARITH_GvEv
+ (subop
<< 3) + ext
, dest
, src
);
515 static inline void tcg_out_mov(TCGContext
*s
, TCGType type
, int ret
, int arg
)
518 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
519 tcg_out_modrm(s
, opc
, ret
, arg
);
523 static void tcg_out_movi(TCGContext
*s
, TCGType type
,
524 int ret
, tcg_target_long arg
)
527 tgen_arithr(s
, ARITH_XOR
, ret
, ret
);
529 } else if (arg
== (uint32_t)arg
|| type
== TCG_TYPE_I32
) {
530 tcg_out_opc(s
, OPC_MOVL_Iv
+ LOWREGMASK(ret
), 0, ret
, 0);
532 } else if (arg
== (int32_t)arg
) {
533 tcg_out_modrm(s
, OPC_MOVL_EvIz
+ P_REXW
, 0, ret
);
536 tcg_out_opc(s
, OPC_MOVL_Iv
+ P_REXW
+ LOWREGMASK(ret
), 0, ret
, 0);
538 tcg_out32(s
, arg
>> 31 >> 1);
542 static inline void tcg_out_pushi(TCGContext
*s
, tcg_target_long val
)
544 if (val
== (int8_t)val
) {
545 tcg_out_opc(s
, OPC_PUSH_Ib
, 0, 0, 0);
547 } else if (val
== (int32_t)val
) {
548 tcg_out_opc(s
, OPC_PUSH_Iv
, 0, 0, 0);
555 static inline void tcg_out_push(TCGContext
*s
, int reg
)
557 tcg_out_opc(s
, OPC_PUSH_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
560 static inline void tcg_out_pop(TCGContext
*s
, int reg
)
562 tcg_out_opc(s
, OPC_POP_r32
+ LOWREGMASK(reg
), 0, reg
, 0);
565 static inline void tcg_out_ld(TCGContext
*s
, TCGType type
, int ret
,
566 int arg1
, tcg_target_long arg2
)
568 int opc
= OPC_MOVL_GvEv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
569 tcg_out_modrm_offset(s
, opc
, ret
, arg1
, arg2
);
572 static inline void tcg_out_st(TCGContext
*s
, TCGType type
, int arg
,
573 int arg1
, tcg_target_long arg2
)
575 int opc
= OPC_MOVL_EvGv
+ (type
== TCG_TYPE_I64
? P_REXW
: 0);
576 tcg_out_modrm_offset(s
, opc
, arg
, arg1
, arg2
);
579 static void tcg_out_shifti(TCGContext
*s
, int subopc
, int reg
, int count
)
581 /* Propagate an opcode prefix, such as P_DATA16. */
582 int ext
= subopc
& ~0x7;
586 tcg_out_modrm(s
, OPC_SHIFT_1
+ ext
, subopc
, reg
);
588 tcg_out_modrm(s
, OPC_SHIFT_Ib
+ ext
, subopc
, reg
);
593 static inline void tcg_out_bswap32(TCGContext
*s
, int reg
)
595 tcg_out_opc(s
, OPC_BSWAP
+ LOWREGMASK(reg
), 0, reg
, 0);
598 static inline void tcg_out_rolw_8(TCGContext
*s
, int reg
)
600 tcg_out_shifti(s
, SHIFT_ROL
+ P_DATA16
, reg
, 8);
603 static inline void tcg_out_ext8u(TCGContext
*s
, int dest
, int src
)
606 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
607 tcg_out_modrm(s
, OPC_MOVZBL
+ P_REXB_RM
, dest
, src
);
610 static void tcg_out_ext8s(TCGContext
*s
, int dest
, int src
, int rexw
)
613 assert(src
< 4 || TCG_TARGET_REG_BITS
== 64);
614 tcg_out_modrm(s
, OPC_MOVSBL
+ P_REXB_RM
+ rexw
, dest
, src
);
617 static inline void tcg_out_ext16u(TCGContext
*s
, int dest
, int src
)
620 tcg_out_modrm(s
, OPC_MOVZWL
, dest
, src
);
623 static inline void tcg_out_ext16s(TCGContext
*s
, int dest
, int src
, int rexw
)
626 tcg_out_modrm(s
, OPC_MOVSWL
+ rexw
, dest
, src
);
629 static inline void tcg_out_ext32u(TCGContext
*s
, int dest
, int src
)
631 /* 32-bit mov zero extends. */
632 tcg_out_modrm(s
, OPC_MOVL_GvEv
, dest
, src
);
635 static inline void tcg_out_ext32s(TCGContext
*s
, int dest
, int src
)
637 tcg_out_modrm(s
, OPC_MOVSLQ
, dest
, src
);
640 static inline void tcg_out_bswap64(TCGContext
*s
, int reg
)
642 tcg_out_opc(s
, OPC_BSWAP
+ P_REXW
+ LOWREGMASK(reg
), 0, reg
, 0);
645 static void tgen_arithi(TCGContext
*s
, int c
, int r0
,
646 tcg_target_long val
, int cf
)
650 if (TCG_TARGET_REG_BITS
== 64) {
655 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656 partial flags update stalls on Pentium4 and are not recommended
657 by current Intel optimization manuals. */
658 if (!cf
&& (c
== ARITH_ADD
|| c
== ARITH_SUB
) && (val
== 1 || val
== -1)) {
659 int is_inc
= (c
== ARITH_ADD
) ^ (val
< 0);
660 if (TCG_TARGET_REG_BITS
== 64) {
661 /* The single-byte increment encodings are re-tasked as the
662 REX prefixes. Use the MODRM encoding. */
663 tcg_out_modrm(s
, OPC_GRP5
+ rexw
,
664 (is_inc
? EXT5_INC_Ev
: EXT5_DEC_Ev
), r0
);
666 tcg_out8(s
, (is_inc
? OPC_INC_r32
: OPC_DEC_r32
) + r0
);
671 if (c
== ARITH_AND
) {
672 if (TCG_TARGET_REG_BITS
== 64) {
673 if (val
== 0xffffffffu
) {
674 tcg_out_ext32u(s
, r0
, r0
);
677 if (val
== (uint32_t)val
) {
678 /* AND with no high bits set can use a 32-bit operation. */
682 if (val
== 0xffu
&& (r0
< 4 || TCG_TARGET_REG_BITS
== 64)) {
683 tcg_out_ext8u(s
, r0
, r0
);
686 if (val
== 0xffffu
) {
687 tcg_out_ext16u(s
, r0
, r0
);
692 if (val
== (int8_t)val
) {
693 tcg_out_modrm(s
, OPC_ARITH_EvIb
+ rexw
, c
, r0
);
697 if (rexw
== 0 || val
== (int32_t)val
) {
698 tcg_out_modrm(s
, OPC_ARITH_EvIz
+ rexw
, c
, r0
);
706 static void tcg_out_addi(TCGContext
*s
, int reg
, tcg_target_long val
)
709 tgen_arithi(s
, ARITH_ADD
+ P_REXW
, reg
, val
, 0);
713 /* Use SMALL != 0 to force a short forward branch. */
714 static void tcg_out_jxx(TCGContext
*s
, int opc
, int label_index
, int small
)
717 TCGLabel
*l
= &s
->labels
[label_index
];
720 val
= l
->u
.value
- (tcg_target_long
)s
->code_ptr
;
722 if ((int8_t)val1
== val1
) {
724 tcg_out8(s
, OPC_JMP_short
);
726 tcg_out8(s
, OPC_JCC_short
+ opc
);
734 tcg_out8(s
, OPC_JMP_long
);
735 tcg_out32(s
, val
- 5);
737 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
738 tcg_out32(s
, val
- 6);
743 tcg_out8(s
, OPC_JMP_short
);
745 tcg_out8(s
, OPC_JCC_short
+ opc
);
747 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC8
, label_index
, -1);
751 tcg_out8(s
, OPC_JMP_long
);
753 tcg_out_opc(s
, OPC_JCC_long
+ opc
, 0, 0, 0);
755 tcg_out_reloc(s
, s
->code_ptr
, R_386_PC32
, label_index
, -4);
760 static void tcg_out_cmp(TCGContext
*s
, TCGArg arg1
, TCGArg arg2
,
761 int const_arg2
, int rexw
)
766 tcg_out_modrm(s
, OPC_TESTL
+ rexw
, arg1
, arg1
);
768 tgen_arithi(s
, ARITH_CMP
+ rexw
, arg1
, arg2
, 0);
771 tgen_arithr(s
, ARITH_CMP
+ rexw
, arg1
, arg2
);
775 static void tcg_out_brcond32(TCGContext
*s
, TCGCond cond
,
776 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
777 int label_index
, int small
)
779 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
780 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label_index
, small
);
783 #if TCG_TARGET_REG_BITS == 64
784 static void tcg_out_brcond64(TCGContext
*s
, TCGCond cond
,
785 TCGArg arg1
, TCGArg arg2
, int const_arg2
,
786 int label_index
, int small
)
788 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
789 tcg_out_jxx(s
, tcg_cond_to_jcc
[cond
], label_index
, small
);
792 /* XXX: we implement it at the target level to avoid having to
793 handle cross basic blocks temporaries */
794 static void tcg_out_brcond2(TCGContext
*s
, const TCGArg
*args
,
795 const int *const_args
, int small
)
798 label_next
= gen_new_label();
801 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
803 tcg_out_brcond32(s
, TCG_COND_EQ
, args
[1], args
[3], const_args
[3],
807 tcg_out_brcond32(s
, TCG_COND_NE
, args
[0], args
[2], const_args
[2],
809 tcg_out_brcond32(s
, TCG_COND_NE
, args
[1], args
[3], const_args
[3],
813 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
815 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
816 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
820 tcg_out_brcond32(s
, TCG_COND_LT
, args
[1], args
[3], const_args
[3],
822 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
823 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
827 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
829 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
830 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
834 tcg_out_brcond32(s
, TCG_COND_GT
, args
[1], args
[3], const_args
[3],
836 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
837 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
841 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
843 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
844 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[0], args
[2], const_args
[2],
848 tcg_out_brcond32(s
, TCG_COND_LTU
, args
[1], args
[3], const_args
[3],
850 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
851 tcg_out_brcond32(s
, TCG_COND_LEU
, args
[0], args
[2], const_args
[2],
855 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
857 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
858 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[0], args
[2], const_args
[2],
862 tcg_out_brcond32(s
, TCG_COND_GTU
, args
[1], args
[3], const_args
[3],
864 tcg_out_jxx(s
, JCC_JNE
, label_next
, 1);
865 tcg_out_brcond32(s
, TCG_COND_GEU
, args
[0], args
[2], const_args
[2],
871 tcg_out_label(s
, label_next
, (tcg_target_long
)s
->code_ptr
);
875 static void tcg_out_setcond32(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
876 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
878 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, 0);
879 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
880 tcg_out_ext8u(s
, dest
, dest
);
883 #if TCG_TARGET_REG_BITS == 64
884 static void tcg_out_setcond64(TCGContext
*s
, TCGCond cond
, TCGArg dest
,
885 TCGArg arg1
, TCGArg arg2
, int const_arg2
)
887 tcg_out_cmp(s
, arg1
, arg2
, const_arg2
, P_REXW
);
888 tcg_out_modrm(s
, OPC_SETCC
| tcg_cond_to_jcc
[cond
], 0, dest
);
889 tcg_out_ext8u(s
, dest
, dest
);
892 static void tcg_out_setcond2(TCGContext
*s
, const TCGArg
*args
,
893 const int *const_args
)
896 int label_true
, label_over
;
898 memcpy(new_args
, args
+1, 5*sizeof(TCGArg
));
900 if (args
[0] == args
[1] || args
[0] == args
[2]
901 || (!const_args
[3] && args
[0] == args
[3])
902 || (!const_args
[4] && args
[0] == args
[4])) {
903 /* When the destination overlaps with one of the argument
904 registers, don't do anything tricky. */
905 label_true
= gen_new_label();
906 label_over
= gen_new_label();
908 new_args
[5] = label_true
;
909 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
911 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
912 tcg_out_jxx(s
, JCC_JMP
, label_over
, 1);
913 tcg_out_label(s
, label_true
, (tcg_target_long
)s
->code_ptr
);
915 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 1);
916 tcg_out_label(s
, label_over
, (tcg_target_long
)s
->code_ptr
);
918 /* When the destination does not overlap one of the arguments,
919 clear the destination first, jump if cond false, and emit an
920 increment in the true case. This results in smaller code. */
922 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], 0);
924 label_over
= gen_new_label();
925 new_args
[4] = tcg_invert_cond(new_args
[4]);
926 new_args
[5] = label_over
;
927 tcg_out_brcond2(s
, new_args
, const_args
+1, 1);
929 tgen_arithi(s
, ARITH_ADD
, args
[0], 1, 0);
930 tcg_out_label(s
, label_over
, (tcg_target_long
)s
->code_ptr
);
935 static void tcg_out_branch(TCGContext
*s
, int call
, tcg_target_long dest
)
937 tcg_target_long disp
= dest
- (tcg_target_long
)s
->code_ptr
- 5;
939 if (disp
== (int32_t)disp
) {
940 tcg_out_opc(s
, call
? OPC_CALL_Jz
: OPC_JMP_long
, 0, 0, 0);
943 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_R10
, dest
);
944 tcg_out_modrm(s
, OPC_GRP5
,
945 call
? EXT5_CALLN_Ev
: EXT5_JMPN_Ev
, TCG_REG_R10
);
949 static inline void tcg_out_calli(TCGContext
*s
, tcg_target_long dest
)
951 tcg_out_branch(s
, 1, dest
);
954 static void tcg_out_jmp(TCGContext
*s
, tcg_target_long dest
)
956 tcg_out_branch(s
, 0, dest
);
959 #if defined(CONFIG_SOFTMMU)
961 #include "../../softmmu_defs.h"
963 static void *qemu_ld_helpers
[4] = {
970 static void *qemu_st_helpers
[4] = {
977 /* Perform the TLB load and compare.
980 ADDRLO_IDX contains the index into ARGS of the low part of the
981 address; the high part of the address is at ADDR_LOW_IDX+1.
983 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
985 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
986 This should be offsetof addr_read or addr_write.
989 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
990 positions of the displacements of forward jumps to the TLB miss case.
992 First argument register is loaded with the low part of the address.
993 In the TLB hit case, it has been adjusted as indicated by the TLB
994 and so is a host address. In the TLB miss case, it continues to
995 hold a guest address.
997 Second argument register is clobbered. */
999 static inline void tcg_out_tlb_load(TCGContext
*s
, int addrlo_idx
,
1000 int mem_index
, int s_bits
,
1002 uint8_t **label_ptr
, int which
)
1004 const int addrlo
= args
[addrlo_idx
];
1005 const int r0
= tcg_target_call_iarg_regs
[0];
1006 const int r1
= tcg_target_call_iarg_regs
[1];
1007 TCGType type
= TCG_TYPE_I32
;
1010 if (TCG_TARGET_REG_BITS
== 64 && TARGET_LONG_BITS
== 64) {
1011 type
= TCG_TYPE_I64
;
1015 tcg_out_mov(s
, type
, r1
, addrlo
);
1016 tcg_out_mov(s
, type
, r0
, addrlo
);
1018 tcg_out_shifti(s
, SHIFT_SHR
+ rexw
, r1
,
1019 TARGET_PAGE_BITS
- CPU_TLB_ENTRY_BITS
);
1021 tgen_arithi(s
, ARITH_AND
+ rexw
, r0
,
1022 TARGET_PAGE_MASK
| ((1 << s_bits
) - 1), 0);
1023 tgen_arithi(s
, ARITH_AND
+ rexw
, r1
,
1024 (CPU_TLB_SIZE
- 1) << CPU_TLB_ENTRY_BITS
, 0);
1026 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ P_REXW
, r1
, TCG_AREG0
, r1
, 0,
1027 offsetof(CPUState
, tlb_table
[mem_index
][0])
1031 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
+ rexw
, r0
, r1
, 0);
1033 tcg_out_mov(s
, type
, r0
, addrlo
);
1036 tcg_out8(s
, OPC_JCC_short
+ JCC_JNE
);
1037 label_ptr
[0] = s
->code_ptr
;
1040 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1041 /* cmp 4(r1), addrhi */
1042 tcg_out_modrm_offset(s
, OPC_CMP_GvEv
, args
[addrlo_idx
+1], r1
, 4);
1045 tcg_out8(s
, OPC_JCC_short
+ JCC_JNE
);
1046 label_ptr
[1] = s
->code_ptr
;
1052 /* add addend(r1), r0 */
1053 tcg_out_modrm_offset(s
, OPC_ADD_GvEv
+ P_REXW
, r0
, r1
,
1054 offsetof(CPUTLBEntry
, addend
) - which
);
1058 static void tcg_out_qemu_ld_direct(TCGContext
*s
, int datalo
, int datahi
,
1059 int base
, tcg_target_long ofs
, int sizeop
)
1061 #ifdef TARGET_WORDS_BIGENDIAN
1062 const int bswap
= 1;
1064 const int bswap
= 0;
1068 tcg_out_modrm_offset(s
, OPC_MOVZBL
, datalo
, base
, ofs
);
1071 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ P_REXW
, datalo
, base
, ofs
);
1074 tcg_out_modrm_offset(s
, OPC_MOVZWL
, datalo
, base
, ofs
);
1076 tcg_out_rolw_8(s
, datalo
);
1081 tcg_out_modrm_offset(s
, OPC_MOVZWL
, datalo
, base
, ofs
);
1082 tcg_out_rolw_8(s
, datalo
);
1083 tcg_out_modrm(s
, OPC_MOVSWL
+ P_REXW
, datalo
, datalo
);
1085 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ P_REXW
, datalo
, base
, ofs
);
1089 tcg_out_ld(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1091 tcg_out_bswap32(s
, datalo
);
1094 #if TCG_TARGET_REG_BITS == 64
1097 tcg_out_ld(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1098 tcg_out_bswap32(s
, datalo
);
1099 tcg_out_ext32s(s
, datalo
, datalo
);
1101 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, datalo
, base
, ofs
);
1106 if (TCG_TARGET_REG_BITS
== 64) {
1107 tcg_out_ld(s
, TCG_TYPE_I64
, datalo
, base
, ofs
);
1109 tcg_out_bswap64(s
, datalo
);
1117 if (base
!= datalo
) {
1118 tcg_out_ld(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1119 tcg_out_ld(s
, TCG_TYPE_I32
, datahi
, base
, ofs
+ 4);
1121 tcg_out_ld(s
, TCG_TYPE_I32
, datahi
, base
, ofs
+ 4);
1122 tcg_out_ld(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1125 tcg_out_bswap32(s
, datalo
);
1126 tcg_out_bswap32(s
, datahi
);
1135 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1136 EAX. It will be useful once fixed registers globals are less
1138 static void tcg_out_qemu_ld(TCGContext
*s
, const TCGArg
*args
,
1141 int data_reg
, data_reg2
= 0;
1143 #if defined(CONFIG_SOFTMMU)
1144 int mem_index
, s_bits
, arg_idx
;
1145 uint8_t *label_ptr
[3];
1150 if (TCG_TARGET_REG_BITS
== 32 && opc
== 3) {
1151 data_reg2
= args
[1];
1155 #if defined(CONFIG_SOFTMMU)
1156 mem_index
= args
[addrlo_idx
+ 1 + (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
)];
1159 tcg_out_tlb_load(s
, addrlo_idx
, mem_index
, s_bits
, args
,
1160 label_ptr
, offsetof(CPUTLBEntry
, addr_read
));
1163 tcg_out_qemu_ld_direct(s
, data_reg
, data_reg2
,
1164 tcg_target_call_iarg_regs
[0], 0, opc
);
1167 tcg_out8(s
, OPC_JMP_short
);
1168 label_ptr
[2] = s
->code_ptr
;
1174 *label_ptr
[0] = s
->code_ptr
- label_ptr
[0] - 1;
1175 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1176 *label_ptr
[1] = s
->code_ptr
- label_ptr
[1] - 1;
1179 /* XXX: move that code at the end of the TB */
1180 /* The first argument is already loaded with addrlo. */
1182 if (TCG_TARGET_REG_BITS
== 32 && TARGET_LONG_BITS
== 64) {
1183 tcg_out_mov(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[arg_idx
++],
1184 args
[addrlo_idx
+ 1]);
1186 tcg_out_movi(s
, TCG_TYPE_I32
, tcg_target_call_iarg_regs
[arg_idx
],
1188 tcg_out_calli(s
, (tcg_target_long
)qemu_ld_helpers
[s_bits
]);
1192 tcg_out_ext8s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1195 tcg_out_ext16s(s
, data_reg
, TCG_REG_EAX
, P_REXW
);
1198 tcg_out_ext8u(s
, data_reg
, TCG_REG_EAX
);
1201 tcg_out_ext16u(s
, data_reg
, TCG_REG_EAX
);
1204 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1206 #if TCG_TARGET_REG_BITS == 64
1208 tcg_out_ext32s(s
, data_reg
, TCG_REG_EAX
);
1212 if (TCG_TARGET_REG_BITS
== 64) {
1213 tcg_out_mov(s
, TCG_TYPE_I64
, data_reg
, TCG_REG_RAX
);
1214 } else if (data_reg
== TCG_REG_EDX
) {
1215 /* xchg %edx, %eax */
1216 tcg_out_opc(s
, OPC_XCHG_ax_r32
+ TCG_REG_EDX
, 0, 0, 0);
1217 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg2
, TCG_REG_EAX
);
1219 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg
, TCG_REG_EAX
);
1220 tcg_out_mov(s
, TCG_TYPE_I32
, data_reg2
, TCG_REG_EDX
);
1228 *label_ptr
[2] = s
->code_ptr
- label_ptr
[2] - 1;
1231 int32_t offset
= GUEST_BASE
;
1232 int base
= args
[addrlo_idx
];
1234 if (TCG_TARGET_REG_BITS
== 64) {
1235 /* ??? We assume all operations have left us with register
1236 contents that are zero extended. So far this appears to
1237 be true. If we want to enforce this, we can either do
1238 an explicit zero-extension here, or (if GUEST_BASE == 0)
1239 use the ADDR32 prefix. For now, do nothing. */
1241 if (offset
!= GUEST_BASE
) {
1242 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_RDI
, GUEST_BASE
);
1243 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_RDI
, base
);
1244 base
= TCG_REG_RDI
, offset
= 0;
1248 tcg_out_qemu_ld_direct(s
, data_reg
, data_reg2
, base
, offset
, opc
);
1253 static void tcg_out_qemu_st_direct(TCGContext
*s
, int datalo
, int datahi
,
1254 int base
, tcg_target_long ofs
, int sizeop
)
1256 #ifdef TARGET_WORDS_BIGENDIAN
1257 const int bswap
= 1;
1259 const int bswap
= 0;
1261 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1262 we could perform the bswap twice to restore the original value
1263 instead of moving to the scratch. But as it is, the L constraint
1264 means that the second argument reg is definitely free here. */
1265 int scratch
= tcg_target_call_iarg_regs
[1];
1269 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
+ P_REXB_R
, datalo
, base
, ofs
);
1273 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1274 tcg_out_rolw_8(s
, scratch
);
1277 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
+ P_DATA16
, datalo
, base
, ofs
);
1281 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1282 tcg_out_bswap32(s
, scratch
);
1285 tcg_out_st(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1288 if (TCG_TARGET_REG_BITS
== 64) {
1290 tcg_out_mov(s
, TCG_TYPE_I64
, scratch
, datalo
);
1291 tcg_out_bswap64(s
, scratch
);
1294 tcg_out_st(s
, TCG_TYPE_I64
, datalo
, base
, ofs
);
1296 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datahi
);
1297 tcg_out_bswap32(s
, scratch
);
1298 tcg_out_st(s
, TCG_TYPE_I32
, scratch
, base
, ofs
);
1299 tcg_out_mov(s
, TCG_TYPE_I32
, scratch
, datalo
);
1300 tcg_out_bswap32(s
, scratch
);
1301 tcg_out_st(s
, TCG_TYPE_I32
, scratch
, base
, ofs
+ 4);
1303 tcg_out_st(s
, TCG_TYPE_I32
, datalo
, base
, ofs
);
1304 tcg_out_st(s
, TCG_TYPE_I32
, datahi
, base
, ofs
+ 4);
1312 static void tcg_out_qemu_st(TCGContext
*s
, const TCGArg
*args
,
1315 int data_reg
, data_reg2
= 0;
1317 #if defined(CONFIG_SOFTMMU)
1318 int mem_index
, s_bits
;
1320 uint8_t *label_ptr
[3];
1325 if (TCG_TARGET_REG_BITS
== 32 && opc
== 3) {
1326 data_reg2
= args
[1];
1330 #if defined(CONFIG_SOFTMMU)
1331 mem_index
= args
[addrlo_idx
+ 1 + (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
)];
1334 tcg_out_tlb_load(s
, addrlo_idx
, mem_index
, s_bits
, args
,
1335 label_ptr
, offsetof(CPUTLBEntry
, addr_write
));
1338 tcg_out_qemu_st_direct(s
, data_reg
, data_reg2
,
1339 tcg_target_call_iarg_regs
[0], 0, opc
);
1342 tcg_out8(s
, OPC_JMP_short
);
1343 label_ptr
[2] = s
->code_ptr
;
1349 *label_ptr
[0] = s
->code_ptr
- label_ptr
[0] - 1;
1350 if (TARGET_LONG_BITS
> TCG_TARGET_REG_BITS
) {
1351 *label_ptr
[1] = s
->code_ptr
- label_ptr
[1] - 1;
1354 /* XXX: move that code at the end of the TB */
1355 if (TCG_TARGET_REG_BITS
== 64) {
1356 tcg_out_mov(s
, (opc
== 3 ? TCG_TYPE_I64
: TCG_TYPE_I32
),
1357 TCG_REG_RSI
, data_reg
);
1358 tcg_out_movi(s
, TCG_TYPE_I32
, TCG_REG_RDX
, mem_index
);
1360 } else if (TARGET_LONG_BITS
== 32) {
1361 tcg_out_mov(s
, TCG_TYPE_I32
, TCG_REG_EDX
, data_reg
);
1363 tcg_out_mov(s
, TCG_TYPE_I32
, TCG_REG_ECX
, data_reg2
);
1364 tcg_out_pushi(s
, mem_index
);
1367 tcg_out_movi(s
, TCG_TYPE_I32
, TCG_REG_ECX
, mem_index
);
1372 tcg_out_mov(s
, TCG_TYPE_I32
, TCG_REG_EDX
, args
[addrlo_idx
+ 1]);
1373 tcg_out_pushi(s
, mem_index
);
1374 tcg_out_push(s
, data_reg2
);
1375 tcg_out_push(s
, data_reg
);
1378 tcg_out_mov(s
, TCG_TYPE_I32
, TCG_REG_EDX
, args
[addrlo_idx
+ 1]);
1381 tcg_out_ext8u(s
, TCG_REG_ECX
, data_reg
);
1384 tcg_out_ext16u(s
, TCG_REG_ECX
, data_reg
);
1387 tcg_out_mov(s
, TCG_TYPE_I32
, TCG_REG_ECX
, data_reg
);
1390 tcg_out_pushi(s
, mem_index
);
1395 tcg_out_calli(s
, (tcg_target_long
)qemu_st_helpers
[s_bits
]);
1397 if (stack_adjust
== (TCG_TARGET_REG_BITS
/ 8)) {
1398 /* Pop and discard. This is 2 bytes smaller than the add. */
1399 tcg_out_pop(s
, TCG_REG_ECX
);
1400 } else if (stack_adjust
!= 0) {
1401 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_adjust
);
1405 *label_ptr
[2] = s
->code_ptr
- label_ptr
[2] - 1;
1408 int32_t offset
= GUEST_BASE
;
1409 int base
= args
[addrlo_idx
];
1411 if (TCG_TARGET_REG_BITS
== 64) {
1412 /* ??? We assume all operations have left us with register
1413 contents that are zero extended. So far this appears to
1414 be true. If we want to enforce this, we can either do
1415 an explicit zero-extension here, or (if GUEST_BASE == 0)
1416 use the ADDR32 prefix. For now, do nothing. */
1418 if (offset
!= GUEST_BASE
) {
1419 tcg_out_movi(s
, TCG_TYPE_I64
, TCG_REG_RDI
, GUEST_BASE
);
1420 tgen_arithr(s
, ARITH_ADD
+ P_REXW
, TCG_REG_RDI
, base
);
1421 base
= TCG_REG_RDI
, offset
= 0;
1425 tcg_out_qemu_st_direct(s
, data_reg
, data_reg2
, base
, offset
, opc
);
1430 static inline void tcg_out_op(TCGContext
*s
, TCGOpcode opc
,
1431 const TCGArg
*args
, const int *const_args
)
1435 #if TCG_TARGET_REG_BITS == 64
1436 # define OP_32_64(x) \
1437 case glue(glue(INDEX_op_, x), _i64): \
1438 rexw = P_REXW; /* FALLTHRU */ \
1439 case glue(glue(INDEX_op_, x), _i32)
1441 # define OP_32_64(x) \
1442 case glue(glue(INDEX_op_, x), _i32)
1446 case INDEX_op_exit_tb
:
1447 tcg_out_movi(s
, TCG_TYPE_PTR
, TCG_REG_EAX
, args
[0]);
1448 tcg_out_jmp(s
, (tcg_target_long
) tb_ret_addr
);
1450 case INDEX_op_goto_tb
:
1451 if (s
->tb_jmp_offset
) {
1452 /* direct jump method */
1453 tcg_out8(s
, OPC_JMP_long
); /* jmp im */
1454 s
->tb_jmp_offset
[args
[0]] = s
->code_ptr
- s
->code_buf
;
1457 /* indirect jump method */
1458 tcg_out_modrm_offset(s
, OPC_GRP5
, EXT5_JMPN_Ev
, -1,
1459 (tcg_target_long
)(s
->tb_next
+ args
[0]));
1461 s
->tb_next_offset
[args
[0]] = s
->code_ptr
- s
->code_buf
;
1464 if (const_args
[0]) {
1465 tcg_out_calli(s
, args
[0]);
1468 tcg_out_modrm(s
, OPC_GRP5
, EXT5_CALLN_Ev
, args
[0]);
1472 if (const_args
[0]) {
1473 tcg_out_jmp(s
, args
[0]);
1476 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, args
[0]);
1480 tcg_out_jxx(s
, JCC_JMP
, args
[0], 0);
1482 case INDEX_op_movi_i32
:
1483 tcg_out_movi(s
, TCG_TYPE_I32
, args
[0], args
[1]);
1486 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1487 tcg_out_modrm_offset(s
, OPC_MOVZBL
, args
[0], args
[1], args
[2]);
1490 tcg_out_modrm_offset(s
, OPC_MOVSBL
+ rexw
, args
[0], args
[1], args
[2]);
1493 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1494 tcg_out_modrm_offset(s
, OPC_MOVZWL
, args
[0], args
[1], args
[2]);
1497 tcg_out_modrm_offset(s
, OPC_MOVSWL
+ rexw
, args
[0], args
[1], args
[2]);
1499 #if TCG_TARGET_REG_BITS == 64
1500 case INDEX_op_ld32u_i64
:
1502 case INDEX_op_ld_i32
:
1503 tcg_out_ld(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1507 tcg_out_modrm_offset(s
, OPC_MOVB_EvGv
| P_REXB_R
,
1508 args
[0], args
[1], args
[2]);
1511 tcg_out_modrm_offset(s
, OPC_MOVL_EvGv
| P_DATA16
,
1512 args
[0], args
[1], args
[2]);
1514 #if TCG_TARGET_REG_BITS == 64
1515 case INDEX_op_st32_i64
:
1517 case INDEX_op_st_i32
:
1518 tcg_out_st(s
, TCG_TYPE_I32
, args
[0], args
[1], args
[2]);
1522 /* For 3-operand addition, use LEA. */
1523 if (args
[0] != args
[1]) {
1524 TCGArg a0
= args
[0], a1
= args
[1], a2
= args
[2], c3
= 0;
1526 if (const_args
[2]) {
1528 } else if (a0
== a2
) {
1529 /* Watch out for dest = src + dest, since we've removed
1530 the matching constraint on the add. */
1531 tgen_arithr(s
, ARITH_ADD
+ rexw
, a0
, a1
);
1535 tcg_out_modrm_sib_offset(s
, OPC_LEA
+ rexw
, a0
, a1
, a2
, 0, c3
);
1553 if (const_args
[2]) {
1554 tgen_arithi(s
, c
+ rexw
, args
[0], args
[2], 0);
1556 tgen_arithr(s
, c
+ rexw
, args
[0], args
[2]);
1561 if (const_args
[2]) {
1564 if (val
== (int8_t)val
) {
1565 tcg_out_modrm(s
, OPC_IMUL_GvEvIb
+ rexw
, args
[0], args
[0]);
1568 tcg_out_modrm(s
, OPC_IMUL_GvEvIz
+ rexw
, args
[0], args
[0]);
1572 tcg_out_modrm(s
, OPC_IMUL_GvEv
+ rexw
, args
[0], args
[2]);
1577 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_IDIV
, args
[4]);
1580 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_DIV
, args
[4]);
1599 if (const_args
[2]) {
1600 tcg_out_shifti(s
, c
+ rexw
, args
[0], args
[2]);
1602 tcg_out_modrm(s
, OPC_SHIFT_cl
+ rexw
, c
, args
[0]);
1606 case INDEX_op_brcond_i32
:
1607 tcg_out_brcond32(s
, args
[2], args
[0], args
[1], const_args
[1],
1610 case INDEX_op_setcond_i32
:
1611 tcg_out_setcond32(s
, args
[3], args
[0], args
[1],
1612 args
[2], const_args
[2]);
1616 tcg_out_rolw_8(s
, args
[0]);
1619 tcg_out_bswap32(s
, args
[0]);
1623 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NEG
, args
[0]);
1626 tcg_out_modrm(s
, OPC_GRP3_Ev
+ rexw
, EXT3_NOT
, args
[0]);
1630 tcg_out_ext8s(s
, args
[0], args
[1], rexw
);
1633 tcg_out_ext16s(s
, args
[0], args
[1], rexw
);
1636 tcg_out_ext8u(s
, args
[0], args
[1]);
1639 tcg_out_ext16u(s
, args
[0], args
[1]);
1642 case INDEX_op_qemu_ld8u
:
1643 tcg_out_qemu_ld(s
, args
, 0);
1645 case INDEX_op_qemu_ld8s
:
1646 tcg_out_qemu_ld(s
, args
, 0 | 4);
1648 case INDEX_op_qemu_ld16u
:
1649 tcg_out_qemu_ld(s
, args
, 1);
1651 case INDEX_op_qemu_ld16s
:
1652 tcg_out_qemu_ld(s
, args
, 1 | 4);
1654 #if TCG_TARGET_REG_BITS == 64
1655 case INDEX_op_qemu_ld32u
:
1657 case INDEX_op_qemu_ld32
:
1658 tcg_out_qemu_ld(s
, args
, 2);
1660 case INDEX_op_qemu_ld64
:
1661 tcg_out_qemu_ld(s
, args
, 3);
1664 case INDEX_op_qemu_st8
:
1665 tcg_out_qemu_st(s
, args
, 0);
1667 case INDEX_op_qemu_st16
:
1668 tcg_out_qemu_st(s
, args
, 1);
1670 case INDEX_op_qemu_st32
:
1671 tcg_out_qemu_st(s
, args
, 2);
1673 case INDEX_op_qemu_st64
:
1674 tcg_out_qemu_st(s
, args
, 3);
1677 #if TCG_TARGET_REG_BITS == 32
1678 case INDEX_op_brcond2_i32
:
1679 tcg_out_brcond2(s
, args
, const_args
, 0);
1681 case INDEX_op_setcond2_i32
:
1682 tcg_out_setcond2(s
, args
, const_args
);
1684 case INDEX_op_mulu2_i32
:
1685 tcg_out_modrm(s
, OPC_GRP3_Ev
, EXT3_MUL
, args
[3]);
1687 case INDEX_op_add2_i32
:
1688 if (const_args
[4]) {
1689 tgen_arithi(s
, ARITH_ADD
, args
[0], args
[4], 1);
1691 tgen_arithr(s
, ARITH_ADD
, args
[0], args
[4]);
1693 if (const_args
[5]) {
1694 tgen_arithi(s
, ARITH_ADC
, args
[1], args
[5], 1);
1696 tgen_arithr(s
, ARITH_ADC
, args
[1], args
[5]);
1699 case INDEX_op_sub2_i32
:
1700 if (const_args
[4]) {
1701 tgen_arithi(s
, ARITH_SUB
, args
[0], args
[4], 1);
1703 tgen_arithr(s
, ARITH_SUB
, args
[0], args
[4]);
1705 if (const_args
[5]) {
1706 tgen_arithi(s
, ARITH_SBB
, args
[1], args
[5], 1);
1708 tgen_arithr(s
, ARITH_SBB
, args
[1], args
[5]);
1711 #else /* TCG_TARGET_REG_BITS == 64 */
1712 case INDEX_op_movi_i64
:
1713 tcg_out_movi(s
, TCG_TYPE_I64
, args
[0], args
[1]);
1715 case INDEX_op_ld32s_i64
:
1716 tcg_out_modrm_offset(s
, OPC_MOVSLQ
, args
[0], args
[1], args
[2]);
1718 case INDEX_op_ld_i64
:
1719 tcg_out_ld(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
1721 case INDEX_op_st_i64
:
1722 tcg_out_st(s
, TCG_TYPE_I64
, args
[0], args
[1], args
[2]);
1724 case INDEX_op_qemu_ld32s
:
1725 tcg_out_qemu_ld(s
, args
, 2 | 4);
1728 case INDEX_op_brcond_i64
:
1729 tcg_out_brcond64(s
, args
[2], args
[0], args
[1], const_args
[1],
1732 case INDEX_op_setcond_i64
:
1733 tcg_out_setcond64(s
, args
[3], args
[0], args
[1],
1734 args
[2], const_args
[2]);
1737 case INDEX_op_bswap64_i64
:
1738 tcg_out_bswap64(s
, args
[0]);
1740 case INDEX_op_ext32u_i64
:
1741 tcg_out_ext32u(s
, args
[0], args
[1]);
1743 case INDEX_op_ext32s_i64
:
1744 tcg_out_ext32s(s
, args
[0], args
[1]);
1755 static const TCGTargetOpDef x86_op_defs
[] = {
1756 { INDEX_op_exit_tb
, { } },
1757 { INDEX_op_goto_tb
, { } },
1758 { INDEX_op_call
, { "ri" } },
1759 { INDEX_op_jmp
, { "ri" } },
1760 { INDEX_op_br
, { } },
1761 { INDEX_op_mov_i32
, { "r", "r" } },
1762 { INDEX_op_movi_i32
, { "r" } },
1763 { INDEX_op_ld8u_i32
, { "r", "r" } },
1764 { INDEX_op_ld8s_i32
, { "r", "r" } },
1765 { INDEX_op_ld16u_i32
, { "r", "r" } },
1766 { INDEX_op_ld16s_i32
, { "r", "r" } },
1767 { INDEX_op_ld_i32
, { "r", "r" } },
1768 { INDEX_op_st8_i32
, { "q", "r" } },
1769 { INDEX_op_st16_i32
, { "r", "r" } },
1770 { INDEX_op_st_i32
, { "r", "r" } },
1772 { INDEX_op_add_i32
, { "r", "r", "ri" } },
1773 { INDEX_op_sub_i32
, { "r", "0", "ri" } },
1774 { INDEX_op_mul_i32
, { "r", "0", "ri" } },
1775 { INDEX_op_div2_i32
, { "a", "d", "0", "1", "r" } },
1776 { INDEX_op_divu2_i32
, { "a", "d", "0", "1", "r" } },
1777 { INDEX_op_and_i32
, { "r", "0", "ri" } },
1778 { INDEX_op_or_i32
, { "r", "0", "ri" } },
1779 { INDEX_op_xor_i32
, { "r", "0", "ri" } },
1781 { INDEX_op_shl_i32
, { "r", "0", "ci" } },
1782 { INDEX_op_shr_i32
, { "r", "0", "ci" } },
1783 { INDEX_op_sar_i32
, { "r", "0", "ci" } },
1784 { INDEX_op_rotl_i32
, { "r", "0", "ci" } },
1785 { INDEX_op_rotr_i32
, { "r", "0", "ci" } },
1787 { INDEX_op_brcond_i32
, { "r", "ri" } },
1789 { INDEX_op_bswap16_i32
, { "r", "0" } },
1790 { INDEX_op_bswap32_i32
, { "r", "0" } },
1792 { INDEX_op_neg_i32
, { "r", "0" } },
1794 { INDEX_op_not_i32
, { "r", "0" } },
1796 { INDEX_op_ext8s_i32
, { "r", "q" } },
1797 { INDEX_op_ext16s_i32
, { "r", "r" } },
1798 { INDEX_op_ext8u_i32
, { "r", "q" } },
1799 { INDEX_op_ext16u_i32
, { "r", "r" } },
1801 { INDEX_op_setcond_i32
, { "q", "r", "ri" } },
1803 #if TCG_TARGET_REG_BITS == 32
1804 { INDEX_op_mulu2_i32
, { "a", "d", "a", "r" } },
1805 { INDEX_op_add2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
1806 { INDEX_op_sub2_i32
, { "r", "r", "0", "1", "ri", "ri" } },
1807 { INDEX_op_brcond2_i32
, { "r", "r", "ri", "ri" } },
1808 { INDEX_op_setcond2_i32
, { "r", "r", "r", "ri", "ri" } },
1810 { INDEX_op_mov_i64
, { "r", "r" } },
1811 { INDEX_op_movi_i64
, { "r" } },
1812 { INDEX_op_ld8u_i64
, { "r", "r" } },
1813 { INDEX_op_ld8s_i64
, { "r", "r" } },
1814 { INDEX_op_ld16u_i64
, { "r", "r" } },
1815 { INDEX_op_ld16s_i64
, { "r", "r" } },
1816 { INDEX_op_ld32u_i64
, { "r", "r" } },
1817 { INDEX_op_ld32s_i64
, { "r", "r" } },
1818 { INDEX_op_ld_i64
, { "r", "r" } },
1819 { INDEX_op_st8_i64
, { "r", "r" } },
1820 { INDEX_op_st16_i64
, { "r", "r" } },
1821 { INDEX_op_st32_i64
, { "r", "r" } },
1822 { INDEX_op_st_i64
, { "r", "r" } },
1824 { INDEX_op_add_i64
, { "r", "0", "re" } },
1825 { INDEX_op_mul_i64
, { "r", "0", "re" } },
1826 { INDEX_op_div2_i64
, { "a", "d", "0", "1", "r" } },
1827 { INDEX_op_divu2_i64
, { "a", "d", "0", "1", "r" } },
1828 { INDEX_op_sub_i64
, { "r", "0", "re" } },
1829 { INDEX_op_and_i64
, { "r", "0", "reZ" } },
1830 { INDEX_op_or_i64
, { "r", "0", "re" } },
1831 { INDEX_op_xor_i64
, { "r", "0", "re" } },
1833 { INDEX_op_shl_i64
, { "r", "0", "ci" } },
1834 { INDEX_op_shr_i64
, { "r", "0", "ci" } },
1835 { INDEX_op_sar_i64
, { "r", "0", "ci" } },
1836 { INDEX_op_rotl_i64
, { "r", "0", "ci" } },
1837 { INDEX_op_rotr_i64
, { "r", "0", "ci" } },
1839 { INDEX_op_brcond_i64
, { "r", "re" } },
1840 { INDEX_op_setcond_i64
, { "r", "r", "re" } },
1842 { INDEX_op_bswap16_i64
, { "r", "0" } },
1843 { INDEX_op_bswap32_i64
, { "r", "0" } },
1844 { INDEX_op_bswap64_i64
, { "r", "0" } },
1845 { INDEX_op_neg_i64
, { "r", "0" } },
1846 { INDEX_op_not_i64
, { "r", "0" } },
1848 { INDEX_op_ext8s_i64
, { "r", "r" } },
1849 { INDEX_op_ext16s_i64
, { "r", "r" } },
1850 { INDEX_op_ext32s_i64
, { "r", "r" } },
1851 { INDEX_op_ext8u_i64
, { "r", "r" } },
1852 { INDEX_op_ext16u_i64
, { "r", "r" } },
1853 { INDEX_op_ext32u_i64
, { "r", "r" } },
1856 #if TCG_TARGET_REG_BITS == 64
1857 { INDEX_op_qemu_ld8u
, { "r", "L" } },
1858 { INDEX_op_qemu_ld8s
, { "r", "L" } },
1859 { INDEX_op_qemu_ld16u
, { "r", "L" } },
1860 { INDEX_op_qemu_ld16s
, { "r", "L" } },
1861 { INDEX_op_qemu_ld32
, { "r", "L" } },
1862 { INDEX_op_qemu_ld32u
, { "r", "L" } },
1863 { INDEX_op_qemu_ld32s
, { "r", "L" } },
1864 { INDEX_op_qemu_ld64
, { "r", "L" } },
1866 { INDEX_op_qemu_st8
, { "L", "L" } },
1867 { INDEX_op_qemu_st16
, { "L", "L" } },
1868 { INDEX_op_qemu_st32
, { "L", "L" } },
1869 { INDEX_op_qemu_st64
, { "L", "L" } },
1870 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1871 { INDEX_op_qemu_ld8u
, { "r", "L" } },
1872 { INDEX_op_qemu_ld8s
, { "r", "L" } },
1873 { INDEX_op_qemu_ld16u
, { "r", "L" } },
1874 { INDEX_op_qemu_ld16s
, { "r", "L" } },
1875 { INDEX_op_qemu_ld32
, { "r", "L" } },
1876 { INDEX_op_qemu_ld64
, { "r", "r", "L" } },
1878 { INDEX_op_qemu_st8
, { "cb", "L" } },
1879 { INDEX_op_qemu_st16
, { "L", "L" } },
1880 { INDEX_op_qemu_st32
, { "L", "L" } },
1881 { INDEX_op_qemu_st64
, { "L", "L", "L" } },
1883 { INDEX_op_qemu_ld8u
, { "r", "L", "L" } },
1884 { INDEX_op_qemu_ld8s
, { "r", "L", "L" } },
1885 { INDEX_op_qemu_ld16u
, { "r", "L", "L" } },
1886 { INDEX_op_qemu_ld16s
, { "r", "L", "L" } },
1887 { INDEX_op_qemu_ld32
, { "r", "L", "L" } },
1888 { INDEX_op_qemu_ld64
, { "r", "r", "L", "L" } },
1890 { INDEX_op_qemu_st8
, { "cb", "L", "L" } },
1891 { INDEX_op_qemu_st16
, { "L", "L", "L" } },
1892 { INDEX_op_qemu_st32
, { "L", "L", "L" } },
1893 { INDEX_op_qemu_st64
, { "L", "L", "L", "L" } },
1898 static int tcg_target_callee_save_regs
[] = {
1899 #if TCG_TARGET_REG_BITS == 64
1904 TCG_REG_R14
, /* Currently used for the global env. */
1907 TCG_REG_EBP
, /* Currently used for the global env. */
1914 /* Generate global QEMU prologue and epilogue code */
1915 static void tcg_target_qemu_prologue(TCGContext
*s
)
1917 int i
, frame_size
, push_size
, stack_addend
;
1921 /* Reserve some stack space, also for TCG temps. */
1922 push_size
= 1 + ARRAY_SIZE(tcg_target_callee_save_regs
);
1923 push_size
*= TCG_TARGET_REG_BITS
/ 8;
1925 frame_size
= push_size
+ TCG_STATIC_CALL_ARGS_SIZE
+
1926 CPU_TEMP_BUF_NLONGS
* sizeof(long);
1927 frame_size
= (frame_size
+ TCG_TARGET_STACK_ALIGN
- 1) &
1928 ~(TCG_TARGET_STACK_ALIGN
- 1);
1929 stack_addend
= frame_size
- push_size
;
1930 tcg_set_frame(s
, TCG_REG_CALL_STACK
, TCG_STATIC_CALL_ARGS_SIZE
,
1931 CPU_TEMP_BUF_NLONGS
* sizeof(long));
1933 /* Save all callee saved registers. */
1934 for (i
= 0; i
< ARRAY_SIZE(tcg_target_callee_save_regs
); i
++) {
1935 tcg_out_push(s
, tcg_target_callee_save_regs
[i
]);
1938 tcg_out_addi(s
, TCG_REG_ESP
, -stack_addend
);
1940 tcg_out_mov(s
, TCG_TYPE_PTR
, TCG_AREG0
, tcg_target_call_iarg_regs
[0]);
1943 tcg_out_modrm(s
, OPC_GRP5
, EXT5_JMPN_Ev
, tcg_target_call_iarg_regs
[1]);
1946 tb_ret_addr
= s
->code_ptr
;
1948 tcg_out_addi(s
, TCG_REG_CALL_STACK
, stack_addend
);
1950 for (i
= ARRAY_SIZE(tcg_target_callee_save_regs
) - 1; i
>= 0; i
--) {
1951 tcg_out_pop(s
, tcg_target_callee_save_regs
[i
]);
1953 tcg_out_opc(s
, OPC_RET
, 0, 0, 0);
1956 static void tcg_target_init(TCGContext
*s
)
1958 #if !defined(CONFIG_USER_ONLY)
1960 if ((1 << CPU_TLB_ENTRY_BITS
) != sizeof(CPUTLBEntry
))
1964 if (TCG_TARGET_REG_BITS
== 64) {
1965 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xffff);
1966 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I64
], 0, 0xffff);
1968 tcg_regset_set32(tcg_target_available_regs
[TCG_TYPE_I32
], 0, 0xff);
1971 tcg_regset_clear(tcg_target_call_clobber_regs
);
1972 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EAX
);
1973 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_EDX
);
1974 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_ECX
);
1975 if (TCG_TARGET_REG_BITS
== 64) {
1976 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RDI
);
1977 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_RSI
);
1978 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R8
);
1979 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R9
);
1980 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R10
);
1981 tcg_regset_set_reg(tcg_target_call_clobber_regs
, TCG_REG_R11
);
1984 tcg_regset_clear(s
->reserved_regs
);
1985 tcg_regset_set_reg(s
->reserved_regs
, TCG_REG_CALL_STACK
);
1987 tcg_add_target_add_op_defs(x86_op_defs
);