2 * x86-64 code generator for TCC
4 * Copyright (c) 2008 Shinichiro Hamaji
6 * Based on i386-gen.c by Fabrice Bellard
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #ifdef TARGET_DEFS_ONLY
25 /* number of available registers */
27 #define NB_ASM_REGS 16
28 #define CONFIG_TCC_ASM
30 /* a register can belong to several classes. The classes must be
31 sorted from more general to more precise (see gv2() code which does
32 assumptions on it). */
33 #define RC_INT 0x0001 /* generic integer register */
34 #define RC_FLOAT 0x0002 /* generic float register */
38 #define RC_ST0 0x0080 /* only for long double */
43 #define RC_XMM0 0x1000
44 #define RC_XMM1 0x2000
45 #define RC_XMM2 0x4000
46 #define RC_XMM3 0x8000
47 #define RC_XMM4 0x10000
48 #define RC_XMM5 0x20000
49 #define RC_XMM6 0x40000
50 #define RC_XMM7 0x80000
51 #define RC_IRET RC_RAX /* function return: integer register */
52 #define RC_LRET RC_RDX /* function return: second integer register */
53 #define RC_FRET RC_XMM0 /* function return: float register */
54 #define RC_QRET RC_XMM1 /* function return: second float register */
56 /* pretty names for the registers */
84 #define REX_BASE(reg) (((reg) >> 3) & 1)
85 #define REG_VALUE(reg) ((reg) & 7)
87 /* return registers for function */
88 #define REG_IRET TREG_RAX /* single word int return register */
89 #define REG_LRET TREG_RDX /* second word return register (for long long) */
90 #define REG_FRET TREG_XMM0 /* float return register */
91 #define REG_QRET TREG_XMM1 /* second float return register */
93 /* defined if function parameters must be evaluated in reverse order */
94 #define INVERT_FUNC_PARAMS
96 /* pointer size, in bytes */
99 /* long double size and alignment, in bytes */
100 #define LDOUBLE_SIZE 16
101 #define LDOUBLE_ALIGN 16
102 /* maximum alignment (for aligned attribute support) */
105 /******************************************************/
106 #else /* ! TARGET_DEFS_ONLY */
107 /******************************************************/
111 ST_DATA
const int reg_classes
[NB_REGS
] = {
112 /* eax */ RC_INT
| RC_RAX
,
113 /* ecx */ RC_INT
| RC_RCX
,
114 /* edx */ RC_INT
| RC_RDX
,
128 /* xmm0 */ RC_FLOAT
| RC_XMM0
,
129 /* xmm1 */ RC_FLOAT
| RC_XMM1
,
130 /* xmm2 */ RC_FLOAT
| RC_XMM2
,
131 /* xmm3 */ RC_FLOAT
| RC_XMM3
,
132 /* xmm4 */ RC_FLOAT
| RC_XMM4
,
133 /* xmm5 */ RC_FLOAT
| RC_XMM5
,
134 /* xmm6 an xmm7 are included so gv() can be used on them,
135 but they are not tagged with RC_FLOAT because they are
136 callee saved on Windows */
142 static unsigned long func_sub_sp_offset
;
143 static int func_ret_sub
;
145 /* XXX: make it faster ? */
146 ST_FUNC
void g(int c
)
152 if (ind1
> cur_text_section
->data_allocated
)
153 section_realloc(cur_text_section
, ind1
);
154 cur_text_section
->data
[ind
] = c
;
158 ST_FUNC
void o(unsigned int c
)
166 ST_FUNC
void gen_le16(int v
)
172 ST_FUNC
void gen_le32(int c
)
180 ST_FUNC
void gen_le64(int64_t c
)
192 static void orex(int ll
, int r
, int r2
, int b
)
194 if ((r
& VT_VALMASK
) >= VT_CONST
)
196 if ((r2
& VT_VALMASK
) >= VT_CONST
)
198 if (ll
|| REX_BASE(r
) || REX_BASE(r2
))
199 o(0x40 | REX_BASE(r
) | (REX_BASE(r2
) << 2) | (ll
<< 3));
203 /* output a symbol and patch all calls to it */
204 ST_FUNC
void gsym_addr(int t
, int a
)
207 unsigned char *ptr
= cur_text_section
->data
+ t
;
208 uint32_t n
= read32le(ptr
); /* next value */
209 write32le(ptr
, a
- t
- 4);
220 static int is64_type(int t
)
222 return ((t
& VT_BTYPE
) == VT_PTR
||
223 (t
& VT_BTYPE
) == VT_FUNC
||
224 (t
& VT_BTYPE
) == VT_LLONG
);
227 /* instruction + 4 bytes data. Return the address of the data */
228 static int oad(int c
, int s
)
239 /* generate jmp to a label */
240 #define gjmp2(instr,lbl) oad(instr,lbl)
242 ST_FUNC
void gen_addr32(int r
, Sym
*sym
, int c
)
245 greloca(cur_text_section
, sym
, ind
, R_X86_64_32S
, c
), c
=0;
249 /* output constant with relocation if 'r & VT_SYM' is true */
250 ST_FUNC
void gen_addr64(int r
, Sym
*sym
, int64_t c
)
253 greloca(cur_text_section
, sym
, ind
, R_X86_64_64
, c
), c
=0;
257 /* output constant with relocation if 'r & VT_SYM' is true */
258 ST_FUNC
void gen_addrpc32(int r
, Sym
*sym
, int c
)
261 greloca(cur_text_section
, sym
, ind
, R_X86_64_PC32
, c
-4), c
=4;
265 /* output got address with relocation */
266 static void gen_gotpcrel(int r
, Sym
*sym
, int c
)
269 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n",
270 get_tok_str(sym
->v
, NULL
), c
, r
,
271 cur_text_section
->data
[ind
-3],
272 cur_text_section
->data
[ind
-2],
273 cur_text_section
->data
[ind
-1]
276 greloca(cur_text_section
, sym
, ind
, R_X86_64_GOTPCREL
, -4);
279 /* we use add c, %xxx for displacement */
281 o(0xc0 + REG_VALUE(r
));
286 static void gen_modrm_impl(int op_reg
, int r
, Sym
*sym
, int c
, int is_got
)
288 op_reg
= REG_VALUE(op_reg
) << 3;
289 if ((r
& VT_VALMASK
) == VT_CONST
) {
290 /* constant memory reference */
293 gen_gotpcrel(r
, sym
, c
);
295 gen_addrpc32(r
, sym
, c
);
297 } else if ((r
& VT_VALMASK
) == VT_LOCAL
) {
298 /* currently, we use only ebp as base */
300 /* short reference */
304 oad(0x85 | op_reg
, c
);
306 } else if ((r
& VT_VALMASK
) >= TREG_MEM
) {
308 g(0x80 | op_reg
| REG_VALUE(r
));
311 g(0x00 | op_reg
| REG_VALUE(r
));
314 g(0x00 | op_reg
| REG_VALUE(r
));
318 /* generate a modrm reference. 'op_reg' contains the additional 3
320 static void gen_modrm(int op_reg
, int r
, Sym
*sym
, int c
)
322 gen_modrm_impl(op_reg
, r
, sym
, c
, 0);
325 /* generate a modrm reference. 'op_reg' contains the additional 3
327 static void gen_modrm64(int opcode
, int op_reg
, int r
, Sym
*sym
, int c
)
330 is_got
= (op_reg
& TREG_MEM
) && !(sym
->type
.t
& VT_STATIC
);
331 orex(1, r
, op_reg
, opcode
);
332 gen_modrm_impl(op_reg
, r
, sym
, c
, is_got
);
336 /* load 'r' from value 'sv' */
337 void load(int r
, SValue
*sv
)
339 int v
, t
, ft
, fc
, fr
;
344 sv
= pe_getimport(sv
, &v2
);
348 ft
= sv
->type
.t
& ~VT_DEFSIGN
;
350 if (fc
!= sv
->c
.i
&& (fr
& VT_SYM
))
351 tcc_error("64 bit addend in load");
353 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
355 #ifndef TCC_TARGET_PE
356 /* we use indirect access via got */
357 if ((fr
& VT_VALMASK
) == VT_CONST
&& (fr
& VT_SYM
) &&
358 (fr
& VT_LVAL
) && !(sv
->sym
->type
.t
& VT_STATIC
)) {
359 /* use the result register as a temporal register */
360 int tr
= r
| TREG_MEM
;
362 /* we cannot use float registers as a temporal register */
363 tr
= get_reg(RC_INT
) | TREG_MEM
;
365 gen_modrm64(0x8b, tr
, fr
, sv
->sym
, 0);
367 /* load from the temporal register */
375 if (v
== VT_LLOCAL
) {
377 v1
.r
= VT_LOCAL
| VT_LVAL
;
380 if (!(reg_classes
[fr
] & (RC_INT
|RC_R11
)))
381 fr
= get_reg(RC_INT
);
385 /* Like GCC we can load from small enough properly sized
386 structs and unions as well.
387 XXX maybe move to generic operand handling, but should
388 occur only with asm, so tccasm.c might also be a better place */
389 if ((ft
& VT_BTYPE
) == VT_STRUCT
) {
391 switch (type_size(&sv
->type
, &align
)) {
392 case 1: ft
= VT_BYTE
; break;
393 case 2: ft
= VT_SHORT
; break;
394 case 4: ft
= VT_INT
; break;
395 case 8: ft
= VT_LLONG
; break;
397 tcc_error("invalid aggregate type for register load");
401 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
403 r
= REG_VALUE(r
); /* movd */
404 } else if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
405 b
= 0x7e0ff3; /* movq */
407 } else if ((ft
& VT_BTYPE
) == VT_LDOUBLE
) {
408 b
= 0xdb, r
= 5; /* fldt */
409 } else if ((ft
& VT_TYPE
) == VT_BYTE
|| (ft
& VT_TYPE
) == VT_BOOL
) {
410 b
= 0xbe0f; /* movsbl */
411 } else if ((ft
& VT_TYPE
) == (VT_BYTE
| VT_UNSIGNED
)) {
412 b
= 0xb60f; /* movzbl */
413 } else if ((ft
& VT_TYPE
) == VT_SHORT
) {
414 b
= 0xbf0f; /* movswl */
415 } else if ((ft
& VT_TYPE
) == (VT_SHORT
| VT_UNSIGNED
)) {
416 b
= 0xb70f; /* movzwl */
418 assert(((ft
& VT_BTYPE
) == VT_INT
)
419 || ((ft
& VT_BTYPE
) == VT_LLONG
)
420 || ((ft
& VT_BTYPE
) == VT_PTR
)
421 || ((ft
& VT_BTYPE
) == VT_FUNC
)
427 gen_modrm64(b
, r
, fr
, sv
->sym
, fc
);
430 gen_modrm(r
, fr
, sv
->sym
, fc
);
437 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
438 gen_addrpc32(fr
, sv
->sym
, fc
);
440 if (sv
->sym
->type
.t
& VT_STATIC
) {
442 o(0x05 + REG_VALUE(r
) * 8); /* lea xx(%rip), r */
443 gen_addrpc32(fr
, sv
->sym
, fc
);
446 o(0x05 + REG_VALUE(r
) * 8); /* mov xx(%rip), r */
447 gen_gotpcrel(r
, sv
->sym
, fc
);
450 } else if (is64_type(ft
)) {
451 orex(1,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
454 orex(0,r
,0, 0xb8 + REG_VALUE(r
)); /* mov $xx, r */
457 } else if (v
== VT_LOCAL
) {
458 orex(1,0,r
,0x8d); /* lea xxx(%ebp), r */
459 gen_modrm(r
, VT_LOCAL
, sv
->sym
, fc
);
460 } else if (v
== VT_CMP
) {
462 if ((fc
& ~0x100) != TOK_NE
)
463 oad(0xb8 + REG_VALUE(r
), 0); /* mov $0, r */
465 oad(0xb8 + REG_VALUE(r
), 1); /* mov $1, r */
468 /* This was a float compare. If the parity bit is
469 set the result was unordered, meaning false for everything
470 except TOK_NE, and true for TOK_NE. */
472 o(0x037a + (REX_BASE(r
) << 8));
474 orex(0,r
,0, 0x0f); /* setxx %br */
476 o(0xc0 + REG_VALUE(r
));
477 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
480 oad(0xb8 + REG_VALUE(r
), t
); /* mov $1, r */
481 o(0x05eb + (REX_BASE(r
) << 8)); /* jmp after */
484 oad(0xb8 + REG_VALUE(r
), t
^ 1); /* mov $0, r */
486 if ((r
>= TREG_XMM0
) && (r
<= TREG_XMM7
)) {
488 /* gen_cvt_ftof(VT_DOUBLE); */
489 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
490 /* movsd -0x10(%rsp),%xmmN */
492 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
495 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
496 if ((ft
& VT_BTYPE
) == VT_FLOAT
) {
499 assert((ft
& VT_BTYPE
) == VT_DOUBLE
);
502 o(0xc0 + REG_VALUE(v
) + REG_VALUE(r
)*8);
504 } else if (r
== TREG_ST0
) {
505 assert((v
>= TREG_XMM0
) && (v
<= TREG_XMM7
));
506 /* gen_cvt_ftof(VT_LDOUBLE); */
507 /* movsd %xmmN,-0x10(%rsp) */
509 o(0x44 + REG_VALUE(r
)*8); /* %xmmN */
511 o(0xf02444dd); /* fldl -0x10(%rsp) */
514 o(0xc0 + REG_VALUE(r
) + REG_VALUE(v
) * 8); /* mov v, r */
520 /* store register 'r' in lvalue 'v' */
521 void store(int r
, SValue
*v
)
525 /* store the REX prefix in this variable when PIC is enabled */
530 v
= pe_getimport(v
, &v2
);
533 fr
= v
->r
& VT_VALMASK
;
536 if (fc
!= v
->c
.i
&& (fr
& VT_SYM
))
537 tcc_error("64 bit addend in store");
538 ft
&= ~(VT_VOLATILE
| VT_CONSTANT
);
541 #ifndef TCC_TARGET_PE
542 /* we need to access the variable via got */
543 if (fr
== VT_CONST
&& (v
->r
& VT_SYM
)) {
544 /* mov xx(%rip), %r11 */
546 gen_gotpcrel(TREG_R11
, v
->sym
, v
->c
.i
);
547 pic
= is64_type(bt
) ? 0x49 : 0x41;
551 /* XXX: incorrect if float reg to reg */
552 if (bt
== VT_FLOAT
) {
555 o(0x7e0f); /* movd */
557 } else if (bt
== VT_DOUBLE
) {
560 o(0xd60f); /* movq */
562 } else if (bt
== VT_LDOUBLE
) {
563 o(0xc0d9); /* fld %st(0) */
571 if (bt
== VT_BYTE
|| bt
== VT_BOOL
)
573 else if (is64_type(bt
))
579 /* xxx r, (%r11) where xxx is mov, movq, fld, or etc */
584 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
585 gen_modrm64(op64
, r
, v
->r
, v
->sym
, fc
);
586 } else if (fr
!= r
) {
587 /* XXX: don't we really come here? */
589 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
592 if (fr
== VT_CONST
|| fr
== VT_LOCAL
|| (v
->r
& VT_LVAL
)) {
593 gen_modrm(r
, v
->r
, v
->sym
, fc
);
594 } else if (fr
!= r
) {
595 /* XXX: don't we really come here? */
597 o(0xc0 + fr
+ r
* 8); /* mov r, fr */
602 /* 'is_jmp' is '1' if it is a jump */
603 static void gcall_or_jmp(int is_jmp
)
606 if ((vtop
->r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
&&
607 ((vtop
->r
& VT_SYM
) || (vtop
->c
.i
-4) == (int)(vtop
->c
.i
-4))) {
609 if (vtop
->r
& VT_SYM
) {
610 /* relocation case */
612 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PC32
, (int)(vtop
->c
.i
-4));
614 greloca(cur_text_section
, vtop
->sym
, ind
+ 1, R_X86_64_PLT32
, (int)(vtop
->c
.i
-4));
617 /* put an empty PC32 relocation */
618 put_elf_reloca(symtab_section
, cur_text_section
,
619 ind
+ 1, R_X86_64_PC32
, 0, (int)(vtop
->c
.i
-4));
621 oad(0xe8 + is_jmp
, 0); /* call/jmp im */
623 /* otherwise, indirect call */
627 o(0xff); /* call/jmp *r */
628 o(0xd0 + REG_VALUE(r
) + (is_jmp
<< 4));
632 #if defined(CONFIG_TCC_BCHECK)
633 #ifndef TCC_TARGET_PE
634 static addr_t func_bound_offset
;
635 static unsigned long func_bound_ind
;
638 static void gen_static_call(int v
)
640 Sym
*sym
= external_global_sym(v
, &func_old_type
, 0);
642 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
645 /* generate a bounded pointer addition */
646 ST_FUNC
void gen_bounded_ptr_add(void)
648 /* save all temporary registers */
651 /* prepare fast x86_64 function call */
653 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size
657 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr
660 /* do a fast function call */
661 gen_static_call(TOK___bound_ptr_add
);
663 /* returned pointer is in rax */
665 vtop
->r
= TREG_RAX
| VT_BOUNDED
;
668 /* relocation offset of the bounding function call point */
669 vtop
->c
.i
= (cur_text_section
->reloc
->data_offset
- sizeof(ElfW(Rela
)));
672 /* patch pointer addition in vtop so that pointer dereferencing is
674 ST_FUNC
void gen_bounded_ptr_deref(void)
682 /* XXX: put that code in generic part of tcc */
683 if (!is_float(vtop
->type
.t
)) {
684 if (vtop
->r
& VT_LVAL_BYTE
)
686 else if (vtop
->r
& VT_LVAL_SHORT
)
690 size
= type_size(&vtop
->type
, &align
);
692 case 1: func
= TOK___bound_ptr_indir1
; break;
693 case 2: func
= TOK___bound_ptr_indir2
; break;
694 case 4: func
= TOK___bound_ptr_indir4
; break;
695 case 8: func
= TOK___bound_ptr_indir8
; break;
696 case 12: func
= TOK___bound_ptr_indir12
; break;
697 case 16: func
= TOK___bound_ptr_indir16
; break;
699 tcc_error("unhandled size when dereferencing bounded pointer");
704 sym
= external_global_sym(func
, &func_old_type
, 0);
706 put_extern_sym(sym
, NULL
, 0, 0);
708 /* patch relocation */
709 /* XXX: find a better solution ? */
711 rel
= (ElfW(Rela
) *)(cur_text_section
->reloc
->data
+ vtop
->c
.i
);
712 rel
->r_info
= ELF64_R_INFO(sym
->c
, ELF64_R_TYPE(rel
->r_info
));
719 static const uint8_t arg_regs
[REGN
] = {
720 TREG_RCX
, TREG_RDX
, TREG_R8
, TREG_R9
723 /* Prepare arguments in R10 and R11 rather than RCX and RDX
724 because gv() will not ever use these */
725 static int arg_prepare_reg(int idx
) {
726 if (idx
== 0 || idx
== 1)
727 /* idx=0: r10, idx=1: r11 */
730 return arg_regs
[idx
];
733 static int func_scratch
, func_alloca
;
735 /* Generate function call. The function address is pushed first, then
736 all the parameters in call order. This functions pops all the
737 parameters and the function address. */
739 static void gen_offs_sp(int b
, int r
, int d
)
741 orex(1,0,r
& 0x100 ? 0 : r
, b
);
743 o(0x2444 | (REG_VALUE(r
) << 3));
746 o(0x2484 | (REG_VALUE(r
) << 3));
751 static int using_regs(int size
)
753 return !(size
> 8 || (size
& (size
- 1)));
756 /* Return the number of registers needed to return the struct, or 0 if
757 returning via struct pointer. */
758 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
761 *ret_align
= 1; // Never have to re-align return values for x86-64
763 size
= type_size(vt
, &align
);
764 if (!using_regs(size
))
778 static int is_sse_float(int t
) {
781 return bt
== VT_DOUBLE
|| bt
== VT_FLOAT
;
784 static int gfunc_arg_size(CType
*type
) {
786 if (type
->t
& (VT_ARRAY
|VT_BITFIELD
))
788 return type_size(type
, &align
);
791 void gfunc_call(int nb_args
)
793 int size
, r
, args_size
, i
, d
, bt
, struct_size
;
796 args_size
= (nb_args
< REGN
? REGN
: nb_args
) * PTR_SIZE
;
799 /* for struct arguments, we need to call memcpy and the function
800 call breaks register passing arguments we are preparing.
801 So, we process arguments which will be passed by stack first. */
802 struct_size
= args_size
;
803 for(i
= 0; i
< nb_args
; i
++) {
808 bt
= (sv
->type
.t
& VT_BTYPE
);
809 size
= gfunc_arg_size(&sv
->type
);
811 if (using_regs(size
))
812 continue; /* arguments smaller than 8 bytes passed in registers or on stack */
814 if (bt
== VT_STRUCT
) {
815 /* align to stack align size */
816 size
= (size
+ 15) & ~15;
817 /* generate structure store */
819 gen_offs_sp(0x8d, r
, struct_size
);
822 /* generate memcpy call */
823 vset(&sv
->type
, r
| VT_LVAL
, 0);
827 } else if (bt
== VT_LDOUBLE
) {
829 gen_offs_sp(0xdb, 0x107, struct_size
);
834 if (func_scratch
< struct_size
)
835 func_scratch
= struct_size
;
838 struct_size
= args_size
;
840 for(i
= 0; i
< nb_args
; i
++) {
842 bt
= (vtop
->type
.t
& VT_BTYPE
);
844 size
= gfunc_arg_size(&vtop
->type
);
845 if (!using_regs(size
)) {
846 /* align to stack align size */
847 size
= (size
+ 15) & ~15;
850 gen_offs_sp(0x8d, d
, struct_size
);
851 gen_offs_sp(0x89, d
, arg
*8);
853 d
= arg_prepare_reg(arg
);
854 gen_offs_sp(0x8d, d
, struct_size
);
858 if (is_sse_float(vtop
->type
.t
)) {
859 if (tcc_state
->nosse
)
860 tcc_error("SSE disabled");
861 gv(RC_XMM0
); /* only use one float register */
863 /* movq %xmm0, j*8(%rsp) */
864 gen_offs_sp(0xd60f66, 0x100, arg
*8);
866 /* movaps %xmm0, %xmmN */
868 o(0xc0 + (arg
<< 3));
869 d
= arg_prepare_reg(arg
);
870 /* mov %xmm0, %rxx */
873 o(0xc0 + REG_VALUE(d
));
876 if (bt
== VT_STRUCT
) {
877 vtop
->type
.ref
= NULL
;
878 vtop
->type
.t
= size
> 4 ? VT_LLONG
: size
> 2 ? VT_INT
879 : size
> 1 ? VT_SHORT
: VT_BYTE
;
884 gen_offs_sp(0x89, r
, arg
*8);
886 d
= arg_prepare_reg(arg
);
887 orex(1,d
,r
,0x89); /* mov */
888 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
896 /* Copy R10 and R11 into RCX and RDX, respectively */
898 o(0xd1894c); /* mov %r10, %rcx */
900 o(0xda894c); /* mov %r11, %rdx */
906 if ((vtop
->r
& VT_SYM
) && vtop
->sym
->v
== TOK_alloca
) {
907 /* need to add the "func_scratch" area after alloca */
908 o(0x0548), gen_le32(func_alloca
), func_alloca
= ind
- 4;
911 /* other compilers don't clear the upper bits when returning char/short */
912 bt
= vtop
->type
.ref
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
);
913 if (bt
== (VT_BYTE
| VT_UNSIGNED
))
914 o(0xc0b60f); /* movzbl %al, %eax */
915 else if (bt
== VT_BYTE
)
916 o(0xc0be0f); /* movsbl %al, %eax */
917 else if (bt
== VT_SHORT
)
919 else if (bt
== (VT_SHORT
| VT_UNSIGNED
))
920 o(0xc0b70f); /* movzbl %al, %eax */
921 #if 0 /* handled in gen_cast() */
922 else if (bt
== VT_INT
)
923 o(0x9848); /* cltq */
924 else if (bt
== (VT_INT
| VT_UNSIGNED
))
925 o(0xc089); /* mov %eax,%eax */
931 #define FUNC_PROLOG_SIZE 11
933 /* generate function prolog of type 't' */
934 void gfunc_prolog(CType
*func_type
)
936 int addr
, reg_param_index
, bt
, size
;
946 ind
+= FUNC_PROLOG_SIZE
;
947 func_sub_sp_offset
= ind
;
950 sym
= func_type
->ref
;
952 /* if the function returns a structure, then add an
953 implicit pointer parameter */
955 func_var
= (sym
->f
.func_type
== FUNC_ELLIPSIS
);
956 size
= gfunc_arg_size(&func_vt
);
957 if (!using_regs(size
)) {
958 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
964 /* define parameters */
965 while ((sym
= sym
->next
) != NULL
) {
967 bt
= type
->t
& VT_BTYPE
;
968 size
= gfunc_arg_size(type
);
969 if (!using_regs(size
)) {
970 if (reg_param_index
< REGN
) {
971 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
973 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LLOCAL
| VT_LVAL
, addr
);
975 if (reg_param_index
< REGN
) {
976 /* save arguments passed by register */
977 if ((bt
== VT_FLOAT
) || (bt
== VT_DOUBLE
)) {
978 if (tcc_state
->nosse
)
979 tcc_error("SSE disabled");
980 o(0xd60f66); /* movq */
981 gen_modrm(reg_param_index
, VT_LOCAL
, NULL
, addr
);
983 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
986 sym_push(sym
->v
& ~SYM_FIELD
, type
, VT_LOCAL
| VT_LVAL
, addr
);
992 while (reg_param_index
< REGN
) {
993 if (func_type
->ref
->f
.func_type
== FUNC_ELLIPSIS
) {
994 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, addr
);
1001 /* generate function epilog */
1002 void gfunc_epilog(void)
1006 o(0xc9); /* leave */
1007 if (func_ret_sub
== 0) {
1010 o(0xc2); /* ret n */
1012 g(func_ret_sub
>> 8);
1016 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1017 /* align local size to word & save local variables */
1018 v
= (func_scratch
+ -loc
+ 15) & -16;
1021 Sym
*sym
= external_global_sym(TOK___chkstk
, &func_old_type
, 0);
1022 oad(0xb8, v
); /* mov stacksize, %eax */
1023 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */
1024 greloca(cur_text_section
, sym
, ind
-4, R_X86_64_PC32
, -4);
1025 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */
1027 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1028 o(0xec8148); /* sub rsp, stacksize */
1032 /* add the "func_scratch" area after each alloca seen */
1033 while (func_alloca
) {
1034 unsigned char *ptr
= cur_text_section
->data
+ func_alloca
;
1035 func_alloca
= read32le(ptr
);
1036 write32le(ptr
, func_scratch
);
1039 cur_text_section
->data_offset
= saved_ind
;
1040 pe_add_unwind_data(ind
, saved_ind
, v
);
1041 ind
= cur_text_section
->data_offset
;
1046 static void gadd_sp(int val
)
1048 if (val
== (char)val
) {
1052 oad(0xc48148, val
); /* add $xxx, %rsp */
1056 typedef enum X86_64_Mode
{
1059 x86_64_mode_integer
,
1064 static X86_64_Mode
classify_x86_64_merge(X86_64_Mode a
, X86_64_Mode b
)
1068 else if (a
== x86_64_mode_none
)
1070 else if (b
== x86_64_mode_none
)
1072 else if ((a
== x86_64_mode_memory
) || (b
== x86_64_mode_memory
))
1073 return x86_64_mode_memory
;
1074 else if ((a
== x86_64_mode_integer
) || (b
== x86_64_mode_integer
))
1075 return x86_64_mode_integer
;
1076 else if ((a
== x86_64_mode_x87
) || (b
== x86_64_mode_x87
))
1077 return x86_64_mode_memory
;
1079 return x86_64_mode_sse
;
1082 static X86_64_Mode
classify_x86_64_inner(CType
*ty
)
1087 switch (ty
->t
& VT_BTYPE
) {
1088 case VT_VOID
: return x86_64_mode_none
;
1097 return x86_64_mode_integer
;
1100 case VT_DOUBLE
: return x86_64_mode_sse
;
1102 case VT_LDOUBLE
: return x86_64_mode_x87
;
1107 mode
= x86_64_mode_none
;
1108 for (f
= f
->next
; f
; f
= f
->next
)
1109 mode
= classify_x86_64_merge(mode
, classify_x86_64_inner(&f
->type
));
1117 static X86_64_Mode
classify_x86_64_arg(CType
*ty
, CType
*ret
, int *psize
, int *palign
, int *reg_count
)
1120 int size
, align
, ret_t
= 0;
1122 if (ty
->t
& (VT_BITFIELD
|VT_ARRAY
)) {
1127 mode
= x86_64_mode_integer
;
1129 size
= type_size(ty
, &align
);
1130 *psize
= (size
+ 7) & ~7;
1131 *palign
= (align
+ 7) & ~7;
1134 mode
= x86_64_mode_memory
;
1136 mode
= classify_x86_64_inner(ty
);
1138 case x86_64_mode_integer
:
1144 ret_t
= (size
> 4) ? VT_LLONG
: VT_INT
;
1148 case x86_64_mode_x87
:
1153 case x86_64_mode_sse
:
1159 ret_t
= (size
> 4) ? VT_DOUBLE
: VT_FLOAT
;
1162 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/
1175 ST_FUNC
int classify_x86_64_va_arg(CType
*ty
)
1177 /* This definition must be synced with stdarg.h */
1178 enum __va_arg_type
{
1179 __va_gen_reg
, __va_float_reg
, __va_stack
1181 int size
, align
, reg_count
;
1182 X86_64_Mode mode
= classify_x86_64_arg(ty
, NULL
, &size
, &align
, ®_count
);
1184 default: return __va_stack
;
1185 case x86_64_mode_integer
: return __va_gen_reg
;
1186 case x86_64_mode_sse
: return __va_float_reg
;
1190 /* Return the number of registers needed to return the struct, or 0 if
1191 returning via struct pointer. */
1192 ST_FUNC
int gfunc_sret(CType
*vt
, int variadic
, CType
*ret
, int *ret_align
, int *regsize
)
1194 int size
, align
, reg_count
;
1195 *ret_align
= 1; // Never have to re-align return values for x86-64
1197 return (classify_x86_64_arg(vt
, ret
, &size
, &align
, ®_count
) != x86_64_mode_memory
);
1201 static const uint8_t arg_regs
[REGN
] = {
1202 TREG_RDI
, TREG_RSI
, TREG_RDX
, TREG_RCX
, TREG_R8
, TREG_R9
1205 static int arg_prepare_reg(int idx
) {
1206 if (idx
== 2 || idx
== 3)
1207 /* idx=2: r10, idx=3: r11 */
1210 return arg_regs
[idx
];
1213 /* Generate function call. The function address is pushed first, then
1214 all the parameters in call order. This functions pops all the
1215 parameters and the function address. */
1216 void gfunc_call(int nb_args
)
1220 int size
, align
, r
, args_size
, stack_adjust
, i
, reg_count
;
1221 int nb_reg_args
= 0;
1222 int nb_sse_args
= 0;
1223 int sse_reg
, gen_reg
;
1224 char _onstack
[nb_args
], *onstack
= _onstack
;
1226 /* calculate the number of integer/float register arguments, remember
1227 arguments to be passed via stack (in onstack[]), and also remember
1228 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs
1229 to be done in a left-to-right pass over arguments. */
1231 for(i
= nb_args
- 1; i
>= 0; i
--) {
1232 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1233 if (mode
== x86_64_mode_sse
&& nb_sse_args
+ reg_count
<= 8) {
1234 nb_sse_args
+= reg_count
;
1236 } else if (mode
== x86_64_mode_integer
&& nb_reg_args
+ reg_count
<= REGN
) {
1237 nb_reg_args
+= reg_count
;
1239 } else if (mode
== x86_64_mode_none
) {
1242 if (align
== 16 && (stack_adjust
&= 15)) {
1247 stack_adjust
+= size
;
1251 if (nb_sse_args
&& tcc_state
->nosse
)
1252 tcc_error("SSE disabled but floating point arguments passed");
1254 /* fetch cpu flag before generating any code */
1255 if (vtop
>= vstack
&& (vtop
->r
& VT_VALMASK
) == VT_CMP
)
1258 /* for struct arguments, we need to call memcpy and the function
1259 call breaks register passing arguments we are preparing.
1260 So, we process arguments which will be passed by stack first. */
1261 gen_reg
= nb_reg_args
;
1262 sse_reg
= nb_sse_args
;
1265 for (i
= 0; i
< nb_args
;) {
1266 mode
= classify_x86_64_arg(&vtop
[-i
].type
, NULL
, &size
, &align
, ®_count
);
1271 /* Possibly adjust stack to align SSE boundary. We're processing
1272 args from right to left while allocating happens left to right
1273 (stack grows down), so the adjustment needs to happen _after_
1274 an argument that requires it. */
1276 o(0x50); /* push %rax; aka sub $8,%rsp */
1280 if (onstack
[i
] == 2)
1285 switch (vtop
->type
.t
& VT_BTYPE
) {
1287 /* allocate the necessary size on stack */
1289 oad(0xec81, size
); /* sub $xxx, %rsp */
1290 /* generate structure store */
1291 r
= get_reg(RC_INT
);
1292 orex(1, r
, 0, 0x89); /* mov %rsp, r */
1293 o(0xe0 + REG_VALUE(r
));
1294 vset(&vtop
->type
, r
| VT_LVAL
, 0);
1301 oad(0xec8148, size
); /* sub $xxx, %rsp */
1302 o(0x7cdb); /* fstpt 0(%rsp) */
1309 assert(mode
== x86_64_mode_sse
);
1311 o(0x50); /* push $rax */
1312 /* movq %xmmN, (%rsp) */
1314 o(0x04 + REG_VALUE(r
)*8);
1319 assert(mode
== x86_64_mode_integer
);
1321 /* XXX: implicit cast ? */
1323 orex(0,r
,0,0x50 + REG_VALUE(r
)); /* push r */
1333 /* XXX This should be superfluous. */
1334 save_regs(0); /* save used temporary registers */
1336 /* then, we prepare register passing arguments.
1337 Note that we cannot set RDX and RCX in this loop because gv()
1338 may break these temporary registers. Let's use R10 and R11
1340 assert(gen_reg
<= REGN
);
1341 assert(sse_reg
<= 8);
1342 for(i
= 0; i
< nb_args
; i
++) {
1343 mode
= classify_x86_64_arg(&vtop
->type
, &type
, &size
, &align
, ®_count
);
1344 /* Alter stack entry type so that gv() knows how to treat it */
1346 if (mode
== x86_64_mode_sse
) {
1347 if (reg_count
== 2) {
1349 gv(RC_FRET
); /* Use pair load into xmm0 & xmm1 */
1350 if (sse_reg
) { /* avoid redundant movaps %xmm0, %xmm0 */
1351 /* movaps %xmm0, %xmmN */
1353 o(0xc0 + (sse_reg
<< 3));
1354 /* movaps %xmm1, %xmmN */
1356 o(0xc1 + ((sse_reg
+1) << 3));
1359 assert(reg_count
== 1);
1361 /* Load directly to register */
1362 gv(RC_XMM0
<< sse_reg
);
1364 } else if (mode
== x86_64_mode_integer
) {
1366 /* XXX: implicit cast ? */
1368 gen_reg
-= reg_count
;
1370 d
= arg_prepare_reg(gen_reg
);
1371 orex(1,d
,r
,0x89); /* mov */
1372 o(0xc0 + REG_VALUE(r
) * 8 + REG_VALUE(d
));
1373 if (reg_count
== 2) {
1374 d
= arg_prepare_reg(gen_reg
+1);
1375 orex(1,d
,vtop
->r2
,0x89); /* mov */
1376 o(0xc0 + REG_VALUE(vtop
->r2
) * 8 + REG_VALUE(d
));
1381 assert(gen_reg
== 0);
1382 assert(sse_reg
== 0);
1384 /* We shouldn't have many operands on the stack anymore, but the
1385 call address itself is still there, and it might be in %eax
1386 (or edx/ecx) currently, which the below writes would clobber.
1387 So evict all remaining operands here. */
1390 /* Copy R10 and R11 into RDX and RCX, respectively */
1391 if (nb_reg_args
> 2) {
1392 o(0xd2894c); /* mov %r10, %rdx */
1393 if (nb_reg_args
> 3) {
1394 o(0xd9894c); /* mov %r11, %rcx */
1398 if (vtop
->type
.ref
->f
.func_type
!= FUNC_NEW
) /* implies FUNC_OLD or FUNC_ELLIPSIS */
1399 oad(0xb8, nb_sse_args
< 8 ? nb_sse_args
: 8); /* mov nb_sse_args, %eax */
1407 #define FUNC_PROLOG_SIZE 11
1409 static void push_arg_reg(int i
) {
1411 gen_modrm64(0x89, arg_regs
[i
], VT_LOCAL
, NULL
, loc
);
1414 /* generate function prolog of type 't' */
1415 void gfunc_prolog(CType
*func_type
)
1418 int i
, addr
, align
, size
, reg_count
;
1419 int param_addr
= 0, reg_param_index
, sse_param_index
;
1423 sym
= func_type
->ref
;
1424 addr
= PTR_SIZE
* 2;
1426 ind
+= FUNC_PROLOG_SIZE
;
1427 func_sub_sp_offset
= ind
;
1430 if (sym
->f
.func_type
== FUNC_ELLIPSIS
) {
1431 int seen_reg_num
, seen_sse_num
, seen_stack_size
;
1432 seen_reg_num
= seen_sse_num
= 0;
1433 /* frame pointer and return address */
1434 seen_stack_size
= PTR_SIZE
* 2;
1435 /* count the number of seen parameters */
1436 sym
= func_type
->ref
;
1437 while ((sym
= sym
->next
) != NULL
) {
1439 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1443 seen_stack_size
= ((seen_stack_size
+ align
- 1) & -align
) + size
;
1446 case x86_64_mode_integer
:
1447 if (seen_reg_num
+ reg_count
> REGN
)
1449 seen_reg_num
+= reg_count
;
1452 case x86_64_mode_sse
:
1453 if (seen_sse_num
+ reg_count
> 8)
1455 seen_sse_num
+= reg_count
;
1461 /* movl $0x????????, -0x10(%rbp) */
1463 gen_le32(seen_reg_num
* 8);
1464 /* movl $0x????????, -0xc(%rbp) */
1466 gen_le32(seen_sse_num
* 16 + 48);
1467 /* movl $0x????????, -0x8(%rbp) */
1469 gen_le32(seen_stack_size
);
1471 /* save all register passing arguments */
1472 for (i
= 0; i
< 8; i
++) {
1474 if (!tcc_state
->nosse
) {
1475 o(0xd60f66); /* movq */
1476 gen_modrm(7 - i
, VT_LOCAL
, NULL
, loc
);
1478 /* movq $0, loc+8(%rbp) */
1483 for (i
= 0; i
< REGN
; i
++) {
1484 push_arg_reg(REGN
-1-i
);
1488 sym
= func_type
->ref
;
1489 reg_param_index
= 0;
1490 sse_param_index
= 0;
1492 /* if the function returns a structure, then add an
1493 implicit pointer parameter */
1494 func_vt
= sym
->type
;
1495 mode
= classify_x86_64_arg(&func_vt
, NULL
, &size
, &align
, ®_count
);
1496 if (mode
== x86_64_mode_memory
) {
1497 push_arg_reg(reg_param_index
);
1501 /* define parameters */
1502 while ((sym
= sym
->next
) != NULL
) {
1504 mode
= classify_x86_64_arg(type
, NULL
, &size
, &align
, ®_count
);
1506 case x86_64_mode_sse
:
1507 if (tcc_state
->nosse
)
1508 tcc_error("SSE disabled but floating point arguments used");
1509 if (sse_param_index
+ reg_count
<= 8) {
1510 /* save arguments passed by register */
1511 loc
-= reg_count
* 8;
1513 for (i
= 0; i
< reg_count
; ++i
) {
1514 o(0xd60f66); /* movq */
1515 gen_modrm(sse_param_index
, VT_LOCAL
, NULL
, param_addr
+ i
*8);
1519 addr
= (addr
+ align
- 1) & -align
;
1525 case x86_64_mode_memory
:
1526 case x86_64_mode_x87
:
1527 addr
= (addr
+ align
- 1) & -align
;
1532 case x86_64_mode_integer
: {
1533 if (reg_param_index
+ reg_count
<= REGN
) {
1534 /* save arguments passed by register */
1535 loc
-= reg_count
* 8;
1537 for (i
= 0; i
< reg_count
; ++i
) {
1538 gen_modrm64(0x89, arg_regs
[reg_param_index
], VT_LOCAL
, NULL
, param_addr
+ i
*8);
1542 addr
= (addr
+ align
- 1) & -align
;
1548 default: break; /* nothing to be done for x86_64_mode_none */
1550 sym_push(sym
->v
& ~SYM_FIELD
, type
,
1551 VT_LOCAL
| VT_LVAL
, param_addr
);
1554 #ifdef CONFIG_TCC_BCHECK
1555 /* leave some room for bound checking code */
1556 if (tcc_state
->do_bounds_check
) {
1557 func_bound_offset
= lbounds_section
->data_offset
;
1558 func_bound_ind
= ind
;
1559 oad(0xb8, 0); /* lbound section pointer */
1560 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */
1561 oad(0xb8, 0); /* call to function */
1566 /* generate function epilog */
1567 void gfunc_epilog(void)
1571 #ifdef CONFIG_TCC_BCHECK
1572 if (tcc_state
->do_bounds_check
1573 && func_bound_offset
!= lbounds_section
->data_offset
)
1579 /* add end of table info */
1580 bounds_ptr
= section_ptr_add(lbounds_section
, sizeof(addr_t
));
1583 /* generate bound local allocation */
1584 sym_data
= get_sym_ref(&char_pointer_type
, lbounds_section
,
1585 func_bound_offset
, lbounds_section
->data_offset
);
1587 ind
= func_bound_ind
;
1588 greloca(cur_text_section
, sym_data
, ind
+ 1, R_X86_64_64
, 0);
1590 gen_static_call(TOK___bound_local_new
);
1593 /* generate bound check local freeing */
1594 o(0x5250); /* save returned value, if any */
1595 greloca(cur_text_section
, sym_data
, ind
+ 1, R_X86_64_64
, 0);
1596 oad(0xb8, 0); /* mov xxx, %rax */
1597 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */
1598 gen_static_call(TOK___bound_local_delete
);
1599 o(0x585a); /* restore returned value, if any */
1602 o(0xc9); /* leave */
1603 if (func_ret_sub
== 0) {
1606 o(0xc2); /* ret n */
1608 g(func_ret_sub
>> 8);
1610 /* align local size to word & save local variables */
1611 v
= (-loc
+ 15) & -16;
1613 ind
= func_sub_sp_offset
- FUNC_PROLOG_SIZE
;
1614 o(0xe5894855); /* push %rbp, mov %rsp, %rbp */
1615 o(0xec8148); /* sub rsp, stacksize */
1622 /* generate a jump to a label */
1625 return gjmp2(0xe9, t
);
1628 /* generate a jump to a fixed address */
1629 void gjmp_addr(int a
)
1637 oad(0xe9, a
- ind
- 5);
1641 ST_FUNC
void gtst_addr(int inv
, int a
)
1643 int v
= vtop
->r
& VT_VALMASK
;
1645 inv
^= (vtop
--)->c
.i
;
1652 oad(inv
- 16, a
- 4);
1654 } else if ((v
& ~1) == VT_JMP
) {
1655 if ((v
& 1) != inv
) {
1667 /* generate a test. set 'inv' to invert test. Stack entry is popped */
1668 ST_FUNC
int gtst(int inv
, int t
)
1670 int v
= vtop
->r
& VT_VALMASK
;
1672 if (nocode_wanted
) {
1674 } else if (v
== VT_CMP
) {
1675 /* fast case : can jump directly since flags are set */
1676 if (vtop
->c
.i
& 0x100)
1678 /* This was a float compare. If the parity flag is set
1679 the result was unordered. For anything except != this
1680 means false and we don't jump (anding both conditions).
1681 For != this means true (oring both).
1682 Take care about inverting the test. We need to jump
1683 to our target if the result was unordered and test wasn't NE,
1684 otherwise if unordered we don't want to jump. */
1685 vtop
->c
.i
&= ~0x100;
1686 if (inv
== (vtop
->c
.i
== TOK_NE
))
1687 o(0x067a); /* jp +6 */
1691 t
= gjmp2(0x8a, t
); /* jp t */
1695 t
= gjmp2((vtop
->c
.i
- 16) ^ inv
, t
);
1696 } else if (v
== VT_JMP
|| v
== VT_JMPI
) {
1697 /* && or || optimization */
1698 if ((v
& 1) == inv
) {
1699 /* insert vtop->c jump list in t */
1700 uint32_t n1
, n
= vtop
->c
.i
;
1702 while ((n1
= read32le(cur_text_section
->data
+ n
)))
1704 write32le(cur_text_section
->data
+ n
, t
);
1716 /* generate an integer binary operation */
1717 void gen_opi(int op
)
1722 ll
= is64_type(vtop
[-1].type
.t
);
1723 uu
= (vtop
[-1].type
.t
& VT_UNSIGNED
) != 0;
1724 cc
= (vtop
->r
& (VT_VALMASK
| VT_LVAL
| VT_SYM
)) == VT_CONST
;
1728 case TOK_ADDC1
: /* add with carry generation */
1731 if (cc
&& (!ll
|| (int)vtop
->c
.i
== vtop
->c
.i
)) {
1738 /* XXX: generate inc and dec for smaller code ? */
1739 orex(ll
, r
, 0, 0x83);
1740 o(0xc0 | (opc
<< 3) | REG_VALUE(r
));
1743 orex(ll
, r
, 0, 0x81);
1744 oad(0xc0 | (opc
<< 3) | REG_VALUE(r
), c
);
1747 gv2(RC_INT
, RC_INT
);
1750 orex(ll
, r
, fr
, (opc
<< 3) | 0x01);
1751 o(0xc0 + REG_VALUE(r
) + REG_VALUE(fr
) * 8);
1754 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1760 case TOK_SUBC1
: /* sub with carry generation */
1763 case TOK_ADDC2
: /* add with carry use */
1766 case TOK_SUBC2
: /* sub with carry use */
1779 gv2(RC_INT
, RC_INT
);
1782 orex(ll
, fr
, r
, 0xaf0f); /* imul fr, r */
1783 o(0xc0 + REG_VALUE(fr
) + REG_VALUE(r
) * 8);
1795 opc
= 0xc0 | (opc
<< 3);
1801 orex(ll
, r
, 0, 0xc1); /* shl/shr/sar $xxx, r */
1802 o(opc
| REG_VALUE(r
));
1803 g(vtop
->c
.i
& (ll
? 63 : 31));
1805 /* we generate the shift in ecx */
1806 gv2(RC_INT
, RC_RCX
);
1808 orex(ll
, r
, 0, 0xd3); /* shl/shr/sar %cl, r */
1809 o(opc
| REG_VALUE(r
));
1822 /* first operand must be in eax */
1823 /* XXX: need better constraint for second operand */
1824 gv2(RC_RAX
, RC_RCX
);
1829 orex(ll
, 0, 0, uu
? 0xd231 : 0x99); /* xor %edx,%edx : cqto */
1830 orex(ll
, fr
, 0, 0xf7); /* div fr, %eax */
1831 o((uu
? 0xf0 : 0xf8) + REG_VALUE(fr
));
1832 if (op
== '%' || op
== TOK_UMOD
)
1844 void gen_opl(int op
)
1849 /* generate a floating point operation 'v = t1 op t2' instruction. The
1850 two operands are guaranteed to have the same floating point type */
1851 /* XXX: need to use ST1 too */
1852 void gen_opf(int op
)
1854 int a
, ft
, fc
, swapped
, r
;
1856 (vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
? RC_ST0
: RC_FLOAT
;
1858 /* convert constants to memory references */
1859 if ((vtop
[-1].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
) {
1864 if ((vtop
[0].r
& (VT_VALMASK
| VT_LVAL
)) == VT_CONST
)
1867 /* must put at least one value in the floating point register */
1868 if ((vtop
[-1].r
& VT_LVAL
) &&
1869 (vtop
[0].r
& VT_LVAL
)) {
1875 /* swap the stack if needed so that t1 is the register and t2 is
1876 the memory reference */
1877 if (vtop
[-1].r
& VT_LVAL
) {
1881 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LDOUBLE
) {
1882 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1883 /* load on stack second operand */
1884 load(TREG_ST0
, vtop
);
1885 save_reg(TREG_RAX
); /* eax is used by FP comparison code */
1886 if (op
== TOK_GE
|| op
== TOK_GT
)
1888 else if (op
== TOK_EQ
|| op
== TOK_NE
)
1891 o(0xc9d9); /* fxch %st(1) */
1892 if (op
== TOK_EQ
|| op
== TOK_NE
)
1893 o(0xe9da); /* fucompp */
1895 o(0xd9de); /* fcompp */
1896 o(0xe0df); /* fnstsw %ax */
1898 o(0x45e480); /* and $0x45, %ah */
1899 o(0x40fC80); /* cmp $0x40, %ah */
1900 } else if (op
== TOK_NE
) {
1901 o(0x45e480); /* and $0x45, %ah */
1902 o(0x40f480); /* xor $0x40, %ah */
1904 } else if (op
== TOK_GE
|| op
== TOK_LE
) {
1905 o(0x05c4f6); /* test $0x05, %ah */
1908 o(0x45c4f6); /* test $0x45, %ah */
1915 /* no memory reference possible for long double operations */
1916 load(TREG_ST0
, vtop
);
1940 o(0xde); /* fxxxp %st, %st(1) */
1945 if (op
>= TOK_ULT
&& op
<= TOK_GT
) {
1946 /* if saved lvalue, then we must reload it */
1949 if ((r
& VT_VALMASK
) == VT_LLOCAL
) {
1951 r
= get_reg(RC_INT
);
1953 v1
.r
= VT_LOCAL
| VT_LVAL
;
1959 if (op
== TOK_EQ
|| op
== TOK_NE
) {
1962 if (op
== TOK_LE
|| op
== TOK_LT
)
1964 if (op
== TOK_LE
|| op
== TOK_GE
) {
1965 op
= 0x93; /* setae */
1967 op
= 0x97; /* seta */
1975 assert(!(vtop
[-1].r
& VT_LVAL
));
1977 if ((vtop
->type
.t
& VT_BTYPE
) == VT_DOUBLE
)
1979 if (op
== TOK_EQ
|| op
== TOK_NE
)
1980 o(0x2e0f); /* ucomisd */
1982 o(0x2f0f); /* comisd */
1984 if (vtop
->r
& VT_LVAL
) {
1985 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
1987 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
1992 vtop
->c
.i
= op
| 0x100;
1994 assert((vtop
->type
.t
& VT_BTYPE
) != VT_LDOUBLE
);
2012 assert((ft
& VT_BTYPE
) != VT_LDOUBLE
);
2015 /* if saved lvalue, then we must reload it */
2016 if ((vtop
->r
& VT_VALMASK
) == VT_LLOCAL
) {
2018 r
= get_reg(RC_INT
);
2020 v1
.r
= VT_LOCAL
| VT_LVAL
;
2026 assert(!(vtop
[-1].r
& VT_LVAL
));
2028 assert(vtop
->r
& VT_LVAL
);
2033 if ((ft
& VT_BTYPE
) == VT_DOUBLE
) {
2041 if (vtop
->r
& VT_LVAL
) {
2042 gen_modrm(vtop
[-1].r
, r
, vtop
->sym
, fc
);
2044 o(0xc0 + REG_VALUE(vtop
[0].r
) + REG_VALUE(vtop
[-1].r
)*8);
2052 /* convert integers to fp 't' type. Must handle 'int', 'unsigned int'
2053 and 'long long' cases. */
2054 void gen_cvt_itof(int t
)
2056 if ((t
& VT_BTYPE
) == VT_LDOUBLE
) {
2059 if ((vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2060 /* signed long long to float/double/long double (unsigned case
2061 is handled generically) */
2062 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2063 o(0x242cdf); /* fildll (%rsp) */
2064 o(0x08c48348); /* add $8, %rsp */
2065 } else if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2066 (VT_INT
| VT_UNSIGNED
)) {
2067 /* unsigned int to float/double/long double */
2068 o(0x6a); /* push $0 */
2070 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2071 o(0x242cdf); /* fildll (%rsp) */
2072 o(0x10c48348); /* add $16, %rsp */
2074 /* int to float/double/long double */
2075 o(0x50 + (vtop
->r
& VT_VALMASK
)); /* push r */
2076 o(0x2404db); /* fildl (%rsp) */
2077 o(0x08c48348); /* add $8, %rsp */
2081 int r
= get_reg(RC_FLOAT
);
2083 o(0xf2 + ((t
& VT_BTYPE
) == VT_FLOAT
?1:0));
2084 if ((vtop
->type
.t
& (VT_BTYPE
| VT_UNSIGNED
)) ==
2085 (VT_INT
| VT_UNSIGNED
) ||
2086 (vtop
->type
.t
& VT_BTYPE
) == VT_LLONG
) {
2090 o(0xc0 + (vtop
->r
& VT_VALMASK
) + REG_VALUE(r
)*8); /* cvtsi2sd */
2095 /* convert from one floating point type to another */
2096 void gen_cvt_ftof(int t
)
2104 if (bt
== VT_FLOAT
) {
2106 if (tbt
== VT_DOUBLE
) {
2107 o(0x140f); /* unpcklps */
2108 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2109 o(0x5a0f); /* cvtps2pd */
2110 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2111 } else if (tbt
== VT_LDOUBLE
) {
2113 /* movss %xmm0,-0x10(%rsp) */
2115 o(0x44 + REG_VALUE(vtop
->r
)*8);
2117 o(0xf02444d9); /* flds -0x10(%rsp) */
2120 } else if (bt
== VT_DOUBLE
) {
2122 if (tbt
== VT_FLOAT
) {
2123 o(0x140f66); /* unpcklpd */
2124 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2125 o(0x5a0f66); /* cvtpd2ps */
2126 o(0xc0 + REG_VALUE(vtop
->r
)*9);
2127 } else if (tbt
== VT_LDOUBLE
) {
2129 /* movsd %xmm0,-0x10(%rsp) */
2131 o(0x44 + REG_VALUE(vtop
->r
)*8);
2133 o(0xf02444dd); /* fldl -0x10(%rsp) */
2139 r
= get_reg(RC_FLOAT
);
2140 if (tbt
== VT_DOUBLE
) {
2141 o(0xf0245cdd); /* fstpl -0x10(%rsp) */
2142 /* movsd -0x10(%rsp),%xmm0 */
2144 o(0x44 + REG_VALUE(r
)*8);
2147 } else if (tbt
== VT_FLOAT
) {
2148 o(0xf0245cd9); /* fstps -0x10(%rsp) */
2149 /* movss -0x10(%rsp),%xmm0 */
2151 o(0x44 + REG_VALUE(r
)*8);
2158 /* convert fp to int 't' type */
2159 void gen_cvt_ftoi(int t
)
2161 int ft
, bt
, size
, r
;
2164 if (bt
== VT_LDOUBLE
) {
2165 gen_cvt_ftof(VT_DOUBLE
);
2175 r
= get_reg(RC_INT
);
2176 if (bt
== VT_FLOAT
) {
2178 } else if (bt
== VT_DOUBLE
) {
2183 orex(size
== 8, r
, 0, 0x2c0f); /* cvttss2si or cvttsd2si */
2184 o(0xc0 + REG_VALUE(vtop
->r
) + REG_VALUE(r
)*8);
2188 /* computed goto support */
2195 /* Save the stack pointer onto the stack and return the location of its address */
2196 ST_FUNC
void gen_vla_sp_save(int addr
) {
2197 /* mov %rsp,addr(%rbp)*/
2198 gen_modrm64(0x89, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2201 /* Restore the SP from a location on the stack */
2202 ST_FUNC
void gen_vla_sp_restore(int addr
) {
2203 gen_modrm64(0x8b, TREG_RSP
, VT_LOCAL
, NULL
, addr
);
2206 /* Subtract from the stack pointer, and push the resulting value onto the stack */
2207 ST_FUNC
void gen_vla_alloc(CType
*type
, int align
) {
2208 #ifdef TCC_TARGET_PE
2209 /* alloca does more than just adjust %rsp on Windows */
2210 vpush_global_sym(&func_old_type
, TOK_alloca
);
2211 vswap(); /* Move alloca ref past allocation size */
2215 r
= gv(RC_INT
); /* allocation size */
2218 o(0xe0 | REG_VALUE(r
));
2219 /* We align to 16 bytes rather than align */
2227 /* end of x86-64 code generator */
2228 /*************************************************************/
2229 #endif /* ! TARGET_DEFS_ONLY */
2230 /******************************************************/