2 * mini-amd64.c: AMD64 backend for the Mono code generator
7 * Paolo Molaro (lupus@ximian.com)
8 * Dietmar Maurer (dietmar@ximian.com)
10 * Zoltan Varga (vargaz@gmail.com)
12 * (C) 2003 Ximian, Inc.
21 #include <mono/metadata/appdomain.h>
22 #include <mono/metadata/debug-helpers.h>
23 #include <mono/metadata/threads.h>
24 #include <mono/metadata/profiler-private.h>
25 #include <mono/metadata/mono-debug.h>
26 #include <mono/utils/mono-math.h>
30 #include "mini-amd64.h"
31 #include "cpu-amd64.h"
34 * Can't define this in mini-amd64.h cause that would turn on the generic code in
37 #define MONO_ARCH_IMT_REG AMD64_R11
39 static gint lmf_tls_offset
= -1;
40 static gint lmf_addr_tls_offset
= -1;
41 static gint appdomain_tls_offset
= -1;
42 static gint thread_tls_offset
= -1;
45 static gboolean optimize_for_xen
= TRUE
;
47 #define optimize_for_xen 0
50 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
52 #define IS_IMM32(val) ((((guint64)val) >> 32) == 0)
54 #define IS_REX(inst) (((inst) >= 0x40) && ((inst) <= 0x4f))
57 /* Under windows, the calling convention is never stdcall */
58 #define CALLCONV_IS_STDCALL(call_conv) (FALSE)
60 #define CALLCONV_IS_STDCALL(call_conv) ((call_conv) == MONO_CALL_STDCALL)
63 /* This mutex protects architecture specific caches */
64 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
65 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
66 static CRITICAL_SECTION mini_arch_mutex
;
69 mono_breakpoint_info
[MONO_BREAKPOINT_ARRAY_SIZE
];
72 /* On Win64 always reserve first 32 bytes for first four arguments */
73 #define ARGS_OFFSET 48
75 #define ARGS_OFFSET 16
77 #define GP_SCRATCH_REG AMD64_R11
80 * AMD64 register usage:
81 * - callee saved registers are used for global register allocation
82 * - %r11 is used for materializing 64 bit constants in opcodes
83 * - the rest is used for local allocation
87 * Floating point comparison results:
97 mono_arch_regname (int reg
)
100 case AMD64_RAX
: return "%rax";
101 case AMD64_RBX
: return "%rbx";
102 case AMD64_RCX
: return "%rcx";
103 case AMD64_RDX
: return "%rdx";
104 case AMD64_RSP
: return "%rsp";
105 case AMD64_RBP
: return "%rbp";
106 case AMD64_RDI
: return "%rdi";
107 case AMD64_RSI
: return "%rsi";
108 case AMD64_R8
: return "%r8";
109 case AMD64_R9
: return "%r9";
110 case AMD64_R10
: return "%r10";
111 case AMD64_R11
: return "%r11";
112 case AMD64_R12
: return "%r12";
113 case AMD64_R13
: return "%r13";
114 case AMD64_R14
: return "%r14";
115 case AMD64_R15
: return "%r15";
120 static const char * xmmregs
[] = {
121 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8",
122 "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
126 mono_arch_fregname (int reg
)
128 if (reg
< AMD64_XMM_NREG
)
129 return xmmregs
[reg
];
134 G_GNUC_UNUSED
static void
139 G_GNUC_UNUSED
static gboolean
142 static int count
= 0;
145 if (!getenv ("COUNT"))
148 if (count
== atoi (getenv ("COUNT"))) {
152 if (count
> atoi (getenv ("COUNT"))) {
163 return debug_count ();
169 static inline gboolean
170 amd64_is_near_call (guint8
*code
)
173 if ((code
[0] >= 0x40) && (code
[0] <= 0x4f))
176 return code
[0] == 0xe8;
180 amd64_patch (unsigned char* code
, gpointer target
)
185 if ((code
[0] >= 0x40) && (code
[0] <= 0x4f)) {
190 if ((code
[0] & 0xf8) == 0xb8) {
191 /* amd64_set_reg_template */
192 *(guint64
*)(code
+ 1) = (guint64
)target
;
194 else if ((code
[0] == 0x8b) && rex
&& x86_modrm_mod (code
[1]) == 0 && x86_modrm_rm (code
[1]) == 5) {
195 /* mov 0(%rip), %dreg */
196 *(guint32
*)(code
+ 2) = (guint32
)(guint64
)target
- 7;
198 else if ((code
[0] == 0xff) && (code
[1] == 0x15)) {
199 /* call *<OFFSET>(%rip) */
200 *(guint32
*)(code
+ 2) = ((guint32
)(guint64
)target
) - 7;
202 else if ((code
[0] == 0xe8)) {
204 gint64 disp
= (guint8
*)target
- (guint8
*)code
;
205 g_assert (amd64_is_imm32 (disp
));
206 x86_patch (code
, (unsigned char*)target
);
209 x86_patch (code
, (unsigned char*)target
);
213 mono_amd64_patch (unsigned char* code
, gpointer target
)
215 amd64_patch (code
, target
);
224 ArgValuetypeAddrInIReg
,
225 ArgNone
/* only in pair_storage */
233 /* Only if storage == ArgValuetypeInReg */
234 ArgStorage pair_storage
[2];
243 gboolean need_stack_align
;
249 #define DEBUG(a) if (cfg->verbose_level > 1) a
251 #ifdef PLATFORM_WIN32
254 static AMD64_Reg_No param_regs
[] = { AMD64_RCX
, AMD64_RDX
, AMD64_R8
, AMD64_R9
};
256 static AMD64_Reg_No return_regs
[] = { AMD64_RAX
, AMD64_RDX
};
260 static AMD64_Reg_No param_regs
[] = { AMD64_RDI
, AMD64_RSI
, AMD64_RDX
, AMD64_RCX
, AMD64_R8
, AMD64_R9
};
262 static AMD64_Reg_No return_regs
[] = { AMD64_RAX
, AMD64_RDX
};
266 add_general (guint32
*gr
, guint32
*stack_size
, ArgInfo
*ainfo
)
268 ainfo
->offset
= *stack_size
;
270 if (*gr
>= PARAM_REGS
) {
271 ainfo
->storage
= ArgOnStack
;
272 (*stack_size
) += sizeof (gpointer
);
275 ainfo
->storage
= ArgInIReg
;
276 ainfo
->reg
= param_regs
[*gr
];
281 #ifdef PLATFORM_WIN32
282 #define FLOAT_PARAM_REGS 4
284 #define FLOAT_PARAM_REGS 8
288 add_float (guint32
*gr
, guint32
*stack_size
, ArgInfo
*ainfo
, gboolean is_double
)
290 ainfo
->offset
= *stack_size
;
292 if (*gr
>= FLOAT_PARAM_REGS
) {
293 ainfo
->storage
= ArgOnStack
;
294 (*stack_size
) += sizeof (gpointer
);
297 /* A double register */
299 ainfo
->storage
= ArgInDoubleSSEReg
;
301 ainfo
->storage
= ArgInFloatSSEReg
;
307 typedef enum ArgumentClass
{
315 merge_argument_class_from_type (MonoType
*type
, ArgumentClass class1
)
317 ArgumentClass class2
= ARG_CLASS_NO_CLASS
;
320 ptype
= mini_type_get_underlying_type (NULL
, type
);
321 switch (ptype
->type
) {
322 case MONO_TYPE_BOOLEAN
:
332 case MONO_TYPE_STRING
:
333 case MONO_TYPE_OBJECT
:
334 case MONO_TYPE_CLASS
:
335 case MONO_TYPE_SZARRAY
:
337 case MONO_TYPE_FNPTR
:
338 case MONO_TYPE_ARRAY
:
341 class2
= ARG_CLASS_INTEGER
;
345 #ifdef PLATFORM_WIN32
346 class2
= ARG_CLASS_INTEGER
;
348 class2
= ARG_CLASS_SSE
;
352 case MONO_TYPE_TYPEDBYREF
:
353 g_assert_not_reached ();
355 case MONO_TYPE_GENERICINST
:
356 if (!mono_type_generic_inst_is_valuetype (ptype
)) {
357 class2
= ARG_CLASS_INTEGER
;
361 case MONO_TYPE_VALUETYPE
: {
362 MonoMarshalType
*info
= mono_marshal_load_type_info (ptype
->data
.klass
);
365 for (i
= 0; i
< info
->num_fields
; ++i
) {
367 class2
= merge_argument_class_from_type (info
->fields
[i
].field
->type
, class2
);
372 g_assert_not_reached ();
376 if (class1
== class2
)
378 else if (class1
== ARG_CLASS_NO_CLASS
)
380 else if ((class1
== ARG_CLASS_MEMORY
) || (class2
== ARG_CLASS_MEMORY
))
381 class1
= ARG_CLASS_MEMORY
;
382 else if ((class1
== ARG_CLASS_INTEGER
) || (class2
== ARG_CLASS_INTEGER
))
383 class1
= ARG_CLASS_INTEGER
;
385 class1
= ARG_CLASS_SSE
;
391 add_valuetype (MonoGenericSharingContext
*gsctx
, MonoMethodSignature
*sig
, ArgInfo
*ainfo
, MonoType
*type
,
393 guint32
*gr
, guint32
*fr
, guint32
*stack_size
)
395 guint32 size
, quad
, nquads
, i
;
396 ArgumentClass args
[2];
397 MonoMarshalType
*info
= NULL
;
399 MonoGenericSharingContext tmp_gsctx
;
402 * The gsctx currently contains no data, it is only used for checking whenever
403 * open types are allowed, some callers like mono_arch_get_argument_info ()
404 * don't pass it to us, so work around that.
409 klass
= mono_class_from_mono_type (type
);
410 size
= mini_type_stack_size_full (gsctx
, &klass
->byval_arg
, NULL
, sig
->pinvoke
);
411 #ifndef PLATFORM_WIN32
412 if (!sig
->pinvoke
&& !disable_vtypes_in_regs
&& ((is_return
&& (size
== 8)) || (!is_return
&& (size
<= 16)))) {
413 /* We pass and return vtypes of size 8 in a register */
414 } else if (!sig
->pinvoke
|| (size
== 0) || (size
> 16)) {
418 /* Allways pass in memory */
419 ainfo
->offset
= *stack_size
;
420 *stack_size
+= ALIGN_TO (size
, 8);
421 ainfo
->storage
= ArgOnStack
;
426 /* FIXME: Handle structs smaller than 8 bytes */
427 //if ((size % 8) != 0)
436 /* Always pass in 1 or 2 integer registers */
437 args
[0] = ARG_CLASS_INTEGER
;
438 args
[1] = ARG_CLASS_INTEGER
;
439 /* Only the simplest cases are supported */
440 if (is_return
&& nquads
!= 1) {
441 args
[0] = ARG_CLASS_MEMORY
;
442 args
[1] = ARG_CLASS_MEMORY
;
446 * Implement the algorithm from section 3.2.3 of the X86_64 ABI.
447 * The X87 and SSEUP stuff is left out since there are no such types in
450 info
= mono_marshal_load_type_info (klass
);
453 #ifndef PLATFORM_WIN32
454 if (info
->native_size
> 16) {
455 ainfo
->offset
= *stack_size
;
456 *stack_size
+= ALIGN_TO (info
->native_size
, 8);
457 ainfo
->storage
= ArgOnStack
;
462 switch (info
->native_size
) {
463 case 1: case 2: case 4: case 8:
467 ainfo
->storage
= ArgOnStack
;
468 ainfo
->offset
= *stack_size
;
469 *stack_size
+= ALIGN_TO (info
->native_size
, 8);
472 ainfo
->storage
= ArgValuetypeAddrInIReg
;
474 if (*gr
< PARAM_REGS
) {
475 ainfo
->pair_storage
[0] = ArgInIReg
;
476 ainfo
->pair_regs
[0] = param_regs
[*gr
];
480 ainfo
->pair_storage
[0] = ArgOnStack
;
481 ainfo
->offset
= *stack_size
;
490 args
[0] = ARG_CLASS_NO_CLASS
;
491 args
[1] = ARG_CLASS_NO_CLASS
;
492 for (quad
= 0; quad
< nquads
; ++quad
) {
495 ArgumentClass class1
;
497 if (info
->num_fields
== 0)
498 class1
= ARG_CLASS_MEMORY
;
500 class1
= ARG_CLASS_NO_CLASS
;
501 for (i
= 0; i
< info
->num_fields
; ++i
) {
502 size
= mono_marshal_type_size (info
->fields
[i
].field
->type
,
503 info
->fields
[i
].mspec
,
504 &align
, TRUE
, klass
->unicode
);
505 if ((info
->fields
[i
].offset
< 8) && (info
->fields
[i
].offset
+ size
) > 8) {
506 /* Unaligned field */
510 /* Skip fields in other quad */
511 if ((quad
== 0) && (info
->fields
[i
].offset
>= 8))
513 if ((quad
== 1) && (info
->fields
[i
].offset
< 8))
516 class1
= merge_argument_class_from_type (info
->fields
[i
].field
->type
, class1
);
518 g_assert (class1
!= ARG_CLASS_NO_CLASS
);
519 args
[quad
] = class1
;
523 /* Post merger cleanup */
524 if ((args
[0] == ARG_CLASS_MEMORY
) || (args
[1] == ARG_CLASS_MEMORY
))
525 args
[0] = args
[1] = ARG_CLASS_MEMORY
;
527 /* Allocate registers */
532 ainfo
->storage
= ArgValuetypeInReg
;
533 ainfo
->pair_storage
[0] = ainfo
->pair_storage
[1] = ArgNone
;
534 for (quad
= 0; quad
< nquads
; ++quad
) {
535 switch (args
[quad
]) {
536 case ARG_CLASS_INTEGER
:
537 if (*gr
>= PARAM_REGS
)
538 args
[quad
] = ARG_CLASS_MEMORY
;
540 ainfo
->pair_storage
[quad
] = ArgInIReg
;
542 ainfo
->pair_regs
[quad
] = return_regs
[*gr
];
544 ainfo
->pair_regs
[quad
] = param_regs
[*gr
];
549 if (*fr
>= FLOAT_PARAM_REGS
)
550 args
[quad
] = ARG_CLASS_MEMORY
;
552 ainfo
->pair_storage
[quad
] = ArgInDoubleSSEReg
;
553 ainfo
->pair_regs
[quad
] = *fr
;
557 case ARG_CLASS_MEMORY
:
560 g_assert_not_reached ();
564 if ((args
[0] == ARG_CLASS_MEMORY
) || (args
[1] == ARG_CLASS_MEMORY
)) {
565 /* Revert possible register assignments */
569 ainfo
->offset
= *stack_size
;
571 *stack_size
+= ALIGN_TO (info
->native_size
, 8);
573 *stack_size
+= nquads
* sizeof (gpointer
);
574 ainfo
->storage
= ArgOnStack
;
582 * Obtain information about a call according to the calling convention.
583 * For AMD64, see the "System V ABI, x86-64 Architecture Processor Supplement
584 * Draft Version 0.23" document for more information.
587 get_call_info (MonoGenericSharingContext
*gsctx
, MonoMemPool
*mp
, MonoMethodSignature
*sig
, gboolean is_pinvoke
)
591 int n
= sig
->hasthis
+ sig
->param_count
;
592 guint32 stack_size
= 0;
596 cinfo
= mono_mempool_alloc0 (mp
, sizeof (CallInfo
) + (sizeof (ArgInfo
) * n
));
598 cinfo
= g_malloc0 (sizeof (CallInfo
) + (sizeof (ArgInfo
) * n
));
605 ret_type
= mini_type_get_underlying_type (gsctx
, sig
->ret
);
606 switch (ret_type
->type
) {
607 case MONO_TYPE_BOOLEAN
:
618 case MONO_TYPE_FNPTR
:
619 case MONO_TYPE_CLASS
:
620 case MONO_TYPE_OBJECT
:
621 case MONO_TYPE_SZARRAY
:
622 case MONO_TYPE_ARRAY
:
623 case MONO_TYPE_STRING
:
624 cinfo
->ret
.storage
= ArgInIReg
;
625 cinfo
->ret
.reg
= AMD64_RAX
;
629 cinfo
->ret
.storage
= ArgInIReg
;
630 cinfo
->ret
.reg
= AMD64_RAX
;
633 cinfo
->ret
.storage
= ArgInFloatSSEReg
;
634 cinfo
->ret
.reg
= AMD64_XMM0
;
637 cinfo
->ret
.storage
= ArgInDoubleSSEReg
;
638 cinfo
->ret
.reg
= AMD64_XMM0
;
640 case MONO_TYPE_GENERICINST
:
641 if (!mono_type_generic_inst_is_valuetype (sig
->ret
)) {
642 cinfo
->ret
.storage
= ArgInIReg
;
643 cinfo
->ret
.reg
= AMD64_RAX
;
647 case MONO_TYPE_VALUETYPE
: {
648 guint32 tmp_gr
= 0, tmp_fr
= 0, tmp_stacksize
= 0;
650 add_valuetype (gsctx
, sig
, &cinfo
->ret
, sig
->ret
, TRUE
, &tmp_gr
, &tmp_fr
, &tmp_stacksize
);
651 if (cinfo
->ret
.storage
== ArgOnStack
)
652 /* The caller passes the address where the value is stored */
653 add_general (&gr
, &stack_size
, &cinfo
->ret
);
656 case MONO_TYPE_TYPEDBYREF
:
657 /* Same as a valuetype with size 24 */
658 add_general (&gr
, &stack_size
, &cinfo
->ret
);
664 g_error ("Can't handle as return value 0x%x", sig
->ret
->type
);
670 add_general (&gr
, &stack_size
, cinfo
->args
+ 0);
672 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
== 0)) {
674 fr
= FLOAT_PARAM_REGS
;
676 /* Emit the signature cookie just before the implicit arguments */
677 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
680 for (i
= 0; i
< sig
->param_count
; ++i
) {
681 ArgInfo
*ainfo
= &cinfo
->args
[sig
->hasthis
+ i
];
684 #ifdef PLATFORM_WIN32
685 /* The float param registers and other param registers must be the same index on Windows x64.*/
692 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (i
== sig
->sentinelpos
)) {
693 /* We allways pass the sig cookie on the stack for simplicity */
695 * Prevent implicit arguments + the sig cookie from being passed
699 fr
= FLOAT_PARAM_REGS
;
701 /* Emit the signature cookie just before the implicit arguments */
702 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
705 if (sig
->params
[i
]->byref
) {
706 add_general (&gr
, &stack_size
, ainfo
);
709 ptype
= mini_type_get_underlying_type (gsctx
, sig
->params
[i
]);
710 switch (ptype
->type
) {
711 case MONO_TYPE_BOOLEAN
:
714 add_general (&gr
, &stack_size
, ainfo
);
719 add_general (&gr
, &stack_size
, ainfo
);
723 add_general (&gr
, &stack_size
, ainfo
);
728 case MONO_TYPE_FNPTR
:
729 case MONO_TYPE_CLASS
:
730 case MONO_TYPE_OBJECT
:
731 case MONO_TYPE_STRING
:
732 case MONO_TYPE_SZARRAY
:
733 case MONO_TYPE_ARRAY
:
734 add_general (&gr
, &stack_size
, ainfo
);
736 case MONO_TYPE_GENERICINST
:
737 if (!mono_type_generic_inst_is_valuetype (ptype
)) {
738 add_general (&gr
, &stack_size
, ainfo
);
742 case MONO_TYPE_VALUETYPE
:
743 add_valuetype (gsctx
, sig
, ainfo
, sig
->params
[i
], FALSE
, &gr
, &fr
, &stack_size
);
745 case MONO_TYPE_TYPEDBYREF
:
746 #ifdef PLATFORM_WIN32
747 add_valuetype (gsctx
, sig
, ainfo
, sig
->params
[i
], FALSE
, &gr
, &fr
, &stack_size
);
749 stack_size
+= sizeof (MonoTypedRef
);
750 ainfo
->storage
= ArgOnStack
;
755 add_general (&gr
, &stack_size
, ainfo
);
758 add_float (&fr
, &stack_size
, ainfo
, FALSE
);
761 add_float (&fr
, &stack_size
, ainfo
, TRUE
);
764 g_assert_not_reached ();
768 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
> 0) && (sig
->sentinelpos
== sig
->param_count
)) {
770 fr
= FLOAT_PARAM_REGS
;
772 /* Emit the signature cookie just before the implicit arguments */
773 add_general (&gr
, &stack_size
, &cinfo
->sig_cookie
);
776 #ifdef PLATFORM_WIN32
777 // There always is 32 bytes reserved on the stack when calling on Winx64
781 if (stack_size
& 0x8) {
782 /* The AMD64 ABI requires each stack frame to be 16 byte aligned */
783 cinfo
->need_stack_align
= TRUE
;
787 cinfo
->stack_usage
= stack_size
;
788 cinfo
->reg_usage
= gr
;
789 cinfo
->freg_usage
= fr
;
794 * mono_arch_get_argument_info:
795 * @csig: a method signature
796 * @param_count: the number of parameters to consider
797 * @arg_info: an array to store the result infos
799 * Gathers information on parameters such as size, alignment and
800 * padding. arg_info should be large enought to hold param_count + 1 entries.
802 * Returns the size of the argument area on the stack.
805 mono_arch_get_argument_info (MonoMethodSignature
*csig
, int param_count
, MonoJitArgumentInfo
*arg_info
)
808 CallInfo
*cinfo
= get_call_info (NULL
, NULL
, csig
, FALSE
);
809 guint32 args_size
= cinfo
->stack_usage
;
811 /* The arguments are saved to a stack area in mono_arch_instrument_prolog */
813 arg_info
[0].offset
= 0;
816 for (k
= 0; k
< param_count
; k
++) {
817 arg_info
[k
+ 1].offset
= ((k
+ csig
->hasthis
) * 8);
819 arg_info
[k
+ 1].size
= 0;
828 cpuid (int id
, int* p_eax
, int* p_ebx
, int* p_ecx
, int* p_edx
)
831 __asm__
__volatile__ ("cpuid"
832 : "=a" (*p_eax
), "=b" (*p_ebx
), "=c" (*p_ecx
), "=d" (*p_edx
)
846 * Initialize the cpu to execute managed code.
849 mono_arch_cpu_init (void)
854 /* spec compliance requires running with double precision */
855 __asm__
__volatile__ ("fnstcw %0\n": "=m" (fpcw
));
856 fpcw
&= ~X86_FPCW_PRECC_MASK
;
857 fpcw
|= X86_FPCW_PREC_DOUBLE
;
858 __asm__
__volatile__ ("fldcw %0\n": : "m" (fpcw
));
859 __asm__
__volatile__ ("fnstcw %0\n": "=m" (fpcw
));
861 /* TODO: This is crashing on Win64 right now.
862 * _control87 (_PC_53, MCW_PC);
868 * Initialize architecture specific code.
871 mono_arch_init (void)
873 InitializeCriticalSection (&mini_arch_mutex
);
877 * Cleanup architecture specific code.
880 mono_arch_cleanup (void)
882 DeleteCriticalSection (&mini_arch_mutex
);
886 * This function returns the optimizations supported on this cpu.
889 mono_arch_cpu_optimizazions (guint32
*exclude_mask
)
891 int eax
, ebx
, ecx
, edx
;
897 /* Feature Flags function, flags returned in EDX. */
898 if (cpuid (1, &eax
, &ebx
, &ecx
, &edx
)) {
899 if (edx
& (1 << 15)) {
900 opts
|= MONO_OPT_CMOV
;
902 opts
|= MONO_OPT_FCMOV
;
904 *exclude_mask
|= MONO_OPT_FCMOV
;
906 *exclude_mask
|= MONO_OPT_CMOV
;
913 mono_arch_get_allocatable_int_vars (MonoCompile
*cfg
)
918 for (i
= 0; i
< cfg
->num_varinfo
; i
++) {
919 MonoInst
*ins
= cfg
->varinfo
[i
];
920 MonoMethodVar
*vmv
= MONO_VARINFO (cfg
, i
);
923 if (vmv
->range
.first_use
.abs_pos
>= vmv
->range
.last_use
.abs_pos
)
926 if ((ins
->flags
& (MONO_INST_IS_DEAD
|MONO_INST_VOLATILE
|MONO_INST_INDIRECT
)) ||
927 (ins
->opcode
!= OP_LOCAL
&& ins
->opcode
!= OP_ARG
))
930 if (mono_is_regsize_var (ins
->inst_vtype
)) {
931 g_assert (MONO_VARINFO (cfg
, i
)->reg
== -1);
932 g_assert (i
== vmv
->idx
);
933 vars
= g_list_prepend (vars
, vmv
);
937 vars
= mono_varlist_sort (cfg
, vars
, 0);
943 * mono_arch_compute_omit_fp:
945 * Determine whenever the frame pointer can be eliminated.
948 mono_arch_compute_omit_fp (MonoCompile
*cfg
)
950 MonoMethodSignature
*sig
;
951 MonoMethodHeader
*header
;
955 if (cfg
->arch
.omit_fp_computed
)
958 header
= mono_method_get_header (cfg
->method
);
960 sig
= mono_method_signature (cfg
->method
);
962 if (!cfg
->arch
.cinfo
)
963 cfg
->arch
.cinfo
= get_call_info (cfg
->generic_sharing_context
, cfg
->mempool
, sig
, FALSE
);
964 cinfo
= cfg
->arch
.cinfo
;
967 * FIXME: Remove some of the restrictions.
969 cfg
->arch
.omit_fp
= TRUE
;
970 cfg
->arch
.omit_fp_computed
= TRUE
;
972 if (cfg
->disable_omit_fp
)
973 cfg
->arch
.omit_fp
= FALSE
;
975 if (!debug_omit_fp ())
976 cfg
->arch
.omit_fp
= FALSE
;
978 if (cfg->method->save_lmf)
979 cfg->arch.omit_fp = FALSE;
981 if (cfg
->flags
& MONO_CFG_HAS_ALLOCA
)
982 cfg
->arch
.omit_fp
= FALSE
;
983 if (header
->num_clauses
)
984 cfg
->arch
.omit_fp
= FALSE
;
986 cfg
->arch
.omit_fp
= FALSE
;
987 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
))
988 cfg
->arch
.omit_fp
= FALSE
;
989 if ((mono_jit_trace_calls
!= NULL
&& mono_trace_eval (cfg
->method
)) ||
990 (cfg
->prof_options
& MONO_PROFILE_ENTER_LEAVE
))
991 cfg
->arch
.omit_fp
= FALSE
;
992 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
993 ArgInfo
*ainfo
= &cinfo
->args
[i
];
995 if (ainfo
->storage
== ArgOnStack
) {
997 * The stack offset can only be determined when the frame
1000 cfg
->arch
.omit_fp
= FALSE
;
1005 for (i
= cfg
->locals_start
; i
< cfg
->num_varinfo
; i
++) {
1006 MonoInst
*ins
= cfg
->varinfo
[i
];
1009 locals_size
+= mono_type_size (ins
->inst_vtype
, &ialign
);
1012 if ((cfg
->num_varinfo
> 10000) || (locals_size
>= (1 << 15))) {
1013 /* Avoid hitting the stack_alloc_size < (1 << 16) assertion in emit_epilog () */
1014 cfg
->arch
.omit_fp
= FALSE
;
1019 mono_arch_get_global_int_regs (MonoCompile
*cfg
)
1023 mono_arch_compute_omit_fp (cfg
);
1025 if (cfg
->globalra
) {
1026 if (cfg
->arch
.omit_fp
)
1027 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBP
);
1029 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBX
);
1030 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R12
);
1031 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R13
);
1032 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R14
);
1033 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R15
);
1035 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R10
);
1036 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R9
);
1037 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R8
);
1038 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDI
);
1039 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RSI
);
1040 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDX
);
1041 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RCX
);
1042 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RAX
);
1044 if (cfg
->arch
.omit_fp
)
1045 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBP
);
1047 /* We use the callee saved registers for global allocation */
1048 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBX
);
1049 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R12
);
1050 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R13
);
1051 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R14
);
1052 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R15
);
1053 #ifdef PLATFORM_WIN32
1054 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDI
);
1055 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RSI
);
1063 mono_arch_get_global_fp_regs (MonoCompile
*cfg
)
1068 /* All XMM registers */
1069 for (i
= 0; i
< 16; ++i
)
1070 regs
= g_list_prepend (regs
, GINT_TO_POINTER (i
));
1076 mono_arch_get_iregs_clobbered_by_call (MonoCallInst
*call
)
1078 static GList
*r
= NULL
;
1083 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBP
);
1084 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RBX
);
1085 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R12
);
1086 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R13
);
1087 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R14
);
1088 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R15
);
1090 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R10
);
1091 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R9
);
1092 regs
= g_list_prepend (regs
, (gpointer
)AMD64_R8
);
1093 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDI
);
1094 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RSI
);
1095 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RDX
);
1096 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RCX
);
1097 regs
= g_list_prepend (regs
, (gpointer
)AMD64_RAX
);
1099 InterlockedCompareExchangePointer ((gpointer
*)&r
, regs
, NULL
);
1106 mono_arch_get_fregs_clobbered_by_call (MonoCallInst
*call
)
1109 static GList
*r
= NULL
;
1114 for (i
= 0; i
< AMD64_XMM_NREG
; ++i
)
1115 regs
= g_list_prepend (regs
, GINT_TO_POINTER (MONO_MAX_IREGS
+ i
));
1117 InterlockedCompareExchangePointer ((gpointer
*)&r
, regs
, NULL
);
1124 * mono_arch_regalloc_cost:
1126 * Return the cost, in number of memory references, of the action of
1127 * allocating the variable VMV into a register during global register
1131 mono_arch_regalloc_cost (MonoCompile
*cfg
, MonoMethodVar
*vmv
)
1133 MonoInst
*ins
= cfg
->varinfo
[vmv
->idx
];
1135 if (cfg
->method
->save_lmf
)
1136 /* The register is already saved */
1137 /* substract 1 for the invisible store in the prolog */
1138 return (ins
->opcode
== OP_ARG
) ? 0 : 1;
1141 return (ins
->opcode
== OP_ARG
) ? 1 : 2;
1145 * mono_arch_fill_argument_info:
1147 * Populate cfg->args, cfg->ret and cfg->vret_addr with information about the arguments
1151 mono_arch_fill_argument_info (MonoCompile
*cfg
)
1153 MonoMethodSignature
*sig
;
1154 MonoMethodHeader
*header
;
1159 header
= mono_method_get_header (cfg
->method
);
1161 sig
= mono_method_signature (cfg
->method
);
1163 cinfo
= cfg
->arch
.cinfo
;
1166 * Contrary to mono_arch_allocate_vars (), the information should describe
1167 * where the arguments are at the beginning of the method, not where they can be
1168 * accessed during the execution of the method. The later makes no sense for the
1169 * global register allocator, since a variable can be in more than one location.
1171 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
1172 switch (cinfo
->ret
.storage
) {
1174 case ArgInFloatSSEReg
:
1175 case ArgInDoubleSSEReg
:
1176 if ((MONO_TYPE_ISSTRUCT (sig
->ret
) && !mono_class_from_mono_type (sig
->ret
)->enumtype
) || (sig
->ret
->type
== MONO_TYPE_TYPEDBYREF
)) {
1177 cfg
->vret_addr
->opcode
= OP_REGVAR
;
1178 cfg
->vret_addr
->inst_c0
= cinfo
->ret
.reg
;
1181 cfg
->ret
->opcode
= OP_REGVAR
;
1182 cfg
->ret
->inst_c0
= cinfo
->ret
.reg
;
1185 case ArgValuetypeInReg
:
1186 cfg
->ret
->opcode
= OP_REGOFFSET
;
1187 cfg
->ret
->inst_basereg
= -1;
1188 cfg
->ret
->inst_offset
= -1;
1191 g_assert_not_reached ();
1195 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
1196 ArgInfo
*ainfo
= &cinfo
->args
[i
];
1199 ins
= cfg
->args
[i
];
1201 if (sig
->hasthis
&& (i
== 0))
1202 arg_type
= &mono_defaults
.object_class
->byval_arg
;
1204 arg_type
= sig
->params
[i
- sig
->hasthis
];
1206 switch (ainfo
->storage
) {
1208 case ArgInFloatSSEReg
:
1209 case ArgInDoubleSSEReg
:
1210 ins
->opcode
= OP_REGVAR
;
1211 ins
->inst_c0
= ainfo
->reg
;
1214 ins
->opcode
= OP_REGOFFSET
;
1215 ins
->inst_basereg
= -1;
1216 ins
->inst_offset
= -1;
1218 case ArgValuetypeInReg
:
1220 ins
->opcode
= OP_NOP
;
1223 g_assert_not_reached ();
1229 mono_arch_allocate_vars (MonoCompile
*cfg
)
1231 MonoMethodSignature
*sig
;
1232 MonoMethodHeader
*header
;
1235 guint32 locals_stack_size
, locals_stack_align
;
1239 header
= mono_method_get_header (cfg
->method
);
1241 sig
= mono_method_signature (cfg
->method
);
1243 cinfo
= cfg
->arch
.cinfo
;
1245 mono_arch_compute_omit_fp (cfg
);
1248 * We use the ABI calling conventions for managed code as well.
1249 * Exception: valuetypes are never passed or returned in registers.
1252 if (cfg
->arch
.omit_fp
) {
1253 cfg
->flags
|= MONO_CFG_HAS_SPILLUP
;
1254 cfg
->frame_reg
= AMD64_RSP
;
1257 /* Locals are allocated backwards from %fp */
1258 cfg
->frame_reg
= AMD64_RBP
;
1262 if (cfg
->method
->save_lmf
) {
1263 /* Reserve stack space for saving LMF */
1264 /* mono_arch_find_jit_info () expects to find the LMF at a fixed offset */
1265 g_assert (offset
== 0);
1266 if (cfg
->arch
.omit_fp
) {
1267 cfg
->arch
.lmf_offset
= offset
;
1268 offset
+= sizeof (MonoLMF
);
1271 offset
+= sizeof (MonoLMF
);
1272 cfg
->arch
.lmf_offset
= -offset
;
1275 if (cfg
->arch
.omit_fp
)
1276 cfg
->arch
.reg_save_area_offset
= offset
;
1277 /* Reserve space for caller saved registers */
1278 for (i
= 0; i
< AMD64_NREG
; ++i
)
1279 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
1280 offset
+= sizeof (gpointer
);
1284 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
1285 switch (cinfo
->ret
.storage
) {
1287 case ArgInFloatSSEReg
:
1288 case ArgInDoubleSSEReg
:
1289 if ((MONO_TYPE_ISSTRUCT (sig
->ret
) && !mono_class_from_mono_type (sig
->ret
)->enumtype
) || (sig
->ret
->type
== MONO_TYPE_TYPEDBYREF
)) {
1290 if (cfg
->globalra
) {
1291 cfg
->vret_addr
->opcode
= OP_REGVAR
;
1292 cfg
->vret_addr
->inst_c0
= cinfo
->ret
.reg
;
1294 /* The register is volatile */
1295 cfg
->vret_addr
->opcode
= OP_REGOFFSET
;
1296 cfg
->vret_addr
->inst_basereg
= cfg
->frame_reg
;
1297 if (cfg
->arch
.omit_fp
) {
1298 cfg
->vret_addr
->inst_offset
= offset
;
1302 cfg
->vret_addr
->inst_offset
= -offset
;
1304 if (G_UNLIKELY (cfg
->verbose_level
> 1)) {
1305 printf ("vret_addr =");
1306 mono_print_ins (cfg
->vret_addr
);
1311 cfg
->ret
->opcode
= OP_REGVAR
;
1312 cfg
->ret
->inst_c0
= cinfo
->ret
.reg
;
1315 case ArgValuetypeInReg
:
1316 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
1317 cfg
->ret
->opcode
= OP_REGOFFSET
;
1318 cfg
->ret
->inst_basereg
= cfg
->frame_reg
;
1319 if (cfg
->arch
.omit_fp
) {
1320 cfg
->ret
->inst_offset
= offset
;
1324 cfg
->ret
->inst_offset
= - offset
;
1328 g_assert_not_reached ();
1331 cfg
->ret
->dreg
= cfg
->ret
->inst_c0
;
1334 /* Allocate locals */
1335 if (!cfg
->globalra
) {
1336 offsets
= mono_allocate_stack_slots_full (cfg
, cfg
->arch
.omit_fp
? FALSE
: TRUE
, &locals_stack_size
, &locals_stack_align
);
1337 if (locals_stack_align
) {
1338 offset
+= (locals_stack_align
- 1);
1339 offset
&= ~(locals_stack_align
- 1);
1341 for (i
= cfg
->locals_start
; i
< cfg
->num_varinfo
; i
++) {
1342 if (offsets
[i
] != -1) {
1343 MonoInst
*ins
= cfg
->varinfo
[i
];
1344 ins
->opcode
= OP_REGOFFSET
;
1345 ins
->inst_basereg
= cfg
->frame_reg
;
1346 if (cfg
->arch
.omit_fp
)
1347 ins
->inst_offset
= (offset
+ offsets
[i
]);
1349 ins
->inst_offset
= - (offset
+ offsets
[i
]);
1350 //printf ("allocated local %d to ", i); mono_print_tree_nl (ins);
1353 offset
+= locals_stack_size
;
1356 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
)) {
1357 g_assert (!cfg
->arch
.omit_fp
);
1358 g_assert (cinfo
->sig_cookie
.storage
== ArgOnStack
);
1359 cfg
->sig_cookie
= cinfo
->sig_cookie
.offset
+ ARGS_OFFSET
;
1362 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
1363 ins
= cfg
->args
[i
];
1364 if (ins
->opcode
!= OP_REGVAR
) {
1365 ArgInfo
*ainfo
= &cinfo
->args
[i
];
1366 gboolean inreg
= TRUE
;
1369 if (sig
->hasthis
&& (i
== 0))
1370 arg_type
= &mono_defaults
.object_class
->byval_arg
;
1372 arg_type
= sig
->params
[i
- sig
->hasthis
];
1374 if (cfg
->globalra
) {
1375 /* The new allocator needs info about the original locations of the arguments */
1376 switch (ainfo
->storage
) {
1378 case ArgInFloatSSEReg
:
1379 case ArgInDoubleSSEReg
:
1380 ins
->opcode
= OP_REGVAR
;
1381 ins
->inst_c0
= ainfo
->reg
;
1384 g_assert (!cfg
->arch
.omit_fp
);
1385 ins
->opcode
= OP_REGOFFSET
;
1386 ins
->inst_basereg
= cfg
->frame_reg
;
1387 ins
->inst_offset
= ainfo
->offset
+ ARGS_OFFSET
;
1389 case ArgValuetypeInReg
:
1390 ins
->opcode
= OP_REGOFFSET
;
1391 ins
->inst_basereg
= cfg
->frame_reg
;
1392 /* These arguments are saved to the stack in the prolog */
1393 offset
= ALIGN_TO (offset
, sizeof (gpointer
));
1394 if (cfg
->arch
.omit_fp
) {
1395 ins
->inst_offset
= offset
;
1396 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? 2 * sizeof (gpointer
) : sizeof (gpointer
);
1398 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? 2 * sizeof (gpointer
) : sizeof (gpointer
);
1399 ins
->inst_offset
= - offset
;
1403 g_assert_not_reached ();
1409 /* FIXME: Allocate volatile arguments to registers */
1410 if (ins
->flags
& (MONO_INST_VOLATILE
|MONO_INST_INDIRECT
))
1414 * Under AMD64, all registers used to pass arguments to functions
1415 * are volatile across calls.
1416 * FIXME: Optimize this.
1418 if ((ainfo
->storage
== ArgInIReg
) || (ainfo
->storage
== ArgInFloatSSEReg
) || (ainfo
->storage
== ArgInDoubleSSEReg
) || (ainfo
->storage
== ArgValuetypeInReg
))
1421 ins
->opcode
= OP_REGOFFSET
;
1423 switch (ainfo
->storage
) {
1425 case ArgInFloatSSEReg
:
1426 case ArgInDoubleSSEReg
:
1428 ins
->opcode
= OP_REGVAR
;
1429 ins
->dreg
= ainfo
->reg
;
1433 g_assert (!cfg
->arch
.omit_fp
);
1434 ins
->opcode
= OP_REGOFFSET
;
1435 ins
->inst_basereg
= cfg
->frame_reg
;
1436 ins
->inst_offset
= ainfo
->offset
+ ARGS_OFFSET
;
1438 case ArgValuetypeInReg
:
1440 case ArgValuetypeAddrInIReg
: {
1442 g_assert (!cfg
->arch
.omit_fp
);
1444 MONO_INST_NEW (cfg
, indir
, 0);
1445 indir
->opcode
= OP_REGOFFSET
;
1446 if (ainfo
->pair_storage
[0] == ArgInIReg
) {
1447 indir
->inst_basereg
= cfg
->frame_reg
;
1448 offset
= ALIGN_TO (offset
, sizeof (gpointer
));
1449 offset
+= (sizeof (gpointer
));
1450 indir
->inst_offset
= - offset
;
1453 indir
->inst_basereg
= cfg
->frame_reg
;
1454 indir
->inst_offset
= ainfo
->offset
+ ARGS_OFFSET
;
1457 ins
->opcode
= OP_VTARG_ADDR
;
1458 ins
->inst_left
= indir
;
1466 if (!inreg
&& (ainfo
->storage
!= ArgOnStack
) && (ainfo
->storage
!= ArgValuetypeAddrInIReg
)) {
1467 ins
->opcode
= OP_REGOFFSET
;
1468 ins
->inst_basereg
= cfg
->frame_reg
;
1469 /* These arguments are saved to the stack in the prolog */
1470 offset
= ALIGN_TO (offset
, sizeof (gpointer
));
1471 if (cfg
->arch
.omit_fp
) {
1472 ins
->inst_offset
= offset
;
1473 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? 2 * sizeof (gpointer
) : sizeof (gpointer
);
1475 offset
+= (ainfo
->storage
== ArgValuetypeInReg
) ? 2 * sizeof (gpointer
) : sizeof (gpointer
);
1476 ins
->inst_offset
= - offset
;
1482 cfg
->stack_offset
= offset
;
1486 mono_arch_create_vars (MonoCompile
*cfg
)
1488 MonoMethodSignature
*sig
;
1491 sig
= mono_method_signature (cfg
->method
);
1493 if (!cfg
->arch
.cinfo
)
1494 cfg
->arch
.cinfo
= get_call_info (cfg
->generic_sharing_context
, cfg
->mempool
, sig
, FALSE
);
1495 cinfo
= cfg
->arch
.cinfo
;
1497 if (cinfo
->ret
.storage
== ArgValuetypeInReg
)
1498 cfg
->ret_var_is_local
= TRUE
;
1500 if ((cinfo
->ret
.storage
!= ArgValuetypeInReg
) && MONO_TYPE_ISSTRUCT (sig
->ret
)) {
1501 cfg
->vret_addr
= mono_compile_create_var (cfg
, &mono_defaults
.int_class
->byval_arg
, OP_ARG
);
1502 if (G_UNLIKELY (cfg
->verbose_level
> 1)) {
1503 printf ("vret_addr = ");
1504 mono_print_ins (cfg
->vret_addr
);
1510 add_outarg_reg (MonoCompile
*cfg
, MonoCallInst
*call
, ArgStorage storage
, int reg
, MonoInst
*tree
)
1516 MONO_INST_NEW (cfg
, ins
, OP_MOVE
);
1517 ins
->dreg
= mono_alloc_ireg (cfg
);
1518 ins
->sreg1
= tree
->dreg
;
1519 MONO_ADD_INS (cfg
->cbb
, ins
);
1520 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, FALSE
);
1522 case ArgInFloatSSEReg
:
1523 MONO_INST_NEW (cfg
, ins
, OP_AMD64_SET_XMMREG_R4
);
1524 ins
->dreg
= mono_alloc_freg (cfg
);
1525 ins
->sreg1
= tree
->dreg
;
1526 MONO_ADD_INS (cfg
->cbb
, ins
);
1528 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, TRUE
);
1530 case ArgInDoubleSSEReg
:
1531 MONO_INST_NEW (cfg
, ins
, OP_FMOVE
);
1532 ins
->dreg
= mono_alloc_freg (cfg
);
1533 ins
->sreg1
= tree
->dreg
;
1534 MONO_ADD_INS (cfg
->cbb
, ins
);
1536 mono_call_inst_add_outarg_reg (cfg
, call
, ins
->dreg
, reg
, TRUE
);
1540 g_assert_not_reached ();
1545 arg_storage_to_load_membase (ArgStorage storage
)
1549 return OP_LOAD_MEMBASE
;
1550 case ArgInDoubleSSEReg
:
1551 return OP_LOADR8_MEMBASE
;
1552 case ArgInFloatSSEReg
:
1553 return OP_LOADR4_MEMBASE
;
1555 g_assert_not_reached ();
1562 emit_sig_cookie (MonoCompile
*cfg
, MonoCallInst
*call
, CallInfo
*cinfo
)
1565 MonoMethodSignature
*tmp_sig
;
1568 if (call
->tail_call
)
1571 /* FIXME: Add support for signature tokens to AOT */
1572 cfg
->disable_aot
= TRUE
;
1574 g_assert (cinfo
->sig_cookie
.storage
== ArgOnStack
);
1577 * mono_ArgIterator_Setup assumes the signature cookie is
1578 * passed first and all the arguments which were before it are
1579 * passed on the stack after the signature. So compensate by
1580 * passing a different signature.
1582 tmp_sig
= mono_metadata_signature_dup (call
->signature
);
1583 tmp_sig
->param_count
-= call
->signature
->sentinelpos
;
1584 tmp_sig
->sentinelpos
= 0;
1585 memcpy (tmp_sig
->params
, call
->signature
->params
+ call
->signature
->sentinelpos
, tmp_sig
->param_count
* sizeof (MonoType
*));
1587 MONO_INST_NEW (cfg
, sig_arg
, OP_ICONST
);
1588 sig_arg
->dreg
= mono_alloc_ireg (cfg
);
1589 sig_arg
->inst_p0
= tmp_sig
;
1590 MONO_ADD_INS (cfg
->cbb
, sig_arg
);
1592 MONO_INST_NEW (cfg
, arg
, OP_X86_PUSH
);
1593 arg
->sreg1
= sig_arg
->dreg
;
1594 MONO_ADD_INS (cfg
->cbb
, arg
);
1598 mono_arch_emit_call (MonoCompile
*cfg
, MonoCallInst
*call
)
1601 MonoMethodSignature
*sig
;
1602 int i
, n
, stack_size
;
1608 sig
= call
->signature
;
1609 n
= sig
->param_count
+ sig
->hasthis
;
1611 cinfo
= get_call_info (cfg
->generic_sharing_context
, cfg
->mempool
, sig
, sig
->pinvoke
);
1613 if (cinfo
->need_stack_align
) {
1614 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_SUB_IMM
, X86_ESP
, X86_ESP
, 8);
1618 * Emit all parameters passed in registers in non-reverse order for better readability
1619 * and to help the optimization in emit_prolog ().
1621 for (i
= 0; i
< n
; ++i
) {
1622 ainfo
= cinfo
->args
+ i
;
1624 in
= call
->args
[i
];
1626 if (ainfo
->storage
== ArgInIReg
)
1627 add_outarg_reg (cfg
, call
, ainfo
->storage
, ainfo
->reg
, in
);
1630 for (i
= n
- 1; i
>= 0; --i
) {
1631 ainfo
= cinfo
->args
+ i
;
1633 in
= call
->args
[i
];
1635 switch (ainfo
->storage
) {
1639 case ArgInFloatSSEReg
:
1640 case ArgInDoubleSSEReg
:
1641 add_outarg_reg (cfg
, call
, ainfo
->storage
, ainfo
->reg
, in
);
1644 case ArgValuetypeInReg
:
1645 case ArgValuetypeAddrInIReg
:
1646 if (ainfo
->storage
== ArgOnStack
&& call
->tail_call
) {
1647 MonoInst
*call_inst
= (MonoInst
*)call
;
1648 cfg
->args
[i
]->flags
|= MONO_INST_VOLATILE
;
1649 EMIT_NEW_ARGSTORE (cfg
, call_inst
, i
, in
);
1650 } else if ((i
>= sig
->hasthis
) && (MONO_TYPE_ISSTRUCT(sig
->params
[i
- sig
->hasthis
]))) {
1654 if (sig
->params
[i
- sig
->hasthis
]->type
== MONO_TYPE_TYPEDBYREF
) {
1655 size
= sizeof (MonoTypedRef
);
1656 align
= sizeof (gpointer
);
1660 size
= mono_type_native_stack_size (&in
->klass
->byval_arg
, &align
);
1663 * Other backends use mono_type_stack_size (), but that
1664 * aligns the size to 8, which is larger than the size of
1665 * the source, leading to reads of invalid memory if the
1666 * source is at the end of address space.
1668 size
= mono_class_value_size (in
->klass
, &align
);
1671 g_assert (in
->klass
);
1674 MONO_INST_NEW (cfg
, arg
, OP_OUTARG_VT
);
1675 arg
->sreg1
= in
->dreg
;
1676 arg
->klass
= in
->klass
;
1677 arg
->backend
.size
= size
;
1678 arg
->inst_p0
= call
;
1679 arg
->inst_p1
= mono_mempool_alloc (cfg
->mempool
, sizeof (ArgInfo
));
1680 memcpy (arg
->inst_p1
, ainfo
, sizeof (ArgInfo
));
1682 MONO_ADD_INS (cfg
->cbb
, arg
);
1685 MONO_INST_NEW (cfg
, arg
, OP_X86_PUSH
);
1686 arg
->sreg1
= in
->dreg
;
1687 if (!sig
->params
[i
- sig
->hasthis
]->byref
) {
1688 if (sig
->params
[i
- sig
->hasthis
]->type
== MONO_TYPE_R4
) {
1689 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_SUB_IMM
, X86_ESP
, X86_ESP
, 8);
1690 arg
->opcode
= OP_STORER4_MEMBASE_REG
;
1691 arg
->inst_destbasereg
= X86_ESP
;
1692 arg
->inst_offset
= 0;
1693 } else if (sig
->params
[i
- sig
->hasthis
]->type
== MONO_TYPE_R8
) {
1694 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_SUB_IMM
, X86_ESP
, X86_ESP
, 8);
1695 arg
->opcode
= OP_STORER8_MEMBASE_REG
;
1696 arg
->inst_destbasereg
= X86_ESP
;
1697 arg
->inst_offset
= 0;
1700 MONO_ADD_INS (cfg
->cbb
, arg
);
1704 g_assert_not_reached ();
1707 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (i
== sig
->sentinelpos
))
1708 /* Emit the signature cookie just before the implicit arguments */
1709 emit_sig_cookie (cfg
, call
, cinfo
);
1712 /* Handle the case where there are no implicit arguments */
1713 if (!sig
->pinvoke
&& (sig
->call_convention
== MONO_CALL_VARARG
) && (n
== sig
->sentinelpos
))
1714 emit_sig_cookie (cfg
, call
, cinfo
);
1716 if (sig
->ret
&& MONO_TYPE_ISSTRUCT (sig
->ret
)) {
1719 if (cinfo
->ret
.storage
== ArgValuetypeInReg
) {
1720 if (cinfo
->ret
.pair_storage
[0] == ArgInIReg
&& cinfo
->ret
.pair_storage
[1] == ArgNone
) {
1722 * Tell the JIT to use a more efficient calling convention: call using
1723 * OP_CALL, compute the result location after the call, and save the
1726 call
->vret_in_reg
= TRUE
;
1728 * Nullify the instruction computing the vret addr to enable
1729 * future optimizations.
1732 NULLIFY_INS (call
->vret_var
);
1734 if (call
->tail_call
)
1737 * The valuetype is in RAX:RDX after the call, need to be copied to
1738 * the stack. Push the address here, so the call instruction can
1741 if (!cfg
->arch
.vret_addr_loc
) {
1742 cfg
->arch
.vret_addr_loc
= mono_compile_create_var (cfg
, &mono_defaults
.int_class
->byval_arg
, OP_LOCAL
);
1743 /* Prevent it from being register allocated or optimized away */
1744 ((MonoInst
*)cfg
->arch
.vret_addr_loc
)->flags
|= MONO_INST_VOLATILE
;
1747 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, ((MonoInst
*)cfg
->arch
.vret_addr_loc
)->dreg
, call
->vret_var
->dreg
);
1751 MONO_INST_NEW (cfg
, vtarg
, OP_MOVE
);
1752 vtarg
->sreg1
= call
->vret_var
->dreg
;
1753 vtarg
->dreg
= mono_alloc_preg (cfg
);
1754 MONO_ADD_INS (cfg
->cbb
, vtarg
);
1756 mono_call_inst_add_outarg_reg (cfg
, call
, vtarg
->dreg
, cinfo
->ret
.reg
, FALSE
);
1760 #ifdef PLATFORM_WIN32
1761 if (call
->inst
.opcode
!= OP_JMP
&& OP_TAILCALL
!= call
->inst
.opcode
) {
1762 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_SUB_IMM
, X86_ESP
, X86_ESP
, 0x20);
1766 if (cfg
->method
->save_lmf
) {
1767 MONO_INST_NEW (cfg
, arg
, OP_AMD64_SAVE_SP_TO_LMF
);
1768 MONO_ADD_INS (cfg
->cbb
, arg
);
1771 call
->stack_usage
= cinfo
->stack_usage
;
1775 mono_arch_emit_outarg_vt (MonoCompile
*cfg
, MonoInst
*ins
, MonoInst
*src
)
1778 MonoCallInst
*call
= (MonoCallInst
*)ins
->inst_p0
;
1779 ArgInfo
*ainfo
= (ArgInfo
*)ins
->inst_p1
;
1780 int size
= ins
->backend
.size
;
1782 if (ainfo
->storage
== ArgValuetypeInReg
) {
1786 for (part
= 0; part
< 2; ++part
) {
1787 if (ainfo
->pair_storage
[part
] == ArgNone
)
1790 MONO_INST_NEW (cfg
, load
, arg_storage_to_load_membase (ainfo
->pair_storage
[part
]));
1791 load
->inst_basereg
= src
->dreg
;
1792 load
->inst_offset
= part
* sizeof (gpointer
);
1794 switch (ainfo
->pair_storage
[part
]) {
1796 load
->dreg
= mono_alloc_ireg (cfg
);
1798 case ArgInDoubleSSEReg
:
1799 case ArgInFloatSSEReg
:
1800 load
->dreg
= mono_alloc_freg (cfg
);
1803 g_assert_not_reached ();
1805 MONO_ADD_INS (cfg
->cbb
, load
);
1807 add_outarg_reg (cfg
, call
, ainfo
->pair_storage
[part
], ainfo
->pair_regs
[part
], load
);
1809 } else if (ainfo
->storage
== ArgValuetypeAddrInIReg
) {
1810 MonoInst
*vtaddr
, *load
;
1811 vtaddr
= mono_compile_create_var (cfg
, &ins
->klass
->byval_arg
, OP_LOCAL
);
1813 MONO_INST_NEW (cfg
, load
, OP_LDADDR
);
1814 load
->inst_p0
= vtaddr
;
1815 vtaddr
->flags
|= MONO_INST_INDIRECT
;
1816 load
->type
= STACK_MP
;
1817 load
->klass
= vtaddr
->klass
;
1818 load
->dreg
= mono_alloc_ireg (cfg
);
1819 MONO_ADD_INS (cfg
->cbb
, load
);
1820 mini_emit_memcpy (cfg
, load
->dreg
, 0, src
->dreg
, 0, size
, 4);
1822 if (ainfo
->pair_storage
[0] == ArgInIReg
) {
1823 MONO_INST_NEW (cfg
, arg
, OP_X86_LEA_MEMBASE
);
1824 arg
->dreg
= mono_alloc_ireg (cfg
);
1825 arg
->sreg1
= load
->dreg
;
1827 MONO_ADD_INS (cfg
->cbb
, arg
);
1828 mono_call_inst_add_outarg_reg (cfg
, call
, arg
->dreg
, ainfo
->pair_regs
[0], FALSE
);
1830 MONO_INST_NEW (cfg
, arg
, OP_X86_PUSH
);
1831 arg
->sreg1
= load
->dreg
;
1832 MONO_ADD_INS (cfg
->cbb
, arg
);
1836 /* Can't use this for < 8 since it does an 8 byte memory load */
1837 MONO_INST_NEW (cfg
, arg
, OP_X86_PUSH_MEMBASE
);
1838 arg
->inst_basereg
= src
->dreg
;
1839 arg
->inst_offset
= 0;
1840 MONO_ADD_INS (cfg
->cbb
, arg
);
1841 } else if (size
<= 40) {
1842 MONO_EMIT_NEW_BIALU_IMM (cfg
, OP_SUB_IMM
, X86_ESP
, X86_ESP
, ALIGN_TO (size
, 8));
1843 mini_emit_memcpy (cfg
, X86_ESP
, 0, src
->dreg
, 0, size
, 4);
1845 MONO_INST_NEW (cfg
, arg
, OP_X86_PUSH_OBJ
);
1846 arg
->inst_basereg
= src
->dreg
;
1847 arg
->inst_offset
= 0;
1848 arg
->inst_imm
= size
;
1849 MONO_ADD_INS (cfg
->cbb
, arg
);
1855 mono_arch_emit_setret (MonoCompile
*cfg
, MonoMethod
*method
, MonoInst
*val
)
1857 MonoType
*ret
= mini_type_get_underlying_type (NULL
, mono_method_signature (method
)->ret
);
1860 if (ret
->type
== MONO_TYPE_R4
) {
1861 MONO_EMIT_NEW_UNALU (cfg
, OP_AMD64_SET_XMMREG_R4
, cfg
->ret
->dreg
, val
->dreg
);
1863 } else if (ret
->type
== MONO_TYPE_R8
) {
1864 MONO_EMIT_NEW_UNALU (cfg
, OP_FMOVE
, cfg
->ret
->dreg
, val
->dreg
);
1869 MONO_EMIT_NEW_UNALU (cfg
, OP_MOVE
, cfg
->ret
->dreg
, val
->dreg
);
1872 #define EMIT_COND_BRANCH(ins,cond,sign) \
1873 if (ins->flags & MONO_INST_BRLABEL) { \
1874 if (ins->inst_i0->inst_c0) { \
1875 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1877 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1878 if ((cfg->opt & MONO_OPT_BRANCH) && \
1879 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1880 x86_branch8 (code, cond, 0, sign); \
1882 x86_branch32 (code, cond, 0, sign); \
1885 if (ins->inst_true_bb->native_offset) { \
1886 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1888 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1889 if ((cfg->opt & MONO_OPT_BRANCH) && \
1890 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1891 x86_branch8 (code, cond, 0, sign); \
1893 x86_branch32 (code, cond, 0, sign); \
1897 /* emit an exception if condition is fail */
1898 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1900 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1901 if (tins == NULL) { \
1902 mono_add_patch_info (cfg, code - cfg->native_code, \
1903 MONO_PATCH_INFO_EXC, exc_name); \
1904 x86_branch32 (code, cond, 0, signed); \
1906 EMIT_COND_BRANCH (tins, cond, signed); \
1910 #define EMIT_FPCOMPARE(code) do { \
1911 amd64_fcompp (code); \
1912 amd64_fnstsw (code); \
1915 #define EMIT_SSE2_FPFUNC(code, op, dreg, sreg1) do { \
1916 amd64_movsd_membase_reg (code, AMD64_RSP, -8, (sreg1)); \
1917 amd64_fld_membase (code, AMD64_RSP, -8, TRUE); \
1918 amd64_ ##op (code); \
1919 amd64_fst_membase (code, AMD64_RSP, -8, TRUE, TRUE); \
1920 amd64_movsd_reg_membase (code, (dreg), AMD64_RSP, -8); \
1924 emit_call_body (MonoCompile
*cfg
, guint8
*code
, guint32 patch_type
, gconstpointer data
)
1926 gboolean no_patch
= FALSE
;
1929 * FIXME: Add support for thunks
1932 gboolean near_call
= FALSE
;
1935 * Indirect calls are expensive so try to make a near call if possible.
1936 * The caller memory is allocated by the code manager so it is
1937 * guaranteed to be at a 32 bit offset.
1940 if (patch_type
!= MONO_PATCH_INFO_ABS
) {
1941 /* The target is in memory allocated using the code manager */
1944 if ((patch_type
== MONO_PATCH_INFO_METHOD
) || (patch_type
== MONO_PATCH_INFO_METHOD_JUMP
)) {
1945 if (((MonoMethod
*)data
)->klass
->image
->aot_module
)
1946 /* The callee might be an AOT method */
1948 if (((MonoMethod
*)data
)->dynamic
)
1949 /* The target is in malloc-ed memory */
1953 if (patch_type
== MONO_PATCH_INFO_INTERNAL_METHOD
) {
1955 * The call might go directly to a native function without
1958 MonoJitICallInfo
*mi
= mono_find_jit_icall_by_name (data
);
1960 gconstpointer target
= mono_icall_get_wrapper (mi
);
1961 if ((((guint64
)target
) >> 32) != 0)
1967 if (cfg
->abs_patches
&& g_hash_table_lookup (cfg
->abs_patches
, data
)) {
1969 * This is not really an optimization, but required because the
1970 * generic class init trampolines use R11 to pass the vtable.
1974 MonoJitICallInfo
*info
= mono_find_jit_icall_by_addr (data
);
1976 if ((cfg
->method
->wrapper_type
== MONO_WRAPPER_MANAGED_TO_NATIVE
) &&
1977 strstr (cfg
->method
->name
, info
->name
)) {
1978 /* A call to the wrapped function */
1979 if ((((guint64
)data
) >> 32) == 0)
1983 else if (info
->func
== info
->wrapper
) {
1985 if ((((guint64
)info
->func
) >> 32) == 0)
1989 /* See the comment in mono_codegen () */
1990 if ((info
->name
[0] != 'v') || (strstr (info
->name
, "ves_array_new_va_") == NULL
&& strstr (info
->name
, "ves_array_element_address_") == NULL
))
1994 else if ((((guint64
)data
) >> 32) == 0) {
2001 if (cfg
->method
->dynamic
)
2002 /* These methods are allocated using malloc */
2005 if (cfg
->compile_aot
) {
2010 #ifdef MONO_ARCH_NOMAP32BIT
2016 * Align the call displacement to an address divisible by 4 so it does
2017 * not span cache lines. This is required for code patching to work on SMP
2020 if (!no_patch
&& ((guint32
)(code
+ 1 - cfg
->native_code
) % 4) != 0)
2021 amd64_padding (code
, 4 - ((guint32
)(code
+ 1 - cfg
->native_code
) % 4));
2022 mono_add_patch_info (cfg
, code
- cfg
->native_code
, patch_type
, data
);
2023 amd64_call_code (code
, 0);
2026 mono_add_patch_info (cfg
, code
- cfg
->native_code
, patch_type
, data
);
2027 amd64_set_reg_template (code
, GP_SCRATCH_REG
);
2028 amd64_call_reg (code
, GP_SCRATCH_REG
);
2035 static inline guint8
*
2036 emit_call (MonoCompile
*cfg
, guint8
*code
, guint32 patch_type
, gconstpointer data
, gboolean win64_adjust_stack
)
2038 #ifdef PLATFORM_WIN32
2039 if (win64_adjust_stack
)
2040 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 32);
2042 code
= emit_call_body (cfg
, code
, patch_type
, data
);
2043 #ifdef PLATFORM_WIN32
2044 if (win64_adjust_stack
)
2045 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 32);
2052 store_membase_imm_to_store_membase_reg (int opcode
)
2055 case OP_STORE_MEMBASE_IMM
:
2056 return OP_STORE_MEMBASE_REG
;
2057 case OP_STOREI4_MEMBASE_IMM
:
2058 return OP_STOREI4_MEMBASE_REG
;
2059 case OP_STOREI8_MEMBASE_IMM
:
2060 return OP_STOREI8_MEMBASE_REG
;
2066 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB) || ((opcode) == OP_ISBB_IMM)))
2069 * mono_arch_peephole_pass_1:
2071 * Perform peephole opts which should/can be performed before local regalloc
2074 mono_arch_peephole_pass_1 (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
2078 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
2079 MonoInst
*last_ins
= ins
->prev
;
2081 switch (ins
->opcode
) {
2085 if ((ins
->sreg1
< MONO_MAX_IREGS
) && (ins
->dreg
>= MONO_MAX_IREGS
) && (ins
->inst_imm
> 0)) {
2087 * X86_LEA is like ADD, but doesn't have the
2088 * sreg1==dreg restriction. inst_imm > 0 is needed since LEA sign-extends
2089 * its operand to 64 bit.
2091 ins
->opcode
= OP_X86_LEA_MEMBASE
;
2092 ins
->inst_basereg
= ins
->sreg1
;
2097 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
2101 * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since
2102 * the latter has length 2-3 instead of 6 (reverse constant
2103 * propagation). These instruction sequences are very common
2104 * in the initlocals bblock.
2106 for (ins2
= ins
->next
; ins2
; ins2
= ins2
->next
) {
2107 if (((ins2
->opcode
== OP_STORE_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_IMM
) || (ins2
->opcode
== OP_STORE_MEMBASE_IMM
)) && (ins2
->inst_imm
== 0)) {
2108 ins2
->opcode
= store_membase_imm_to_store_membase_reg (ins2
->opcode
);
2109 ins2
->sreg1
= ins
->dreg
;
2110 } else if ((ins2
->opcode
== OP_STOREI1_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI2_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_REG
) || (ins2
->opcode
== OP_STORE_MEMBASE_REG
)) {
2112 } else if (((ins2
->opcode
== OP_ICONST
) || (ins2
->opcode
== OP_I8CONST
)) && (ins2
->dreg
== ins
->dreg
) && (ins2
->inst_c0
== 0)) {
2121 case OP_COMPARE_IMM
:
2122 case OP_LCOMPARE_IMM
:
2123 /* OP_COMPARE_IMM (reg, 0)
2125 * OP_AMD64_TEST_NULL (reg)
2128 ins
->opcode
= OP_AMD64_TEST_NULL
;
2130 case OP_ICOMPARE_IMM
:
2132 ins
->opcode
= OP_X86_TEST_NULL
;
2134 case OP_AMD64_ICOMPARE_MEMBASE_IMM
:
2136 * OP_STORE_MEMBASE_REG reg, offset(basereg)
2137 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
2139 * OP_STORE_MEMBASE_REG reg, offset(basereg)
2140 * OP_COMPARE_IMM reg, imm
2142 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
2144 if (last_ins
&& (last_ins
->opcode
== OP_STOREI4_MEMBASE_REG
) &&
2145 ins
->inst_basereg
== last_ins
->inst_destbasereg
&&
2146 ins
->inst_offset
== last_ins
->inst_offset
) {
2147 ins
->opcode
= OP_ICOMPARE_IMM
;
2148 ins
->sreg1
= last_ins
->sreg1
;
2150 /* check if we can remove cmp reg,0 with test null */
2152 ins
->opcode
= OP_X86_TEST_NULL
;
2158 mono_peephole_ins (bb
, ins
);
2163 mono_arch_peephole_pass_2 (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
2167 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
2168 switch (ins
->opcode
) {
2171 /* reg = 0 -> XOR (reg, reg) */
2172 /* XOR sets cflags on x86, so we cant do it always */
2173 if (ins
->inst_c0
== 0 && (!ins
->next
|| (ins
->next
&& INST_IGNORES_CFLAGS (ins
->next
->opcode
)))) {
2174 ins
->opcode
= OP_LXOR
;
2175 ins
->sreg1
= ins
->dreg
;
2176 ins
->sreg2
= ins
->dreg
;
2184 * Use IXOR to avoid a rex prefix if possible. The cpu will sign extend the
2185 * 0 result into 64 bits.
2187 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
2188 ins
->opcode
= OP_IXOR
;
2192 if ((ins
->sreg1
== ins
->sreg2
) && (ins
->sreg1
== ins
->dreg
)) {
2196 * Replace STORE_MEMBASE_IMM 0 with STORE_MEMBASE_REG since
2197 * the latter has length 2-3 instead of 6 (reverse constant
2198 * propagation). These instruction sequences are very common
2199 * in the initlocals bblock.
2201 for (ins2
= ins
->next
; ins2
; ins2
= ins2
->next
) {
2202 if (((ins2
->opcode
== OP_STORE_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_IMM
) || (ins2
->opcode
== OP_STORE_MEMBASE_IMM
)) && (ins2
->inst_imm
== 0)) {
2203 ins2
->opcode
= store_membase_imm_to_store_membase_reg (ins2
->opcode
);
2204 ins2
->sreg1
= ins
->dreg
;
2205 } else if ((ins2
->opcode
== OP_STOREI1_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI2_MEMBASE_IMM
) || (ins2
->opcode
== OP_STOREI4_MEMBASE_REG
) || (ins2
->opcode
== OP_STOREI8_MEMBASE_REG
) || (ins2
->opcode
== OP_STORE_MEMBASE_REG
)) {
2207 } else if (((ins2
->opcode
== OP_ICONST
) || (ins2
->opcode
== OP_I8CONST
)) && (ins2
->dreg
== ins
->dreg
) && (ins2
->inst_c0
== 0)) {
2217 if ((ins
->inst_imm
== 1) && (ins
->dreg
== ins
->sreg1
))
2218 ins
->opcode
= OP_X86_INC_REG
;
2221 if ((ins
->inst_imm
== 1) && (ins
->dreg
== ins
->sreg1
))
2222 ins
->opcode
= OP_X86_DEC_REG
;
2226 mono_peephole_ins (bb
, ins
);
2230 #define NEW_INS(cfg,ins,dest,op) do { \
2231 MONO_INST_NEW ((cfg), (dest), (op)); \
2232 (dest)->cil_code = (ins)->cil_code; \
2233 mono_bblock_insert_before_ins (bb, ins, (dest)); \
2237 * mono_arch_lowering_pass:
2239 * Converts complex opcodes into simpler ones so that each IR instruction
2240 * corresponds to one machine instruction.
2243 mono_arch_lowering_pass (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
2245 MonoInst
*ins
, *n
, *temp
;
2248 * FIXME: Need to add more instructions, but the current machine
2249 * description can't model some parts of the composite instructions like
2252 MONO_BB_FOR_EACH_INS_SAFE (bb
, n
, ins
) {
2253 switch (ins
->opcode
) {
2257 case OP_IDIV_UN_IMM
:
2258 case OP_IREM_UN_IMM
:
2259 mono_decompose_op_imm (cfg
, bb
, ins
);
2262 /* Keep the opcode if we can implement it efficiently */
2263 if (!((ins
->inst_imm
> 0) && (mono_is_power_of_two (ins
->inst_imm
) != -1)))
2264 mono_decompose_op_imm (cfg
, bb
, ins
);
2266 case OP_COMPARE_IMM
:
2267 case OP_LCOMPARE_IMM
:
2268 if (!amd64_is_imm32 (ins
->inst_imm
)) {
2269 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
2270 temp
->inst_c0
= ins
->inst_imm
;
2271 temp
->dreg
= mono_alloc_ireg (cfg
);
2272 ins
->opcode
= OP_COMPARE
;
2273 ins
->sreg2
= temp
->dreg
;
2276 case OP_LOAD_MEMBASE
:
2277 case OP_LOADI8_MEMBASE
:
2278 if (!amd64_is_imm32 (ins
->inst_offset
)) {
2279 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
2280 temp
->inst_c0
= ins
->inst_offset
;
2281 temp
->dreg
= mono_alloc_ireg (cfg
);
2282 ins
->opcode
= OP_AMD64_LOADI8_MEMINDEX
;
2283 ins
->inst_indexreg
= temp
->dreg
;
2286 case OP_STORE_MEMBASE_IMM
:
2287 case OP_STOREI8_MEMBASE_IMM
:
2288 if (!amd64_is_imm32 (ins
->inst_imm
)) {
2289 NEW_INS (cfg
, ins
, temp
, OP_I8CONST
);
2290 temp
->inst_c0
= ins
->inst_imm
;
2291 temp
->dreg
= mono_alloc_ireg (cfg
);
2292 ins
->opcode
= OP_STOREI8_MEMBASE_REG
;
2293 ins
->sreg1
= temp
->dreg
;
2301 bb
->max_vreg
= cfg
->next_vreg
;
2305 branch_cc_table
[] = {
2306 X86_CC_EQ
, X86_CC_GE
, X86_CC_GT
, X86_CC_LE
, X86_CC_LT
,
2307 X86_CC_NE
, X86_CC_GE
, X86_CC_GT
, X86_CC_LE
, X86_CC_LT
,
2308 X86_CC_O
, X86_CC_NO
, X86_CC_C
, X86_CC_NC
2311 /* Maps CMP_... constants to X86_CC_... constants */
2314 X86_CC_EQ
, X86_CC_NE
, X86_CC_LE
, X86_CC_GE
, X86_CC_LT
, X86_CC_GT
,
2315 X86_CC_LE
, X86_CC_GE
, X86_CC_LT
, X86_CC_GT
2319 cc_signed_table
[] = {
2320 TRUE
, TRUE
, TRUE
, TRUE
, TRUE
, TRUE
,
2321 FALSE
, FALSE
, FALSE
, FALSE
2324 /*#include "cprop.c"*/
2326 static unsigned char*
2327 emit_float_to_int (MonoCompile
*cfg
, guchar
*code
, int dreg
, int sreg
, int size
, gboolean is_signed
)
2329 amd64_sse_cvttsd2si_reg_reg (code
, dreg
, sreg
);
2332 amd64_widen_reg (code
, dreg
, dreg
, is_signed
, FALSE
);
2334 amd64_widen_reg (code
, dreg
, dreg
, is_signed
, TRUE
);
2338 static unsigned char*
2339 mono_emit_stack_alloc (guchar
*code
, MonoInst
* tree
)
2341 int sreg
= tree
->sreg1
;
2342 int need_touch
= FALSE
;
2344 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
2345 if (!tree
->flags
& MONO_INST_INIT
)
2354 * If requested stack size is larger than one page,
2355 * perform stack-touch operation
2358 * Generate stack probe code.
2359 * Under Windows, it is necessary to allocate one page at a time,
2360 * "touching" stack after each successful sub-allocation. This is
2361 * because of the way stack growth is implemented - there is a
2362 * guard page before the lowest stack page that is currently commited.
2363 * Stack normally grows sequentially so OS traps access to the
2364 * guard page and commits more pages when needed.
2366 amd64_test_reg_imm (code
, sreg
, ~0xFFF);
2367 br
[0] = code
; x86_branch8 (code
, X86_CC_Z
, 0, FALSE
);
2369 br
[2] = code
; /* loop */
2370 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 0x1000);
2371 amd64_test_membase_reg (code
, AMD64_RSP
, 0, AMD64_RSP
);
2372 amd64_alu_reg_imm (code
, X86_SUB
, sreg
, 0x1000);
2373 amd64_alu_reg_imm (code
, X86_CMP
, sreg
, 0x1000);
2374 br
[3] = code
; x86_branch8 (code
, X86_CC_AE
, 0, FALSE
);
2375 amd64_patch (br
[3], br
[2]);
2376 amd64_test_reg_reg (code
, sreg
, sreg
);
2377 br
[4] = code
; x86_branch8 (code
, X86_CC_Z
, 0, FALSE
);
2378 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, sreg
);
2380 br
[1] = code
; x86_jump8 (code
, 0);
2382 amd64_patch (br
[0], code
);
2383 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, sreg
);
2384 amd64_patch (br
[1], code
);
2385 amd64_patch (br
[4], code
);
2388 amd64_alu_reg_reg (code
, X86_SUB
, AMD64_RSP
, tree
->sreg1
);
2390 if (tree
->flags
& MONO_INST_INIT
) {
2392 if (tree
->dreg
!= AMD64_RAX
&& sreg
!= AMD64_RAX
) {
2393 amd64_push_reg (code
, AMD64_RAX
);
2396 if (tree
->dreg
!= AMD64_RCX
&& sreg
!= AMD64_RCX
) {
2397 amd64_push_reg (code
, AMD64_RCX
);
2400 if (tree
->dreg
!= AMD64_RDI
&& sreg
!= AMD64_RDI
) {
2401 amd64_push_reg (code
, AMD64_RDI
);
2405 amd64_shift_reg_imm (code
, X86_SHR
, sreg
, 3);
2406 if (sreg
!= AMD64_RCX
)
2407 amd64_mov_reg_reg (code
, AMD64_RCX
, sreg
, 8);
2408 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
2410 amd64_lea_membase (code
, AMD64_RDI
, AMD64_RSP
, offset
);
2412 amd64_prefix (code
, X86_REP_PREFIX
);
2415 if (tree
->dreg
!= AMD64_RDI
&& sreg
!= AMD64_RDI
)
2416 amd64_pop_reg (code
, AMD64_RDI
);
2417 if (tree
->dreg
!= AMD64_RCX
&& sreg
!= AMD64_RCX
)
2418 amd64_pop_reg (code
, AMD64_RCX
);
2419 if (tree
->dreg
!= AMD64_RAX
&& sreg
!= AMD64_RAX
)
2420 amd64_pop_reg (code
, AMD64_RAX
);
2426 emit_move_return_value (MonoCompile
*cfg
, MonoInst
*ins
, guint8
*code
)
2431 /* Move return value to the target register */
2432 /* FIXME: do this in the local reg allocator */
2433 switch (ins
->opcode
) {
2436 case OP_CALL_MEMBASE
:
2439 case OP_LCALL_MEMBASE
:
2440 g_assert (ins
->dreg
== AMD64_RAX
);
2444 case OP_FCALL_MEMBASE
:
2445 if (((MonoCallInst
*)ins
)->signature
->ret
->type
== MONO_TYPE_R4
) {
2446 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, AMD64_XMM0
);
2449 if (ins
->dreg
!= AMD64_XMM0
)
2450 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, AMD64_XMM0
);
2455 case OP_VCALL_MEMBASE
:
2458 case OP_VCALL2_MEMBASE
:
2459 cinfo
= get_call_info (cfg
->generic_sharing_context
, cfg
->mempool
, ((MonoCallInst
*)ins
)->signature
, FALSE
);
2460 if (cinfo
->ret
.storage
== ArgValuetypeInReg
) {
2461 MonoInst
*loc
= cfg
->arch
.vret_addr_loc
;
2463 /* Load the destination address */
2464 g_assert (loc
->opcode
== OP_REGOFFSET
);
2465 amd64_mov_reg_membase (code
, AMD64_RCX
, loc
->inst_basereg
, loc
->inst_offset
, 8);
2467 for (quad
= 0; quad
< 2; quad
++) {
2468 switch (cinfo
->ret
.pair_storage
[quad
]) {
2470 amd64_mov_membase_reg (code
, AMD64_RCX
, (quad
* 8), cinfo
->ret
.pair_regs
[quad
], 8);
2472 case ArgInFloatSSEReg
:
2473 amd64_movss_membase_reg (code
, AMD64_RCX
, (quad
* 8), cinfo
->ret
.pair_regs
[quad
]);
2475 case ArgInDoubleSSEReg
:
2476 amd64_movsd_membase_reg (code
, AMD64_RCX
, (quad
* 8), cinfo
->ret
.pair_regs
[quad
]);
2492 * mono_amd64_emit_tls_get:
2493 * @code: buffer to store code to
2494 * @dreg: hard register where to place the result
2495 * @tls_offset: offset info
2497 * mono_amd64_emit_tls_get emits in @code the native code that puts in
2498 * the dreg register the item in the thread local storage identified
2501 * Returns: a pointer to the end of the stored code
2504 mono_amd64_emit_tls_get (guint8
* code
, int dreg
, int tls_offset
)
2506 #ifdef PLATFORM_WIN32
2507 g_assert (tls_offset
< 64);
2508 x86_prefix (code
, X86_GS_PREFIX
);
2509 amd64_mov_reg_mem (code
, dreg
, (tls_offset
* 8) + 0x1480, 8);
2511 if (optimize_for_xen
) {
2512 x86_prefix (code
, X86_FS_PREFIX
);
2513 amd64_mov_reg_mem (code
, dreg
, 0, 8);
2514 amd64_mov_reg_membase (code
, dreg
, dreg
, tls_offset
, 8);
2516 x86_prefix (code
, X86_FS_PREFIX
);
2517 amd64_mov_reg_mem (code
, dreg
, tls_offset
, 8);
2523 #define REAL_PRINT_REG(text,reg) \
2524 mono_assert (reg >= 0); \
2525 amd64_push_reg (code, AMD64_RAX); \
2526 amd64_push_reg (code, AMD64_RDX); \
2527 amd64_push_reg (code, AMD64_RCX); \
2528 amd64_push_reg (code, reg); \
2529 amd64_push_imm (code, reg); \
2530 amd64_push_imm (code, text " %d %p\n"); \
2531 amd64_mov_reg_imm (code, AMD64_RAX, printf); \
2532 amd64_call_reg (code, AMD64_RAX); \
2533 amd64_alu_reg_imm (code, X86_ADD, AMD64_RSP, 3*4); \
2534 amd64_pop_reg (code, AMD64_RCX); \
2535 amd64_pop_reg (code, AMD64_RDX); \
2536 amd64_pop_reg (code, AMD64_RAX);
2538 /* benchmark and set based on cpu */
2539 #define LOOP_ALIGNMENT 8
2540 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2545 mono_arch_output_basic_block (MonoCompile
*cfg
, MonoBasicBlock
*bb
)
2550 guint8
*code
= cfg
->native_code
+ cfg
->code_len
;
2551 MonoInst
*last_ins
= NULL
;
2552 guint last_offset
= 0;
2555 if (cfg
->opt
& MONO_OPT_LOOP
) {
2556 int pad
, align
= LOOP_ALIGNMENT
;
2557 /* set alignment depending on cpu */
2558 if (bb_is_loop_start (bb
) && (pad
= (cfg
->code_len
& (align
- 1)))) {
2560 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2561 amd64_padding (code
, pad
);
2562 cfg
->code_len
+= pad
;
2563 bb
->native_offset
= cfg
->code_len
;
2567 if (cfg
->verbose_level
> 2)
2568 g_print ("Basic block %d starting at offset 0x%x\n", bb
->block_num
, bb
->native_offset
);
2570 cpos
= bb
->max_offset
;
2572 if (cfg
->prof_options
& MONO_PROFILE_COVERAGE
) {
2573 MonoProfileCoverageInfo
*cov
= cfg
->coverage_info
;
2574 g_assert (!cfg
->compile_aot
);
2577 cov
->data
[bb
->dfn
].cil_code
= bb
->cil_code
;
2578 amd64_mov_reg_imm (code
, AMD64_R11
, (guint64
)&cov
->data
[bb
->dfn
].count
);
2579 /* this is not thread save, but good enough */
2580 amd64_inc_membase (code
, AMD64_R11
, 0);
2583 offset
= code
- cfg
->native_code
;
2585 mono_debug_open_block (cfg
, bb
, offset
);
2587 if (mono_break_at_bb_method
&& mono_method_desc_full_match (mono_break_at_bb_method
, cfg
->method
) && bb
->block_num
== mono_break_at_bb_bb_num
)
2588 x86_breakpoint (code
);
2590 MONO_BB_FOR_EACH_INS (bb
, ins
) {
2591 offset
= code
- cfg
->native_code
;
2593 max_len
= ((guint8
*)ins_get_spec (ins
->opcode
))[MONO_INST_LEN
];
2595 if (G_UNLIKELY (offset
> (cfg
->code_size
- max_len
- 16))) {
2596 cfg
->code_size
*= 2;
2597 cfg
->native_code
= g_realloc (cfg
->native_code
, cfg
->code_size
);
2598 code
= cfg
->native_code
+ offset
;
2599 mono_jit_stats
.code_reallocs
++;
2602 if (cfg
->debug_info
)
2603 mono_debug_record_line_number (cfg
, ins
, offset
);
2605 switch (ins
->opcode
) {
2607 amd64_mul_reg (code
, ins
->sreg2
, TRUE
);
2610 amd64_mul_reg (code
, ins
->sreg2
, FALSE
);
2612 case OP_X86_SETEQ_MEMBASE
:
2613 amd64_set_membase (code
, X86_CC_EQ
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
);
2615 case OP_STOREI1_MEMBASE_IMM
:
2616 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 1);
2618 case OP_STOREI2_MEMBASE_IMM
:
2619 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 2);
2621 case OP_STOREI4_MEMBASE_IMM
:
2622 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2624 case OP_STOREI1_MEMBASE_REG
:
2625 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 1);
2627 case OP_STOREI2_MEMBASE_REG
:
2628 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 2);
2630 case OP_STORE_MEMBASE_REG
:
2631 case OP_STOREI8_MEMBASE_REG
:
2632 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 8);
2634 case OP_STOREI4_MEMBASE_REG
:
2635 amd64_mov_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
, 4);
2637 case OP_STORE_MEMBASE_IMM
:
2638 case OP_STOREI8_MEMBASE_IMM
:
2639 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2640 amd64_mov_membase_imm (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2644 // FIXME: Decompose this earlier
2645 if (amd64_is_imm32 (ins
->inst_imm
))
2646 amd64_mov_reg_mem (code
, ins
->dreg
, ins
->inst_imm
, sizeof (gpointer
));
2648 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
2649 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0, 8);
2653 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
2654 amd64_movsxd_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0);
2657 // FIXME: Decompose this earlier
2658 if (amd64_is_imm32 (ins
->inst_imm
))
2659 amd64_mov_reg_mem (code
, ins
->dreg
, ins
->inst_imm
, 4);
2661 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
2662 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->dreg
, 0, 4);
2666 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
2667 amd64_widen_membase (code
, ins
->dreg
, ins
->dreg
, 0, FALSE
, FALSE
);
2670 amd64_mov_reg_imm (code
, ins
->dreg
, ins
->inst_imm
);
2671 amd64_widen_membase (code
, ins
->dreg
, ins
->dreg
, 0, FALSE
, TRUE
);
2673 case OP_LOAD_MEMBASE
:
2674 case OP_LOADI8_MEMBASE
:
2675 g_assert (amd64_is_imm32 (ins
->inst_offset
));
2676 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, sizeof (gpointer
));
2678 case OP_LOADI4_MEMBASE
:
2679 amd64_movsxd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
2681 case OP_LOADU4_MEMBASE
:
2682 amd64_mov_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, 4);
2684 case OP_LOADU1_MEMBASE
:
2685 /* The cpu zero extends the result into 64 bits */
2686 amd64_widen_membase_size (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, FALSE
, 4);
2688 case OP_LOADI1_MEMBASE
:
2689 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, FALSE
);
2691 case OP_LOADU2_MEMBASE
:
2692 /* The cpu zero extends the result into 64 bits */
2693 amd64_widen_membase_size (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, FALSE
, TRUE
, 4);
2695 case OP_LOADI2_MEMBASE
:
2696 amd64_widen_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
, TRUE
, TRUE
);
2698 case OP_AMD64_LOADI8_MEMINDEX
:
2699 amd64_mov_reg_memindex_size (code
, ins
->dreg
, ins
->inst_basereg
, 0, ins
->inst_indexreg
, 0, 8);
2701 case OP_LCONV_TO_I1
:
2702 case OP_ICONV_TO_I1
:
2704 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, TRUE
, FALSE
);
2706 case OP_LCONV_TO_I2
:
2707 case OP_ICONV_TO_I2
:
2709 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, TRUE
, TRUE
);
2711 case OP_LCONV_TO_U1
:
2712 case OP_ICONV_TO_U1
:
2713 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, FALSE
, FALSE
);
2715 case OP_LCONV_TO_U2
:
2716 case OP_ICONV_TO_U2
:
2717 amd64_widen_reg (code
, ins
->dreg
, ins
->sreg1
, FALSE
, TRUE
);
2720 /* Clean out the upper word */
2721 amd64_mov_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
2724 amd64_movsxd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
2728 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
2730 case OP_COMPARE_IMM
:
2731 case OP_LCOMPARE_IMM
:
2732 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2733 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, ins
->inst_imm
);
2735 case OP_X86_COMPARE_REG_MEMBASE
:
2736 amd64_alu_reg_membase (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
);
2738 case OP_X86_TEST_NULL
:
2739 amd64_test_reg_reg_size (code
, ins
->sreg1
, ins
->sreg1
, 4);
2741 case OP_AMD64_TEST_NULL
:
2742 amd64_test_reg_reg (code
, ins
->sreg1
, ins
->sreg1
);
2745 case OP_X86_ADD_REG_MEMBASE
:
2746 amd64_alu_reg_membase_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2748 case OP_X86_SUB_REG_MEMBASE
:
2749 amd64_alu_reg_membase_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2751 case OP_X86_AND_REG_MEMBASE
:
2752 amd64_alu_reg_membase_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2754 case OP_X86_OR_REG_MEMBASE
:
2755 amd64_alu_reg_membase_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2757 case OP_X86_XOR_REG_MEMBASE
:
2758 amd64_alu_reg_membase_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2761 case OP_X86_ADD_MEMBASE_IMM
:
2762 /* FIXME: Make a 64 version too */
2763 amd64_alu_membase_imm_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2765 case OP_X86_SUB_MEMBASE_IMM
:
2766 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2767 amd64_alu_membase_imm_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2769 case OP_X86_AND_MEMBASE_IMM
:
2770 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2771 amd64_alu_membase_imm_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2773 case OP_X86_OR_MEMBASE_IMM
:
2774 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2775 amd64_alu_membase_imm_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2777 case OP_X86_XOR_MEMBASE_IMM
:
2778 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2779 amd64_alu_membase_imm_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2781 case OP_X86_ADD_MEMBASE_REG
:
2782 amd64_alu_membase_reg_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2784 case OP_X86_SUB_MEMBASE_REG
:
2785 amd64_alu_membase_reg_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2787 case OP_X86_AND_MEMBASE_REG
:
2788 amd64_alu_membase_reg_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2790 case OP_X86_OR_MEMBASE_REG
:
2791 amd64_alu_membase_reg_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2793 case OP_X86_XOR_MEMBASE_REG
:
2794 amd64_alu_membase_reg_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2796 case OP_X86_INC_MEMBASE
:
2797 amd64_inc_membase_size (code
, ins
->inst_basereg
, ins
->inst_offset
, 4);
2799 case OP_X86_INC_REG
:
2800 amd64_inc_reg_size (code
, ins
->dreg
, 4);
2802 case OP_X86_DEC_MEMBASE
:
2803 amd64_dec_membase_size (code
, ins
->inst_basereg
, ins
->inst_offset
, 4);
2805 case OP_X86_DEC_REG
:
2806 amd64_dec_reg_size (code
, ins
->dreg
, 4);
2808 case OP_X86_MUL_REG_MEMBASE
:
2809 case OP_X86_MUL_MEMBASE_REG
:
2810 amd64_imul_reg_membase_size (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2812 case OP_AMD64_ICOMPARE_MEMBASE_REG
:
2813 amd64_alu_membase_reg_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 4);
2815 case OP_AMD64_ICOMPARE_MEMBASE_IMM
:
2816 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2818 case OP_AMD64_COMPARE_MEMBASE_REG
:
2819 amd64_alu_membase_reg_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2821 case OP_AMD64_COMPARE_MEMBASE_IMM
:
2822 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2823 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2825 case OP_X86_COMPARE_MEMBASE8_IMM
:
2826 amd64_alu_membase8_imm_size (code
, X86_CMP
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 4);
2828 case OP_AMD64_ICOMPARE_REG_MEMBASE
:
2829 amd64_alu_reg_membase_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 4);
2831 case OP_AMD64_COMPARE_REG_MEMBASE
:
2832 amd64_alu_reg_membase_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2835 case OP_AMD64_ADD_REG_MEMBASE
:
2836 amd64_alu_reg_membase_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2838 case OP_AMD64_SUB_REG_MEMBASE
:
2839 amd64_alu_reg_membase_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2841 case OP_AMD64_AND_REG_MEMBASE
:
2842 amd64_alu_reg_membase_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2844 case OP_AMD64_OR_REG_MEMBASE
:
2845 amd64_alu_reg_membase_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2847 case OP_AMD64_XOR_REG_MEMBASE
:
2848 amd64_alu_reg_membase_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
, 8);
2851 case OP_AMD64_ADD_MEMBASE_REG
:
2852 amd64_alu_membase_reg_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2854 case OP_AMD64_SUB_MEMBASE_REG
:
2855 amd64_alu_membase_reg_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2857 case OP_AMD64_AND_MEMBASE_REG
:
2858 amd64_alu_membase_reg_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2860 case OP_AMD64_OR_MEMBASE_REG
:
2861 amd64_alu_membase_reg_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2863 case OP_AMD64_XOR_MEMBASE_REG
:
2864 amd64_alu_membase_reg_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->sreg2
, 8);
2867 case OP_AMD64_ADD_MEMBASE_IMM
:
2868 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2869 amd64_alu_membase_imm_size (code
, X86_ADD
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2871 case OP_AMD64_SUB_MEMBASE_IMM
:
2872 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2873 amd64_alu_membase_imm_size (code
, X86_SUB
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2875 case OP_AMD64_AND_MEMBASE_IMM
:
2876 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2877 amd64_alu_membase_imm_size (code
, X86_AND
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2879 case OP_AMD64_OR_MEMBASE_IMM
:
2880 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2881 amd64_alu_membase_imm_size (code
, X86_OR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2883 case OP_AMD64_XOR_MEMBASE_IMM
:
2884 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2885 amd64_alu_membase_imm_size (code
, X86_XOR
, ins
->inst_basereg
, ins
->inst_offset
, ins
->inst_imm
, 8);
2889 amd64_breakpoint (code
);
2891 case OP_RELAXED_NOP
:
2892 x86_prefix (code
, X86_REP_PREFIX
);
2900 case OP_DUMMY_STORE
:
2901 case OP_NOT_REACHED
:
2906 amd64_alu_reg_reg (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
);
2909 amd64_alu_reg_reg (code
, X86_ADC
, ins
->sreg1
, ins
->sreg2
);
2913 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2914 amd64_alu_reg_imm (code
, X86_ADD
, ins
->dreg
, ins
->inst_imm
);
2917 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2918 amd64_alu_reg_imm (code
, X86_ADC
, ins
->dreg
, ins
->inst_imm
);
2922 amd64_alu_reg_reg (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
);
2925 amd64_alu_reg_reg (code
, X86_SBB
, ins
->sreg1
, ins
->sreg2
);
2929 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2930 amd64_alu_reg_imm (code
, X86_SUB
, ins
->dreg
, ins
->inst_imm
);
2933 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2934 amd64_alu_reg_imm (code
, X86_SBB
, ins
->dreg
, ins
->inst_imm
);
2937 amd64_alu_reg_reg (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
);
2941 g_assert (amd64_is_imm32 (ins
->inst_imm
));
2942 amd64_alu_reg_imm (code
, X86_AND
, ins
->sreg1
, ins
->inst_imm
);
2945 amd64_imul_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
2950 guint32 size
= (ins
->opcode
== OP_IMUL_IMM
) ? 4 : 8;
2952 switch (ins
->inst_imm
) {
2956 if (ins
->dreg
!= ins
->sreg1
)
2957 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, size
);
2958 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
2961 /* LEA r1, [r2 + r2*2] */
2962 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
2965 /* LEA r1, [r2 + r2*4] */
2966 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
2969 /* LEA r1, [r2 + r2*2] */
2971 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
2972 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
2975 /* LEA r1, [r2 + r2*8] */
2976 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 3);
2979 /* LEA r1, [r2 + r2*4] */
2981 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
2982 amd64_alu_reg_reg (code
, X86_ADD
, ins
->dreg
, ins
->dreg
);
2985 /* LEA r1, [r2 + r2*2] */
2987 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 1);
2988 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, 2);
2991 /* LEA r1, [r2 + r2*4] */
2992 /* LEA r1, [r1 + r1*4] */
2993 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
2994 amd64_lea_memindex (code
, ins
->dreg
, ins
->dreg
, 0, ins
->dreg
, 2);
2997 /* LEA r1, [r2 + r2*4] */
2999 /* LEA r1, [r1 + r1*4] */
3000 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, 0, ins
->sreg1
, 2);
3001 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, 2);
3002 amd64_lea_memindex (code
, ins
->dreg
, ins
->dreg
, 0, ins
->dreg
, 2);
3005 amd64_imul_reg_reg_imm_size (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
, size
);
3012 /* Regalloc magic makes the div/rem cases the same */
3013 if (ins
->sreg2
== AMD64_RDX
) {
3014 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
3016 amd64_div_membase (code
, AMD64_RSP
, -8, TRUE
);
3019 amd64_div_reg (code
, ins
->sreg2
, TRUE
);
3024 if (ins
->sreg2
== AMD64_RDX
) {
3025 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
3026 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
3027 amd64_div_membase (code
, AMD64_RSP
, -8, FALSE
);
3029 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
3030 amd64_div_reg (code
, ins
->sreg2
, FALSE
);
3035 if (ins
->sreg2
== AMD64_RDX
) {
3036 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
3037 amd64_cdq_size (code
, 4);
3038 amd64_div_membase_size (code
, AMD64_RSP
, -8, TRUE
, 4);
3040 amd64_cdq_size (code
, 4);
3041 amd64_div_reg_size (code
, ins
->sreg2
, TRUE
, 4);
3046 if (ins
->sreg2
== AMD64_RDX
) {
3047 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RDX
, 8);
3048 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
3049 amd64_div_membase_size (code
, AMD64_RSP
, -8, FALSE
, 4);
3051 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RDX
, AMD64_RDX
);
3052 amd64_div_reg_size (code
, ins
->sreg2
, FALSE
, 4);
3056 int power
= mono_is_power_of_two (ins
->inst_imm
);
3058 g_assert (ins
->sreg1
== X86_EAX
);
3059 g_assert (ins
->dreg
== X86_EAX
);
3060 g_assert (power
>= 0);
3062 /* Based on gcc code */
3064 /* Add compensation for negative dividents */
3065 amd64_mov_reg_reg_size (code
, AMD64_RDX
, AMD64_RAX
, 4);
3067 amd64_shift_reg_imm_size (code
, X86_SAR
, AMD64_RDX
, 31, 4);
3068 amd64_shift_reg_imm_size (code
, X86_SHR
, AMD64_RDX
, 32 - power
, 4);
3069 amd64_alu_reg_reg_size (code
, X86_ADD
, AMD64_RAX
, AMD64_RDX
, 4);
3070 /* Compute remainder */
3071 amd64_alu_reg_imm_size (code
, X86_AND
, AMD64_RAX
, (1 << power
) - 1, 4);
3072 /* Remove compensation */
3073 amd64_alu_reg_reg_size (code
, X86_SUB
, AMD64_RAX
, AMD64_RDX
, 4);
3077 amd64_imul_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
3078 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
3081 amd64_alu_reg_reg (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
);
3085 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3086 amd64_alu_reg_imm (code
, X86_OR
, ins
->sreg1
, ins
->inst_imm
);
3089 amd64_alu_reg_reg (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
);
3093 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3094 amd64_alu_reg_imm (code
, X86_XOR
, ins
->sreg1
, ins
->inst_imm
);
3097 g_assert (ins
->sreg2
== AMD64_RCX
);
3098 amd64_shift_reg (code
, X86_SHL
, ins
->dreg
);
3101 g_assert (ins
->sreg2
== AMD64_RCX
);
3102 amd64_shift_reg (code
, X86_SAR
, ins
->dreg
);
3105 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3106 amd64_shift_reg_imm_size (code
, X86_SAR
, ins
->dreg
, ins
->inst_imm
, 4);
3109 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3110 amd64_shift_reg_imm (code
, X86_SAR
, ins
->dreg
, ins
->inst_imm
);
3113 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3114 amd64_shift_reg_imm_size (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
, 4);
3116 case OP_LSHR_UN_IMM
:
3117 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3118 amd64_shift_reg_imm (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
);
3121 g_assert (ins
->sreg2
== AMD64_RCX
);
3122 amd64_shift_reg (code
, X86_SHR
, ins
->dreg
);
3125 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3126 amd64_shift_reg_imm_size (code
, X86_SHL
, ins
->dreg
, ins
->inst_imm
, 4);
3129 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3130 amd64_shift_reg_imm (code
, X86_SHL
, ins
->dreg
, ins
->inst_imm
);
3135 amd64_alu_reg_reg_size (code
, X86_ADD
, ins
->sreg1
, ins
->sreg2
, 4);
3138 amd64_alu_reg_reg_size (code
, X86_ADC
, ins
->sreg1
, ins
->sreg2
, 4);
3141 amd64_alu_reg_imm_size (code
, X86_ADD
, ins
->dreg
, ins
->inst_imm
, 4);
3144 amd64_alu_reg_imm_size (code
, X86_ADC
, ins
->dreg
, ins
->inst_imm
, 4);
3148 amd64_alu_reg_reg_size (code
, X86_SUB
, ins
->sreg1
, ins
->sreg2
, 4);
3151 amd64_alu_reg_reg_size (code
, X86_SBB
, ins
->sreg1
, ins
->sreg2
, 4);
3154 amd64_alu_reg_imm_size (code
, X86_SUB
, ins
->dreg
, ins
->inst_imm
, 4);
3157 amd64_alu_reg_imm_size (code
, X86_SBB
, ins
->dreg
, ins
->inst_imm
, 4);
3160 amd64_alu_reg_reg_size (code
, X86_AND
, ins
->sreg1
, ins
->sreg2
, 4);
3163 amd64_alu_reg_imm_size (code
, X86_AND
, ins
->sreg1
, ins
->inst_imm
, 4);
3166 amd64_alu_reg_reg_size (code
, X86_OR
, ins
->sreg1
, ins
->sreg2
, 4);
3169 amd64_alu_reg_imm_size (code
, X86_OR
, ins
->sreg1
, ins
->inst_imm
, 4);
3172 amd64_alu_reg_reg_size (code
, X86_XOR
, ins
->sreg1
, ins
->sreg2
, 4);
3175 amd64_alu_reg_imm_size (code
, X86_XOR
, ins
->sreg1
, ins
->inst_imm
, 4);
3178 amd64_neg_reg_size (code
, ins
->sreg1
, 4);
3181 amd64_not_reg_size (code
, ins
->sreg1
, 4);
3184 g_assert (ins
->sreg2
== AMD64_RCX
);
3185 amd64_shift_reg_size (code
, X86_SHL
, ins
->dreg
, 4);
3188 g_assert (ins
->sreg2
== AMD64_RCX
);
3189 amd64_shift_reg_size (code
, X86_SAR
, ins
->dreg
, 4);
3192 amd64_shift_reg_imm_size (code
, X86_SAR
, ins
->dreg
, ins
->inst_imm
, 4);
3194 case OP_ISHR_UN_IMM
:
3195 amd64_shift_reg_imm_size (code
, X86_SHR
, ins
->dreg
, ins
->inst_imm
, 4);
3198 g_assert (ins
->sreg2
== AMD64_RCX
);
3199 amd64_shift_reg_size (code
, X86_SHR
, ins
->dreg
, 4);
3202 amd64_shift_reg_imm_size (code
, X86_SHL
, ins
->dreg
, ins
->inst_imm
, 4);
3205 amd64_imul_reg_reg_size (code
, ins
->sreg1
, ins
->sreg2
, 4);
3208 amd64_imul_reg_reg_size (code
, ins
->sreg1
, ins
->sreg2
, 4);
3209 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
3211 case OP_IMUL_OVF_UN
:
3212 case OP_LMUL_OVF_UN
: {
3213 /* the mul operation and the exception check should most likely be split */
3214 int non_eax_reg
, saved_eax
= FALSE
, saved_edx
= FALSE
;
3215 int size
= (ins
->opcode
== OP_IMUL_OVF_UN
) ? 4 : 8;
3216 /*g_assert (ins->sreg2 == X86_EAX);
3217 g_assert (ins->dreg == X86_EAX);*/
3218 if (ins
->sreg2
== X86_EAX
) {
3219 non_eax_reg
= ins
->sreg1
;
3220 } else if (ins
->sreg1
== X86_EAX
) {
3221 non_eax_reg
= ins
->sreg2
;
3223 /* no need to save since we're going to store to it anyway */
3224 if (ins
->dreg
!= X86_EAX
) {
3226 amd64_push_reg (code
, X86_EAX
);
3228 amd64_mov_reg_reg (code
, X86_EAX
, ins
->sreg1
, size
);
3229 non_eax_reg
= ins
->sreg2
;
3231 if (ins
->dreg
== X86_EDX
) {
3234 amd64_push_reg (code
, X86_EAX
);
3238 amd64_push_reg (code
, X86_EDX
);
3240 amd64_mul_reg_size (code
, non_eax_reg
, FALSE
, size
);
3241 /* save before the check since pop and mov don't change the flags */
3242 if (ins
->dreg
!= X86_EAX
)
3243 amd64_mov_reg_reg (code
, ins
->dreg
, X86_EAX
, size
);
3245 amd64_pop_reg (code
, X86_EDX
);
3247 amd64_pop_reg (code
, X86_EAX
);
3248 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O
, FALSE
, "OverflowException");
3252 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
3254 case OP_ICOMPARE_IMM
:
3255 amd64_alu_reg_imm_size (code
, X86_CMP
, ins
->sreg1
, ins
->inst_imm
, 4);
3277 EMIT_COND_BRANCH (ins
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)]);
3285 case OP_CMOV_INE_UN
:
3286 case OP_CMOV_IGE_UN
:
3287 case OP_CMOV_IGT_UN
:
3288 case OP_CMOV_ILE_UN
:
3289 case OP_CMOV_ILT_UN
:
3295 case OP_CMOV_LNE_UN
:
3296 case OP_CMOV_LGE_UN
:
3297 case OP_CMOV_LGT_UN
:
3298 case OP_CMOV_LLE_UN
:
3299 case OP_CMOV_LLT_UN
:
3300 g_assert (ins
->dreg
== ins
->sreg1
);
3301 /* This needs to operate on 64 bit values */
3302 amd64_cmov_reg (code
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)], ins
->dreg
, ins
->sreg2
);
3306 amd64_not_reg (code
, ins
->sreg1
);
3309 amd64_neg_reg (code
, ins
->sreg1
);
3314 if ((((guint64
)ins
->inst_c0
) >> 32) == 0)
3315 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_c0
, 4);
3317 amd64_mov_reg_imm_size (code
, ins
->dreg
, ins
->inst_c0
, 8);
3320 mono_add_patch_info (cfg
, offset
, (MonoJumpInfoType
)ins
->inst_i1
, ins
->inst_p0
);
3321 amd64_mov_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0, 8);
3324 mono_add_patch_info (cfg
, offset
, (MonoJumpInfoType
)ins
->inst_i1
, ins
->inst_p0
);
3325 amd64_mov_reg_imm_size (code
, ins
->dreg
, 0, 8);
3328 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, sizeof (gpointer
));
3330 case OP_AMD64_SET_XMMREG_R4
: {
3331 amd64_sse_cvtsd2ss_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
3334 case OP_AMD64_SET_XMMREG_R8
: {
3335 if (ins
->dreg
!= ins
->sreg1
)
3336 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
3341 * Note: this 'frame destruction' logic is useful for tail calls, too.
3342 * Keep in sync with the code in emit_epilog.
3346 /* FIXME: no tracing support... */
3347 if (cfg
->prof_options
& MONO_PROFILE_ENTER_LEAVE
)
3348 code
= mono_arch_instrument_epilog (cfg
, mono_profiler_method_leave
, code
, FALSE
);
3350 g_assert (!cfg
->method
->save_lmf
);
3352 if (cfg
->arch
.omit_fp
) {
3353 guint32 save_offset
= 0;
3354 /* Pop callee-saved registers */
3355 for (i
= 0; i
< AMD64_NREG
; ++i
)
3356 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
3357 amd64_mov_reg_membase (code
, i
, AMD64_RSP
, save_offset
, 8);
3360 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, cfg
->arch
.stack_alloc_size
);
3363 for (i
= 0; i
< AMD64_NREG
; ++i
)
3364 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
)))
3365 pos
-= sizeof (gpointer
);
3368 amd64_lea_membase (code
, AMD64_RSP
, AMD64_RBP
, pos
);
3370 /* Pop registers in reverse order */
3371 for (i
= AMD64_NREG
- 1; i
> 0; --i
)
3372 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
3373 amd64_pop_reg (code
, i
);
3379 offset
= code
- cfg
->native_code
;
3380 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_METHOD_JUMP
, ins
->inst_p0
);
3381 if (cfg
->compile_aot
)
3382 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_RIP
, 0, 8);
3384 amd64_set_reg_template (code
, AMD64_R11
);
3385 amd64_jump_reg (code
, AMD64_R11
);
3389 /* ensure ins->sreg1 is not NULL */
3390 amd64_alu_membase_imm_size (code
, X86_CMP
, ins
->sreg1
, 0, 0, 4);
3393 amd64_lea_membase (code
, AMD64_R11
, cfg
->frame_reg
, cfg
->sig_cookie
);
3394 amd64_mov_membase_reg (code
, ins
->sreg1
, 0, AMD64_R11
, 8);
3403 call
= (MonoCallInst
*)ins
;
3405 * The AMD64 ABI forces callers to know about varargs.
3407 if ((call
->signature
->call_convention
== MONO_CALL_VARARG
) && (call
->signature
->pinvoke
))
3408 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
3409 else if ((cfg
->method
->wrapper_type
== MONO_WRAPPER_MANAGED_TO_NATIVE
) && (cfg
->method
->klass
->image
!= mono_defaults
.corlib
)) {
3411 * Since the unmanaged calling convention doesn't contain a
3412 * 'vararg' entry, we have to treat every pinvoke call as a
3413 * potential vararg call.
3417 for (i
= 0; i
< AMD64_XMM_NREG
; ++i
)
3418 if (call
->used_fregs
& (1 << i
))
3421 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
3423 amd64_mov_reg_imm (code
, AMD64_RAX
, nregs
);
3426 if (ins
->flags
& MONO_INST_HAS_METHOD
)
3427 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_METHOD
, call
->method
, FALSE
);
3429 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_ABS
, call
->fptr
, FALSE
);
3430 if (call
->stack_usage
&& !CALLCONV_IS_STDCALL (call
->signature
->call_convention
))
3431 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, call
->stack_usage
);
3432 code
= emit_move_return_value (cfg
, ins
, code
);
3438 case OP_VOIDCALL_REG
:
3440 call
= (MonoCallInst
*)ins
;
3442 if (AMD64_IS_ARGUMENT_REG (ins
->sreg1
)) {
3443 amd64_mov_reg_reg (code
, AMD64_R11
, ins
->sreg1
, 8);
3444 ins
->sreg1
= AMD64_R11
;
3448 * The AMD64 ABI forces callers to know about varargs.
3450 if ((call
->signature
->call_convention
== MONO_CALL_VARARG
) && (call
->signature
->pinvoke
)) {
3451 if (ins
->sreg1
== AMD64_RAX
) {
3452 amd64_mov_reg_reg (code
, AMD64_R11
, AMD64_RAX
, 8);
3453 ins
->sreg1
= AMD64_R11
;
3455 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
3456 } else if ((cfg
->method
->wrapper_type
== MONO_WRAPPER_MANAGED_TO_NATIVE
) && (cfg
->method
->klass
->image
!= mono_defaults
.corlib
)) {
3458 * Since the unmanaged calling convention doesn't contain a
3459 * 'vararg' entry, we have to treat every pinvoke call as a
3460 * potential vararg call.
3464 for (i
= 0; i
< AMD64_XMM_NREG
; ++i
)
3465 if (call
->used_fregs
& (1 << i
))
3467 if (ins
->sreg1
== AMD64_RAX
) {
3468 amd64_mov_reg_reg (code
, AMD64_R11
, AMD64_RAX
, 8);
3469 ins
->sreg1
= AMD64_R11
;
3472 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
3474 amd64_mov_reg_imm (code
, AMD64_RAX
, nregs
);
3477 amd64_call_reg (code
, ins
->sreg1
);
3478 if (call
->stack_usage
&& !CALLCONV_IS_STDCALL (call
->signature
->call_convention
))
3479 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, call
->stack_usage
);
3480 code
= emit_move_return_value (cfg
, ins
, code
);
3482 case OP_FCALL_MEMBASE
:
3483 case OP_LCALL_MEMBASE
:
3484 case OP_VCALL_MEMBASE
:
3485 case OP_VCALL2_MEMBASE
:
3486 case OP_VOIDCALL_MEMBASE
:
3487 case OP_CALL_MEMBASE
:
3488 call
= (MonoCallInst
*)ins
;
3490 if (AMD64_IS_ARGUMENT_REG (ins
->sreg1
)) {
3492 * Can't use R11 because it is clobbered by the trampoline
3493 * code, and the reg value is needed by get_vcall_slot_addr.
3495 amd64_mov_reg_reg (code
, AMD64_RAX
, ins
->sreg1
, 8);
3496 ins
->sreg1
= AMD64_RAX
;
3499 if (call
->method
&& ins
->inst_offset
< 0) {
3503 * This is a possible IMT call so save the IMT method in the proper
3504 * register. We don't use the generic code in method-to-ir.c, because
3505 * we need to disassemble this in get_vcall_slot_addr (), so we have to
3506 * maintain control over the layout of the code.
3507 * Also put the base reg in %rax to simplify find_imt_method ().
3509 if (ins
->sreg1
!= AMD64_RAX
) {
3510 amd64_mov_reg_reg (code
, AMD64_RAX
, ins
->sreg1
, 8);
3511 ins
->sreg1
= AMD64_RAX
;
3513 val
= (gssize
)(gpointer
)call
->method
;
3515 // FIXME: Generics sharing
3517 if ((((guint64
)val
) >> 32) == 0)
3518 amd64_mov_reg_imm_size (code
, MONO_ARCH_IMT_REG
, val
, 4);
3520 amd64_mov_reg_imm_size (code
, MONO_ARCH_IMT_REG
, val
, 8);
3524 amd64_call_membase (code
, ins
->sreg1
, ins
->inst_offset
);
3525 if (call
->stack_usage
&& !CALLCONV_IS_STDCALL (call
->signature
->call_convention
))
3526 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, call
->stack_usage
);
3527 code
= emit_move_return_value (cfg
, ins
, code
);
3529 case OP_AMD64_SAVE_SP_TO_LMF
:
3530 amd64_mov_membase_reg (code
, cfg
->frame_reg
, cfg
->arch
.lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, rsp
), AMD64_RSP
, 8);
3533 amd64_push_reg (code
, ins
->sreg1
);
3535 case OP_X86_PUSH_IMM
:
3536 g_assert (amd64_is_imm32 (ins
->inst_imm
));
3537 amd64_push_imm (code
, ins
->inst_imm
);
3539 case OP_X86_PUSH_MEMBASE
:
3540 amd64_push_membase (code
, ins
->inst_basereg
, ins
->inst_offset
);
3542 case OP_X86_PUSH_OBJ
: {
3543 int size
= ALIGN_TO (ins
->inst_imm
, 8);
3544 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
3545 amd64_push_reg (code
, AMD64_RDI
);
3546 amd64_push_reg (code
, AMD64_RSI
);
3547 amd64_push_reg (code
, AMD64_RCX
);
3548 if (ins
->inst_offset
)
3549 amd64_lea_membase (code
, AMD64_RSI
, ins
->inst_basereg
, ins
->inst_offset
);
3551 amd64_mov_reg_reg (code
, AMD64_RSI
, ins
->inst_basereg
, 8);
3552 amd64_lea_membase (code
, AMD64_RDI
, AMD64_RSP
, (3 * 8));
3553 amd64_mov_reg_imm (code
, AMD64_RCX
, (size
>> 3));
3555 amd64_prefix (code
, X86_REP_PREFIX
);
3557 amd64_pop_reg (code
, AMD64_RCX
);
3558 amd64_pop_reg (code
, AMD64_RSI
);
3559 amd64_pop_reg (code
, AMD64_RDI
);
3563 amd64_lea_memindex (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
, ins
->sreg2
, ins
->backend
.shift_amount
);
3565 case OP_X86_LEA_MEMBASE
:
3566 amd64_lea_membase (code
, ins
->dreg
, ins
->sreg1
, ins
->inst_imm
);
3569 amd64_xchg_reg_reg (code
, ins
->sreg1
, ins
->sreg2
, 4);
3572 /* keep alignment */
3573 amd64_alu_reg_imm (code
, X86_ADD
, ins
->sreg1
, MONO_ARCH_FRAME_ALIGNMENT
- 1);
3574 amd64_alu_reg_imm (code
, X86_AND
, ins
->sreg1
, ~(MONO_ARCH_FRAME_ALIGNMENT
- 1));
3575 code
= mono_emit_stack_alloc (code
, ins
);
3576 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
3578 case OP_LOCALLOC_IMM
: {
3579 guint32 size
= ins
->inst_imm
;
3580 size
= (size
+ (MONO_ARCH_FRAME_ALIGNMENT
- 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT
- 1);
3582 if (ins
->flags
& MONO_INST_INIT
) {
3586 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
3587 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
3589 for (i
= 0; i
< size
; i
+= 8)
3590 amd64_mov_membase_reg (code
, AMD64_RSP
, i
, ins
->dreg
, 8);
3591 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
3593 amd64_mov_reg_imm (code
, ins
->dreg
, size
);
3594 ins
->sreg1
= ins
->dreg
;
3596 code
= mono_emit_stack_alloc (code
, ins
);
3597 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
3600 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, size
);
3601 amd64_mov_reg_reg (code
, ins
->dreg
, AMD64_RSP
, 8);
3606 amd64_mov_reg_reg (code
, AMD64_ARG_REG1
, ins
->sreg1
, 8);
3607 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
,
3608 (gpointer
)"mono_arch_throw_exception", FALSE
);
3612 amd64_mov_reg_reg (code
, AMD64_ARG_REG1
, ins
->sreg1
, 8);
3613 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
,
3614 (gpointer
)"mono_arch_rethrow_exception", FALSE
);
3617 case OP_CALL_HANDLER
:
3619 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 8);
3620 mono_add_patch_info (cfg
, code
- cfg
->native_code
, MONO_PATCH_INFO_BB
, ins
->inst_target_bb
);
3621 amd64_call_imm (code
, 0);
3622 /* Restore stack alignment */
3623 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 8);
3625 case OP_START_HANDLER
: {
3626 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
3627 amd64_mov_membase_reg (code
, spvar
->inst_basereg
, spvar
->inst_offset
, AMD64_RSP
, 8);
3630 case OP_ENDFINALLY
: {
3631 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
3632 amd64_mov_reg_membase (code
, AMD64_RSP
, spvar
->inst_basereg
, spvar
->inst_offset
, 8);
3636 case OP_ENDFILTER
: {
3637 MonoInst
*spvar
= mono_find_spvar_for_region (cfg
, bb
->region
);
3638 amd64_mov_reg_membase (code
, AMD64_RSP
, spvar
->inst_basereg
, spvar
->inst_offset
, 8);
3639 /* The local allocator will put the result into RAX */
3645 ins
->inst_c0
= code
- cfg
->native_code
;
3648 //g_print ("target: %p, next: %p, curr: %p, last: %p\n", ins->inst_target_bb, bb->next_bb, ins, bb->last_ins);
3649 //if ((ins->inst_target_bb == bb->next_bb) && ins == bb->last_ins)
3651 if (ins
->flags
& MONO_INST_BRLABEL
) {
3652 if (ins
->inst_i0
->inst_c0
) {
3653 amd64_jump_code (code
, cfg
->native_code
+ ins
->inst_i0
->inst_c0
);
3655 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_LABEL
, ins
->inst_i0
);
3656 if ((cfg
->opt
& MONO_OPT_BRANCH
) &&
3657 x86_is_imm8 (ins
->inst_i0
->inst_c1
- cpos
))
3658 x86_jump8 (code
, 0);
3660 x86_jump32 (code
, 0);
3663 if (ins
->inst_target_bb
->native_offset
) {
3664 amd64_jump_code (code
, cfg
->native_code
+ ins
->inst_target_bb
->native_offset
);
3666 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_BB
, ins
->inst_target_bb
);
3667 if ((cfg
->opt
& MONO_OPT_BRANCH
) &&
3668 x86_is_imm8 (ins
->inst_target_bb
->max_offset
- cpos
))
3669 x86_jump8 (code
, 0);
3671 x86_jump32 (code
, 0);
3676 amd64_jump_reg (code
, ins
->sreg1
);
3693 amd64_set_reg (code
, cc_table
[mono_opcode_to_cond (ins
->opcode
)], ins
->dreg
, cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)]);
3694 amd64_widen_reg (code
, ins
->dreg
, ins
->dreg
, FALSE
, FALSE
);
3696 case OP_COND_EXC_EQ
:
3697 case OP_COND_EXC_NE_UN
:
3698 case OP_COND_EXC_LT
:
3699 case OP_COND_EXC_LT_UN
:
3700 case OP_COND_EXC_GT
:
3701 case OP_COND_EXC_GT_UN
:
3702 case OP_COND_EXC_GE
:
3703 case OP_COND_EXC_GE_UN
:
3704 case OP_COND_EXC_LE
:
3705 case OP_COND_EXC_LE_UN
:
3706 case OP_COND_EXC_IEQ
:
3707 case OP_COND_EXC_INE_UN
:
3708 case OP_COND_EXC_ILT
:
3709 case OP_COND_EXC_ILT_UN
:
3710 case OP_COND_EXC_IGT
:
3711 case OP_COND_EXC_IGT_UN
:
3712 case OP_COND_EXC_IGE
:
3713 case OP_COND_EXC_IGE_UN
:
3714 case OP_COND_EXC_ILE
:
3715 case OP_COND_EXC_ILE_UN
:
3716 EMIT_COND_SYSTEM_EXCEPTION (cc_table
[mono_opcode_to_cond (ins
->opcode
)], cc_signed_table
[mono_opcode_to_cond (ins
->opcode
)], ins
->inst_p1
);
3718 case OP_COND_EXC_OV
:
3719 case OP_COND_EXC_NO
:
3721 case OP_COND_EXC_NC
:
3722 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table
[ins
->opcode
- OP_COND_EXC_EQ
],
3723 (ins
->opcode
< OP_COND_EXC_NE_UN
), ins
->inst_p1
);
3725 case OP_COND_EXC_IOV
:
3726 case OP_COND_EXC_INO
:
3727 case OP_COND_EXC_IC
:
3728 case OP_COND_EXC_INC
:
3729 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table
[ins
->opcode
- OP_COND_EXC_IEQ
],
3730 (ins
->opcode
< OP_COND_EXC_INE_UN
), ins
->inst_p1
);
3733 /* floating point opcodes */
3735 double d
= *(double *)ins
->inst_p0
;
3737 if ((d
== 0.0) && (mono_signbit (d
) == 0)) {
3738 amd64_sse_xorpd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
3741 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, ins
->inst_p0
);
3742 amd64_sse_movsd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
3747 float f
= *(float *)ins
->inst_p0
;
3749 if ((f
== 0.0) && (mono_signbit (f
) == 0)) {
3750 amd64_sse_xorpd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
3753 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R4
, ins
->inst_p0
);
3754 amd64_sse_movss_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
3755 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
3759 case OP_STORER8_MEMBASE_REG
:
3760 amd64_sse_movsd_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, ins
->sreg1
);
3762 case OP_LOADR8_SPILL_MEMBASE
:
3763 g_assert_not_reached ();
3765 case OP_LOADR8_MEMBASE
:
3766 amd64_sse_movsd_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
3768 case OP_STORER4_MEMBASE_REG
:
3769 /* This requires a double->single conversion */
3770 amd64_sse_cvtsd2ss_reg_reg (code
, AMD64_XMM15
, ins
->sreg1
);
3771 amd64_sse_movss_membase_reg (code
, ins
->inst_destbasereg
, ins
->inst_offset
, AMD64_XMM15
);
3773 case OP_LOADR4_MEMBASE
:
3774 amd64_sse_movss_reg_membase (code
, ins
->dreg
, ins
->inst_basereg
, ins
->inst_offset
);
3775 amd64_sse_cvtss2sd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
3777 case OP_ICONV_TO_R4
: /* FIXME: change precision */
3778 case OP_ICONV_TO_R8
:
3779 amd64_sse_cvtsi2sd_reg_reg_size (code
, ins
->dreg
, ins
->sreg1
, 4);
3781 case OP_LCONV_TO_R4
: /* FIXME: change precision */
3782 case OP_LCONV_TO_R8
:
3783 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
3785 case OP_FCONV_TO_R4
:
3786 /* FIXME: nothing to do ?? */
3788 case OP_FCONV_TO_I1
:
3789 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 1, TRUE
);
3791 case OP_FCONV_TO_U1
:
3792 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 1, FALSE
);
3794 case OP_FCONV_TO_I2
:
3795 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 2, TRUE
);
3797 case OP_FCONV_TO_U2
:
3798 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 2, FALSE
);
3800 case OP_FCONV_TO_U4
:
3801 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 4, FALSE
);
3803 case OP_FCONV_TO_I4
:
3805 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 4, TRUE
);
3807 case OP_FCONV_TO_I8
:
3808 code
= emit_float_to_int (cfg
, code
, ins
->dreg
, ins
->sreg1
, 8, TRUE
);
3810 case OP_LCONV_TO_R_UN
: {
3813 /* Based on gcc code */
3814 amd64_test_reg_reg (code
, ins
->sreg1
, ins
->sreg1
);
3815 br
[0] = code
; x86_branch8 (code
, X86_CC_S
, 0, TRUE
);
3818 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
3819 br
[1] = code
; x86_jump8 (code
, 0);
3820 amd64_patch (br
[0], code
);
3823 /* Save to the red zone */
3824 amd64_mov_membase_reg (code
, AMD64_RSP
, -8, AMD64_RAX
, 8);
3825 amd64_mov_membase_reg (code
, AMD64_RSP
, -16, AMD64_RCX
, 8);
3826 amd64_mov_reg_reg (code
, AMD64_RCX
, ins
->sreg1
, 8);
3827 amd64_mov_reg_reg (code
, AMD64_RAX
, ins
->sreg1
, 8);
3828 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RCX
, 1);
3829 amd64_shift_reg_imm (code
, X86_SHR
, AMD64_RAX
, 1);
3830 amd64_alu_reg_imm (code
, X86_OR
, AMD64_RAX
, AMD64_RCX
);
3831 amd64_sse_cvtsi2sd_reg_reg (code
, ins
->dreg
, AMD64_RAX
);
3832 amd64_sse_addsd_reg_reg (code
, ins
->dreg
, ins
->dreg
);
3834 amd64_mov_reg_membase (code
, AMD64_RCX
, AMD64_RSP
, -16, 8);
3835 amd64_mov_reg_membase (code
, AMD64_RAX
, AMD64_RSP
, -8, 8);
3836 amd64_patch (br
[1], code
);
3839 case OP_LCONV_TO_OVF_U4
:
3840 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, 0);
3841 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_LT
, TRUE
, "OverflowException");
3842 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, 8);
3844 case OP_LCONV_TO_OVF_I4_UN
:
3845 amd64_alu_reg_imm (code
, X86_CMP
, ins
->sreg1
, 0x7fffffff);
3846 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_GT
, FALSE
, "OverflowException");
3847 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg1
, 8);
3850 if (ins
->dreg
!= ins
->sreg1
)
3851 amd64_sse_movsd_reg_reg (code
, ins
->dreg
, ins
->sreg1
);
3854 amd64_sse_addsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
3857 amd64_sse_subsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
3860 amd64_sse_mulsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
3863 amd64_sse_divsd_reg_reg (code
, ins
->dreg
, ins
->sreg2
);
3866 static double r8_0
= -0.0;
3868 g_assert (ins
->sreg1
== ins
->dreg
);
3870 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, &r8_0
);
3871 amd64_sse_xorpd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
3875 EMIT_SSE2_FPFUNC (code
, fsin
, ins
->dreg
, ins
->sreg1
);
3878 EMIT_SSE2_FPFUNC (code
, fcos
, ins
->dreg
, ins
->sreg1
);
3881 static guint64 d
= 0x7fffffffffffffffUL
;
3883 g_assert (ins
->sreg1
== ins
->dreg
);
3885 mono_add_patch_info (cfg
, offset
, MONO_PATCH_INFO_R8
, &d
);
3886 amd64_sse_andpd_reg_membase (code
, ins
->dreg
, AMD64_RIP
, 0);
3890 EMIT_SSE2_FPFUNC (code
, fsqrt
, ins
->dreg
, ins
->sreg1
);
3893 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3894 g_assert (ins
->dreg
== ins
->sreg1
);
3895 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
3896 amd64_cmov_reg_size (code
, X86_CC_GT
, TRUE
, ins
->dreg
, ins
->sreg2
, 4);
3899 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3900 g_assert (ins
->dreg
== ins
->sreg1
);
3901 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
3902 amd64_cmov_reg_size (code
, X86_CC_GT
, FALSE
, ins
->dreg
, ins
->sreg2
, 4);
3905 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3906 g_assert (ins
->dreg
== ins
->sreg1
);
3907 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
3908 amd64_cmov_reg_size (code
, X86_CC_LT
, TRUE
, ins
->dreg
, ins
->sreg2
, 4);
3911 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3912 g_assert (ins
->dreg
== ins
->sreg1
);
3913 amd64_alu_reg_reg_size (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
, 4);
3914 amd64_cmov_reg_size (code
, X86_CC_LT
, FALSE
, ins
->dreg
, ins
->sreg2
, 4);
3917 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3918 g_assert (ins
->dreg
== ins
->sreg1
);
3919 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
3920 amd64_cmov_reg (code
, X86_CC_GT
, TRUE
, ins
->dreg
, ins
->sreg2
);
3923 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3924 g_assert (ins
->dreg
== ins
->sreg1
);
3925 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
3926 amd64_cmov_reg (code
, X86_CC_GT
, FALSE
, ins
->dreg
, ins
->sreg2
);
3929 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3930 g_assert (ins
->dreg
== ins
->sreg1
);
3931 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
3932 amd64_cmov_reg (code
, X86_CC_LT
, TRUE
, ins
->dreg
, ins
->sreg2
);
3935 g_assert (cfg
->opt
& MONO_OPT_CMOV
);
3936 g_assert (ins
->dreg
== ins
->sreg1
);
3937 amd64_alu_reg_reg (code
, X86_CMP
, ins
->sreg1
, ins
->sreg2
);
3938 amd64_cmov_reg (code
, X86_CC_LT
, FALSE
, ins
->dreg
, ins
->sreg2
);
3944 * The two arguments are swapped because the fbranch instructions
3945 * depend on this for the non-sse case to work.
3947 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
3950 /* zeroing the register at the start results in
3951 * shorter and faster code (we can also remove the widening op)
3953 guchar
*unordered_check
;
3954 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
3955 amd64_sse_comisd_reg_reg (code
, ins
->sreg1
, ins
->sreg2
);
3956 unordered_check
= code
;
3957 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
3958 amd64_set_reg (code
, X86_CC_EQ
, ins
->dreg
, FALSE
);
3959 amd64_patch (unordered_check
, code
);
3964 /* zeroing the register at the start results in
3965 * shorter and faster code (we can also remove the widening op)
3967 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
3968 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
3969 if (ins
->opcode
== OP_FCLT_UN
) {
3970 guchar
*unordered_check
= code
;
3971 guchar
*jump_to_end
;
3972 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
3973 amd64_set_reg (code
, X86_CC_GT
, ins
->dreg
, FALSE
);
3975 x86_jump8 (code
, 0);
3976 amd64_patch (unordered_check
, code
);
3977 amd64_inc_reg (code
, ins
->dreg
);
3978 amd64_patch (jump_to_end
, code
);
3980 amd64_set_reg (code
, X86_CC_GT
, ins
->dreg
, FALSE
);
3985 /* zeroing the register at the start results in
3986 * shorter and faster code (we can also remove the widening op)
3988 guchar
*unordered_check
;
3989 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
3990 amd64_sse_comisd_reg_reg (code
, ins
->sreg2
, ins
->sreg1
);
3991 if (ins
->opcode
== OP_FCGT
) {
3992 unordered_check
= code
;
3993 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
3994 amd64_set_reg (code
, X86_CC_LT
, ins
->dreg
, FALSE
);
3995 amd64_patch (unordered_check
, code
);
3997 amd64_set_reg (code
, X86_CC_LT
, ins
->dreg
, FALSE
);
4001 case OP_FCLT_MEMBASE
:
4002 case OP_FCGT_MEMBASE
:
4003 case OP_FCLT_UN_MEMBASE
:
4004 case OP_FCGT_UN_MEMBASE
:
4005 case OP_FCEQ_MEMBASE
: {
4006 guchar
*unordered_check
, *jump_to_end
;
4009 amd64_alu_reg_reg (code
, X86_XOR
, ins
->dreg
, ins
->dreg
);
4010 amd64_sse_comisd_reg_membase (code
, ins
->sreg1
, ins
->sreg2
, ins
->inst_offset
);
4012 switch (ins
->opcode
) {
4013 case OP_FCEQ_MEMBASE
:
4014 x86_cond
= X86_CC_EQ
;
4016 case OP_FCLT_MEMBASE
:
4017 case OP_FCLT_UN_MEMBASE
:
4018 x86_cond
= X86_CC_LT
;
4020 case OP_FCGT_MEMBASE
:
4021 case OP_FCGT_UN_MEMBASE
:
4022 x86_cond
= X86_CC_GT
;
4025 g_assert_not_reached ();
4028 unordered_check
= code
;
4029 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
4030 amd64_set_reg (code
, x86_cond
, ins
->dreg
, FALSE
);
4032 switch (ins
->opcode
) {
4033 case OP_FCEQ_MEMBASE
:
4034 case OP_FCLT_MEMBASE
:
4035 case OP_FCGT_MEMBASE
:
4036 amd64_patch (unordered_check
, code
);
4038 case OP_FCLT_UN_MEMBASE
:
4039 case OP_FCGT_UN_MEMBASE
:
4041 x86_jump8 (code
, 0);
4042 amd64_patch (unordered_check
, code
);
4043 amd64_inc_reg (code
, ins
->dreg
);
4044 amd64_patch (jump_to_end
, code
);
4052 guchar
*jump
= code
;
4053 x86_branch8 (code
, X86_CC_P
, 0, TRUE
);
4054 EMIT_COND_BRANCH (ins
, X86_CC_EQ
, FALSE
);
4055 amd64_patch (jump
, code
);
4059 /* Branch if C013 != 100 */
4060 /* branch if !ZF or (PF|CF) */
4061 EMIT_COND_BRANCH (ins
, X86_CC_NE
, FALSE
);
4062 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
4063 EMIT_COND_BRANCH (ins
, X86_CC_B
, FALSE
);
4066 EMIT_COND_BRANCH (ins
, X86_CC_GT
, FALSE
);
4069 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
4070 EMIT_COND_BRANCH (ins
, X86_CC_GT
, FALSE
);
4074 if (ins
->opcode
== OP_FBGT
) {
4077 /* skip branch if C1=1 */
4079 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
4080 /* branch if (C0 | C3) = 1 */
4081 EMIT_COND_BRANCH (ins
, X86_CC_LT
, FALSE
);
4082 amd64_patch (br1
, code
);
4085 EMIT_COND_BRANCH (ins
, X86_CC_LT
, FALSE
);
4089 /* Branch if C013 == 100 or 001 */
4092 /* skip branch if C1=1 */
4094 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
4095 /* branch if (C0 | C3) = 1 */
4096 EMIT_COND_BRANCH (ins
, X86_CC_BE
, FALSE
);
4097 amd64_patch (br1
, code
);
4101 /* Branch if C013 == 000 */
4102 EMIT_COND_BRANCH (ins
, X86_CC_LE
, FALSE
);
4105 /* Branch if C013=000 or 100 */
4108 /* skip branch if C1=1 */
4110 x86_branch8 (code
, X86_CC_P
, 0, FALSE
);
4111 /* branch if C0=0 */
4112 EMIT_COND_BRANCH (ins
, X86_CC_NB
, FALSE
);
4113 amd64_patch (br1
, code
);
4117 /* Branch if C013 != 001 */
4118 EMIT_COND_BRANCH (ins
, X86_CC_P
, FALSE
);
4119 EMIT_COND_BRANCH (ins
, X86_CC_GE
, FALSE
);
4122 /* Transfer value to the fp stack */
4123 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 16);
4124 amd64_movsd_membase_reg (code
, AMD64_RSP
, 0, ins
->sreg1
);
4125 amd64_fld_membase (code
, AMD64_RSP
, 0, TRUE
);
4127 amd64_push_reg (code
, AMD64_RAX
);
4129 amd64_fnstsw (code
);
4130 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RAX
, 0x4100);
4131 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RAX
, X86_FP_C0
);
4132 amd64_pop_reg (code
, AMD64_RAX
);
4133 amd64_fstp (code
, 0);
4134 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ
, FALSE
, "ArithmeticException");
4135 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 16);
4138 code
= mono_amd64_emit_tls_get (code
, ins
->dreg
, ins
->inst_offset
);
4141 case OP_MEMORY_BARRIER
: {
4142 /* Not needed on amd64 */
4145 case OP_ATOMIC_ADD_I4
:
4146 case OP_ATOMIC_ADD_I8
: {
4147 int dreg
= ins
->dreg
;
4148 guint32 size
= (ins
->opcode
== OP_ATOMIC_ADD_I4
) ? 4 : 8;
4150 if (dreg
== ins
->inst_basereg
)
4153 if (dreg
!= ins
->sreg2
)
4154 amd64_mov_reg_reg (code
, ins
->dreg
, ins
->sreg2
, size
);
4156 x86_prefix (code
, X86_LOCK_PREFIX
);
4157 amd64_xadd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, dreg
, size
);
4159 if (dreg
!= ins
->dreg
)
4160 amd64_mov_reg_reg (code
, ins
->dreg
, dreg
, size
);
4164 case OP_ATOMIC_ADD_NEW_I4
:
4165 case OP_ATOMIC_ADD_NEW_I8
: {
4166 int dreg
= ins
->dreg
;
4167 guint32 size
= (ins
->opcode
== OP_ATOMIC_ADD_NEW_I4
) ? 4 : 8;
4169 if ((dreg
== ins
->sreg2
) || (dreg
== ins
->inst_basereg
))
4172 amd64_mov_reg_reg (code
, dreg
, ins
->sreg2
, size
);
4173 amd64_prefix (code
, X86_LOCK_PREFIX
);
4174 amd64_xadd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, dreg
, size
);
4175 /* dreg contains the old value, add with sreg2 value */
4176 amd64_alu_reg_reg_size (code
, X86_ADD
, dreg
, ins
->sreg2
, size
);
4178 if (ins
->dreg
!= dreg
)
4179 amd64_mov_reg_reg (code
, ins
->dreg
, dreg
, size
);
4183 case OP_ATOMIC_EXCHANGE_I4
:
4184 case OP_ATOMIC_EXCHANGE_I8
:
4185 case OP_ATOMIC_CAS_IMM_I4
: {
4187 int sreg2
= ins
->sreg2
;
4188 int breg
= ins
->inst_basereg
;
4190 gboolean need_push
= FALSE
, rdx_pushed
= FALSE
;
4192 if (ins
->opcode
== OP_ATOMIC_EXCHANGE_I8
)
4198 * See http://msdn.microsoft.com/en-us/magazine/cc302329.aspx for
4199 * an explanation of how this works.
4202 /* cmpxchg uses eax as comperand, need to make sure we can use it
4203 * hack to overcome limits in x86 reg allocator
4204 * (req: dreg == eax and sreg2 != eax and breg != eax)
4206 g_assert (ins
->dreg
== AMD64_RAX
);
4208 if (breg
== AMD64_RAX
&& ins
->sreg2
== AMD64_RAX
)
4209 /* Highly unlikely, but possible */
4212 /* The pushes invalidate rsp */
4213 if ((breg
== AMD64_RAX
) || need_push
) {
4214 amd64_mov_reg_reg (code
, AMD64_R11
, breg
, 8);
4218 /* We need the EAX reg for the comparand */
4219 if (ins
->sreg2
== AMD64_RAX
) {
4220 if (breg
!= AMD64_R11
) {
4221 amd64_mov_reg_reg (code
, AMD64_R11
, AMD64_RAX
, 8);
4224 g_assert (need_push
);
4225 amd64_push_reg (code
, AMD64_RDX
);
4226 amd64_mov_reg_reg (code
, AMD64_RDX
, AMD64_RAX
, size
);
4232 if (ins
->opcode
== OP_ATOMIC_CAS_IMM_I4
) {
4233 if (ins
->backend
.data
== NULL
)
4234 amd64_alu_reg_reg (code
, X86_XOR
, AMD64_RAX
, AMD64_RAX
);
4236 amd64_mov_reg_imm (code
, AMD64_RAX
, ins
->backend
.data
);
4238 amd64_prefix (code
, X86_LOCK_PREFIX
);
4239 amd64_cmpxchg_membase_reg_size (code
, breg
, ins
->inst_offset
, sreg2
, size
);
4241 amd64_mov_reg_membase (code
, AMD64_RAX
, breg
, ins
->inst_offset
, size
);
4243 br
[0] = code
; amd64_prefix (code
, X86_LOCK_PREFIX
);
4244 amd64_cmpxchg_membase_reg_size (code
, breg
, ins
->inst_offset
, sreg2
, size
);
4245 br
[1] = code
; amd64_branch8 (code
, X86_CC_NE
, -1, FALSE
);
4246 amd64_patch (br
[1], br
[0]);
4250 amd64_pop_reg (code
, AMD64_RDX
);
4254 case OP_LIVERANGE_START
: {
4255 if (cfg
->verbose_level
> 1)
4256 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg
, ins
->inst_c0
)->vreg
, (int)(code
- cfg
->native_code
));
4257 MONO_VARINFO (cfg
, ins
->inst_c0
)->live_range_start
= code
- cfg
->native_code
;
4260 case OP_LIVERANGE_END
: {
4261 if (cfg
->verbose_level
> 1)
4262 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg
, ins
->inst_c0
)->vreg
, (int)(code
- cfg
->native_code
));
4263 MONO_VARINFO (cfg
, ins
->inst_c0
)->live_range_end
= code
- cfg
->native_code
;
4267 g_warning ("unknown opcode %s in %s()\n", mono_inst_name (ins
->opcode
), __FUNCTION__
);
4268 g_assert_not_reached ();
4271 if ((code
- cfg
->native_code
- offset
) > max_len
) {
4272 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %ld)",
4273 mono_inst_name (ins
->opcode
), max_len
, code
- cfg
->native_code
- offset
);
4274 g_assert_not_reached ();
4280 last_offset
= offset
;
4283 cfg
->code_len
= code
- cfg
->native_code
;
4286 #endif /* DISABLE_JIT */
4289 mono_arch_register_lowlevel_calls (void)
4291 /* The signature doesn't matter */
4292 mono_register_jit_icall (mono_amd64_throw_exception
, "mono_amd64_throw_exception", mono_create_icall_signature ("void"), TRUE
);
4296 mono_arch_patch_code (MonoMethod
*method
, MonoDomain
*domain
, guint8
*code
, MonoJumpInfo
*ji
, gboolean run_cctors
)
4298 MonoJumpInfo
*patch_info
;
4299 gboolean compile_aot
= !run_cctors
;
4301 for (patch_info
= ji
; patch_info
; patch_info
= patch_info
->next
) {
4302 unsigned char *ip
= patch_info
->ip
.i
+ code
;
4303 unsigned char *target
;
4305 target
= mono_resolve_patch_target (method
, domain
, code
, patch_info
, run_cctors
);
4308 switch (patch_info
->type
) {
4309 case MONO_PATCH_INFO_BB
:
4310 case MONO_PATCH_INFO_LABEL
:
4313 /* No need to patch these */
4318 switch (patch_info
->type
) {
4319 case MONO_PATCH_INFO_NONE
:
4321 case MONO_PATCH_INFO_METHOD_REL
:
4322 case MONO_PATCH_INFO_R8
:
4323 case MONO_PATCH_INFO_R4
:
4324 g_assert_not_reached ();
4326 case MONO_PATCH_INFO_BB
:
4333 * Debug code to help track down problems where the target of a near call is
4336 if (amd64_is_near_call (ip
)) {
4337 gint64 disp
= (guint8
*)target
- (guint8
*)ip
;
4339 if (!amd64_is_imm32 (disp
)) {
4340 printf ("TYPE: %d\n", patch_info
->type
);
4341 switch (patch_info
->type
) {
4342 case MONO_PATCH_INFO_INTERNAL_METHOD
:
4343 printf ("V: %s\n", patch_info
->data
.name
);
4345 case MONO_PATCH_INFO_METHOD_JUMP
:
4346 case MONO_PATCH_INFO_METHOD
:
4347 printf ("V: %s\n", patch_info
->data
.method
->name
);
4355 amd64_patch (ip
, (gpointer
)target
);
4360 get_max_epilog_size (MonoCompile
*cfg
)
4362 int max_epilog_size
= 16;
4364 if (cfg
->method
->save_lmf
)
4365 max_epilog_size
+= 256;
4367 if (mono_jit_trace_calls
!= NULL
)
4368 max_epilog_size
+= 50;
4370 if (cfg
->prof_options
& MONO_PROFILE_ENTER_LEAVE
)
4371 max_epilog_size
+= 50;
4373 max_epilog_size
+= (AMD64_NREG
* 2);
4375 return max_epilog_size
;
4379 * This macro is used for testing whenever the unwinder works correctly at every point
4380 * where an async exception can happen.
4382 /* This will generate a SIGSEGV at the given point in the code */
4383 #define async_exc_point(code) do { \
4384 if (mono_inject_async_exc_method && mono_method_desc_full_match (mono_inject_async_exc_method, cfg->method)) { \
4385 if (cfg->arch.async_point_count == mono_inject_async_exc_pos) \
4386 amd64_mov_reg_mem (code, AMD64_RAX, 0, 4); \
4387 cfg->arch.async_point_count ++; \
4392 mono_arch_emit_prolog (MonoCompile
*cfg
)
4394 MonoMethod
*method
= cfg
->method
;
4396 MonoMethodSignature
*sig
;
4398 int alloc_size
, pos
, max_offset
, i
, cfa_offset
, quad
, max_epilog_size
;
4401 gint32 lmf_offset
= cfg
->arch
.lmf_offset
;
4402 gboolean args_clobbered
= FALSE
;
4403 gboolean trace
= FALSE
;
4405 cfg
->code_size
= MAX (((MonoMethodNormal
*)method
)->header
->code_size
* 4, 10240);
4407 code
= cfg
->native_code
= g_malloc (cfg
->code_size
);
4409 if (mono_jit_trace_calls
!= NULL
&& mono_trace_eval (method
))
4412 /* Amount of stack space allocated by register saving code */
4415 /* Offset between RSP and the CFA */
4419 * The prolog consists of the following parts:
4421 * - push rbp, mov rbp, rsp
4422 * - save callee saved regs using pushes
4424 * - save rgctx if needed
4425 * - save lmf if needed
4428 * - save rgctx if needed
4429 * - save lmf if needed
4430 * - save callee saved regs using moves
4435 mono_emit_unwind_op_def_cfa (cfg
, code
, AMD64_RSP
, 8);
4436 // IP saved at CFA - 8
4437 mono_emit_unwind_op_offset (cfg
, code
, AMD64_RIP
, -cfa_offset
);
4438 async_exc_point (code
);
4440 if (!cfg
->arch
.omit_fp
) {
4441 amd64_push_reg (code
, AMD64_RBP
);
4443 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
4444 mono_emit_unwind_op_offset (cfg
, code
, AMD64_RBP
, - cfa_offset
);
4445 async_exc_point (code
);
4446 #ifdef PLATFORM_WIN32
4447 mono_arch_unwindinfo_add_push_nonvol (&cfg
->arch
.unwindinfo
, cfg
->native_code
, code
, AMD64_RBP
);
4450 amd64_mov_reg_reg (code
, AMD64_RBP
, AMD64_RSP
, sizeof (gpointer
));
4451 mono_emit_unwind_op_def_cfa_reg (cfg
, code
, AMD64_RBP
);
4452 async_exc_point (code
);
4453 #ifdef PLATFORM_WIN32
4454 mono_arch_unwindinfo_add_set_fpreg (&cfg
->arch
.unwindinfo
, cfg
->native_code
, code
, AMD64_RBP
);
4458 /* Save callee saved registers */
4459 if (!cfg
->arch
.omit_fp
&& !method
->save_lmf
) {
4460 int offset
= cfa_offset
;
4462 for (i
= 0; i
< AMD64_NREG
; ++i
)
4463 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
4464 amd64_push_reg (code
, i
);
4465 pos
+= sizeof (gpointer
);
4467 mono_emit_unwind_op_offset (cfg
, code
, i
, - offset
);
4468 async_exc_point (code
);
4472 if (cfg
->arch
.omit_fp
) {
4474 * On enter, the stack is misaligned by the the pushing of the return
4475 * address. It is either made aligned by the pushing of %rbp, or by
4478 alloc_size
= ALIGN_TO (cfg
->stack_offset
, 8);
4479 if ((alloc_size
% 16) == 0)
4482 alloc_size
= ALIGN_TO (cfg
->stack_offset
, MONO_ARCH_FRAME_ALIGNMENT
);
4487 cfg
->arch
.stack_alloc_size
= alloc_size
;
4489 /* Allocate stack frame */
4491 /* See mono_emit_stack_alloc */
4492 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4493 guint32 remaining_size
= alloc_size
;
4494 while (remaining_size
>= 0x1000) {
4495 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 0x1000);
4496 if (cfg
->arch
.omit_fp
) {
4497 cfa_offset
+= 0x1000;
4498 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
4500 async_exc_point (code
);
4501 #ifdef PLATFORM_WIN32
4502 if (cfg
->arch
.omit_fp
)
4503 mono_arch_unwindinfo_add_alloc_stack (&cfg
->arch
.unwindinfo
, cfg
->native_code
, code
, 0x1000);
4506 amd64_test_membase_reg (code
, AMD64_RSP
, 0, AMD64_RSP
);
4507 remaining_size
-= 0x1000;
4509 if (remaining_size
) {
4510 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, remaining_size
);
4511 if (cfg
->arch
.omit_fp
) {
4512 cfa_offset
+= remaining_size
;
4513 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
4514 async_exc_point (code
);
4516 #ifdef PLATFORM_WIN32
4517 if (cfg
->arch
.omit_fp
)
4518 mono_arch_unwindinfo_add_alloc_stack (&cfg
->arch
.unwindinfo
, cfg
->native_code
, code
, remaining_size
);
4522 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, alloc_size
);
4523 if (cfg
->arch
.omit_fp
) {
4524 cfa_offset
+= alloc_size
;
4525 mono_emit_unwind_op_def_cfa_offset (cfg
, code
, cfa_offset
);
4526 async_exc_point (code
);
4531 /* Stack alignment check */
4534 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_RSP
, 8);
4535 amd64_alu_reg_imm (code
, X86_AND
, AMD64_RAX
, 0xf);
4536 amd64_alu_reg_imm (code
, X86_CMP
, AMD64_RAX
, 0);
4537 x86_branch8 (code
, X86_CC_EQ
, 2, FALSE
);
4538 amd64_breakpoint (code
);
4543 if (method
->save_lmf
) {
4545 * The ip field is not set, the exception handling code will obtain it from the stack location pointed to by the sp field.
4547 /* sp is saved right before calls */
4548 /* Skip method (only needed for trampoline LMF frames) */
4549 /* Save callee saved regs */
4550 for (i
= 0; i
< MONO_MAX_IREGS
; ++i
) {
4554 case AMD64_RBX
: offset
= G_STRUCT_OFFSET (MonoLMF
, rbx
); break;
4555 case AMD64_RBP
: offset
= G_STRUCT_OFFSET (MonoLMF
, rbp
); break;
4556 case AMD64_R12
: offset
= G_STRUCT_OFFSET (MonoLMF
, r12
); break;
4557 case AMD64_R13
: offset
= G_STRUCT_OFFSET (MonoLMF
, r13
); break;
4558 case AMD64_R14
: offset
= G_STRUCT_OFFSET (MonoLMF
, r14
); break;
4559 case AMD64_R15
: offset
= G_STRUCT_OFFSET (MonoLMF
, r15
); break;
4560 #ifdef PLATFORM_WIN32
4561 case AMD64_RDI
: offset
= G_STRUCT_OFFSET (MonoLMF
, rdi
); break;
4562 case AMD64_RSI
: offset
= G_STRUCT_OFFSET (MonoLMF
, rsi
); break;
4570 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ offset
, i
, 8);
4571 if (cfg
->arch
.omit_fp
|| (i
!= AMD64_RBP
))
4572 mono_emit_unwind_op_offset (cfg
, code
, i
, - (cfa_offset
- (lmf_offset
+ offset
)));
4577 /* Save callee saved registers */
4578 if (cfg
->arch
.omit_fp
&& !method
->save_lmf
) {
4579 gint32 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
4581 /* Save caller saved registers after sp is adjusted */
4582 /* The registers are saved at the bottom of the frame */
4583 /* FIXME: Optimize this so the regs are saved at the end of the frame in increasing order */
4584 for (i
= 0; i
< AMD64_NREG
; ++i
)
4585 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
4586 amd64_mov_membase_reg (code
, AMD64_RSP
, save_area_offset
, i
, 8);
4587 mono_emit_unwind_op_offset (cfg
, code
, i
, - (cfa_offset
- save_area_offset
));
4588 save_area_offset
+= 8;
4589 async_exc_point (code
);
4593 /* store runtime generic context */
4594 if (cfg
->rgctx_var
) {
4595 g_assert (cfg
->rgctx_var
->opcode
== OP_REGOFFSET
&&
4596 (cfg
->rgctx_var
->inst_basereg
== AMD64_RBP
|| cfg
->rgctx_var
->inst_basereg
== AMD64_RSP
));
4598 amd64_mov_membase_reg (code
, cfg
->rgctx_var
->inst_basereg
, cfg
->rgctx_var
->inst_offset
, MONO_ARCH_RGCTX_REG
, 8);
4601 /* compute max_offset in order to use short forward jumps */
4603 max_epilog_size
= get_max_epilog_size (cfg
);
4604 if (cfg
->opt
& MONO_OPT_BRANCH
) {
4605 for (bb
= cfg
->bb_entry
; bb
; bb
= bb
->next_bb
) {
4607 bb
->max_offset
= max_offset
;
4609 if (cfg
->prof_options
& MONO_PROFILE_COVERAGE
)
4611 /* max alignment for loops */
4612 if ((cfg
->opt
& MONO_OPT_LOOP
) && bb_is_loop_start (bb
))
4613 max_offset
+= LOOP_ALIGNMENT
;
4615 MONO_BB_FOR_EACH_INS (bb
, ins
) {
4616 if (ins
->opcode
== OP_LABEL
)
4617 ins
->inst_c1
= max_offset
;
4619 max_offset
+= ((guint8
*)ins_get_spec (ins
->opcode
))[MONO_INST_LEN
];
4622 if (mono_jit_trace_calls
&& bb
== cfg
->bb_exit
)
4623 /* The tracing code can be quite large */
4624 max_offset
+= max_epilog_size
;
4628 sig
= mono_method_signature (method
);
4631 cinfo
= cfg
->arch
.cinfo
;
4633 if (sig
->ret
->type
!= MONO_TYPE_VOID
) {
4634 /* Save volatile arguments to the stack */
4635 if (cfg
->vret_addr
&& (cfg
->vret_addr
->opcode
!= OP_REGVAR
))
4636 amd64_mov_membase_reg (code
, cfg
->vret_addr
->inst_basereg
, cfg
->vret_addr
->inst_offset
, cinfo
->ret
.reg
, 8);
4639 /* Keep this in sync with emit_load_volatile_arguments */
4640 for (i
= 0; i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
4641 ArgInfo
*ainfo
= cinfo
->args
+ i
;
4642 gint32 stack_offset
;
4645 ins
= cfg
->args
[i
];
4647 if ((ins
->flags
& MONO_INST_IS_DEAD
) && !trace
)
4648 /* Unused arguments */
4651 if (sig
->hasthis
&& (i
== 0))
4652 arg_type
= &mono_defaults
.object_class
->byval_arg
;
4654 arg_type
= sig
->params
[i
- sig
->hasthis
];
4656 stack_offset
= ainfo
->offset
+ ARGS_OFFSET
;
4658 if (cfg
->globalra
) {
4659 /* All the other moves are done by the register allocator */
4660 switch (ainfo
->storage
) {
4661 case ArgInFloatSSEReg
:
4662 amd64_sse_cvtss2sd_reg_reg (code
, ainfo
->reg
, ainfo
->reg
);
4664 case ArgValuetypeInReg
:
4665 for (quad
= 0; quad
< 2; quad
++) {
4666 switch (ainfo
->pair_storage
[quad
]) {
4668 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
], sizeof (gpointer
));
4670 case ArgInFloatSSEReg
:
4671 amd64_movss_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
]);
4673 case ArgInDoubleSSEReg
:
4674 amd64_movsd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
]);
4679 g_assert_not_reached ();
4690 /* Save volatile arguments to the stack */
4691 if (ins
->opcode
!= OP_REGVAR
) {
4692 switch (ainfo
->storage
) {
4698 if (stack_offset & 0x1)
4700 else if (stack_offset & 0x2)
4702 else if (stack_offset & 0x4)
4707 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
, size
);
4710 case ArgInFloatSSEReg
:
4711 amd64_movss_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
);
4713 case ArgInDoubleSSEReg
:
4714 amd64_movsd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
, ainfo
->reg
);
4716 case ArgValuetypeInReg
:
4717 for (quad
= 0; quad
< 2; quad
++) {
4718 switch (ainfo
->pair_storage
[quad
]) {
4720 amd64_mov_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
], sizeof (gpointer
));
4722 case ArgInFloatSSEReg
:
4723 amd64_movss_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
]);
4725 case ArgInDoubleSSEReg
:
4726 amd64_movsd_membase_reg (code
, ins
->inst_basereg
, ins
->inst_offset
+ (quad
* sizeof (gpointer
)), ainfo
->pair_regs
[quad
]);
4731 g_assert_not_reached ();
4735 case ArgValuetypeAddrInIReg
:
4736 if (ainfo
->pair_storage
[0] == ArgInIReg
)
4737 amd64_mov_membase_reg (code
, ins
->inst_left
->inst_basereg
, ins
->inst_left
->inst_offset
, ainfo
->pair_regs
[0], sizeof (gpointer
));
4743 /* Argument allocated to (non-volatile) register */
4744 switch (ainfo
->storage
) {
4746 amd64_mov_reg_reg (code
, ins
->dreg
, ainfo
->reg
, 8);
4749 amd64_mov_reg_membase (code
, ins
->dreg
, AMD64_RBP
, ARGS_OFFSET
+ ainfo
->offset
, 8);
4752 g_assert_not_reached ();
4757 /* Might need to attach the thread to the JIT or change the domain for the callback */
4758 if (method
->wrapper_type
== MONO_WRAPPER_NATIVE_TO_MANAGED
) {
4759 guint64 domain
= (guint64
)cfg
->domain
;
4761 args_clobbered
= TRUE
;
4764 * The call might clobber argument registers, but they are already
4765 * saved to the stack/global regs.
4767 if (appdomain_tls_offset
!= -1 && lmf_tls_offset
!= -1) {
4768 guint8
*buf
, *no_domain_branch
;
4770 code
= mono_amd64_emit_tls_get (code
, AMD64_RAX
, appdomain_tls_offset
);
4771 if ((domain
>> 32) == 0)
4772 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG1
, domain
, 4);
4774 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG1
, domain
, 8);
4775 amd64_alu_reg_reg (code
, X86_CMP
, AMD64_RAX
, AMD64_ARG_REG1
);
4776 no_domain_branch
= code
;
4777 x86_branch8 (code
, X86_CC_NE
, 0, 0);
4778 code
= mono_amd64_emit_tls_get ( code
, AMD64_RAX
, lmf_addr_tls_offset
);
4779 amd64_test_reg_reg (code
, AMD64_RAX
, AMD64_RAX
);
4781 x86_branch8 (code
, X86_CC_NE
, 0, 0);
4782 amd64_patch (no_domain_branch
, code
);
4783 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
,
4784 (gpointer
)"mono_jit_thread_attach", TRUE
);
4785 amd64_patch (buf
, code
);
4786 #ifdef PLATFORM_WIN32
4787 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4788 /* FIXME: Add a separate key for LMF to avoid this */
4789 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RAX
, G_STRUCT_OFFSET (MonoJitTlsData
, lmf
));
4792 g_assert (!cfg
->compile_aot
);
4793 if ((domain
>> 32) == 0)
4794 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG1
, domain
, 4);
4796 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG1
, domain
, 8);
4797 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
,
4798 (gpointer
)"mono_jit_thread_attach", TRUE
);
4802 if (method
->save_lmf
) {
4803 if ((lmf_tls_offset
!= -1) && !optimize_for_xen
) {
4805 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4806 * through the mono_lmf_addr TLS variable.
4808 /* %rax = previous_lmf */
4809 x86_prefix (code
, X86_FS_PREFIX
);
4810 amd64_mov_reg_mem (code
, AMD64_RAX
, lmf_tls_offset
, 8);
4812 /* Save previous_lmf */
4813 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, previous_lmf
), AMD64_RAX
, 8);
4815 if (lmf_offset
== 0) {
4816 x86_prefix (code
, X86_FS_PREFIX
);
4817 amd64_mov_mem_reg (code
, lmf_tls_offset
, cfg
->frame_reg
, 8);
4819 amd64_lea_membase (code
, AMD64_R11
, cfg
->frame_reg
, lmf_offset
);
4820 x86_prefix (code
, X86_FS_PREFIX
);
4821 amd64_mov_mem_reg (code
, lmf_tls_offset
, AMD64_R11
, 8);
4824 if (lmf_addr_tls_offset
!= -1) {
4825 /* Load lmf quicky using the FS register */
4826 code
= mono_amd64_emit_tls_get (code
, AMD64_RAX
, lmf_addr_tls_offset
);
4827 #ifdef PLATFORM_WIN32
4828 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4829 /* FIXME: Add a separate key for LMF to avoid this */
4830 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RAX
, G_STRUCT_OFFSET (MonoJitTlsData
, lmf
));
4835 * The call might clobber argument registers, but they are already
4836 * saved to the stack/global regs.
4838 args_clobbered
= TRUE
;
4839 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
,
4840 (gpointer
)"mono_get_lmf_addr", TRUE
);
4844 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, lmf_addr
), AMD64_RAX
, 8);
4845 /* Save previous_lmf */
4846 amd64_mov_reg_membase (code
, AMD64_R11
, AMD64_RAX
, 0, 8);
4847 amd64_mov_membase_reg (code
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, previous_lmf
), AMD64_R11
, 8);
4849 amd64_lea_membase (code
, AMD64_R11
, cfg
->frame_reg
, lmf_offset
);
4850 amd64_mov_membase_reg (code
, AMD64_RAX
, 0, AMD64_R11
, 8);
4855 args_clobbered
= TRUE
;
4856 code
= mono_arch_instrument_prolog (cfg
, mono_trace_enter_method
, code
, TRUE
);
4859 if (cfg
->prof_options
& MONO_PROFILE_ENTER_LEAVE
)
4860 args_clobbered
= TRUE
;
4863 * Optimize the common case of the first bblock making a call with the same
4864 * arguments as the method. This works because the arguments are still in their
4865 * original argument registers.
4866 * FIXME: Generalize this
4868 if (!args_clobbered
) {
4869 MonoBasicBlock
*first_bb
= cfg
->bb_entry
;
4872 next
= mono_bb_first_ins (first_bb
);
4873 if (!next
&& first_bb
->next_bb
) {
4874 first_bb
= first_bb
->next_bb
;
4875 next
= mono_bb_first_ins (first_bb
);
4878 if (first_bb
->in_count
> 1)
4881 for (i
= 0; next
&& i
< sig
->param_count
+ sig
->hasthis
; ++i
) {
4882 ArgInfo
*ainfo
= cinfo
->args
+ i
;
4883 gboolean match
= FALSE
;
4885 ins
= cfg
->args
[i
];
4886 if (ins
->opcode
!= OP_REGVAR
) {
4887 switch (ainfo
->storage
) {
4889 if (((next
->opcode
== OP_LOAD_MEMBASE
) || (next
->opcode
== OP_LOADI4_MEMBASE
)) && next
->inst_basereg
== ins
->inst_basereg
&& next
->inst_offset
== ins
->inst_offset
) {
4890 if (next
->dreg
== ainfo
->reg
) {
4894 next
->opcode
= OP_MOVE
;
4895 next
->sreg1
= ainfo
->reg
;
4896 /* Only continue if the instruction doesn't change argument regs */
4897 if (next
->dreg
== ainfo
->reg
|| next
->dreg
== AMD64_RAX
)
4907 /* Argument allocated to (non-volatile) register */
4908 switch (ainfo
->storage
) {
4910 if (next
->opcode
== OP_MOVE
&& next
->sreg1
== ins
->dreg
&& next
->dreg
== ainfo
->reg
) {
4922 //next = mono_inst_list_next (&next->node, &first_bb->ins_list);
4929 cfg
->code_len
= code
- cfg
->native_code
;
4931 g_assert (cfg
->code_len
< cfg
->code_size
);
4937 mono_arch_emit_epilog (MonoCompile
*cfg
)
4939 MonoMethod
*method
= cfg
->method
;
4942 int max_epilog_size
;
4944 gint32 lmf_offset
= cfg
->arch
.lmf_offset
;
4946 max_epilog_size
= get_max_epilog_size (cfg
);
4948 while (cfg
->code_len
+ max_epilog_size
> (cfg
->code_size
- 16)) {
4949 cfg
->code_size
*= 2;
4950 cfg
->native_code
= g_realloc (cfg
->native_code
, cfg
->code_size
);
4951 mono_jit_stats
.code_reallocs
++;
4954 code
= cfg
->native_code
+ cfg
->code_len
;
4956 if (mono_jit_trace_calls
!= NULL
&& mono_trace_eval (method
))
4957 code
= mono_arch_instrument_epilog (cfg
, mono_trace_leave_method
, code
, TRUE
);
4959 /* the code restoring the registers must be kept in sync with OP_JMP */
4962 if (method
->save_lmf
) {
4963 /* check if we need to restore protection of the stack after a stack overflow */
4964 if (mono_get_jit_tls_offset () != -1) {
4966 code
= mono_amd64_emit_tls_get (code
, X86_ECX
, mono_get_jit_tls_offset ());
4967 /* we load the value in a separate instruction: this mechanism may be
4968 * used later as a safer way to do thread interruption
4970 amd64_mov_reg_membase (code
, X86_ECX
, X86_ECX
, G_STRUCT_OFFSET (MonoJitTlsData
, restore_stack_prot
), 8);
4971 x86_alu_reg_imm (code
, X86_CMP
, X86_ECX
, 0);
4973 x86_branch8 (code
, X86_CC_Z
, 0, FALSE
);
4974 /* note that the call trampoline will preserve eax/edx */
4975 x86_call_reg (code
, X86_ECX
);
4976 x86_patch (patch
, code
);
4978 /* FIXME: maybe save the jit tls in the prolog */
4980 if ((lmf_tls_offset
!= -1) && !optimize_for_xen
) {
4982 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4983 * through the mono_lmf_addr TLS variable.
4985 /* reg = previous_lmf */
4986 amd64_mov_reg_membase (code
, AMD64_R11
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, previous_lmf
), 8);
4987 x86_prefix (code
, X86_FS_PREFIX
);
4988 amd64_mov_mem_reg (code
, lmf_tls_offset
, AMD64_R11
, 8);
4990 /* Restore previous lmf */
4991 amd64_mov_reg_membase (code
, AMD64_RCX
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, previous_lmf
), 8);
4992 amd64_mov_reg_membase (code
, AMD64_R11
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, lmf_addr
), 8);
4993 amd64_mov_membase_reg (code
, AMD64_R11
, 0, AMD64_RCX
, 8);
4996 /* Restore caller saved regs */
4997 if (cfg
->used_int_regs
& (1 << AMD64_RBP
)) {
4998 amd64_mov_reg_membase (code
, AMD64_RBP
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, rbp
), 8);
5000 if (cfg
->used_int_regs
& (1 << AMD64_RBX
)) {
5001 amd64_mov_reg_membase (code
, AMD64_RBX
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, rbx
), 8);
5003 if (cfg
->used_int_regs
& (1 << AMD64_R12
)) {
5004 amd64_mov_reg_membase (code
, AMD64_R12
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, r12
), 8);
5006 if (cfg
->used_int_regs
& (1 << AMD64_R13
)) {
5007 amd64_mov_reg_membase (code
, AMD64_R13
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, r13
), 8);
5009 if (cfg
->used_int_regs
& (1 << AMD64_R14
)) {
5010 amd64_mov_reg_membase (code
, AMD64_R14
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, r14
), 8);
5012 if (cfg
->used_int_regs
& (1 << AMD64_R15
)) {
5013 amd64_mov_reg_membase (code
, AMD64_R15
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, r15
), 8);
5015 #ifdef PLATFORM_WIN32
5016 if (cfg
->used_int_regs
& (1 << AMD64_RDI
)) {
5017 amd64_mov_reg_membase (code
, AMD64_RDI
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, rdi
), 8);
5019 if (cfg
->used_int_regs
& (1 << AMD64_RSI
)) {
5020 amd64_mov_reg_membase (code
, AMD64_RSI
, cfg
->frame_reg
, lmf_offset
+ G_STRUCT_OFFSET (MonoLMF
, rsi
), 8);
5025 if (cfg
->arch
.omit_fp
) {
5026 gint32 save_area_offset
= cfg
->arch
.reg_save_area_offset
;
5028 for (i
= 0; i
< AMD64_NREG
; ++i
)
5029 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
5030 amd64_mov_reg_membase (code
, i
, AMD64_RSP
, save_area_offset
, 8);
5031 save_area_offset
+= 8;
5035 for (i
= 0; i
< AMD64_NREG
; ++i
)
5036 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
)))
5037 pos
-= sizeof (gpointer
);
5040 if (pos
== - sizeof (gpointer
)) {
5041 /* Only one register, so avoid lea */
5042 for (i
= AMD64_NREG
- 1; i
> 0; --i
)
5043 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
5044 amd64_mov_reg_membase (code
, i
, AMD64_RBP
, pos
, 8);
5048 amd64_lea_membase (code
, AMD64_RSP
, AMD64_RBP
, pos
);
5050 /* Pop registers in reverse order */
5051 for (i
= AMD64_NREG
- 1; i
> 0; --i
)
5052 if (AMD64_IS_CALLEE_SAVED_REG (i
) && (cfg
->used_int_regs
& (1 << i
))) {
5053 amd64_pop_reg (code
, i
);
5060 /* Load returned vtypes into registers if needed */
5061 cinfo
= cfg
->arch
.cinfo
;
5062 if (cinfo
->ret
.storage
== ArgValuetypeInReg
) {
5063 ArgInfo
*ainfo
= &cinfo
->ret
;
5064 MonoInst
*inst
= cfg
->ret
;
5066 for (quad
= 0; quad
< 2; quad
++) {
5067 switch (ainfo
->pair_storage
[quad
]) {
5069 amd64_mov_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (gpointer
)), sizeof (gpointer
));
5071 case ArgInFloatSSEReg
:
5072 amd64_movss_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (gpointer
)));
5074 case ArgInDoubleSSEReg
:
5075 amd64_movsd_reg_membase (code
, ainfo
->pair_regs
[quad
], inst
->inst_basereg
, inst
->inst_offset
+ (quad
* sizeof (gpointer
)));
5080 g_assert_not_reached ();
5085 if (cfg
->arch
.omit_fp
) {
5086 if (cfg
->arch
.stack_alloc_size
)
5087 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, cfg
->arch
.stack_alloc_size
);
5091 async_exc_point (code
);
5094 cfg
->code_len
= code
- cfg
->native_code
;
5096 g_assert (cfg
->code_len
< cfg
->code_size
);
5098 if (cfg
->arch
.omit_fp
) {
5100 * Encode the stack size into used_int_regs so the exception handler
5103 g_assert (cfg
->arch
.stack_alloc_size
< (1 << 16));
5104 cfg
->used_int_regs
|= (1 << 31) | (cfg
->arch
.stack_alloc_size
<< 16);
5109 mono_arch_emit_exceptions (MonoCompile
*cfg
)
5111 MonoJumpInfo
*patch_info
;
5114 MonoClass
*exc_classes
[16];
5115 guint8
*exc_throw_start
[16], *exc_throw_end
[16];
5116 guint32 code_size
= 0;
5118 /* Compute needed space */
5119 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
5120 if (patch_info
->type
== MONO_PATCH_INFO_EXC
)
5122 if (patch_info
->type
== MONO_PATCH_INFO_R8
)
5123 code_size
+= 8 + 15; /* sizeof (double) + alignment */
5124 if (patch_info
->type
== MONO_PATCH_INFO_R4
)
5125 code_size
+= 4 + 15; /* sizeof (float) + alignment */
5128 while (cfg
->code_len
+ code_size
> (cfg
->code_size
- 16)) {
5129 cfg
->code_size
*= 2;
5130 cfg
->native_code
= g_realloc (cfg
->native_code
, cfg
->code_size
);
5131 mono_jit_stats
.code_reallocs
++;
5134 code
= cfg
->native_code
+ cfg
->code_len
;
5136 /* add code to raise exceptions */
5138 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
5139 switch (patch_info
->type
) {
5140 case MONO_PATCH_INFO_EXC
: {
5141 MonoClass
*exc_class
;
5145 amd64_patch (patch_info
->ip
.i
+ cfg
->native_code
, code
);
5147 exc_class
= mono_class_from_name (mono_defaults
.corlib
, "System", patch_info
->data
.name
);
5148 g_assert (exc_class
);
5149 throw_ip
= patch_info
->ip
.i
;
5151 //x86_breakpoint (code);
5152 /* Find a throw sequence for the same exception class */
5153 for (i
= 0; i
< nthrows
; ++i
)
5154 if (exc_classes
[i
] == exc_class
)
5157 amd64_mov_reg_imm (code
, AMD64_ARG_REG2
, (exc_throw_end
[i
] - cfg
->native_code
) - throw_ip
);
5158 x86_jump_code (code
, exc_throw_start
[i
]);
5159 patch_info
->type
= MONO_PATCH_INFO_NONE
;
5163 amd64_mov_reg_imm_size (code
, AMD64_ARG_REG2
, 0xf0f0f0f0, 4);
5167 exc_classes
[nthrows
] = exc_class
;
5168 exc_throw_start
[nthrows
] = code
;
5170 amd64_mov_reg_imm (code
, AMD64_ARG_REG1
, exc_class
->type_token
);
5172 patch_info
->type
= MONO_PATCH_INFO_NONE
;
5174 code
= emit_call_body (cfg
, code
, MONO_PATCH_INFO_INTERNAL_METHOD
, "mono_arch_throw_corlib_exception");
5176 amd64_mov_reg_imm (buf
, AMD64_ARG_REG2
, (code
- cfg
->native_code
) - throw_ip
);
5181 exc_throw_end
[nthrows
] = code
;
5193 /* Handle relocations with RIP relative addressing */
5194 for (patch_info
= cfg
->patch_info
; patch_info
; patch_info
= patch_info
->next
) {
5195 gboolean remove
= FALSE
;
5197 switch (patch_info
->type
) {
5198 case MONO_PATCH_INFO_R8
:
5199 case MONO_PATCH_INFO_R4
: {
5202 /* The SSE opcodes require a 16 byte alignment */
5203 code
= (guint8
*)ALIGN_TO (code
, 16);
5205 pos
= cfg
->native_code
+ patch_info
->ip
.i
;
5207 if (IS_REX (pos
[1]))
5208 *(guint32
*)(pos
+ 5) = (guint8
*)code
- pos
- 9;
5210 *(guint32
*)(pos
+ 4) = (guint8
*)code
- pos
- 8;
5212 if (patch_info
->type
== MONO_PATCH_INFO_R8
) {
5213 *(double*)code
= *(double*)patch_info
->data
.target
;
5214 code
+= sizeof (double);
5216 *(float*)code
= *(float*)patch_info
->data
.target
;
5217 code
+= sizeof (float);
5228 if (patch_info
== cfg
->patch_info
)
5229 cfg
->patch_info
= patch_info
->next
;
5233 for (tmp
= cfg
->patch_info
; tmp
->next
!= patch_info
; tmp
= tmp
->next
)
5235 tmp
->next
= patch_info
->next
;
5240 cfg
->code_len
= code
- cfg
->native_code
;
5242 g_assert (cfg
->code_len
< cfg
->code_size
);
5247 mono_arch_instrument_prolog (MonoCompile
*cfg
, void *func
, void *p
, gboolean enable_arguments
)
5250 CallInfo
*cinfo
= NULL
;
5251 MonoMethodSignature
*sig
;
5253 int i
, n
, stack_area
= 0;
5255 /* Keep this in sync with mono_arch_get_argument_info */
5257 if (enable_arguments
) {
5258 /* Allocate a new area on the stack and save arguments there */
5259 sig
= mono_method_signature (cfg
->method
);
5261 cinfo
= get_call_info (cfg
->generic_sharing_context
, cfg
->mempool
, sig
, FALSE
);
5263 n
= sig
->param_count
+ sig
->hasthis
;
5265 stack_area
= ALIGN_TO (n
* 8, 16);
5267 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, stack_area
);
5269 for (i
= 0; i
< n
; ++i
) {
5270 inst
= cfg
->args
[i
];
5272 if (inst
->opcode
== OP_REGVAR
)
5273 amd64_mov_membase_reg (code
, AMD64_RSP
, (i
* 8), inst
->dreg
, 8);
5275 amd64_mov_reg_membase (code
, AMD64_R11
, inst
->inst_basereg
, inst
->inst_offset
, 8);
5276 amd64_mov_membase_reg (code
, AMD64_RSP
, (i
* 8), AMD64_R11
, 8);
5281 mono_add_patch_info (cfg
, code
-cfg
->native_code
, MONO_PATCH_INFO_METHODCONST
, cfg
->method
);
5282 amd64_set_reg_template (code
, AMD64_ARG_REG1
);
5283 amd64_mov_reg_reg (code
, AMD64_ARG_REG2
, AMD64_RSP
, 8);
5284 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_ABS
, (gpointer
)func
, TRUE
);
5286 if (enable_arguments
)
5287 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, stack_area
);
5301 mono_arch_instrument_epilog (MonoCompile
*cfg
, void *func
, void *p
, gboolean enable_arguments
)
5304 int save_mode
= SAVE_NONE
;
5305 MonoMethod
*method
= cfg
->method
;
5306 int rtype
= mini_type_get_underlying_type (NULL
, mono_method_signature (method
)->ret
)->type
;
5309 case MONO_TYPE_VOID
:
5310 /* special case string .ctor icall */
5311 if (strcmp (".ctor", method
->name
) && method
->klass
== mono_defaults
.string_class
)
5312 save_mode
= SAVE_EAX
;
5314 save_mode
= SAVE_NONE
;
5318 save_mode
= SAVE_EAX
;
5322 save_mode
= SAVE_XMM
;
5324 case MONO_TYPE_GENERICINST
:
5325 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method
)->ret
)) {
5326 save_mode
= SAVE_EAX
;
5330 case MONO_TYPE_VALUETYPE
:
5331 save_mode
= SAVE_STRUCT
;
5334 save_mode
= SAVE_EAX
;
5338 /* Save the result and copy it into the proper argument register */
5339 switch (save_mode
) {
5341 amd64_push_reg (code
, AMD64_RAX
);
5343 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 8);
5344 if (enable_arguments
)
5345 amd64_mov_reg_reg (code
, AMD64_ARG_REG2
, AMD64_RAX
, 8);
5349 if (enable_arguments
)
5350 amd64_mov_reg_imm (code
, AMD64_ARG_REG2
, 0);
5353 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 8);
5354 amd64_movsd_membase_reg (code
, AMD64_RSP
, 0, AMD64_XMM0
);
5356 amd64_alu_reg_imm (code
, X86_SUB
, AMD64_RSP
, 8);
5358 * The result is already in the proper argument register so no copying
5365 g_assert_not_reached ();
5368 /* Set %al since this is a varargs call */
5369 if (save_mode
== SAVE_XMM
)
5370 amd64_mov_reg_imm (code
, AMD64_RAX
, 1);
5372 amd64_mov_reg_imm (code
, AMD64_RAX
, 0);
5374 mono_add_patch_info (cfg
, code
-cfg
->native_code
, MONO_PATCH_INFO_METHODCONST
, method
);
5375 amd64_set_reg_template (code
, AMD64_ARG_REG1
);
5376 code
= emit_call (cfg
, code
, MONO_PATCH_INFO_ABS
, (gpointer
)func
, TRUE
);
5378 /* Restore result */
5379 switch (save_mode
) {
5381 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 8);
5382 amd64_pop_reg (code
, AMD64_RAX
);
5388 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 8);
5389 amd64_movsd_reg_membase (code
, AMD64_XMM0
, AMD64_RSP
, 0);
5390 amd64_alu_reg_imm (code
, X86_ADD
, AMD64_RSP
, 8);
5395 g_assert_not_reached ();
5402 mono_arch_flush_icache (guint8
*code
, gint size
)
5408 mono_arch_flush_register_windows (void)
5413 mono_arch_is_inst_imm (gint64 imm
)
5415 return amd64_is_imm32 (imm
);
5419 * Determine whenever the trap whose info is in SIGINFO is caused by
5423 mono_arch_is_int_overflow (void *sigctx
, void *info
)
5430 mono_arch_sigctx_to_monoctx (sigctx
, &ctx
);
5432 rip
= (guint8
*)ctx
.rip
;
5434 if (IS_REX (rip
[0])) {
5435 reg
= amd64_rex_b (rip
[0]);
5441 if ((rip
[0] == 0xf7) && (x86_modrm_mod (rip
[1]) == 0x3) && (x86_modrm_reg (rip
[1]) == 0x7)) {
5443 reg
+= x86_modrm_rm (rip
[1]);
5483 g_assert_not_reached ();
5495 mono_arch_get_patch_offset (guint8
*code
)
5501 * mono_breakpoint_clean_code:
5503 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5504 * breakpoints in the original code, they are removed in the copy.
5506 * Returns TRUE if no sw breakpoint was present.
5509 mono_breakpoint_clean_code (guint8
*method_start
, guint8
*code
, int offset
, guint8
*buf
, int size
)
5512 gboolean can_write
= TRUE
;
5514 * If method_start is non-NULL we need to perform bound checks, since we access memory
5515 * at code - offset we could go before the start of the method and end up in a different
5516 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5519 if (!method_start
|| code
- offset
>= method_start
) {
5520 memcpy (buf
, code
- offset
, size
);
5522 int diff
= code
- method_start
;
5523 memset (buf
, 0, size
);
5524 memcpy (buf
+ offset
- diff
, method_start
, diff
+ size
- offset
);
5527 for (i
= 0; i
< MONO_BREAKPOINT_ARRAY_SIZE
; ++i
) {
5528 int idx
= mono_breakpoint_info_index
[i
];
5532 ptr
= mono_breakpoint_info
[idx
].address
;
5533 if (ptr
>= code
&& ptr
< code
+ size
) {
5534 guint8 saved_byte
= mono_breakpoint_info
[idx
].saved_byte
;
5536 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5537 buf
[ptr
- code
] = saved_byte
;
5544 mono_arch_get_vcall_slot (guint8
*code
, gpointer
*regs
, int *displacement
)
5551 mono_breakpoint_clean_code (NULL
, code
, 9, buf
, sizeof (buf
));
5556 /* go to the start of the call instruction
5558 * address_byte = (m << 6) | (o << 3) | reg
5559 * call opcode: 0xff address_byte displacement
5561 * 0xff m=2,o=2 imm32
5566 * A given byte sequence can match more than case here, so we have to be
5567 * really careful about the ordering of the cases. Longer sequences
5569 * Some of the rules are only needed because the imm in the mov could
5571 * code [2] == 0xe8 case below.
5573 #ifdef MONO_ARCH_HAVE_IMT
5574 if ((code
[-2] == 0x41) && (code
[-1] == 0xbb) && (code
[4] == 0xff) && (x86_modrm_mod (code
[5]) == 1) && (x86_modrm_reg (code
[5]) == 2) && ((signed char)code
[6] < 0)) {
5575 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == r11
5576 * 41 bb 14 f8 28 08 mov $0x828f814,%r11d
5577 * ff 50 fc call *0xfffffffc(%rax)
5579 reg
= amd64_modrm_rm (code
[5]);
5580 disp
= (signed char)code
[6];
5581 /* R10 is clobbered by the IMT thunk code */
5582 g_assert (reg
!= AMD64_R10
);
5588 else if ((code
[-2] == 0x41) && (code
[-1] == 0xbb) && (code
[4] == 0xff) && (amd64_modrm_reg (code
[5]) == 0x2) && (amd64_modrm_mod (code
[5]) == 0x1)) {
5590 * 41 bb e8 e8 e8 e8 mov $0xe8e8e8e8,%r11d
5591 * ff 50 60 callq *0x60(%rax)
5593 if (IS_REX (code
[3]))
5595 reg
= amd64_modrm_rm (code
[5]);
5596 disp
= *(gint8
*)(code
+ 6);
5597 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
5598 } else if ((code
[-1] == 0x8b) && (amd64_modrm_mod (code
[0]) == 0x2) && (code
[5] == 0xff) && (amd64_modrm_reg (code
[6]) == 0x2) && (amd64_modrm_mod (code
[6]) == 0x0)) {
5600 * This is a interface call
5601 * 48 8b 80 f0 e8 ff ff mov 0xffffffffffffe8f0(%rax),%rax
5602 * ff 10 callq *(%rax)
5604 if (IS_REX (code
[4]))
5606 reg
= amd64_modrm_rm (code
[6]);
5608 /* R10 is clobbered by the IMT thunk code */
5609 g_assert (reg
!= AMD64_R10
);
5610 } else if ((code
[-1] >= 0xb8) && (code
[-1] < 0xb8 + 8) && (code
[4] == 0xff) && (amd64_modrm_reg (code
[5]) == 0x2) && (amd64_modrm_mod (code
[5]) == 0x1)) {
5612 * ba e8 e8 e8 e8 mov $0xe8e8e8e8,%edx
5613 * ff 50 60 callq *0x60(%rax)
5615 if (IS_REX (code
[3]))
5617 reg
= amd64_modrm_rm (code
[5]);
5618 disp
= *(gint8
*)(code
+ 6);
5619 } else if ((code
[0] == 0x41) && (code
[1] == 0xff) && (code
[2] == 0x15)) {
5620 /* call OFFSET(%rip) */
5621 disp
= *(guint32
*)(code
+ 3);
5622 return (gpointer
*)(code
+ disp
+ 7);
5623 } else if ((code
[0] == 0xff) && (amd64_modrm_reg (code
[1]) == 0x2) && (amd64_modrm_mod (code
[1]) == 0x2) && (amd64_modrm_reg (code
[2]) == X86_ESP
) && (amd64_modrm_mod (code
[2]) == 0) && (amd64_modrm_rm (code
[2]) == X86_ESP
)) {
5624 /* call *[r12+disp32] */
5625 if (IS_REX (code
[-1]))
5628 disp
= *(gint32
*)(code
+ 3);
5629 } else if ((code
[1] == 0xff) && (amd64_modrm_reg (code
[2]) == 0x2) && (amd64_modrm_mod (code
[2]) == 0x2)) {
5630 /* call *[reg+disp32] */
5631 if (IS_REX (code
[0]))
5633 reg
= amd64_modrm_rm (code
[2]);
5634 disp
= *(gint32
*)(code
+ 3);
5635 /* R10 is clobbered by the IMT thunk code */
5636 g_assert (reg
!= AMD64_R10
);
5637 } else if (code
[2] == 0xe8) {
5640 } else if ((code
[3] == 0xff) && (amd64_modrm_reg (code
[4]) == 0x2) && (amd64_modrm_mod (code
[4]) == 0x1) && (amd64_modrm_reg (code
[5]) == X86_ESP
) && (amd64_modrm_mod (code
[5]) == 0) && (amd64_modrm_rm (code
[5]) == X86_ESP
)) {
5641 /* call *[r12+disp32] */
5642 if (IS_REX (code
[2]))
5645 disp
= *(gint8
*)(code
+ 6);
5646 } else if (IS_REX (code
[4]) && (code
[5] == 0xff) && (amd64_modrm_reg (code
[6]) == 0x2) && (amd64_modrm_mod (code
[6]) == 0x3)) {
5649 } else if ((code
[4] == 0xff) && (amd64_modrm_reg (code
[5]) == 0x2) && (amd64_modrm_mod (code
[5]) == 0x1)) {
5650 /* call *[reg+disp8] */
5651 if (IS_REX (code
[3]))
5653 reg
= amd64_modrm_rm (code
[5]);
5654 disp
= *(gint8
*)(code
+ 6);
5655 //printf ("B: [%%r%d+0x%x]\n", reg, disp);
5657 else if ((code
[5] == 0xff) && (amd64_modrm_reg (code
[6]) == 0x2) && (amd64_modrm_mod (code
[6]) == 0x0)) {
5659 * This is a interface call: should check the above code can't catch it earlier
5660 * 8b 40 30 mov 0x30(%eax),%eax
5661 * ff 10 call *(%eax)
5663 if (IS_REX (code
[4]))
5665 reg
= amd64_modrm_rm (code
[6]);
5669 g_assert_not_reached ();
5671 reg
+= amd64_rex_b (rex
);
5673 /* R11 is clobbered by the trampoline code */
5674 g_assert (reg
!= AMD64_R11
);
5676 *displacement
= disp
;
5681 mono_arch_get_vcall_slot_addr (guint8
* code
, gpointer
*regs
)
5685 vt
= mono_arch_get_vcall_slot (code
, regs
, &displacement
);
5688 return (gpointer
*)((char*)vt
+ displacement
);
5692 mono_arch_get_this_arg_reg (MonoMethodSignature
*sig
, MonoGenericSharingContext
*gsctx
, guint8
*code
)
5694 int this_reg
= AMD64_ARG_REG1
;
5696 if (MONO_TYPE_ISSTRUCT (sig
->ret
)) {
5700 gsctx
= mono_get_generic_context_from_code (code
);
5702 cinfo
= get_call_info (gsctx
, NULL
, sig
, FALSE
);
5704 if (cinfo
->ret
.storage
!= ArgValuetypeInReg
)
5705 this_reg
= AMD64_ARG_REG2
;
5713 mono_arch_get_this_arg_from_call (MonoGenericSharingContext
*gsctx
, MonoMethodSignature
*sig
, gssize
*regs
, guint8
*code
)
5715 return (gpointer
)regs
[mono_arch_get_this_arg_reg (sig
, gsctx
, code
)];
5718 #define MAX_ARCH_DELEGATE_PARAMS 10
5721 mono_arch_get_delegate_invoke_impl (MonoMethodSignature
*sig
, gboolean has_target
)
5723 guint8
*code
, *start
;
5726 if (sig
->param_count
> MAX_ARCH_DELEGATE_PARAMS
)
5729 /* FIXME: Support more cases */
5730 if (MONO_TYPE_ISSTRUCT (sig
->ret
))
5734 static guint8
* cached
= NULL
;
5739 start
= code
= mono_global_codeman_reserve (64);
5741 /* Replace the this argument with the target */
5742 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_ARG_REG1
, 8);
5743 amd64_mov_reg_membase (code
, AMD64_ARG_REG1
, AMD64_RAX
, G_STRUCT_OFFSET (MonoDelegate
, target
), 8);
5744 amd64_jump_membase (code
, AMD64_RAX
, G_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
5746 g_assert ((code
- start
) < 64);
5748 mono_debug_add_delegate_trampoline (start
, code
- start
);
5750 mono_memory_barrier ();
5754 static guint8
* cache
[MAX_ARCH_DELEGATE_PARAMS
+ 1] = {NULL
};
5755 for (i
= 0; i
< sig
->param_count
; ++i
)
5756 if (!mono_is_regsize_var (sig
->params
[i
]))
5758 if (sig
->param_count
> 4)
5761 code
= cache
[sig
->param_count
];
5765 start
= code
= mono_global_codeman_reserve (64);
5767 if (sig
->param_count
== 0) {
5768 amd64_jump_membase (code
, AMD64_ARG_REG1
, G_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
5770 /* We have to shift the arguments left */
5771 amd64_mov_reg_reg (code
, AMD64_RAX
, AMD64_ARG_REG1
, 8);
5772 for (i
= 0; i
< sig
->param_count
; ++i
) {
5773 #ifdef PLATFORM_WIN32
5775 amd64_mov_reg_reg (code
, param_regs
[i
], param_regs
[i
+ 1], 8);
5777 amd64_mov_reg_membase (code
, param_regs
[i
], AMD64_RSP
, 0x28, 8);
5779 amd64_mov_reg_reg (code
, param_regs
[i
], param_regs
[i
+ 1], 8);
5783 amd64_jump_membase (code
, AMD64_RAX
, G_STRUCT_OFFSET (MonoDelegate
, method_ptr
));
5785 g_assert ((code
- start
) < 64);
5787 mono_debug_add_delegate_trampoline (start
, code
- start
);
5789 mono_memory_barrier ();
5791 cache
[sig
->param_count
] = start
;
5798 * Support for fast access to the thread-local lmf structure using the GS
5799 * segment register on NPTL + kernel 2.6.x.
5802 static gboolean tls_offset_inited
= FALSE
;
5805 mono_arch_setup_jit_tls_data (MonoJitTlsData
*tls
)
5807 if (!tls_offset_inited
) {
5808 #ifdef PLATFORM_WIN32
5810 * We need to init this multiple times, since when we are first called, the key might not
5811 * be initialized yet.
5813 appdomain_tls_offset
= mono_domain_get_tls_key ();
5814 lmf_tls_offset
= mono_get_jit_tls_key ();
5815 thread_tls_offset
= mono_thread_get_tls_key ();
5816 lmf_addr_tls_offset
= mono_get_jit_tls_key ();
5818 /* Only 64 tls entries can be accessed using inline code */
5819 if (appdomain_tls_offset
>= 64)
5820 appdomain_tls_offset
= -1;
5821 if (lmf_tls_offset
>= 64)
5822 lmf_tls_offset
= -1;
5823 if (thread_tls_offset
>= 64)
5824 thread_tls_offset
= -1;
5826 tls_offset_inited
= TRUE
;
5828 optimize_for_xen
= access ("/proc/xen", F_OK
) == 0;
5830 appdomain_tls_offset
= mono_domain_get_tls_offset ();
5831 lmf_tls_offset
= mono_get_lmf_tls_offset ();
5832 lmf_addr_tls_offset
= mono_get_lmf_addr_tls_offset ();
5833 thread_tls_offset
= mono_thread_get_tls_offset ();
5839 mono_arch_free_jit_tls_data (MonoJitTlsData
*tls
)
5843 #ifdef MONO_ARCH_HAVE_IMT
5845 #define CMP_SIZE (6 + 1)
5846 #define CMP_REG_REG_SIZE (4 + 1)
5847 #define BR_SMALL_SIZE 2
5848 #define BR_LARGE_SIZE 6
5849 #define MOV_REG_IMM_SIZE 10
5850 #define MOV_REG_IMM_32BIT_SIZE 6
5851 #define JUMP_REG_SIZE (2 + 1)
5854 imt_branch_distance (MonoIMTCheckItem
**imt_entries
, int start
, int target
)
5856 int i
, distance
= 0;
5857 for (i
= start
; i
< target
; ++i
)
5858 distance
+= imt_entries
[i
]->chunk_size
;
5863 * LOCKING: called with the domain lock held
5866 mono_arch_build_imt_thunk (MonoVTable
*vtable
, MonoDomain
*domain
, MonoIMTCheckItem
**imt_entries
, int count
,
5867 gpointer fail_tramp
)
5871 guint8
*code
, *start
;
5872 gboolean vtable_is_32bit
= ((gsize
)(vtable
) == (gsize
)(int)(gsize
)(vtable
));
5874 for (i
= 0; i
< count
; ++i
) {
5875 MonoIMTCheckItem
*item
= imt_entries
[i
];
5876 if (item
->is_equals
) {
5877 if (item
->check_target_idx
) {
5878 if (!item
->compare_done
) {
5879 if (amd64_is_imm32 (item
->key
))
5880 item
->chunk_size
+= CMP_SIZE
;
5882 item
->chunk_size
+= MOV_REG_IMM_SIZE
+ CMP_REG_REG_SIZE
;
5884 if (item
->has_target_code
) {
5885 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
5887 if (vtable_is_32bit
)
5888 item
->chunk_size
+= MOV_REG_IMM_32BIT_SIZE
;
5890 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
5892 item
->chunk_size
+= BR_SMALL_SIZE
+ JUMP_REG_SIZE
;
5895 item
->chunk_size
+= MOV_REG_IMM_SIZE
* 3 + CMP_REG_REG_SIZE
+
5896 BR_SMALL_SIZE
+ JUMP_REG_SIZE
* 2;
5898 if (vtable_is_32bit
)
5899 item
->chunk_size
+= MOV_REG_IMM_32BIT_SIZE
;
5901 item
->chunk_size
+= MOV_REG_IMM_SIZE
;
5902 item
->chunk_size
+= JUMP_REG_SIZE
;
5903 /* with assert below:
5904 * item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
5909 if (amd64_is_imm32 (item
->key
))
5910 item
->chunk_size
+= CMP_SIZE
;
5912 item
->chunk_size
+= MOV_REG_IMM_SIZE
+ CMP_REG_REG_SIZE
;
5913 item
->chunk_size
+= BR_LARGE_SIZE
;
5914 imt_entries
[item
->check_target_idx
]->compare_done
= TRUE
;
5916 size
+= item
->chunk_size
;
5919 code
= mono_method_alloc_generic_virtual_thunk (domain
, size
);
5921 code
= mono_domain_code_reserve (domain
, size
);
5923 for (i
= 0; i
< count
; ++i
) {
5924 MonoIMTCheckItem
*item
= imt_entries
[i
];
5925 item
->code_target
= code
;
5926 if (item
->is_equals
) {
5927 if (item
->check_target_idx
) {
5928 if (!item
->compare_done
) {
5929 if (amd64_is_imm32 (item
->key
))
5930 amd64_alu_reg_imm (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
);
5932 amd64_mov_reg_imm (code
, AMD64_R10
, item
->key
);
5933 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, AMD64_R10
);
5936 item
->jmp_code
= code
;
5937 amd64_branch8 (code
, X86_CC_NE
, 0, FALSE
);
5938 /* See the comment below about R10 */
5939 if (item
->has_target_code
) {
5940 amd64_mov_reg_imm (code
, AMD64_R10
, item
->value
.target_code
);
5941 amd64_jump_reg (code
, AMD64_R10
);
5943 amd64_mov_reg_imm (code
, AMD64_R10
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
5944 amd64_jump_membase (code
, AMD64_R10
, 0);
5948 if (amd64_is_imm32 (item
->key
))
5949 amd64_alu_reg_imm (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
);
5951 amd64_mov_reg_imm (code
, AMD64_R10
, item
->key
);
5952 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, AMD64_R10
);
5954 item
->jmp_code
= code
;
5955 amd64_branch8 (code
, X86_CC_NE
, 0, FALSE
);
5956 if (item
->has_target_code
) {
5957 amd64_mov_reg_imm (code
, AMD64_R10
, item
->value
.target_code
);
5958 amd64_jump_reg (code
, AMD64_R10
);
5961 amd64_mov_reg_imm (code
, AMD64_R10
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
5962 amd64_jump_membase (code
, AMD64_R10
, 0);
5964 amd64_patch (item
->jmp_code
, code
);
5965 amd64_mov_reg_imm (code
, AMD64_R10
, fail_tramp
);
5966 amd64_jump_reg (code
, AMD64_R10
);
5967 item
->jmp_code
= NULL
;
5970 /* enable the commented code to assert on wrong method */
5972 if (amd64_is_imm32 (item
->key
))
5973 amd64_alu_reg_imm (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
);
5975 amd64_mov_reg_imm (code
, AMD64_R10
, item
->key
);
5976 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, AMD64_R10
);
5978 item
->jmp_code
= code
;
5979 amd64_branch8 (code
, X86_CC_NE
, 0, FALSE
);
5980 /* See the comment below about R10 */
5981 amd64_mov_reg_imm (code
, AMD64_R10
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
5982 amd64_jump_membase (code
, AMD64_R10
, 0);
5983 amd64_patch (item
->jmp_code
, code
);
5984 amd64_breakpoint (code
);
5985 item
->jmp_code
= NULL
;
5987 /* We're using R10 here because R11
5988 needs to be preserved. R10 needs
5989 to be preserved for calls which
5990 require a runtime generic context,
5991 but interface calls don't. */
5992 amd64_mov_reg_imm (code
, AMD64_R10
, & (vtable
->vtable
[item
->value
.vtable_slot
]));
5993 amd64_jump_membase (code
, AMD64_R10
, 0);
5998 if (amd64_is_imm32 (item
->key
))
5999 amd64_alu_reg_imm (code
, X86_CMP
, MONO_ARCH_IMT_REG
, (guint32
)(gssize
)item
->key
);
6001 amd64_mov_reg_imm (code
, AMD64_R10
, item
->key
);
6002 amd64_alu_reg_reg (code
, X86_CMP
, MONO_ARCH_IMT_REG
, AMD64_R10
);
6004 item
->jmp_code
= code
;
6005 if (x86_is_imm8 (imt_branch_distance (imt_entries
, i
, item
->check_target_idx
)))
6006 x86_branch8 (code
, X86_CC_GE
, 0, FALSE
);
6008 x86_branch32 (code
, X86_CC_GE
, 0, FALSE
);
6010 g_assert (code
- item
->code_target
<= item
->chunk_size
);
6012 /* patch the branches to get to the target items */
6013 for (i
= 0; i
< count
; ++i
) {
6014 MonoIMTCheckItem
*item
= imt_entries
[i
];
6015 if (item
->jmp_code
) {
6016 if (item
->check_target_idx
) {
6017 amd64_patch (item
->jmp_code
, imt_entries
[item
->check_target_idx
]->code_target
);
6023 mono_stats
.imt_thunks_size
+= code
- start
;
6024 g_assert (code
- start
<= size
);
6030 mono_arch_find_imt_method (gpointer
*regs
, guint8
*code
)
6032 return regs
[MONO_ARCH_IMT_REG
];
6036 mono_arch_find_this_argument (gpointer
*regs
, MonoMethod
*method
, MonoGenericSharingContext
*gsctx
)
6038 return mono_arch_get_this_arg_from_call (gsctx
, mono_method_signature (method
), (gssize
*)regs
, NULL
);
6042 mono_arch_emit_imt_argument (MonoCompile
*cfg
, MonoCallInst
*call
, MonoInst
*imt_arg
)
6044 /* Done by the implementation of the CALL_MEMBASE opcodes */
6049 mono_arch_find_static_call_vtable (gpointer
*regs
, guint8
*code
)
6051 return (MonoVTable
*) regs
[MONO_ARCH_RGCTX_REG
];
6055 mono_arch_emit_inst_for_method (MonoCompile
*cfg
, MonoMethod
*cmethod
, MonoMethodSignature
*fsig
, MonoInst
**args
)
6057 MonoInst
*ins
= NULL
;
6060 if (cmethod
->klass
== mono_defaults
.math_class
) {
6061 if (strcmp (cmethod
->name
, "Sin") == 0) {
6063 } else if (strcmp (cmethod
->name
, "Cos") == 0) {
6065 } else if (strcmp (cmethod
->name
, "Sqrt") == 0) {
6067 } else if (strcmp (cmethod
->name
, "Abs") == 0 && fsig
->params
[0]->type
== MONO_TYPE_R8
) {
6072 MONO_INST_NEW (cfg
, ins
, opcode
);
6073 ins
->type
= STACK_R8
;
6074 ins
->dreg
= mono_alloc_freg (cfg
);
6075 ins
->sreg1
= args
[0]->dreg
;
6076 MONO_ADD_INS (cfg
->cbb
, ins
);
6080 if (cfg
->opt
& MONO_OPT_CMOV
) {
6081 if (strcmp (cmethod
->name
, "Min") == 0) {
6082 if (fsig
->params
[0]->type
== MONO_TYPE_I4
)
6084 if (fsig
->params
[0]->type
== MONO_TYPE_U4
)
6085 opcode
= OP_IMIN_UN
;
6086 else if (fsig
->params
[0]->type
== MONO_TYPE_I8
)
6088 else if (fsig
->params
[0]->type
== MONO_TYPE_U8
)
6089 opcode
= OP_LMIN_UN
;
6090 } else if (strcmp (cmethod
->name
, "Max") == 0) {
6091 if (fsig
->params
[0]->type
== MONO_TYPE_I4
)
6093 if (fsig
->params
[0]->type
== MONO_TYPE_U4
)
6094 opcode
= OP_IMAX_UN
;
6095 else if (fsig
->params
[0]->type
== MONO_TYPE_I8
)
6097 else if (fsig
->params
[0]->type
== MONO_TYPE_U8
)
6098 opcode
= OP_LMAX_UN
;
6103 MONO_INST_NEW (cfg
, ins
, opcode
);
6104 ins
->type
= fsig
->params
[0]->type
== MONO_TYPE_I4
? STACK_I4
: STACK_I8
;
6105 ins
->dreg
= mono_alloc_ireg (cfg
);
6106 ins
->sreg1
= args
[0]->dreg
;
6107 ins
->sreg2
= args
[1]->dreg
;
6108 MONO_ADD_INS (cfg
->cbb
, ins
);
6112 /* OP_FREM is not IEEE compatible */
6113 else if (strcmp (cmethod
->name
, "IEEERemainder") == 0) {
6114 MONO_INST_NEW (cfg
, ins
, OP_FREM
);
6115 ins
->inst_i0
= args
[0];
6116 ins
->inst_i1
= args
[1];
6122 * Can't implement CompareExchange methods this way since they have
6130 mono_arch_print_tree (MonoInst
*tree
, int arity
)
6135 MonoInst
* mono_arch_get_domain_intrinsic (MonoCompile
* cfg
)
6139 if (appdomain_tls_offset
== -1)
6142 MONO_INST_NEW (cfg
, ins
, OP_TLS_GET
);
6143 ins
->inst_offset
= appdomain_tls_offset
;
6147 MonoInst
* mono_arch_get_thread_intrinsic (MonoCompile
* cfg
)
6151 if (thread_tls_offset
== -1)
6154 MONO_INST_NEW (cfg
, ins
, OP_TLS_GET
);
6155 ins
->inst_offset
= thread_tls_offset
;
6159 #define _CTX_REG(ctx,fld,i) ((gpointer)((&ctx->fld)[i]))
6162 mono_arch_context_get_int_reg (MonoContext
*ctx
, int reg
)
6165 case AMD64_RCX
: return (gpointer
)ctx
->rcx
;
6166 case AMD64_RDX
: return (gpointer
)ctx
->rdx
;
6167 case AMD64_RBX
: return (gpointer
)ctx
->rbx
;
6168 case AMD64_RBP
: return (gpointer
)ctx
->rbp
;
6169 case AMD64_RSP
: return (gpointer
)ctx
->rsp
;
6172 return _CTX_REG (ctx
, rax
, reg
);
6174 return _CTX_REG (ctx
, r12
, reg
- 12);
6176 g_assert_not_reached ();