Make sure x86 ATOMIC_CAS doesn't overwrite its own operands.
[mono-debugger.git] / mono / mini / mini-x86.c
blob61e514738a7ca6247105cc2c27b7c9e7a276b85d
1 /*
2 * mini-x86.c: x86 backend for the Mono code generator
4 * Authors:
5 * Paolo Molaro (lupus@ximian.com)
6 * Dietmar Maurer (dietmar@ximian.com)
7 * Patrik Torstensson
9 * (C) 2003 Ximian, Inc.
11 #include "mini.h"
12 #include <string.h>
13 #include <math.h>
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
18 #include <mono/metadata/appdomain.h>
19 #include <mono/metadata/debug-helpers.h>
20 #include <mono/metadata/threads.h>
21 #include <mono/metadata/profiler-private.h>
22 #include <mono/metadata/mono-debug.h>
23 #include <mono/utils/mono-math.h>
24 #include <mono/utils/mono-counters.h>
26 #include "trace.h"
27 #include "mini-x86.h"
28 #include "cpu-x86.h"
29 #include "ir-emit.h"
31 /* On windows, these hold the key returned by TlsAlloc () */
32 static gint lmf_tls_offset = -1;
33 static gint lmf_addr_tls_offset = -1;
34 static gint appdomain_tls_offset = -1;
35 static gint thread_tls_offset = -1;
37 #ifdef MONO_XEN_OPT
38 static gboolean optimize_for_xen = TRUE;
39 #else
40 #define optimize_for_xen 0
41 #endif
43 #ifdef PLATFORM_WIN32
44 static gboolean is_win32 = TRUE;
45 #else
46 static gboolean is_win32 = FALSE;
47 #endif
49 /* This mutex protects architecture specific caches */
50 #define mono_mini_arch_lock() EnterCriticalSection (&mini_arch_mutex)
51 #define mono_mini_arch_unlock() LeaveCriticalSection (&mini_arch_mutex)
52 static CRITICAL_SECTION mini_arch_mutex;
54 #define ALIGN_TO(val,align) ((((guint64)val) + ((align) - 1)) & ~((align) - 1))
56 #define ARGS_OFFSET 8
58 #ifdef PLATFORM_WIN32
59 /* Under windows, the default pinvoke calling convention is stdcall */
60 #define CALLCONV_IS_STDCALL(sig) ((((sig)->call_convention) == MONO_CALL_STDCALL) || ((sig)->pinvoke && ((sig)->call_convention) == MONO_CALL_DEFAULT))
61 #else
62 #define CALLCONV_IS_STDCALL(sig) (((sig)->call_convention) == MONO_CALL_STDCALL)
63 #endif
65 MonoBreakpointInfo
66 mono_breakpoint_info [MONO_BREAKPOINT_ARRAY_SIZE];
68 const char*
69 mono_arch_regname (int reg)
71 switch (reg) {
72 case X86_EAX: return "%eax";
73 case X86_EBX: return "%ebx";
74 case X86_ECX: return "%ecx";
75 case X86_EDX: return "%edx";
76 case X86_ESP: return "%esp";
77 case X86_EBP: return "%ebp";
78 case X86_EDI: return "%edi";
79 case X86_ESI: return "%esi";
81 return "unknown";
84 const char*
85 mono_arch_fregname (int reg)
87 switch (reg) {
88 case 0:
89 return "%fr0";
90 case 1:
91 return "%fr1";
92 case 2:
93 return "%fr2";
94 case 3:
95 return "%fr3";
96 case 4:
97 return "%fr4";
98 case 5:
99 return "%fr5";
100 case 6:
101 return "%fr6";
102 case 7:
103 return "%fr7";
104 default:
105 return "unknown";
109 const char *
110 mono_arch_xregname (int reg)
112 switch (reg) {
113 case 0:
114 return "%xmm0";
115 case 1:
116 return "%xmm1";
117 case 2:
118 return "%xmm2";
119 case 3:
120 return "%xmm3";
121 case 4:
122 return "%xmm4";
123 case 5:
124 return "%xmm5";
125 case 6:
126 return "%xmm6";
127 case 7:
128 return "%xmm7";
129 default:
130 return "unknown";
135 typedef enum {
136 ArgInIReg,
137 ArgInFloatSSEReg,
138 ArgInDoubleSSEReg,
139 ArgOnStack,
140 ArgValuetypeInReg,
141 ArgOnFloatFpStack,
142 ArgOnDoubleFpStack,
143 ArgNone
144 } ArgStorage;
146 typedef struct {
147 gint16 offset;
148 gint8 reg;
149 ArgStorage storage;
151 /* Only if storage == ArgValuetypeInReg */
152 ArgStorage pair_storage [2];
153 gint8 pair_regs [2];
154 } ArgInfo;
156 typedef struct {
157 int nargs;
158 guint32 stack_usage;
159 guint32 reg_usage;
160 guint32 freg_usage;
161 gboolean need_stack_align;
162 guint32 stack_align_amount;
163 ArgInfo ret;
164 ArgInfo sig_cookie;
165 ArgInfo args [1];
166 } CallInfo;
168 #define PARAM_REGS 0
170 #define FLOAT_PARAM_REGS 0
172 static X86_Reg_No param_regs [] = { 0 };
174 #if defined(PLATFORM_WIN32) || defined(__APPLE__) || defined(__FreeBSD__)
175 #define SMALL_STRUCTS_IN_REGS
176 static X86_Reg_No return_regs [] = { X86_EAX, X86_EDX };
177 #endif
179 static void inline
180 add_general (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
182 ainfo->offset = *stack_size;
184 if (*gr >= PARAM_REGS) {
185 ainfo->storage = ArgOnStack;
186 (*stack_size) += sizeof (gpointer);
188 else {
189 ainfo->storage = ArgInIReg;
190 ainfo->reg = param_regs [*gr];
191 (*gr) ++;
195 static void inline
196 add_general_pair (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo)
198 ainfo->offset = *stack_size;
200 g_assert (PARAM_REGS == 0);
202 ainfo->storage = ArgOnStack;
203 (*stack_size) += sizeof (gpointer) * 2;
206 static void inline
207 add_float (guint32 *gr, guint32 *stack_size, ArgInfo *ainfo, gboolean is_double)
209 ainfo->offset = *stack_size;
211 if (*gr >= FLOAT_PARAM_REGS) {
212 ainfo->storage = ArgOnStack;
213 (*stack_size) += is_double ? 8 : 4;
215 else {
216 /* A double register */
217 if (is_double)
218 ainfo->storage = ArgInDoubleSSEReg;
219 else
220 ainfo->storage = ArgInFloatSSEReg;
221 ainfo->reg = *gr;
222 (*gr) += 1;
227 static void
228 add_valuetype (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig, ArgInfo *ainfo, MonoType *type,
229 gboolean is_return,
230 guint32 *gr, guint32 *fr, guint32 *stack_size)
232 guint32 size;
233 MonoClass *klass;
235 klass = mono_class_from_mono_type (type);
236 size = mini_type_stack_size_full (gsctx, &klass->byval_arg, NULL, sig->pinvoke);
238 #ifdef SMALL_STRUCTS_IN_REGS
239 if (sig->pinvoke && is_return) {
240 MonoMarshalType *info;
243 * the exact rules are not very well documented, the code below seems to work with the
244 * code generated by gcc 3.3.3 -mno-cygwin.
246 info = mono_marshal_load_type_info (klass);
247 g_assert (info);
249 ainfo->pair_storage [0] = ainfo->pair_storage [1] = ArgNone;
251 /* Special case structs with only a float member */
252 if ((info->native_size == 8) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R8)) {
253 ainfo->storage = ArgValuetypeInReg;
254 ainfo->pair_storage [0] = ArgOnDoubleFpStack;
255 return;
257 if ((info->native_size == 4) && (info->num_fields == 1) && (info->fields [0].field->type->type == MONO_TYPE_R4)) {
258 ainfo->storage = ArgValuetypeInReg;
259 ainfo->pair_storage [0] = ArgOnFloatFpStack;
260 return;
262 if ((info->native_size == 1) || (info->native_size == 2) || (info->native_size == 4) || (info->native_size == 8)) {
263 ainfo->storage = ArgValuetypeInReg;
264 ainfo->pair_storage [0] = ArgInIReg;
265 ainfo->pair_regs [0] = return_regs [0];
266 if (info->native_size > 4) {
267 ainfo->pair_storage [1] = ArgInIReg;
268 ainfo->pair_regs [1] = return_regs [1];
270 return;
273 #endif
275 ainfo->offset = *stack_size;
276 ainfo->storage = ArgOnStack;
277 *stack_size += ALIGN_TO (size, sizeof (gpointer));
281 * get_call_info:
283 * Obtain information about a call according to the calling convention.
284 * For x86 ELF, see the "System V Application Binary Interface Intel386
285 * Architecture Processor Supplment, Fourth Edition" document for more
286 * information.
287 * For x86 win32, see ???.
289 static CallInfo*
290 get_call_info (MonoGenericSharingContext *gsctx, MonoMemPool *mp, MonoMethodSignature *sig, gboolean is_pinvoke)
292 guint32 i, gr, fr;
293 MonoType *ret_type;
294 int n = sig->hasthis + sig->param_count;
295 guint32 stack_size = 0;
296 CallInfo *cinfo;
298 if (mp)
299 cinfo = mono_mempool_alloc0 (mp, sizeof (CallInfo) + (sizeof (ArgInfo) * n));
300 else
301 cinfo = g_malloc0 (sizeof (CallInfo) + (sizeof (ArgInfo) * n));
303 gr = 0;
304 fr = 0;
306 /* return value */
308 ret_type = mini_type_get_underlying_type (gsctx, sig->ret);
309 switch (ret_type->type) {
310 case MONO_TYPE_BOOLEAN:
311 case MONO_TYPE_I1:
312 case MONO_TYPE_U1:
313 case MONO_TYPE_I2:
314 case MONO_TYPE_U2:
315 case MONO_TYPE_CHAR:
316 case MONO_TYPE_I4:
317 case MONO_TYPE_U4:
318 case MONO_TYPE_I:
319 case MONO_TYPE_U:
320 case MONO_TYPE_PTR:
321 case MONO_TYPE_FNPTR:
322 case MONO_TYPE_CLASS:
323 case MONO_TYPE_OBJECT:
324 case MONO_TYPE_SZARRAY:
325 case MONO_TYPE_ARRAY:
326 case MONO_TYPE_STRING:
327 cinfo->ret.storage = ArgInIReg;
328 cinfo->ret.reg = X86_EAX;
329 break;
330 case MONO_TYPE_U8:
331 case MONO_TYPE_I8:
332 cinfo->ret.storage = ArgInIReg;
333 cinfo->ret.reg = X86_EAX;
334 break;
335 case MONO_TYPE_R4:
336 cinfo->ret.storage = ArgOnFloatFpStack;
337 break;
338 case MONO_TYPE_R8:
339 cinfo->ret.storage = ArgOnDoubleFpStack;
340 break;
341 case MONO_TYPE_GENERICINST:
342 if (!mono_type_generic_inst_is_valuetype (sig->ret)) {
343 cinfo->ret.storage = ArgInIReg;
344 cinfo->ret.reg = X86_EAX;
345 break;
347 /* Fall through */
348 case MONO_TYPE_VALUETYPE: {
349 guint32 tmp_gr = 0, tmp_fr = 0, tmp_stacksize = 0;
351 add_valuetype (gsctx, sig, &cinfo->ret, sig->ret, TRUE, &tmp_gr, &tmp_fr, &tmp_stacksize);
352 if (cinfo->ret.storage == ArgOnStack)
353 /* The caller passes the address where the value is stored */
354 add_general (&gr, &stack_size, &cinfo->ret);
355 break;
357 case MONO_TYPE_TYPEDBYREF:
358 /* Same as a valuetype with size 24 */
359 add_general (&gr, &stack_size, &cinfo->ret);
361 break;
362 case MONO_TYPE_VOID:
363 cinfo->ret.storage = ArgNone;
364 break;
365 default:
366 g_error ("Can't handle as return value 0x%x", sig->ret->type);
370 /* this */
371 if (sig->hasthis)
372 add_general (&gr, &stack_size, cinfo->args + 0);
374 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == 0)) {
375 gr = PARAM_REGS;
376 fr = FLOAT_PARAM_REGS;
378 /* Emit the signature cookie just before the implicit arguments */
379 add_general (&gr, &stack_size, &cinfo->sig_cookie);
382 for (i = 0; i < sig->param_count; ++i) {
383 ArgInfo *ainfo = &cinfo->args [sig->hasthis + i];
384 MonoType *ptype;
386 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sig->sentinelpos)) {
387 /* We allways pass the sig cookie on the stack for simplicity */
389 * Prevent implicit arguments + the sig cookie from being passed
390 * in registers.
392 gr = PARAM_REGS;
393 fr = FLOAT_PARAM_REGS;
395 /* Emit the signature cookie just before the implicit arguments */
396 add_general (&gr, &stack_size, &cinfo->sig_cookie);
399 if (sig->params [i]->byref) {
400 add_general (&gr, &stack_size, ainfo);
401 continue;
403 ptype = mini_type_get_underlying_type (gsctx, sig->params [i]);
404 switch (ptype->type) {
405 case MONO_TYPE_BOOLEAN:
406 case MONO_TYPE_I1:
407 case MONO_TYPE_U1:
408 add_general (&gr, &stack_size, ainfo);
409 break;
410 case MONO_TYPE_I2:
411 case MONO_TYPE_U2:
412 case MONO_TYPE_CHAR:
413 add_general (&gr, &stack_size, ainfo);
414 break;
415 case MONO_TYPE_I4:
416 case MONO_TYPE_U4:
417 add_general (&gr, &stack_size, ainfo);
418 break;
419 case MONO_TYPE_I:
420 case MONO_TYPE_U:
421 case MONO_TYPE_PTR:
422 case MONO_TYPE_FNPTR:
423 case MONO_TYPE_CLASS:
424 case MONO_TYPE_OBJECT:
425 case MONO_TYPE_STRING:
426 case MONO_TYPE_SZARRAY:
427 case MONO_TYPE_ARRAY:
428 add_general (&gr, &stack_size, ainfo);
429 break;
430 case MONO_TYPE_GENERICINST:
431 if (!mono_type_generic_inst_is_valuetype (sig->params [i])) {
432 add_general (&gr, &stack_size, ainfo);
433 break;
435 /* Fall through */
436 case MONO_TYPE_VALUETYPE:
437 add_valuetype (gsctx, sig, ainfo, sig->params [i], FALSE, &gr, &fr, &stack_size);
438 break;
439 case MONO_TYPE_TYPEDBYREF:
440 stack_size += sizeof (MonoTypedRef);
441 ainfo->storage = ArgOnStack;
442 break;
443 case MONO_TYPE_U8:
444 case MONO_TYPE_I8:
445 add_general_pair (&gr, &stack_size, ainfo);
446 break;
447 case MONO_TYPE_R4:
448 add_float (&fr, &stack_size, ainfo, FALSE);
449 break;
450 case MONO_TYPE_R8:
451 add_float (&fr, &stack_size, ainfo, TRUE);
452 break;
453 default:
454 g_error ("unexpected type 0x%x", ptype->type);
455 g_assert_not_reached ();
459 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n > 0) && (sig->sentinelpos == sig->param_count)) {
460 gr = PARAM_REGS;
461 fr = FLOAT_PARAM_REGS;
463 /* Emit the signature cookie just before the implicit arguments */
464 add_general (&gr, &stack_size, &cinfo->sig_cookie);
467 if (mono_do_x86_stack_align && (stack_size % MONO_ARCH_FRAME_ALIGNMENT) != 0) {
468 cinfo->need_stack_align = TRUE;
469 cinfo->stack_align_amount = MONO_ARCH_FRAME_ALIGNMENT - (stack_size % MONO_ARCH_FRAME_ALIGNMENT);
470 stack_size += cinfo->stack_align_amount;
473 cinfo->stack_usage = stack_size;
474 cinfo->reg_usage = gr;
475 cinfo->freg_usage = fr;
476 return cinfo;
480 * mono_arch_get_argument_info:
481 * @csig: a method signature
482 * @param_count: the number of parameters to consider
483 * @arg_info: an array to store the result infos
485 * Gathers information on parameters such as size, alignment and
486 * padding. arg_info should be large enought to hold param_count + 1 entries.
488 * Returns the size of the argument area on the stack.
491 mono_arch_get_argument_info (MonoMethodSignature *csig, int param_count, MonoJitArgumentInfo *arg_info)
493 int k, args_size = 0;
494 int size, pad;
495 guint32 align;
496 int offset = 8;
497 CallInfo *cinfo;
499 cinfo = get_call_info (NULL, NULL, csig, FALSE);
501 if (MONO_TYPE_ISSTRUCT (csig->ret) && (cinfo->ret.storage == ArgOnStack)) {
502 args_size += sizeof (gpointer);
503 offset += 4;
506 arg_info [0].offset = offset;
508 if (csig->hasthis) {
509 args_size += sizeof (gpointer);
510 offset += 4;
513 arg_info [0].size = args_size;
515 for (k = 0; k < param_count; k++) {
516 size = mini_type_stack_size_full (NULL, csig->params [k], &align, csig->pinvoke);
518 /* ignore alignment for now */
519 align = 1;
521 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
522 arg_info [k].pad = pad;
523 args_size += size;
524 arg_info [k + 1].pad = 0;
525 arg_info [k + 1].size = size;
526 offset += pad;
527 arg_info [k + 1].offset = offset;
528 offset += size;
531 if (mono_do_x86_stack_align && !CALLCONV_IS_STDCALL (csig))
532 align = MONO_ARCH_FRAME_ALIGNMENT;
533 else
534 align = 4;
535 args_size += pad = (align - (args_size & (align - 1))) & (align - 1);
536 arg_info [k].pad = pad;
538 g_free (cinfo);
540 return args_size;
543 static const guchar cpuid_impl [] = {
544 0x55, /* push %ebp */
545 0x89, 0xe5, /* mov %esp,%ebp */
546 0x53, /* push %ebx */
547 0x8b, 0x45, 0x08, /* mov 0x8(%ebp),%eax */
548 0x0f, 0xa2, /* cpuid */
549 0x50, /* push %eax */
550 0x8b, 0x45, 0x10, /* mov 0x10(%ebp),%eax */
551 0x89, 0x18, /* mov %ebx,(%eax) */
552 0x8b, 0x45, 0x14, /* mov 0x14(%ebp),%eax */
553 0x89, 0x08, /* mov %ecx,(%eax) */
554 0x8b, 0x45, 0x18, /* mov 0x18(%ebp),%eax */
555 0x89, 0x10, /* mov %edx,(%eax) */
556 0x58, /* pop %eax */
557 0x8b, 0x55, 0x0c, /* mov 0xc(%ebp),%edx */
558 0x89, 0x02, /* mov %eax,(%edx) */
559 0x5b, /* pop %ebx */
560 0xc9, /* leave */
561 0xc3, /* ret */
564 typedef void (*CpuidFunc) (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx);
566 static int
567 cpuid (int id, int* p_eax, int* p_ebx, int* p_ecx, int* p_edx)
569 int have_cpuid = 0;
570 #ifndef _MSC_VER
571 __asm__ __volatile__ (
572 "pushfl\n"
573 "popl %%eax\n"
574 "movl %%eax, %%edx\n"
575 "xorl $0x200000, %%eax\n"
576 "pushl %%eax\n"
577 "popfl\n"
578 "pushfl\n"
579 "popl %%eax\n"
580 "xorl %%edx, %%eax\n"
581 "andl $0x200000, %%eax\n"
582 "movl %%eax, %0"
583 : "=r" (have_cpuid)
585 : "%eax", "%edx"
587 #else
588 __asm {
589 pushfd
590 pop eax
591 mov edx, eax
592 xor eax, 0x200000
593 push eax
594 popfd
595 pushfd
596 pop eax
597 xor eax, edx
598 and eax, 0x200000
599 mov have_cpuid, eax
601 #endif
602 if (have_cpuid) {
603 /* Have to use the code manager to get around WinXP DEP */
604 static CpuidFunc func = NULL;
605 void *ptr;
606 if (!func) {
607 ptr = mono_global_codeman_reserve (sizeof (cpuid_impl));
608 memcpy (ptr, cpuid_impl, sizeof (cpuid_impl));
609 func = (CpuidFunc)ptr;
611 func (id, p_eax, p_ebx, p_ecx, p_edx);
614 * We use this approach because of issues with gcc and pic code, see:
615 * http://gcc.gnu.org/cgi-bin/gnatsweb.pl?cmd=view%20audit-trail&database=gcc&pr=7329
616 __asm__ __volatile__ ("cpuid"
617 : "=a" (*p_eax), "=b" (*p_ebx), "=c" (*p_ecx), "=d" (*p_edx)
618 : "a" (id));
620 return 1;
622 return 0;
626 * Initialize the cpu to execute managed code.
628 void
629 mono_arch_cpu_init (void)
631 /* spec compliance requires running with double precision */
632 #ifndef _MSC_VER
633 guint16 fpcw;
635 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
636 fpcw &= ~X86_FPCW_PRECC_MASK;
637 fpcw |= X86_FPCW_PREC_DOUBLE;
638 __asm__ __volatile__ ("fldcw %0\n": : "m" (fpcw));
639 __asm__ __volatile__ ("fnstcw %0\n": "=m" (fpcw));
640 #else
641 _control87 (_PC_53, MCW_PC);
642 #endif
646 * Initialize architecture specific code.
648 void
649 mono_arch_init (void)
651 InitializeCriticalSection (&mini_arch_mutex);
655 * Cleanup architecture specific code.
657 void
658 mono_arch_cleanup (void)
660 DeleteCriticalSection (&mini_arch_mutex);
664 * This function returns the optimizations supported on this cpu.
666 guint32
667 mono_arch_cpu_optimizazions (guint32 *exclude_mask)
669 int eax, ebx, ecx, edx;
670 guint32 opts = 0;
672 *exclude_mask = 0;
673 /* Feature Flags function, flags returned in EDX. */
674 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
675 if (edx & (1 << 15)) {
676 opts |= MONO_OPT_CMOV;
677 if (edx & 1)
678 opts |= MONO_OPT_FCMOV;
679 else
680 *exclude_mask |= MONO_OPT_FCMOV;
681 } else
682 *exclude_mask |= MONO_OPT_CMOV;
683 if (edx & (1 << 26))
684 opts |= MONO_OPT_SSE2;
685 else
686 *exclude_mask |= MONO_OPT_SSE2;
688 #ifdef MONO_ARCH_SIMD_INTRINSICS
689 /*SIMD intrinsics require at least SSE2.*/
690 if (!(opts & MONO_OPT_SSE2))
691 *exclude_mask |= MONO_OPT_SIMD;
692 #endif
694 return opts;
698 * This function test for all SSE functions supported.
700 * Returns a bitmask corresponding to all supported versions.
702 * TODO detect other versions like SSE4a.
704 guint32
705 mono_arch_cpu_enumerate_simd_versions (void)
707 int eax, ebx, ecx, edx;
708 guint32 sse_opts = 0;
710 if (cpuid (1, &eax, &ebx, &ecx, &edx)) {
711 if (edx & (1 << 25))
712 sse_opts |= 1 << SIMD_VERSION_SSE1;
713 if (edx & (1 << 26))
714 sse_opts |= 1 << SIMD_VERSION_SSE2;
715 if (ecx & (1 << 0))
716 sse_opts |= 1 << SIMD_VERSION_SSE3;
717 if (ecx & (1 << 9))
718 sse_opts |= 1 << SIMD_VERSION_SSSE3;
719 if (ecx & (1 << 19))
720 sse_opts |= 1 << SIMD_VERSION_SSE41;
721 if (ecx & (1 << 20))
722 sse_opts |= 1 << SIMD_VERSION_SSE42;
724 return sse_opts;
728 * Determine whenever the trap whose info is in SIGINFO is caused by
729 * integer overflow.
731 gboolean
732 mono_arch_is_int_overflow (void *sigctx, void *info)
734 MonoContext ctx;
735 guint8* ip;
737 mono_arch_sigctx_to_monoctx (sigctx, &ctx);
739 ip = (guint8*)ctx.eip;
741 if ((ip [0] == 0xf7) && (x86_modrm_mod (ip [1]) == 0x3) && (x86_modrm_reg (ip [1]) == 0x7)) {
742 gint32 reg;
744 /* idiv REG */
745 switch (x86_modrm_rm (ip [1])) {
746 case X86_EAX:
747 reg = ctx.eax;
748 break;
749 case X86_ECX:
750 reg = ctx.ecx;
751 break;
752 case X86_EDX:
753 reg = ctx.edx;
754 break;
755 case X86_EBX:
756 reg = ctx.ebx;
757 break;
758 case X86_ESI:
759 reg = ctx.esi;
760 break;
761 case X86_EDI:
762 reg = ctx.edi;
763 break;
764 default:
765 g_assert_not_reached ();
766 reg = -1;
769 if (reg == -1)
770 return TRUE;
773 return FALSE;
776 GList *
777 mono_arch_get_allocatable_int_vars (MonoCompile *cfg)
779 GList *vars = NULL;
780 int i;
782 for (i = 0; i < cfg->num_varinfo; i++) {
783 MonoInst *ins = cfg->varinfo [i];
784 MonoMethodVar *vmv = MONO_VARINFO (cfg, i);
786 /* unused vars */
787 if (vmv->range.first_use.abs_pos >= vmv->range.last_use.abs_pos)
788 continue;
790 if ((ins->flags & (MONO_INST_IS_DEAD|MONO_INST_VOLATILE|MONO_INST_INDIRECT)) ||
791 (ins->opcode != OP_LOCAL && ins->opcode != OP_ARG))
792 continue;
794 /* we dont allocate I1 to registers because there is no simply way to sign extend
795 * 8bit quantities in caller saved registers on x86 */
796 if (mono_is_regsize_var (ins->inst_vtype) && (ins->inst_vtype->type != MONO_TYPE_I1)) {
797 g_assert (MONO_VARINFO (cfg, i)->reg == -1);
798 g_assert (i == vmv->idx);
799 vars = g_list_prepend (vars, vmv);
803 vars = mono_varlist_sort (cfg, vars, 0);
805 return vars;
808 GList *
809 mono_arch_get_global_int_regs (MonoCompile *cfg)
811 GList *regs = NULL;
813 /* we can use 3 registers for global allocation */
814 regs = g_list_prepend (regs, (gpointer)X86_EBX);
815 regs = g_list_prepend (regs, (gpointer)X86_ESI);
816 regs = g_list_prepend (regs, (gpointer)X86_EDI);
818 return regs;
822 * mono_arch_regalloc_cost:
824 * Return the cost, in number of memory references, of the action of
825 * allocating the variable VMV into a register during global register
826 * allocation.
828 guint32
829 mono_arch_regalloc_cost (MonoCompile *cfg, MonoMethodVar *vmv)
831 MonoInst *ins = cfg->varinfo [vmv->idx];
833 if (cfg->method->save_lmf)
834 /* The register is already saved */
835 return (ins->opcode == OP_ARG) ? 1 : 0;
836 else
837 /* push+pop+possible load if it is an argument */
838 return (ins->opcode == OP_ARG) ? 3 : 2;
841 static void
842 set_needs_stack_frame (MonoCompile *cfg, gboolean flag)
844 static int inited = FALSE;
845 static int count = 0;
847 if (cfg->arch.need_stack_frame_inited) {
848 g_assert (cfg->arch.need_stack_frame == flag);
849 return;
852 cfg->arch.need_stack_frame = flag;
853 cfg->arch.need_stack_frame_inited = TRUE;
855 if (flag)
856 return;
858 if (!inited) {
859 mono_counters_register ("Could eliminate stack frame", MONO_COUNTER_INT|MONO_COUNTER_JIT, &count);
860 inited = TRUE;
862 ++count;
864 //g_print ("will eliminate %s.%s.%s\n", cfg->method->klass->name_space, cfg->method->klass->name, cfg->method->name);
867 static gboolean
868 needs_stack_frame (MonoCompile *cfg)
870 MonoMethodSignature *sig;
871 MonoMethodHeader *header;
872 gboolean result = FALSE;
874 if (cfg->arch.need_stack_frame_inited)
875 return cfg->arch.need_stack_frame;
877 header = mono_method_get_header (cfg->method);
878 sig = mono_method_signature (cfg->method);
880 if (cfg->disable_omit_fp)
881 result = TRUE;
882 else if (cfg->flags & MONO_CFG_HAS_ALLOCA)
883 result = TRUE;
884 else if (cfg->method->save_lmf)
885 result = TRUE;
886 else if (cfg->stack_offset)
887 result = TRUE;
888 else if (cfg->param_area)
889 result = TRUE;
890 else if (cfg->flags & (MONO_CFG_HAS_CALLS | MONO_CFG_HAS_ALLOCA | MONO_CFG_HAS_TAIL))
891 result = TRUE;
892 else if (header->num_clauses)
893 result = TRUE;
894 else if (sig->param_count + sig->hasthis)
895 result = TRUE;
896 else if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
897 result = TRUE;
898 else if ((mono_jit_trace_calls != NULL && mono_trace_eval (cfg->method)) ||
899 (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE))
900 result = TRUE;
902 set_needs_stack_frame (cfg, result);
904 return cfg->arch.need_stack_frame;
908 * Set var information according to the calling convention. X86 version.
909 * The locals var stuff should most likely be split in another method.
911 void
912 mono_arch_allocate_vars (MonoCompile *cfg)
914 MonoMethodSignature *sig;
915 MonoMethodHeader *header;
916 MonoInst *inst;
917 guint32 locals_stack_size, locals_stack_align;
918 int i, offset;
919 gint32 *offsets;
920 CallInfo *cinfo;
922 header = mono_method_get_header (cfg->method);
923 sig = mono_method_signature (cfg->method);
925 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
927 cfg->frame_reg = X86_EBP;
928 offset = 0;
930 /* Reserve space to save LMF and caller saved registers */
932 if (cfg->method->save_lmf) {
933 offset += sizeof (MonoLMF);
934 } else {
935 if (cfg->used_int_regs & (1 << X86_EBX)) {
936 offset += 4;
939 if (cfg->used_int_regs & (1 << X86_EDI)) {
940 offset += 4;
943 if (cfg->used_int_regs & (1 << X86_ESI)) {
944 offset += 4;
948 switch (cinfo->ret.storage) {
949 case ArgValuetypeInReg:
950 /* Allocate a local to hold the result, the epilog will copy it to the correct place */
951 offset += 8;
952 cfg->ret->opcode = OP_REGOFFSET;
953 cfg->ret->inst_basereg = X86_EBP;
954 cfg->ret->inst_offset = - offset;
955 break;
956 default:
957 break;
960 /* Allocate locals */
961 offsets = mono_allocate_stack_slots (cfg, &locals_stack_size, &locals_stack_align);
962 if (locals_stack_align) {
963 offset += (locals_stack_align - 1);
964 offset &= ~(locals_stack_align - 1);
967 * EBP is at alignment 8 % MONO_ARCH_FRAME_ALIGNMENT, so if we
968 * have locals larger than 8 bytes we need to make sure that
969 * they have the appropriate offset.
971 if (MONO_ARCH_FRAME_ALIGNMENT > 8 && locals_stack_align > 8)
972 offset += MONO_ARCH_FRAME_ALIGNMENT - sizeof (gpointer) * 2;
973 for (i = cfg->locals_start; i < cfg->num_varinfo; i++) {
974 if (offsets [i] != -1) {
975 MonoInst *inst = cfg->varinfo [i];
976 inst->opcode = OP_REGOFFSET;
977 inst->inst_basereg = X86_EBP;
978 inst->inst_offset = - (offset + offsets [i]);
979 //printf ("allocated local %d to ", i); mono_print_tree_nl (inst);
982 offset += locals_stack_size;
986 * Allocate arguments+return value
989 switch (cinfo->ret.storage) {
990 case ArgOnStack:
991 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
993 * In the new IR, the cfg->vret_addr variable represents the
994 * vtype return value.
996 cfg->vret_addr->opcode = OP_REGOFFSET;
997 cfg->vret_addr->inst_basereg = cfg->frame_reg;
998 cfg->vret_addr->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
999 if (G_UNLIKELY (cfg->verbose_level > 1)) {
1000 printf ("vret_addr =");
1001 mono_print_ins (cfg->vret_addr);
1003 } else {
1004 cfg->ret->opcode = OP_REGOFFSET;
1005 cfg->ret->inst_basereg = X86_EBP;
1006 cfg->ret->inst_offset = cinfo->ret.offset + ARGS_OFFSET;
1008 break;
1009 case ArgValuetypeInReg:
1010 break;
1011 case ArgInIReg:
1012 cfg->ret->opcode = OP_REGVAR;
1013 cfg->ret->inst_c0 = cinfo->ret.reg;
1014 cfg->ret->dreg = cinfo->ret.reg;
1015 break;
1016 case ArgNone:
1017 case ArgOnFloatFpStack:
1018 case ArgOnDoubleFpStack:
1019 break;
1020 default:
1021 g_assert_not_reached ();
1024 if (sig->call_convention == MONO_CALL_VARARG) {
1025 g_assert (cinfo->sig_cookie.storage == ArgOnStack);
1026 cfg->sig_cookie = cinfo->sig_cookie.offset + ARGS_OFFSET;
1029 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1030 ArgInfo *ainfo = &cinfo->args [i];
1031 inst = cfg->args [i];
1032 if (inst->opcode != OP_REGVAR) {
1033 inst->opcode = OP_REGOFFSET;
1034 inst->inst_basereg = X86_EBP;
1036 inst->inst_offset = ainfo->offset + ARGS_OFFSET;
1039 cfg->stack_offset = offset;
1042 void
1043 mono_arch_create_vars (MonoCompile *cfg)
1045 MonoMethodSignature *sig;
1046 CallInfo *cinfo;
1048 sig = mono_method_signature (cfg->method);
1050 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1052 if (cinfo->ret.storage == ArgValuetypeInReg)
1053 cfg->ret_var_is_local = TRUE;
1054 if ((cinfo->ret.storage != ArgValuetypeInReg) && MONO_TYPE_ISSTRUCT (sig->ret)) {
1055 cfg->vret_addr = mono_compile_create_var (cfg, &mono_defaults.int_class->byval_arg, OP_ARG);
1060 * It is expensive to adjust esp for each individual fp argument pushed on the stack
1061 * so we try to do it just once when we have multiple fp arguments in a row.
1062 * We don't use this mechanism generally because for int arguments the generated code
1063 * is slightly bigger and new generation cpus optimize away the dependency chains
1064 * created by push instructions on the esp value.
1065 * fp_arg_setup is the first argument in the execution sequence where the esp register
1066 * is modified.
1068 static G_GNUC_UNUSED int
1069 collect_fp_stack_space (MonoMethodSignature *sig, int start_arg, int *fp_arg_setup)
1071 int fp_space = 0;
1072 MonoType *t;
1074 for (; start_arg < sig->param_count; ++start_arg) {
1075 t = mini_type_get_underlying_type (NULL, sig->params [start_arg]);
1076 if (!t->byref && t->type == MONO_TYPE_R8) {
1077 fp_space += sizeof (double);
1078 *fp_arg_setup = start_arg;
1079 } else {
1080 break;
1083 return fp_space;
1086 static void
1087 emit_sig_cookie (MonoCompile *cfg, MonoCallInst *call, CallInfo *cinfo)
1089 MonoMethodSignature *tmp_sig;
1091 /* FIXME: Add support for signature tokens to AOT */
1092 cfg->disable_aot = TRUE;
1095 * mono_ArgIterator_Setup assumes the signature cookie is
1096 * passed first and all the arguments which were before it are
1097 * passed on the stack after the signature. So compensate by
1098 * passing a different signature.
1100 tmp_sig = mono_metadata_signature_dup (call->signature);
1101 tmp_sig->param_count -= call->signature->sentinelpos;
1102 tmp_sig->sentinelpos = 0;
1103 memcpy (tmp_sig->params, call->signature->params + call->signature->sentinelpos, tmp_sig->param_count * sizeof (MonoType*));
1105 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_X86_PUSH_IMM, -1, -1, tmp_sig);
1108 void
1109 mono_arch_emit_call (MonoCompile *cfg, MonoCallInst *call)
1111 MonoInst *arg, *in;
1112 MonoMethodSignature *sig;
1113 int i, n;
1114 CallInfo *cinfo;
1115 int sentinelpos = 0;
1117 sig = call->signature;
1118 n = sig->param_count + sig->hasthis;
1120 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1122 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG))
1123 sentinelpos = sig->sentinelpos + (sig->hasthis ? 1 : 0);
1125 if (cinfo->need_stack_align) {
1126 MONO_INST_NEW (cfg, arg, OP_SUB_IMM);
1127 arg->dreg = X86_ESP;
1128 arg->sreg1 = X86_ESP;
1129 arg->inst_imm = cinfo->stack_align_amount;
1130 MONO_ADD_INS (cfg->cbb, arg);
1133 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1134 MonoInst *vtarg;
1136 if (cinfo->ret.storage == ArgValuetypeInReg) {
1137 if (cinfo->ret.pair_storage [0] == ArgInIReg && cinfo->ret.pair_storage [1] == ArgNone) {
1139 * Tell the JIT to use a more efficient calling convention: call using
1140 * OP_CALL, compute the result location after the call, and save the
1141 * result there.
1143 call->vret_in_reg = TRUE;
1144 } else {
1146 * The valuetype is in EAX:EDX after the call, needs to be copied to
1147 * the stack. Save the address here, so the call instruction can
1148 * access it.
1150 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1151 vtarg->sreg1 = call->vret_var->dreg;
1152 MONO_ADD_INS (cfg->cbb, vtarg);
1157 /* Handle the case where there are no implicit arguments */
1158 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (n == sentinelpos)) {
1159 emit_sig_cookie (cfg, call, cinfo);
1162 /* Arguments are pushed in the reverse order */
1163 for (i = n - 1; i >= 0; i --) {
1164 ArgInfo *ainfo = cinfo->args + i;
1165 MonoType *t;
1167 if (i >= sig->hasthis)
1168 t = sig->params [i - sig->hasthis];
1169 else
1170 t = &mono_defaults.int_class->byval_arg;
1171 t = mini_type_get_underlying_type (cfg->generic_sharing_context, t);
1173 MONO_INST_NEW (cfg, arg, OP_X86_PUSH);
1175 in = call->args [i];
1176 arg->cil_code = in->cil_code;
1177 arg->sreg1 = in->dreg;
1178 arg->type = in->type;
1180 g_assert (in->dreg != -1);
1182 if ((i >= sig->hasthis) && (MONO_TYPE_ISSTRUCT(t))) {
1183 guint32 align;
1184 guint32 size;
1186 g_assert (in->klass);
1188 if (t->type == MONO_TYPE_TYPEDBYREF) {
1189 size = sizeof (MonoTypedRef);
1190 align = sizeof (gpointer);
1192 else {
1193 size = mini_type_stack_size_full (cfg->generic_sharing_context, &in->klass->byval_arg, &align, sig->pinvoke);
1196 if (size > 0) {
1197 arg->opcode = OP_OUTARG_VT;
1198 arg->sreg1 = in->dreg;
1199 arg->klass = in->klass;
1200 arg->backend.size = size;
1202 MONO_ADD_INS (cfg->cbb, arg);
1205 else {
1206 switch (ainfo->storage) {
1207 case ArgOnStack:
1208 arg->opcode = OP_X86_PUSH;
1209 if (!t->byref) {
1210 if (t->type == MONO_TYPE_R4) {
1211 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 4);
1212 arg->opcode = OP_STORER4_MEMBASE_REG;
1213 arg->inst_destbasereg = X86_ESP;
1214 arg->inst_offset = 0;
1215 } else if (t->type == MONO_TYPE_R8) {
1216 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, 8);
1217 arg->opcode = OP_STORER8_MEMBASE_REG;
1218 arg->inst_destbasereg = X86_ESP;
1219 arg->inst_offset = 0;
1220 } else if (t->type == MONO_TYPE_I8 || t->type == MONO_TYPE_U8) {
1221 arg->sreg1 ++;
1222 MONO_EMIT_NEW_UNALU (cfg, OP_X86_PUSH, -1, in->dreg + 2);
1225 break;
1226 default:
1227 g_assert_not_reached ();
1230 MONO_ADD_INS (cfg->cbb, arg);
1233 if (!sig->pinvoke && (sig->call_convention == MONO_CALL_VARARG) && (i == sentinelpos)) {
1234 /* Emit the signature cookie just before the implicit arguments */
1235 emit_sig_cookie (cfg, call, cinfo);
1239 if (sig->ret && MONO_TYPE_ISSTRUCT (sig->ret)) {
1240 MonoInst *vtarg;
1242 if (cinfo->ret.storage == ArgValuetypeInReg) {
1243 /* Already done */
1245 else if (cinfo->ret.storage == ArgInIReg) {
1246 NOT_IMPLEMENTED;
1247 /* The return address is passed in a register */
1248 MONO_INST_NEW (cfg, vtarg, OP_MOVE);
1249 vtarg->sreg1 = call->inst.dreg;
1250 vtarg->dreg = mono_alloc_ireg (cfg);
1251 MONO_ADD_INS (cfg->cbb, vtarg);
1253 mono_call_inst_add_outarg_reg (cfg, call, vtarg->dreg, cinfo->ret.reg, FALSE);
1254 } else {
1255 MonoInst *vtarg;
1256 MONO_INST_NEW (cfg, vtarg, OP_X86_PUSH);
1257 vtarg->type = STACK_MP;
1258 vtarg->sreg1 = call->vret_var->dreg;
1259 MONO_ADD_INS (cfg->cbb, vtarg);
1262 /* if the function returns a struct, the called method already does a ret $0x4 */
1263 cinfo->stack_usage -= 4;
1266 call->stack_usage = cinfo->stack_usage;
1269 void
1270 mono_arch_emit_outarg_vt (MonoCompile *cfg, MonoInst *ins, MonoInst *src)
1272 MonoInst *arg;
1273 int size = ins->backend.size;
1275 if (size <= 4) {
1276 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_MEMBASE);
1277 arg->sreg1 = src->dreg;
1279 MONO_ADD_INS (cfg->cbb, arg);
1280 } else if (size <= 20) {
1281 MONO_EMIT_NEW_BIALU_IMM (cfg, OP_SUB_IMM, X86_ESP, X86_ESP, ALIGN_TO (size, 4));
1282 mini_emit_memcpy (cfg, X86_ESP, 0, src->dreg, 0, size, 4);
1283 } else {
1284 MONO_INST_NEW (cfg, arg, OP_X86_PUSH_OBJ);
1285 arg->inst_basereg = src->dreg;
1286 arg->inst_offset = 0;
1287 arg->inst_imm = size;
1289 MONO_ADD_INS (cfg->cbb, arg);
1293 void
1294 mono_arch_emit_setret (MonoCompile *cfg, MonoMethod *method, MonoInst *val)
1296 MonoType *ret = mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret);
1298 if (!ret->byref) {
1299 if (ret->type == MONO_TYPE_R4) {
1300 /* Nothing to do */
1301 return;
1302 } else if (ret->type == MONO_TYPE_R8) {
1303 /* Nothing to do */
1304 return;
1305 } else if (ret->type == MONO_TYPE_I8 || ret->type == MONO_TYPE_U8) {
1306 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EAX, val->dreg + 1);
1307 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, X86_EDX, val->dreg + 2);
1308 return;
1312 MONO_EMIT_NEW_UNALU (cfg, OP_MOVE, cfg->ret->dreg, val->dreg);
1316 * Allow tracing to work with this interface (with an optional argument)
1318 void*
1319 mono_arch_instrument_prolog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1321 guchar *code = p;
1323 g_assert (MONO_ARCH_FRAME_ALIGNMENT >= 8);
1324 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 8);
1326 /* if some args are passed in registers, we need to save them here */
1327 x86_push_reg (code, X86_EBP);
1329 if (cfg->compile_aot) {
1330 x86_push_imm (code, cfg->method);
1331 x86_mov_reg_imm (code, X86_EAX, func);
1332 x86_call_reg (code, X86_EAX);
1333 } else {
1334 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, cfg->method);
1335 x86_push_imm (code, cfg->method);
1336 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1337 x86_call_code (code, 0);
1339 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT);
1341 return code;
1344 enum {
1345 SAVE_NONE,
1346 SAVE_STRUCT,
1347 SAVE_EAX,
1348 SAVE_EAX_EDX,
1349 SAVE_FP
1352 void*
1353 mono_arch_instrument_epilog (MonoCompile *cfg, void *func, void *p, gboolean enable_arguments)
1355 guchar *code = p;
1356 int arg_size = 0, save_mode = SAVE_NONE;
1357 MonoMethod *method = cfg->method;
1359 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, mono_method_signature (method)->ret)->type) {
1360 case MONO_TYPE_VOID:
1361 /* special case string .ctor icall */
1362 if (strcmp (".ctor", method->name) && method->klass == mono_defaults.string_class)
1363 save_mode = SAVE_EAX;
1364 else
1365 save_mode = SAVE_NONE;
1366 break;
1367 case MONO_TYPE_I8:
1368 case MONO_TYPE_U8:
1369 save_mode = SAVE_EAX_EDX;
1370 break;
1371 case MONO_TYPE_R4:
1372 case MONO_TYPE_R8:
1373 save_mode = SAVE_FP;
1374 break;
1375 case MONO_TYPE_GENERICINST:
1376 if (!mono_type_generic_inst_is_valuetype (mono_method_signature (method)->ret)) {
1377 save_mode = SAVE_EAX;
1378 break;
1380 /* Fall through */
1381 case MONO_TYPE_VALUETYPE:
1382 save_mode = SAVE_STRUCT;
1383 break;
1384 default:
1385 save_mode = SAVE_EAX;
1386 break;
1389 switch (save_mode) {
1390 case SAVE_EAX_EDX:
1391 x86_push_reg (code, X86_EDX);
1392 x86_push_reg (code, X86_EAX);
1393 if (enable_arguments) {
1394 x86_push_reg (code, X86_EDX);
1395 x86_push_reg (code, X86_EAX);
1396 arg_size = 8;
1398 break;
1399 case SAVE_EAX:
1400 x86_push_reg (code, X86_EAX);
1401 if (enable_arguments) {
1402 x86_push_reg (code, X86_EAX);
1403 arg_size = 4;
1405 break;
1406 case SAVE_FP:
1407 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1408 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1409 if (enable_arguments) {
1410 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1411 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1412 arg_size = 8;
1414 break;
1415 case SAVE_STRUCT:
1416 if (enable_arguments) {
1417 x86_push_membase (code, X86_EBP, 8);
1418 arg_size = 4;
1420 break;
1421 case SAVE_NONE:
1422 default:
1423 break;
1426 if (cfg->compile_aot) {
1427 x86_push_imm (code, method);
1428 x86_mov_reg_imm (code, X86_EAX, func);
1429 x86_call_reg (code, X86_EAX);
1430 } else {
1431 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_METHODCONST, method);
1432 x86_push_imm (code, method);
1433 mono_add_patch_info (cfg, code-cfg->native_code, MONO_PATCH_INFO_ABS, func);
1434 x86_call_code (code, 0);
1436 x86_alu_reg_imm (code, X86_ADD, X86_ESP, arg_size + 4);
1438 switch (save_mode) {
1439 case SAVE_EAX_EDX:
1440 x86_pop_reg (code, X86_EAX);
1441 x86_pop_reg (code, X86_EDX);
1442 break;
1443 case SAVE_EAX:
1444 x86_pop_reg (code, X86_EAX);
1445 break;
1446 case SAVE_FP:
1447 x86_fld_membase (code, X86_ESP, 0, TRUE);
1448 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1449 break;
1450 case SAVE_NONE:
1451 default:
1452 break;
1455 return code;
1458 #define EMIT_COND_BRANCH(ins,cond,sign) \
1459 if (ins->flags & MONO_INST_BRLABEL) { \
1460 if (ins->inst_i0->inst_c0) { \
1461 x86_branch (code, cond, cfg->native_code + ins->inst_i0->inst_c0, sign); \
1462 } else { \
1463 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_LABEL, ins->inst_i0); \
1464 if ((cfg->opt & MONO_OPT_BRANCH) && \
1465 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos)) \
1466 x86_branch8 (code, cond, 0, sign); \
1467 else \
1468 x86_branch32 (code, cond, 0, sign); \
1470 } else { \
1471 if (ins->inst_true_bb->native_offset) { \
1472 x86_branch (code, cond, cfg->native_code + ins->inst_true_bb->native_offset, sign); \
1473 } else { \
1474 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_true_bb); \
1475 if ((cfg->opt & MONO_OPT_BRANCH) && \
1476 x86_is_imm8 (ins->inst_true_bb->max_offset - cpos)) \
1477 x86_branch8 (code, cond, 0, sign); \
1478 else \
1479 x86_branch32 (code, cond, 0, sign); \
1484 * Emit an exception if condition is fail and
1485 * if possible do a directly branch to target
1487 #define EMIT_COND_SYSTEM_EXCEPTION(cond,signed,exc_name) \
1488 do { \
1489 MonoInst *tins = mono_branch_optimize_exception_target (cfg, bb, exc_name); \
1490 if (tins == NULL) { \
1491 mono_add_patch_info (cfg, code - cfg->native_code, \
1492 MONO_PATCH_INFO_EXC, exc_name); \
1493 x86_branch32 (code, cond, 0, signed); \
1494 } else { \
1495 EMIT_COND_BRANCH (tins, cond, signed); \
1497 } while (0);
1499 #define EMIT_FPCOMPARE(code) do { \
1500 x86_fcompp (code); \
1501 x86_fnstsw (code); \
1502 } while (0);
1505 static guint8*
1506 emit_call (MonoCompile *cfg, guint8 *code, guint32 patch_type, gconstpointer data)
1508 mono_add_patch_info (cfg, code - cfg->native_code, patch_type, data);
1509 x86_call_code (code, 0);
1511 return code;
1514 #define INST_IGNORES_CFLAGS(opcode) (!(((opcode) == OP_ADC) || ((opcode) == OP_IADC) || ((opcode) == OP_ADC_IMM) || ((opcode) == OP_IADC_IMM) || ((opcode) == OP_SBB) || ((opcode) == OP_ISBB) || ((opcode) == OP_SBB_IMM) || ((opcode) == OP_ISBB_IMM)))
1517 * mono_peephole_pass_1:
1519 * Perform peephole opts which should/can be performed before local regalloc
1521 void
1522 mono_arch_peephole_pass_1 (MonoCompile *cfg, MonoBasicBlock *bb)
1524 MonoInst *ins, *n;
1526 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1527 MonoInst *last_ins = ins->prev;
1529 switch (ins->opcode) {
1530 case OP_IADD_IMM:
1531 case OP_ADD_IMM:
1532 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1534 * X86_LEA is like ADD, but doesn't have the
1535 * sreg1==dreg restriction.
1537 ins->opcode = OP_X86_LEA_MEMBASE;
1538 ins->inst_basereg = ins->sreg1;
1539 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1540 ins->opcode = OP_X86_INC_REG;
1541 break;
1542 case OP_SUB_IMM:
1543 case OP_ISUB_IMM:
1544 if ((ins->sreg1 < MONO_MAX_IREGS) && (ins->dreg >= MONO_MAX_IREGS)) {
1545 ins->opcode = OP_X86_LEA_MEMBASE;
1546 ins->inst_basereg = ins->sreg1;
1547 ins->inst_imm = -ins->inst_imm;
1548 } else if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1549 ins->opcode = OP_X86_DEC_REG;
1550 break;
1551 case OP_COMPARE_IMM:
1552 case OP_ICOMPARE_IMM:
1553 /* OP_COMPARE_IMM (reg, 0)
1554 * -->
1555 * OP_X86_TEST_NULL (reg)
1557 if (!ins->inst_imm)
1558 ins->opcode = OP_X86_TEST_NULL;
1559 break;
1560 case OP_X86_COMPARE_MEMBASE_IMM:
1562 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1563 * OP_X86_COMPARE_MEMBASE_IMM offset(basereg), imm
1564 * -->
1565 * OP_STORE_MEMBASE_REG reg, offset(basereg)
1566 * OP_COMPARE_IMM reg, imm
1568 * Note: if imm = 0 then OP_COMPARE_IMM replaced with OP_X86_TEST_NULL
1570 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG) &&
1571 ins->inst_basereg == last_ins->inst_destbasereg &&
1572 ins->inst_offset == last_ins->inst_offset) {
1573 ins->opcode = OP_COMPARE_IMM;
1574 ins->sreg1 = last_ins->sreg1;
1576 /* check if we can remove cmp reg,0 with test null */
1577 if (!ins->inst_imm)
1578 ins->opcode = OP_X86_TEST_NULL;
1581 break;
1582 case OP_X86_PUSH_MEMBASE:
1583 if (last_ins && (last_ins->opcode == OP_STOREI4_MEMBASE_REG ||
1584 last_ins->opcode == OP_STORE_MEMBASE_REG) &&
1585 ins->inst_basereg == last_ins->inst_destbasereg &&
1586 ins->inst_offset == last_ins->inst_offset) {
1587 ins->opcode = OP_X86_PUSH;
1588 ins->sreg1 = last_ins->sreg1;
1590 break;
1593 mono_peephole_ins (bb, ins);
1597 void
1598 mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb)
1600 MonoInst *ins, *n;
1602 MONO_BB_FOR_EACH_INS_SAFE (bb, n, ins) {
1603 switch (ins->opcode) {
1604 case OP_ICONST:
1605 /* reg = 0 -> XOR (reg, reg) */
1606 /* XOR sets cflags on x86, so we cant do it always */
1607 if (ins->inst_c0 == 0 && (!ins->next || (ins->next && INST_IGNORES_CFLAGS (ins->next->opcode)))) {
1608 MonoInst *ins2;
1610 ins->opcode = OP_IXOR;
1611 ins->sreg1 = ins->dreg;
1612 ins->sreg2 = ins->dreg;
1615 * Convert succeeding STORE_MEMBASE_IMM 0 ins to STORE_MEMBASE_REG
1616 * since it takes 3 bytes instead of 7.
1618 for (ins2 = ins->next; ins2; ins2 = ins2->next) {
1619 if ((ins2->opcode == OP_STORE_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1620 ins2->opcode = OP_STORE_MEMBASE_REG;
1621 ins2->sreg1 = ins->dreg;
1623 else if ((ins2->opcode == OP_STOREI4_MEMBASE_IMM) && (ins2->inst_imm == 0)) {
1624 ins2->opcode = OP_STOREI4_MEMBASE_REG;
1625 ins2->sreg1 = ins->dreg;
1627 else if ((ins2->opcode == OP_STOREI1_MEMBASE_IMM) || (ins2->opcode == OP_STOREI2_MEMBASE_IMM)) {
1628 /* Continue iteration */
1630 else
1631 break;
1634 break;
1635 case OP_IADD_IMM:
1636 case OP_ADD_IMM:
1637 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1638 ins->opcode = OP_X86_INC_REG;
1639 break;
1640 case OP_ISUB_IMM:
1641 case OP_SUB_IMM:
1642 if ((ins->inst_imm == 1) && (ins->dreg == ins->sreg1))
1643 ins->opcode = OP_X86_DEC_REG;
1644 break;
1647 mono_peephole_ins (bb, ins);
1652 * mono_arch_lowering_pass:
1654 * Converts complex opcodes into simpler ones so that each IR instruction
1655 * corresponds to one machine instruction.
1657 void
1658 mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb)
1660 MonoInst *ins, *next;
1663 * FIXME: Need to add more instructions, but the current machine
1664 * description can't model some parts of the composite instructions like
1665 * cdq.
1667 MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) {
1668 switch (ins->opcode) {
1669 case OP_IREM_IMM:
1670 case OP_IDIV_IMM:
1671 case OP_IDIV_UN_IMM:
1672 case OP_IREM_UN_IMM:
1674 * Keep the cases where we could generated optimized code, otherwise convert
1675 * to the non-imm variant.
1677 if ((ins->opcode == OP_IREM_IMM) && mono_is_power_of_two (ins->inst_imm) >= 0)
1678 break;
1679 mono_decompose_op_imm (cfg, bb, ins);
1680 break;
1681 default:
1682 break;
1686 bb->max_vreg = cfg->next_vreg;
1689 static const int
1690 branch_cc_table [] = {
1691 X86_CC_EQ, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1692 X86_CC_NE, X86_CC_GE, X86_CC_GT, X86_CC_LE, X86_CC_LT,
1693 X86_CC_O, X86_CC_NO, X86_CC_C, X86_CC_NC
1696 /* Maps CMP_... constants to X86_CC_... constants */
1697 static const int
1698 cc_table [] = {
1699 X86_CC_EQ, X86_CC_NE, X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT,
1700 X86_CC_LE, X86_CC_GE, X86_CC_LT, X86_CC_GT
1703 static const int
1704 cc_signed_table [] = {
1705 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1706 FALSE, FALSE, FALSE, FALSE
1709 static unsigned char*
1710 emit_float_to_int (MonoCompile *cfg, guchar *code, int dreg, int size, gboolean is_signed)
1712 #define XMM_TEMP_REG 0
1713 /*This SSE2 optimization must not be done which OPT_SIMD in place as it clobbers xmm0.*/
1714 /*The xmm pass decomposes OP_FCONV_ ops anyway anyway.*/
1715 if (cfg->opt & MONO_OPT_SSE2 && size < 8 && !(cfg->opt & MONO_OPT_SIMD)) {
1716 /* optimize by assigning a local var for this use so we avoid
1717 * the stack manipulations */
1718 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1719 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
1720 x86_movsd_reg_membase (code, XMM_TEMP_REG, X86_ESP, 0);
1721 x86_cvttsd2si (code, dreg, XMM_TEMP_REG);
1722 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
1723 if (size == 1)
1724 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1725 else if (size == 2)
1726 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1727 return code;
1729 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
1730 x86_fnstcw_membase(code, X86_ESP, 0);
1731 x86_mov_reg_membase (code, dreg, X86_ESP, 0, 2);
1732 x86_alu_reg_imm (code, X86_OR, dreg, 0xc00);
1733 x86_mov_membase_reg (code, X86_ESP, 2, dreg, 2);
1734 x86_fldcw_membase (code, X86_ESP, 2);
1735 if (size == 8) {
1736 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
1737 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
1738 x86_pop_reg (code, dreg);
1739 /* FIXME: need the high register
1740 * x86_pop_reg (code, dreg_high);
1742 } else {
1743 x86_push_reg (code, X86_EAX); // SP = SP - 4
1744 x86_fist_pop_membase (code, X86_ESP, 0, FALSE);
1745 x86_pop_reg (code, dreg);
1747 x86_fldcw_membase (code, X86_ESP, 0);
1748 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
1750 if (size == 1)
1751 x86_widen_reg (code, dreg, dreg, is_signed, FALSE);
1752 else if (size == 2)
1753 x86_widen_reg (code, dreg, dreg, is_signed, TRUE);
1754 return code;
1757 static unsigned char*
1758 mono_emit_stack_alloc (guchar *code, MonoInst* tree)
1760 int sreg = tree->sreg1;
1761 int need_touch = FALSE;
1763 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
1764 need_touch = TRUE;
1765 #endif
1767 if (need_touch) {
1768 guint8* br[5];
1771 * Under Windows:
1772 * If requested stack size is larger than one page,
1773 * perform stack-touch operation
1776 * Generate stack probe code.
1777 * Under Windows, it is necessary to allocate one page at a time,
1778 * "touching" stack after each successful sub-allocation. This is
1779 * because of the way stack growth is implemented - there is a
1780 * guard page before the lowest stack page that is currently commited.
1781 * Stack normally grows sequentially so OS traps access to the
1782 * guard page and commits more pages when needed.
1784 x86_test_reg_imm (code, sreg, ~0xFFF);
1785 br[0] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1787 br[2] = code; /* loop */
1788 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
1789 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
1792 * By the end of the loop, sreg2 is smaller than 0x1000, so the init routine
1793 * that follows only initializes the last part of the area.
1795 /* Same as the init code below with size==0x1000 */
1796 if (tree->flags & MONO_INST_INIT) {
1797 x86_push_reg (code, X86_EAX);
1798 x86_push_reg (code, X86_ECX);
1799 x86_push_reg (code, X86_EDI);
1800 x86_mov_reg_imm (code, X86_ECX, (0x1000 >> 2));
1801 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1802 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
1803 x86_cld (code);
1804 x86_prefix (code, X86_REP_PREFIX);
1805 x86_stosl (code);
1806 x86_pop_reg (code, X86_EDI);
1807 x86_pop_reg (code, X86_ECX);
1808 x86_pop_reg (code, X86_EAX);
1811 x86_alu_reg_imm (code, X86_SUB, sreg, 0x1000);
1812 x86_alu_reg_imm (code, X86_CMP, sreg, 0x1000);
1813 br[3] = code; x86_branch8 (code, X86_CC_AE, 0, FALSE);
1814 x86_patch (br[3], br[2]);
1815 x86_test_reg_reg (code, sreg, sreg);
1816 br[4] = code; x86_branch8 (code, X86_CC_Z, 0, FALSE);
1817 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1819 br[1] = code; x86_jump8 (code, 0);
1821 x86_patch (br[0], code);
1822 x86_alu_reg_reg (code, X86_SUB, X86_ESP, sreg);
1823 x86_patch (br[1], code);
1824 x86_patch (br[4], code);
1826 else
1827 x86_alu_reg_reg (code, X86_SUB, X86_ESP, tree->sreg1);
1829 if (tree->flags & MONO_INST_INIT) {
1830 int offset = 0;
1831 if (tree->dreg != X86_EAX && sreg != X86_EAX) {
1832 x86_push_reg (code, X86_EAX);
1833 offset += 4;
1835 if (tree->dreg != X86_ECX && sreg != X86_ECX) {
1836 x86_push_reg (code, X86_ECX);
1837 offset += 4;
1839 if (tree->dreg != X86_EDI && sreg != X86_EDI) {
1840 x86_push_reg (code, X86_EDI);
1841 offset += 4;
1844 x86_shift_reg_imm (code, X86_SHR, sreg, 2);
1845 if (sreg != X86_ECX)
1846 x86_mov_reg_reg (code, X86_ECX, sreg, 4);
1847 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EAX);
1849 x86_lea_membase (code, X86_EDI, X86_ESP, offset);
1850 x86_cld (code);
1851 x86_prefix (code, X86_REP_PREFIX);
1852 x86_stosl (code);
1854 if (tree->dreg != X86_EDI && sreg != X86_EDI)
1855 x86_pop_reg (code, X86_EDI);
1856 if (tree->dreg != X86_ECX && sreg != X86_ECX)
1857 x86_pop_reg (code, X86_ECX);
1858 if (tree->dreg != X86_EAX && sreg != X86_EAX)
1859 x86_pop_reg (code, X86_EAX);
1861 return code;
1865 static guint8*
1866 emit_move_return_value (MonoCompile *cfg, MonoInst *ins, guint8 *code)
1868 CallInfo *cinfo;
1869 int quad;
1871 /* Move return value to the target register */
1872 switch (ins->opcode) {
1873 case OP_CALL:
1874 case OP_CALL_REG:
1875 case OP_CALL_MEMBASE:
1876 if (ins->dreg != X86_EAX)
1877 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
1878 break;
1879 case OP_VCALL:
1880 case OP_VCALL_REG:
1881 case OP_VCALL_MEMBASE:
1882 case OP_VCALL2:
1883 case OP_VCALL2_REG:
1884 case OP_VCALL2_MEMBASE:
1885 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, ((MonoCallInst*)ins)->signature, FALSE);
1886 if (cinfo->ret.storage == ArgValuetypeInReg) {
1887 /* Pop the destination address from the stack */
1888 x86_pop_reg (code, X86_ECX);
1890 for (quad = 0; quad < 2; quad ++) {
1891 switch (cinfo->ret.pair_storage [quad]) {
1892 case ArgInIReg:
1893 g_assert (cinfo->ret.pair_regs [quad] != X86_ECX);
1894 x86_mov_membase_reg (code, X86_ECX, (quad * sizeof (gpointer)), cinfo->ret.pair_regs [quad], sizeof (gpointer));
1895 break;
1896 case ArgNone:
1897 break;
1898 default:
1899 g_assert_not_reached ();
1903 break;
1904 default:
1905 break;
1908 return code;
1912 * mono_x86_emit_tls_get:
1913 * @code: buffer to store code to
1914 * @dreg: hard register where to place the result
1915 * @tls_offset: offset info
1917 * mono_x86_emit_tls_get emits in @code the native code that puts in
1918 * the dreg register the item in the thread local storage identified
1919 * by tls_offset.
1921 * Returns: a pointer to the end of the stored code
1923 guint8*
1924 mono_x86_emit_tls_get (guint8* code, int dreg, int tls_offset)
1926 #ifdef PLATFORM_WIN32
1928 * See the Under the Hood article in the May 1996 issue of Microsoft Systems
1929 * Journal and/or a disassembly of the TlsGet () function.
1931 g_assert (tls_offset < 64);
1932 x86_prefix (code, X86_FS_PREFIX);
1933 x86_mov_reg_mem (code, dreg, 0x18, 4);
1934 /* Dunno what this does but TlsGetValue () contains it */
1935 x86_alu_membase_imm (code, X86_AND, dreg, 0x34, 0);
1936 x86_mov_reg_membase (code, dreg, dreg, 3600 + (tls_offset * 4), 4);
1937 #else
1938 if (optimize_for_xen) {
1939 x86_prefix (code, X86_GS_PREFIX);
1940 x86_mov_reg_mem (code, dreg, 0, 4);
1941 x86_mov_reg_membase (code, dreg, dreg, tls_offset, 4);
1942 } else {
1943 x86_prefix (code, X86_GS_PREFIX);
1944 x86_mov_reg_mem (code, dreg, tls_offset, 4);
1946 #endif
1947 return code;
1951 * emit_load_volatile_arguments:
1953 * Load volatile arguments from the stack to the original input registers.
1954 * Required before a tail call.
1956 static guint8*
1957 emit_load_volatile_arguments (MonoCompile *cfg, guint8 *code)
1959 MonoMethod *method = cfg->method;
1960 MonoMethodSignature *sig;
1961 MonoInst *inst;
1962 CallInfo *cinfo;
1963 guint32 i;
1965 /* FIXME: Generate intermediate code instead */
1967 sig = mono_method_signature (method);
1969 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
1971 /* This is the opposite of the code in emit_prolog */
1973 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
1974 ArgInfo *ainfo = cinfo->args + i;
1975 MonoType *arg_type;
1976 inst = cfg->args [i];
1978 if (sig->hasthis && (i == 0))
1979 arg_type = &mono_defaults.object_class->byval_arg;
1980 else
1981 arg_type = sig->params [i - sig->hasthis];
1984 * On x86, the arguments are either in their original stack locations, or in
1985 * global regs.
1987 if (inst->opcode == OP_REGVAR) {
1988 g_assert (ainfo->storage == ArgOnStack);
1990 x86_mov_membase_reg (code, X86_EBP, inst->inst_offset, inst->dreg, 4);
1994 return code;
1997 #define REAL_PRINT_REG(text,reg) \
1998 mono_assert (reg >= 0); \
1999 x86_push_reg (code, X86_EAX); \
2000 x86_push_reg (code, X86_EDX); \
2001 x86_push_reg (code, X86_ECX); \
2002 x86_push_reg (code, reg); \
2003 x86_push_imm (code, reg); \
2004 x86_push_imm (code, text " %d %p\n"); \
2005 x86_mov_reg_imm (code, X86_EAX, printf); \
2006 x86_call_reg (code, X86_EAX); \
2007 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 3*4); \
2008 x86_pop_reg (code, X86_ECX); \
2009 x86_pop_reg (code, X86_EDX); \
2010 x86_pop_reg (code, X86_EAX);
2012 /* benchmark and set based on cpu */
2013 #define LOOP_ALIGNMENT 8
2014 #define bb_is_loop_start(bb) ((bb)->loop_body_start && (bb)->nesting)
2016 #ifndef DISABLE_JIT
2018 void
2019 mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
2021 MonoInst *ins;
2022 MonoCallInst *call;
2023 guint offset;
2024 guint8 *code = cfg->native_code + cfg->code_len;
2025 int max_len, cpos;
2027 if (cfg->opt & MONO_OPT_LOOP) {
2028 int pad, align = LOOP_ALIGNMENT;
2029 /* set alignment depending on cpu */
2030 if (bb_is_loop_start (bb) && (pad = (cfg->code_len & (align - 1)))) {
2031 pad = align - pad;
2032 /*g_print ("adding %d pad at %x to loop in %s\n", pad, cfg->code_len, cfg->method->name);*/
2033 x86_padding (code, pad);
2034 cfg->code_len += pad;
2035 bb->native_offset = cfg->code_len;
2039 if (cfg->verbose_level > 2)
2040 g_print ("Basic block %d starting at offset 0x%x\n", bb->block_num, bb->native_offset);
2042 cpos = bb->max_offset;
2044 if (cfg->prof_options & MONO_PROFILE_COVERAGE) {
2045 MonoProfileCoverageInfo *cov = cfg->coverage_info;
2046 g_assert (!cfg->compile_aot);
2047 cpos += 6;
2049 cov->data [bb->dfn].cil_code = bb->cil_code;
2050 /* this is not thread save, but good enough */
2051 x86_inc_mem (code, &cov->data [bb->dfn].count);
2054 offset = code - cfg->native_code;
2056 mono_debug_open_block (cfg, bb, offset);
2058 MONO_BB_FOR_EACH_INS (bb, ins) {
2059 offset = code - cfg->native_code;
2061 max_len = ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
2063 if (G_UNLIKELY (offset > (cfg->code_size - max_len - 16))) {
2064 cfg->code_size *= 2;
2065 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
2066 code = cfg->native_code + offset;
2067 mono_jit_stats.code_reallocs++;
2070 if (cfg->debug_info)
2071 mono_debug_record_line_number (cfg, ins, offset);
2073 switch (ins->opcode) {
2074 case OP_BIGMUL:
2075 x86_mul_reg (code, ins->sreg2, TRUE);
2076 break;
2077 case OP_BIGMUL_UN:
2078 x86_mul_reg (code, ins->sreg2, FALSE);
2079 break;
2080 case OP_X86_SETEQ_MEMBASE:
2081 case OP_X86_SETNE_MEMBASE:
2082 x86_set_membase (code, ins->opcode == OP_X86_SETEQ_MEMBASE ? X86_CC_EQ : X86_CC_NE,
2083 ins->inst_basereg, ins->inst_offset, TRUE);
2084 break;
2085 case OP_STOREI1_MEMBASE_IMM:
2086 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 1);
2087 break;
2088 case OP_STOREI2_MEMBASE_IMM:
2089 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 2);
2090 break;
2091 case OP_STORE_MEMBASE_IMM:
2092 case OP_STOREI4_MEMBASE_IMM:
2093 x86_mov_membase_imm (code, ins->inst_destbasereg, ins->inst_offset, ins->inst_imm, 4);
2094 break;
2095 case OP_STOREI1_MEMBASE_REG:
2096 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 1);
2097 break;
2098 case OP_STOREI2_MEMBASE_REG:
2099 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 2);
2100 break;
2101 case OP_STORE_MEMBASE_REG:
2102 case OP_STOREI4_MEMBASE_REG:
2103 x86_mov_membase_reg (code, ins->inst_destbasereg, ins->inst_offset, ins->sreg1, 4);
2104 break;
2105 case OP_STORE_MEM_IMM:
2106 x86_mov_mem_imm (code, ins->inst_p0, ins->inst_c0, 4);
2107 break;
2108 case OP_LOADU4_MEM:
2109 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2110 break;
2111 case OP_LOAD_MEM:
2112 case OP_LOADI4_MEM:
2113 /* These are created by the cprop pass so they use inst_imm as the source */
2114 x86_mov_reg_mem (code, ins->dreg, ins->inst_imm, 4);
2115 break;
2116 case OP_LOADU1_MEM:
2117 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, FALSE);
2118 break;
2119 case OP_LOADU2_MEM:
2120 x86_widen_mem (code, ins->dreg, ins->inst_imm, FALSE, TRUE);
2121 break;
2122 case OP_LOAD_MEMBASE:
2123 case OP_LOADI4_MEMBASE:
2124 case OP_LOADU4_MEMBASE:
2125 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, 4);
2126 break;
2127 case OP_LOADU1_MEMBASE:
2128 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, FALSE);
2129 break;
2130 case OP_LOADI1_MEMBASE:
2131 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, FALSE);
2132 break;
2133 case OP_LOADU2_MEMBASE:
2134 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, FALSE, TRUE);
2135 break;
2136 case OP_LOADI2_MEMBASE:
2137 x86_widen_membase (code, ins->dreg, ins->inst_basereg, ins->inst_offset, TRUE, TRUE);
2138 break;
2139 case OP_ICONV_TO_I1:
2140 case OP_SEXT_I1:
2141 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, FALSE);
2142 break;
2143 case OP_ICONV_TO_I2:
2144 case OP_SEXT_I2:
2145 x86_widen_reg (code, ins->dreg, ins->sreg1, TRUE, TRUE);
2146 break;
2147 case OP_ICONV_TO_U1:
2148 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, FALSE);
2149 break;
2150 case OP_ICONV_TO_U2:
2151 x86_widen_reg (code, ins->dreg, ins->sreg1, FALSE, TRUE);
2152 break;
2153 case OP_COMPARE:
2154 case OP_ICOMPARE:
2155 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
2156 break;
2157 case OP_COMPARE_IMM:
2158 case OP_ICOMPARE_IMM:
2159 x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
2160 break;
2161 case OP_X86_COMPARE_MEMBASE_REG:
2162 x86_alu_membase_reg (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2163 break;
2164 case OP_X86_COMPARE_MEMBASE_IMM:
2165 x86_alu_membase_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2166 break;
2167 case OP_X86_COMPARE_MEMBASE8_IMM:
2168 x86_alu_membase8_imm (code, X86_CMP, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2169 break;
2170 case OP_X86_COMPARE_REG_MEMBASE:
2171 x86_alu_reg_membase (code, X86_CMP, ins->sreg1, ins->sreg2, ins->inst_offset);
2172 break;
2173 case OP_X86_COMPARE_MEM_IMM:
2174 x86_alu_mem_imm (code, X86_CMP, ins->inst_offset, ins->inst_imm);
2175 break;
2176 case OP_X86_TEST_NULL:
2177 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
2178 break;
2179 case OP_X86_ADD_MEMBASE_IMM:
2180 x86_alu_membase_imm (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2181 break;
2182 case OP_X86_ADD_REG_MEMBASE:
2183 x86_alu_reg_membase (code, X86_ADD, ins->sreg1, ins->sreg2, ins->inst_offset);
2184 break;
2185 case OP_X86_SUB_MEMBASE_IMM:
2186 x86_alu_membase_imm (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2187 break;
2188 case OP_X86_SUB_REG_MEMBASE:
2189 x86_alu_reg_membase (code, X86_SUB, ins->sreg1, ins->sreg2, ins->inst_offset);
2190 break;
2191 case OP_X86_AND_MEMBASE_IMM:
2192 x86_alu_membase_imm (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2193 break;
2194 case OP_X86_OR_MEMBASE_IMM:
2195 x86_alu_membase_imm (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2196 break;
2197 case OP_X86_XOR_MEMBASE_IMM:
2198 x86_alu_membase_imm (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->inst_imm);
2199 break;
2200 case OP_X86_ADD_MEMBASE_REG:
2201 x86_alu_membase_reg (code, X86_ADD, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2202 break;
2203 case OP_X86_SUB_MEMBASE_REG:
2204 x86_alu_membase_reg (code, X86_SUB, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2205 break;
2206 case OP_X86_AND_MEMBASE_REG:
2207 x86_alu_membase_reg (code, X86_AND, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2208 break;
2209 case OP_X86_OR_MEMBASE_REG:
2210 x86_alu_membase_reg (code, X86_OR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2211 break;
2212 case OP_X86_XOR_MEMBASE_REG:
2213 x86_alu_membase_reg (code, X86_XOR, ins->inst_basereg, ins->inst_offset, ins->sreg2);
2214 break;
2215 case OP_X86_INC_MEMBASE:
2216 x86_inc_membase (code, ins->inst_basereg, ins->inst_offset);
2217 break;
2218 case OP_X86_INC_REG:
2219 x86_inc_reg (code, ins->dreg);
2220 break;
2221 case OP_X86_DEC_MEMBASE:
2222 x86_dec_membase (code, ins->inst_basereg, ins->inst_offset);
2223 break;
2224 case OP_X86_DEC_REG:
2225 x86_dec_reg (code, ins->dreg);
2226 break;
2227 case OP_X86_MUL_REG_MEMBASE:
2228 x86_imul_reg_membase (code, ins->sreg1, ins->sreg2, ins->inst_offset);
2229 break;
2230 case OP_X86_AND_REG_MEMBASE:
2231 x86_alu_reg_membase (code, X86_AND, ins->sreg1, ins->sreg2, ins->inst_offset);
2232 break;
2233 case OP_X86_OR_REG_MEMBASE:
2234 x86_alu_reg_membase (code, X86_OR, ins->sreg1, ins->sreg2, ins->inst_offset);
2235 break;
2236 case OP_X86_XOR_REG_MEMBASE:
2237 x86_alu_reg_membase (code, X86_XOR, ins->sreg1, ins->sreg2, ins->inst_offset);
2238 break;
2239 case OP_BREAK:
2240 x86_breakpoint (code);
2241 break;
2242 case OP_RELAXED_NOP:
2243 x86_prefix (code, X86_REP_PREFIX);
2244 x86_nop (code);
2245 break;
2246 case OP_HARD_NOP:
2247 x86_nop (code);
2248 break;
2249 case OP_NOP:
2250 case OP_DUMMY_USE:
2251 case OP_DUMMY_STORE:
2252 case OP_NOT_REACHED:
2253 case OP_NOT_NULL:
2254 break;
2255 case OP_ADDCC:
2256 case OP_IADDCC:
2257 case OP_IADD:
2258 x86_alu_reg_reg (code, X86_ADD, ins->sreg1, ins->sreg2);
2259 break;
2260 case OP_ADC:
2261 case OP_IADC:
2262 x86_alu_reg_reg (code, X86_ADC, ins->sreg1, ins->sreg2);
2263 break;
2264 case OP_ADDCC_IMM:
2265 case OP_ADD_IMM:
2266 case OP_IADD_IMM:
2267 x86_alu_reg_imm (code, X86_ADD, ins->dreg, ins->inst_imm);
2268 break;
2269 case OP_ADC_IMM:
2270 case OP_IADC_IMM:
2271 x86_alu_reg_imm (code, X86_ADC, ins->dreg, ins->inst_imm);
2272 break;
2273 case OP_SUBCC:
2274 case OP_ISUBCC:
2275 case OP_ISUB:
2276 x86_alu_reg_reg (code, X86_SUB, ins->sreg1, ins->sreg2);
2277 break;
2278 case OP_SBB:
2279 case OP_ISBB:
2280 x86_alu_reg_reg (code, X86_SBB, ins->sreg1, ins->sreg2);
2281 break;
2282 case OP_SUBCC_IMM:
2283 case OP_SUB_IMM:
2284 case OP_ISUB_IMM:
2285 x86_alu_reg_imm (code, X86_SUB, ins->dreg, ins->inst_imm);
2286 break;
2287 case OP_SBB_IMM:
2288 case OP_ISBB_IMM:
2289 x86_alu_reg_imm (code, X86_SBB, ins->dreg, ins->inst_imm);
2290 break;
2291 case OP_IAND:
2292 x86_alu_reg_reg (code, X86_AND, ins->sreg1, ins->sreg2);
2293 break;
2294 case OP_AND_IMM:
2295 case OP_IAND_IMM:
2296 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_imm);
2297 break;
2298 case OP_IDIV:
2299 case OP_IREM:
2301 * The code is the same for div/rem, the allocator will allocate dreg
2302 * to RAX/RDX as appropriate.
2304 if (ins->sreg2 == X86_EDX) {
2305 /* cdq clobbers this */
2306 x86_push_reg (code, ins->sreg2);
2307 x86_cdq (code);
2308 x86_div_membase (code, X86_ESP, 0, TRUE);
2309 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2310 } else {
2311 x86_cdq (code);
2312 x86_div_reg (code, ins->sreg2, TRUE);
2314 break;
2315 case OP_IDIV_UN:
2316 case OP_IREM_UN:
2317 if (ins->sreg2 == X86_EDX) {
2318 x86_push_reg (code, ins->sreg2);
2319 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2320 x86_div_membase (code, X86_ESP, 0, FALSE);
2321 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2322 } else {
2323 x86_alu_reg_reg (code, X86_XOR, X86_EDX, X86_EDX);
2324 x86_div_reg (code, ins->sreg2, FALSE);
2326 break;
2327 case OP_DIV_IMM:
2328 x86_mov_reg_imm (code, ins->sreg2, ins->inst_imm);
2329 x86_cdq (code);
2330 x86_div_reg (code, ins->sreg2, TRUE);
2331 break;
2332 case OP_IREM_IMM: {
2333 int power = mono_is_power_of_two (ins->inst_imm);
2335 g_assert (ins->sreg1 == X86_EAX);
2336 g_assert (ins->dreg == X86_EAX);
2337 g_assert (power >= 0);
2339 if (power == 1) {
2340 /* Based on http://compilers.iecc.com/comparch/article/93-04-079 */
2341 x86_cdq (code);
2342 x86_alu_reg_imm (code, X86_AND, X86_EAX, 1);
2344 * If the divident is >= 0, this does not nothing. If it is positive, it
2345 * it transforms %eax=0 into %eax=0, and %eax=1 into %eax=-1.
2347 x86_alu_reg_reg (code, X86_XOR, X86_EAX, X86_EDX);
2348 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2349 } else {
2350 /* Based on gcc code */
2352 /* Add compensation for negative dividents */
2353 x86_cdq (code);
2354 x86_shift_reg_imm (code, X86_SHR, X86_EDX, 32 - power);
2355 x86_alu_reg_reg (code, X86_ADD, X86_EAX, X86_EDX);
2356 /* Compute remainder */
2357 x86_alu_reg_imm (code, X86_AND, X86_EAX, (1 << power) - 1);
2358 /* Remove compensation */
2359 x86_alu_reg_reg (code, X86_SUB, X86_EAX, X86_EDX);
2361 break;
2363 case OP_IOR:
2364 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
2365 break;
2366 case OP_OR_IMM:
2367 case OP_IOR_IMM:
2368 x86_alu_reg_imm (code, X86_OR, ins->sreg1, ins->inst_imm);
2369 break;
2370 case OP_IXOR:
2371 x86_alu_reg_reg (code, X86_XOR, ins->sreg1, ins->sreg2);
2372 break;
2373 case OP_XOR_IMM:
2374 case OP_IXOR_IMM:
2375 x86_alu_reg_imm (code, X86_XOR, ins->sreg1, ins->inst_imm);
2376 break;
2377 case OP_ISHL:
2378 g_assert (ins->sreg2 == X86_ECX);
2379 x86_shift_reg (code, X86_SHL, ins->dreg);
2380 break;
2381 case OP_ISHR:
2382 g_assert (ins->sreg2 == X86_ECX);
2383 x86_shift_reg (code, X86_SAR, ins->dreg);
2384 break;
2385 case OP_SHR_IMM:
2386 case OP_ISHR_IMM:
2387 x86_shift_reg_imm (code, X86_SAR, ins->dreg, ins->inst_imm);
2388 break;
2389 case OP_SHR_UN_IMM:
2390 case OP_ISHR_UN_IMM:
2391 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_imm);
2392 break;
2393 case OP_ISHR_UN:
2394 g_assert (ins->sreg2 == X86_ECX);
2395 x86_shift_reg (code, X86_SHR, ins->dreg);
2396 break;
2397 case OP_SHL_IMM:
2398 case OP_ISHL_IMM:
2399 x86_shift_reg_imm (code, X86_SHL, ins->dreg, ins->inst_imm);
2400 break;
2401 case OP_LSHL: {
2402 guint8 *jump_to_end;
2404 /* handle shifts below 32 bits */
2405 x86_shld_reg (code, ins->backend.reg3, ins->sreg1);
2406 x86_shift_reg (code, X86_SHL, ins->sreg1);
2408 x86_test_reg_imm (code, X86_ECX, 32);
2409 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
2411 /* handle shift over 32 bit */
2412 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2413 x86_clear_reg (code, ins->sreg1);
2415 x86_patch (jump_to_end, code);
2417 break;
2418 case OP_LSHR: {
2419 guint8 *jump_to_end;
2421 /* handle shifts below 32 bits */
2422 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2423 x86_shift_reg (code, X86_SAR, ins->backend.reg3);
2425 x86_test_reg_imm (code, X86_ECX, 32);
2426 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2428 /* handle shifts over 31 bits */
2429 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2430 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 31);
2432 x86_patch (jump_to_end, code);
2434 break;
2435 case OP_LSHR_UN: {
2436 guint8 *jump_to_end;
2438 /* handle shifts below 32 bits */
2439 x86_shrd_reg (code, ins->sreg1, ins->backend.reg3);
2440 x86_shift_reg (code, X86_SHR, ins->backend.reg3);
2442 x86_test_reg_imm (code, X86_ECX, 32);
2443 jump_to_end = code; x86_branch8 (code, X86_CC_EQ, 0, FALSE);
2445 /* handle shifts over 31 bits */
2446 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2447 x86_clear_reg (code, ins->backend.reg3);
2449 x86_patch (jump_to_end, code);
2451 break;
2452 case OP_LSHL_IMM:
2453 if (ins->inst_imm >= 32) {
2454 x86_mov_reg_reg (code, ins->backend.reg3, ins->sreg1, 4);
2455 x86_clear_reg (code, ins->sreg1);
2456 x86_shift_reg_imm (code, X86_SHL, ins->backend.reg3, ins->inst_imm - 32);
2457 } else {
2458 x86_shld_reg_imm (code, ins->backend.reg3, ins->sreg1, ins->inst_imm);
2459 x86_shift_reg_imm (code, X86_SHL, ins->sreg1, ins->inst_imm);
2461 break;
2462 case OP_LSHR_IMM:
2463 if (ins->inst_imm >= 32) {
2464 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2465 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, 0x1f);
2466 x86_shift_reg_imm (code, X86_SAR, ins->sreg1, ins->inst_imm - 32);
2467 } else {
2468 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2469 x86_shift_reg_imm (code, X86_SAR, ins->backend.reg3, ins->inst_imm);
2471 break;
2472 case OP_LSHR_UN_IMM:
2473 if (ins->inst_imm >= 32) {
2474 x86_mov_reg_reg (code, ins->sreg1, ins->backend.reg3, 4);
2475 x86_clear_reg (code, ins->backend.reg3);
2476 x86_shift_reg_imm (code, X86_SHR, ins->sreg1, ins->inst_imm - 32);
2477 } else {
2478 x86_shrd_reg_imm (code, ins->sreg1, ins->backend.reg3, ins->inst_imm);
2479 x86_shift_reg_imm (code, X86_SHR, ins->backend.reg3, ins->inst_imm);
2481 break;
2482 case OP_INOT:
2483 x86_not_reg (code, ins->sreg1);
2484 break;
2485 case OP_INEG:
2486 x86_neg_reg (code, ins->sreg1);
2487 break;
2489 case OP_IMUL:
2490 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2491 break;
2492 case OP_MUL_IMM:
2493 case OP_IMUL_IMM:
2494 switch (ins->inst_imm) {
2495 case 2:
2496 /* MOV r1, r2 */
2497 /* ADD r1, r1 */
2498 if (ins->dreg != ins->sreg1)
2499 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2500 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2501 break;
2502 case 3:
2503 /* LEA r1, [r2 + r2*2] */
2504 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2505 break;
2506 case 5:
2507 /* LEA r1, [r2 + r2*4] */
2508 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2509 break;
2510 case 6:
2511 /* LEA r1, [r2 + r2*2] */
2512 /* ADD r1, r1 */
2513 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2514 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2515 break;
2516 case 9:
2517 /* LEA r1, [r2 + r2*8] */
2518 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 3);
2519 break;
2520 case 10:
2521 /* LEA r1, [r2 + r2*4] */
2522 /* ADD r1, r1 */
2523 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2524 x86_alu_reg_reg (code, X86_ADD, ins->dreg, ins->dreg);
2525 break;
2526 case 12:
2527 /* LEA r1, [r2 + r2*2] */
2528 /* SHL r1, 2 */
2529 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 1);
2530 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2531 break;
2532 case 25:
2533 /* LEA r1, [r2 + r2*4] */
2534 /* LEA r1, [r1 + r1*4] */
2535 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2536 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2537 break;
2538 case 100:
2539 /* LEA r1, [r2 + r2*4] */
2540 /* SHL r1, 2 */
2541 /* LEA r1, [r1 + r1*4] */
2542 x86_lea_memindex (code, ins->dreg, ins->sreg1, 0, ins->sreg1, 2);
2543 x86_shift_reg_imm (code, X86_SHL, ins->dreg, 2);
2544 x86_lea_memindex (code, ins->dreg, ins->dreg, 0, ins->dreg, 2);
2545 break;
2546 default:
2547 x86_imul_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_imm);
2548 break;
2550 break;
2551 case OP_IMUL_OVF:
2552 x86_imul_reg_reg (code, ins->sreg1, ins->sreg2);
2553 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2554 break;
2555 case OP_IMUL_OVF_UN: {
2556 /* the mul operation and the exception check should most likely be split */
2557 int non_eax_reg, saved_eax = FALSE, saved_edx = FALSE;
2558 /*g_assert (ins->sreg2 == X86_EAX);
2559 g_assert (ins->dreg == X86_EAX);*/
2560 if (ins->sreg2 == X86_EAX) {
2561 non_eax_reg = ins->sreg1;
2562 } else if (ins->sreg1 == X86_EAX) {
2563 non_eax_reg = ins->sreg2;
2564 } else {
2565 /* no need to save since we're going to store to it anyway */
2566 if (ins->dreg != X86_EAX) {
2567 saved_eax = TRUE;
2568 x86_push_reg (code, X86_EAX);
2570 x86_mov_reg_reg (code, X86_EAX, ins->sreg1, 4);
2571 non_eax_reg = ins->sreg2;
2573 if (ins->dreg == X86_EDX) {
2574 if (!saved_eax) {
2575 saved_eax = TRUE;
2576 x86_push_reg (code, X86_EAX);
2578 } else if (ins->dreg != X86_EAX) {
2579 saved_edx = TRUE;
2580 x86_push_reg (code, X86_EDX);
2582 x86_mul_reg (code, non_eax_reg, FALSE);
2583 /* save before the check since pop and mov don't change the flags */
2584 if (ins->dreg != X86_EAX)
2585 x86_mov_reg_reg (code, ins->dreg, X86_EAX, 4);
2586 if (saved_edx)
2587 x86_pop_reg (code, X86_EDX);
2588 if (saved_eax)
2589 x86_pop_reg (code, X86_EAX);
2590 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_O, FALSE, "OverflowException");
2591 break;
2593 case OP_ICONST:
2594 x86_mov_reg_imm (code, ins->dreg, ins->inst_c0);
2595 break;
2596 case OP_AOTCONST:
2597 g_assert_not_reached ();
2598 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2599 x86_mov_reg_imm (code, ins->dreg, 0);
2600 break;
2601 case OP_JUMP_TABLE:
2602 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_i1, ins->inst_p0);
2603 x86_mov_reg_imm (code, ins->dreg, 0);
2604 break;
2605 case OP_LOAD_GOTADDR:
2606 x86_call_imm (code, 0);
2608 * The patch needs to point to the pop, since the GOT offset needs
2609 * to be added to that address.
2611 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_GOT_OFFSET, NULL);
2612 x86_pop_reg (code, ins->dreg);
2613 x86_alu_reg_imm (code, X86_ADD, ins->dreg, 0xf0f0f0f0);
2614 break;
2615 case OP_GOT_ENTRY:
2616 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2617 x86_mov_reg_membase (code, ins->dreg, ins->inst_basereg, 0xf0f0f0f0, 4);
2618 break;
2619 case OP_X86_PUSH_GOT_ENTRY:
2620 mono_add_patch_info (cfg, offset, (MonoJumpInfoType)ins->inst_right->inst_i1, ins->inst_right->inst_p0);
2621 x86_push_membase (code, ins->inst_basereg, 0xf0f0f0f0);
2622 break;
2623 case OP_MOVE:
2624 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
2625 break;
2626 case OP_JMP: {
2628 * Note: this 'frame destruction' logic is useful for tail calls, too.
2629 * Keep in sync with the code in emit_epilog.
2631 int pos = 0;
2633 /* FIXME: no tracing support... */
2634 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
2635 code = mono_arch_instrument_epilog (cfg, mono_profiler_method_leave, code, FALSE);
2636 /* reset offset to make max_len work */
2637 offset = code - cfg->native_code;
2639 g_assert (!cfg->method->save_lmf);
2641 code = emit_load_volatile_arguments (cfg, code);
2643 if (cfg->used_int_regs & (1 << X86_EBX))
2644 pos -= 4;
2645 if (cfg->used_int_regs & (1 << X86_EDI))
2646 pos -= 4;
2647 if (cfg->used_int_regs & (1 << X86_ESI))
2648 pos -= 4;
2649 if (pos)
2650 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
2652 if (cfg->used_int_regs & (1 << X86_ESI))
2653 x86_pop_reg (code, X86_ESI);
2654 if (cfg->used_int_regs & (1 << X86_EDI))
2655 x86_pop_reg (code, X86_EDI);
2656 if (cfg->used_int_regs & (1 << X86_EBX))
2657 x86_pop_reg (code, X86_EBX);
2659 /* restore ESP/EBP */
2660 x86_leave (code);
2661 offset = code - cfg->native_code;
2662 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_METHOD_JUMP, ins->inst_p0);
2663 x86_jump32 (code, 0);
2665 cfg->disable_aot = TRUE;
2666 break;
2668 case OP_CHECK_THIS:
2669 /* ensure ins->sreg1 is not NULL
2670 * note that cmp DWORD PTR [eax], eax is one byte shorter than
2671 * cmp DWORD PTR [eax], 0
2673 x86_alu_membase_reg (code, X86_CMP, ins->sreg1, 0, ins->sreg1);
2674 break;
2675 case OP_ARGLIST: {
2676 int hreg = ins->sreg1 == X86_EAX? X86_ECX: X86_EAX;
2677 x86_push_reg (code, hreg);
2678 x86_lea_membase (code, hreg, X86_EBP, cfg->sig_cookie);
2679 x86_mov_membase_reg (code, ins->sreg1, 0, hreg, 4);
2680 x86_pop_reg (code, hreg);
2681 break;
2683 case OP_FCALL:
2684 case OP_LCALL:
2685 case OP_VCALL:
2686 case OP_VCALL2:
2687 case OP_VOIDCALL:
2688 case OP_CALL:
2689 call = (MonoCallInst*)ins;
2690 if (ins->flags & MONO_INST_HAS_METHOD)
2691 code = emit_call (cfg, code, MONO_PATCH_INFO_METHOD, call->method);
2692 else
2693 code = emit_call (cfg, code, MONO_PATCH_INFO_ABS, call->fptr);
2694 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2695 /* a pop is one byte, while an add reg, imm is 3. So if there are 4 or 8
2696 * bytes to pop, we want to use pops. GCC does this (note it won't happen
2697 * for P4 or i686 because gcc will avoid using pop push at all. But we aren't
2698 * smart enough to do that optimization yet
2700 * It turns out that on my P4, doing two pops for 8 bytes on the stack makes
2701 * mcs botstrap slow down. However, doing 1 pop for 4 bytes creates a small,
2702 * (most likely from locality benefits). People with other processors should
2703 * check on theirs to see what happens.
2705 if (call->stack_usage == 4) {
2706 /* we want to use registers that won't get used soon, so use
2707 * ecx, as eax will get allocated first. edx is used by long calls,
2708 * so we can't use that.
2711 x86_pop_reg (code, X86_ECX);
2712 } else {
2713 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2716 code = emit_move_return_value (cfg, ins, code);
2717 break;
2718 case OP_FCALL_REG:
2719 case OP_LCALL_REG:
2720 case OP_VCALL_REG:
2721 case OP_VCALL2_REG:
2722 case OP_VOIDCALL_REG:
2723 case OP_CALL_REG:
2724 call = (MonoCallInst*)ins;
2725 x86_call_reg (code, ins->sreg1);
2726 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2727 if (call->stack_usage == 4)
2728 x86_pop_reg (code, X86_ECX);
2729 else
2730 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2732 code = emit_move_return_value (cfg, ins, code);
2733 break;
2734 case OP_FCALL_MEMBASE:
2735 case OP_LCALL_MEMBASE:
2736 case OP_VCALL_MEMBASE:
2737 case OP_VCALL2_MEMBASE:
2738 case OP_VOIDCALL_MEMBASE:
2739 case OP_CALL_MEMBASE:
2740 call = (MonoCallInst*)ins;
2741 x86_call_membase (code, ins->sreg1, ins->inst_offset);
2742 if (call->stack_usage && !CALLCONV_IS_STDCALL (call->signature)) {
2743 if (call->stack_usage == 4)
2744 x86_pop_reg (code, X86_ECX);
2745 else
2746 x86_alu_reg_imm (code, X86_ADD, X86_ESP, call->stack_usage);
2748 code = emit_move_return_value (cfg, ins, code);
2749 break;
2750 case OP_X86_PUSH:
2751 x86_push_reg (code, ins->sreg1);
2752 break;
2753 case OP_X86_PUSH_IMM:
2754 x86_push_imm (code, ins->inst_imm);
2755 break;
2756 case OP_X86_PUSH_MEMBASE:
2757 x86_push_membase (code, ins->inst_basereg, ins->inst_offset);
2758 break;
2759 case OP_X86_PUSH_OBJ:
2760 x86_alu_reg_imm (code, X86_SUB, X86_ESP, ins->inst_imm);
2761 x86_push_reg (code, X86_EDI);
2762 x86_push_reg (code, X86_ESI);
2763 x86_push_reg (code, X86_ECX);
2764 if (ins->inst_offset)
2765 x86_lea_membase (code, X86_ESI, ins->inst_basereg, ins->inst_offset);
2766 else
2767 x86_mov_reg_reg (code, X86_ESI, ins->inst_basereg, 4);
2768 x86_lea_membase (code, X86_EDI, X86_ESP, 12);
2769 x86_mov_reg_imm (code, X86_ECX, (ins->inst_imm >> 2));
2770 x86_cld (code);
2771 x86_prefix (code, X86_REP_PREFIX);
2772 x86_movsd (code);
2773 x86_pop_reg (code, X86_ECX);
2774 x86_pop_reg (code, X86_ESI);
2775 x86_pop_reg (code, X86_EDI);
2776 break;
2777 case OP_X86_LEA:
2778 x86_lea_memindex (code, ins->dreg, ins->sreg1, ins->inst_imm, ins->sreg2, ins->backend.shift_amount);
2779 break;
2780 case OP_X86_LEA_MEMBASE:
2781 x86_lea_membase (code, ins->dreg, ins->sreg1, ins->inst_imm);
2782 break;
2783 case OP_X86_XCHG:
2784 x86_xchg_reg_reg (code, ins->sreg1, ins->sreg2, 4);
2785 break;
2786 case OP_LOCALLOC:
2787 /* keep alignment */
2788 x86_alu_reg_imm (code, X86_ADD, ins->sreg1, MONO_ARCH_LOCALLOC_ALIGNMENT - 1);
2789 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ~(MONO_ARCH_LOCALLOC_ALIGNMENT - 1));
2790 code = mono_emit_stack_alloc (code, ins);
2791 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2792 break;
2793 case OP_LOCALLOC_IMM: {
2794 guint32 size = ins->inst_imm;
2795 size = (size + (MONO_ARCH_FRAME_ALIGNMENT - 1)) & ~ (MONO_ARCH_FRAME_ALIGNMENT - 1);
2797 if (ins->flags & MONO_INST_INIT) {
2798 /* FIXME: Optimize this */
2799 x86_mov_reg_imm (code, ins->dreg, size);
2800 ins->sreg1 = ins->dreg;
2802 code = mono_emit_stack_alloc (code, ins);
2803 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2804 } else {
2805 x86_alu_reg_imm (code, X86_SUB, X86_ESP, size);
2806 x86_mov_reg_reg (code, ins->dreg, X86_ESP, 4);
2808 break;
2810 case OP_THROW: {
2811 x86_push_reg (code, ins->sreg1);
2812 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2813 (gpointer)"mono_arch_throw_exception");
2814 break;
2816 case OP_RETHROW: {
2817 x86_push_reg (code, ins->sreg1);
2818 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD,
2819 (gpointer)"mono_arch_rethrow_exception");
2820 break;
2822 case OP_CALL_HANDLER:
2823 x86_alu_reg_imm (code, X86_SUB, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2824 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2825 x86_call_imm (code, 0);
2826 x86_alu_reg_imm (code, X86_ADD, X86_ESP, MONO_ARCH_FRAME_ALIGNMENT - 4);
2827 break;
2828 case OP_START_HANDLER: {
2829 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2830 x86_mov_membase_reg (code, spvar->inst_basereg, spvar->inst_offset, X86_ESP, 4);
2831 break;
2833 case OP_ENDFINALLY: {
2834 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2835 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2836 x86_ret (code);
2837 break;
2839 case OP_ENDFILTER: {
2840 MonoInst *spvar = mono_find_spvar_for_region (cfg, bb->region);
2841 x86_mov_reg_membase (code, X86_ESP, spvar->inst_basereg, spvar->inst_offset, 4);
2842 /* The local allocator will put the result into EAX */
2843 x86_ret (code);
2844 break;
2847 case OP_LABEL:
2848 ins->inst_c0 = code - cfg->native_code;
2849 break;
2850 case OP_BR:
2851 if (ins->flags & MONO_INST_BRLABEL) {
2852 if (ins->inst_i0->inst_c0) {
2853 x86_jump_code (code, cfg->native_code + ins->inst_i0->inst_c0);
2854 } else {
2855 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_LABEL, ins->inst_i0);
2856 if ((cfg->opt & MONO_OPT_BRANCH) &&
2857 x86_is_imm8 (ins->inst_i0->inst_c1 - cpos))
2858 x86_jump8 (code, 0);
2859 else
2860 x86_jump32 (code, 0);
2862 } else {
2863 if (ins->inst_target_bb->native_offset) {
2864 x86_jump_code (code, cfg->native_code + ins->inst_target_bb->native_offset);
2865 } else {
2866 mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_target_bb);
2867 if ((cfg->opt & MONO_OPT_BRANCH) &&
2868 x86_is_imm8 (ins->inst_target_bb->max_offset - cpos))
2869 x86_jump8 (code, 0);
2870 else
2871 x86_jump32 (code, 0);
2874 break;
2875 case OP_BR_REG:
2876 x86_jump_reg (code, ins->sreg1);
2877 break;
2878 case OP_CEQ:
2879 case OP_CLT:
2880 case OP_CLT_UN:
2881 case OP_CGT:
2882 case OP_CGT_UN:
2883 case OP_CNE:
2884 case OP_ICEQ:
2885 case OP_ICLT:
2886 case OP_ICLT_UN:
2887 case OP_ICGT:
2888 case OP_ICGT_UN:
2889 x86_set_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2890 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
2891 break;
2892 case OP_COND_EXC_EQ:
2893 case OP_COND_EXC_NE_UN:
2894 case OP_COND_EXC_LT:
2895 case OP_COND_EXC_LT_UN:
2896 case OP_COND_EXC_GT:
2897 case OP_COND_EXC_GT_UN:
2898 case OP_COND_EXC_GE:
2899 case OP_COND_EXC_GE_UN:
2900 case OP_COND_EXC_LE:
2901 case OP_COND_EXC_LE_UN:
2902 case OP_COND_EXC_IEQ:
2903 case OP_COND_EXC_INE_UN:
2904 case OP_COND_EXC_ILT:
2905 case OP_COND_EXC_ILT_UN:
2906 case OP_COND_EXC_IGT:
2907 case OP_COND_EXC_IGT_UN:
2908 case OP_COND_EXC_IGE:
2909 case OP_COND_EXC_IGE_UN:
2910 case OP_COND_EXC_ILE:
2911 case OP_COND_EXC_ILE_UN:
2912 EMIT_COND_SYSTEM_EXCEPTION (cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->inst_p1);
2913 break;
2914 case OP_COND_EXC_OV:
2915 case OP_COND_EXC_NO:
2916 case OP_COND_EXC_C:
2917 case OP_COND_EXC_NC:
2918 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_EQ], (ins->opcode < OP_COND_EXC_NE_UN), ins->inst_p1);
2919 break;
2920 case OP_COND_EXC_IOV:
2921 case OP_COND_EXC_INO:
2922 case OP_COND_EXC_IC:
2923 case OP_COND_EXC_INC:
2924 EMIT_COND_SYSTEM_EXCEPTION (branch_cc_table [ins->opcode - OP_COND_EXC_IEQ], (ins->opcode < OP_COND_EXC_INE_UN), ins->inst_p1);
2925 break;
2926 case OP_IBEQ:
2927 case OP_IBNE_UN:
2928 case OP_IBLT:
2929 case OP_IBLT_UN:
2930 case OP_IBGT:
2931 case OP_IBGT_UN:
2932 case OP_IBGE:
2933 case OP_IBGE_UN:
2934 case OP_IBLE:
2935 case OP_IBLE_UN:
2936 EMIT_COND_BRANCH (ins, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)]);
2937 break;
2939 case OP_CMOV_IEQ:
2940 case OP_CMOV_IGE:
2941 case OP_CMOV_IGT:
2942 case OP_CMOV_ILE:
2943 case OP_CMOV_ILT:
2944 case OP_CMOV_INE_UN:
2945 case OP_CMOV_IGE_UN:
2946 case OP_CMOV_IGT_UN:
2947 case OP_CMOV_ILE_UN:
2948 case OP_CMOV_ILT_UN:
2949 g_assert (ins->dreg == ins->sreg1);
2950 x86_cmov_reg (code, cc_table [mono_opcode_to_cond (ins->opcode)], cc_signed_table [mono_opcode_to_cond (ins->opcode)], ins->dreg, ins->sreg2);
2951 break;
2953 /* floating point opcodes */
2954 case OP_R8CONST: {
2955 double d = *(double *)ins->inst_p0;
2957 if ((d == 0.0) && (mono_signbit (d) == 0)) {
2958 x86_fldz (code);
2959 } else if (d == 1.0) {
2960 x86_fld1 (code);
2961 } else {
2962 if (cfg->compile_aot) {
2963 guint32 *val = (guint32*)&d;
2964 x86_push_imm (code, val [1]);
2965 x86_push_imm (code, val [0]);
2966 x86_fld_membase (code, X86_ESP, 0, TRUE);
2967 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
2969 else {
2970 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R8, ins->inst_p0);
2971 x86_fld (code, NULL, TRUE);
2974 break;
2976 case OP_R4CONST: {
2977 float f = *(float *)ins->inst_p0;
2979 if ((f == 0.0) && (mono_signbit (f) == 0)) {
2980 x86_fldz (code);
2981 } else if (f == 1.0) {
2982 x86_fld1 (code);
2983 } else {
2984 if (cfg->compile_aot) {
2985 guint32 val = *(guint32*)&f;
2986 x86_push_imm (code, val);
2987 x86_fld_membase (code, X86_ESP, 0, FALSE);
2988 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
2990 else {
2991 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_R4, ins->inst_p0);
2992 x86_fld (code, NULL, FALSE);
2995 break;
2997 case OP_STORER8_MEMBASE_REG:
2998 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, TRUE, TRUE);
2999 break;
3000 case OP_LOADR8_SPILL_MEMBASE:
3001 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3002 x86_fxch (code, 1);
3003 break;
3004 case OP_LOADR8_MEMBASE:
3005 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3006 break;
3007 case OP_STORER4_MEMBASE_REG:
3008 x86_fst_membase (code, ins->inst_destbasereg, ins->inst_offset, FALSE, TRUE);
3009 break;
3010 case OP_LOADR4_MEMBASE:
3011 x86_fld_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3012 break;
3013 case OP_ICONV_TO_R4:
3014 x86_push_reg (code, ins->sreg1);
3015 x86_fild_membase (code, X86_ESP, 0, FALSE);
3016 /* Change precision */
3017 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3018 x86_fld_membase (code, X86_ESP, 0, FALSE);
3019 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3020 break;
3021 case OP_ICONV_TO_R8:
3022 x86_push_reg (code, ins->sreg1);
3023 x86_fild_membase (code, X86_ESP, 0, FALSE);
3024 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3025 break;
3026 case OP_ICONV_TO_R_UN:
3027 x86_push_imm (code, 0);
3028 x86_push_reg (code, ins->sreg1);
3029 x86_fild_membase (code, X86_ESP, 0, TRUE);
3030 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3031 break;
3032 case OP_X86_FP_LOAD_I8:
3033 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, TRUE);
3034 break;
3035 case OP_X86_FP_LOAD_I4:
3036 x86_fild_membase (code, ins->inst_basereg, ins->inst_offset, FALSE);
3037 break;
3038 case OP_FCONV_TO_R4:
3039 /* Change precision */
3040 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3041 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3042 x86_fld_membase (code, X86_ESP, 0, FALSE);
3043 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3044 break;
3045 case OP_FCONV_TO_I1:
3046 code = emit_float_to_int (cfg, code, ins->dreg, 1, TRUE);
3047 break;
3048 case OP_FCONV_TO_U1:
3049 code = emit_float_to_int (cfg, code, ins->dreg, 1, FALSE);
3050 break;
3051 case OP_FCONV_TO_I2:
3052 code = emit_float_to_int (cfg, code, ins->dreg, 2, TRUE);
3053 break;
3054 case OP_FCONV_TO_U2:
3055 code = emit_float_to_int (cfg, code, ins->dreg, 2, FALSE);
3056 break;
3057 case OP_FCONV_TO_I4:
3058 case OP_FCONV_TO_I:
3059 code = emit_float_to_int (cfg, code, ins->dreg, 4, TRUE);
3060 break;
3061 case OP_FCONV_TO_I8:
3062 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 4);
3063 x86_fnstcw_membase(code, X86_ESP, 0);
3064 x86_mov_reg_membase (code, ins->dreg, X86_ESP, 0, 2);
3065 x86_alu_reg_imm (code, X86_OR, ins->dreg, 0xc00);
3066 x86_mov_membase_reg (code, X86_ESP, 2, ins->dreg, 2);
3067 x86_fldcw_membase (code, X86_ESP, 2);
3068 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
3069 x86_fist_pop_membase (code, X86_ESP, 0, TRUE);
3070 x86_pop_reg (code, ins->dreg);
3071 x86_pop_reg (code, ins->backend.reg3);
3072 x86_fldcw_membase (code, X86_ESP, 0);
3073 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
3074 break;
3075 case OP_LCONV_TO_R8_2:
3076 x86_push_reg (code, ins->sreg2);
3077 x86_push_reg (code, ins->sreg1);
3078 x86_fild_membase (code, X86_ESP, 0, TRUE);
3079 /* Change precision */
3080 x86_fst_membase (code, X86_ESP, 0, TRUE, TRUE);
3081 x86_fld_membase (code, X86_ESP, 0, TRUE);
3082 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3083 break;
3084 case OP_LCONV_TO_R4_2:
3085 x86_push_reg (code, ins->sreg2);
3086 x86_push_reg (code, ins->sreg1);
3087 x86_fild_membase (code, X86_ESP, 0, TRUE);
3088 /* Change precision */
3089 x86_fst_membase (code, X86_ESP, 0, FALSE, TRUE);
3090 x86_fld_membase (code, X86_ESP, 0, FALSE);
3091 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3092 break;
3093 case OP_LCONV_TO_R_UN:
3094 case OP_LCONV_TO_R_UN_2: {
3095 static guint8 mn[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x40 };
3096 guint8 *br;
3098 /* load 64bit integer to FP stack */
3099 x86_push_reg (code, ins->sreg2);
3100 x86_push_reg (code, ins->sreg1);
3101 x86_fild_membase (code, X86_ESP, 0, TRUE);
3103 /* test if lreg is negative */
3104 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3105 br = code; x86_branch8 (code, X86_CC_GEZ, 0, TRUE);
3107 /* add correction constant mn */
3108 x86_fld80_mem (code, mn);
3109 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3111 x86_patch (br, code);
3113 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 8);
3115 break;
3117 case OP_LCONV_TO_OVF_I:
3118 case OP_LCONV_TO_OVF_I4_2: {
3119 guint8 *br [3], *label [1];
3120 MonoInst *tins;
3123 * Valid ints: 0xffffffff:8000000 to 00000000:0x7f000000
3125 x86_test_reg_reg (code, ins->sreg1, ins->sreg1);
3127 /* If the low word top bit is set, see if we are negative */
3128 br [0] = code; x86_branch8 (code, X86_CC_LT, 0, TRUE);
3129 /* We are not negative (no top bit set, check for our top word to be zero */
3130 x86_test_reg_reg (code, ins->sreg2, ins->sreg2);
3131 br [1] = code; x86_branch8 (code, X86_CC_EQ, 0, TRUE);
3132 label [0] = code;
3134 /* throw exception */
3135 tins = mono_branch_optimize_exception_target (cfg, bb, "OverflowException");
3136 if (tins) {
3137 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_BB, tins->inst_true_bb);
3138 if ((cfg->opt & MONO_OPT_BRANCH) && x86_is_imm8 (tins->inst_true_bb->max_offset - cpos))
3139 x86_jump8 (code, 0);
3140 else
3141 x86_jump32 (code, 0);
3142 } else {
3143 mono_add_patch_info (cfg, code - cfg->native_code, MONO_PATCH_INFO_EXC, "OverflowException");
3144 x86_jump32 (code, 0);
3148 x86_patch (br [0], code);
3149 /* our top bit is set, check that top word is 0xfffffff */
3150 x86_alu_reg_imm (code, X86_CMP, ins->sreg2, 0xffffffff);
3152 x86_patch (br [1], code);
3153 /* nope, emit exception */
3154 br [2] = code; x86_branch8 (code, X86_CC_NE, 0, TRUE);
3155 x86_patch (br [2], label [0]);
3157 if (ins->dreg != ins->sreg1)
3158 x86_mov_reg_reg (code, ins->dreg, ins->sreg1, 4);
3159 break;
3161 case OP_FMOVE:
3162 /* Not needed on the fp stack */
3163 break;
3164 case OP_FADD:
3165 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3166 break;
3167 case OP_FSUB:
3168 x86_fp_op_reg (code, X86_FSUB, 1, TRUE);
3169 break;
3170 case OP_FMUL:
3171 x86_fp_op_reg (code, X86_FMUL, 1, TRUE);
3172 break;
3173 case OP_FDIV:
3174 x86_fp_op_reg (code, X86_FDIV, 1, TRUE);
3175 break;
3176 case OP_FNEG:
3177 x86_fchs (code);
3178 break;
3179 case OP_SIN:
3180 x86_fsin (code);
3181 x86_fldz (code);
3182 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3183 break;
3184 case OP_COS:
3185 x86_fcos (code);
3186 x86_fldz (code);
3187 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3188 break;
3189 case OP_ABS:
3190 x86_fabs (code);
3191 break;
3192 case OP_TAN: {
3194 * it really doesn't make sense to inline all this code,
3195 * it's here just to show that things may not be as simple
3196 * as they appear.
3198 guchar *check_pos, *end_tan, *pop_jump;
3199 x86_push_reg (code, X86_EAX);
3200 x86_fptan (code);
3201 x86_fnstsw (code);
3202 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3203 check_pos = code;
3204 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3205 x86_fstp (code, 0); /* pop the 1.0 */
3206 end_tan = code;
3207 x86_jump8 (code, 0);
3208 x86_fldpi (code);
3209 x86_fp_op (code, X86_FADD, 0);
3210 x86_fxch (code, 1);
3211 x86_fprem1 (code);
3212 x86_fstsw (code);
3213 x86_test_reg_imm (code, X86_EAX, X86_FP_C2);
3214 pop_jump = code;
3215 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3216 x86_fstp (code, 1);
3217 x86_fptan (code);
3218 x86_patch (pop_jump, code);
3219 x86_fstp (code, 0); /* pop the 1.0 */
3220 x86_patch (check_pos, code);
3221 x86_patch (end_tan, code);
3222 x86_fldz (code);
3223 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3224 x86_pop_reg (code, X86_EAX);
3225 break;
3227 case OP_ATAN:
3228 x86_fld1 (code);
3229 x86_fpatan (code);
3230 x86_fldz (code);
3231 x86_fp_op_reg (code, X86_FADD, 1, TRUE);
3232 break;
3233 case OP_SQRT:
3234 x86_fsqrt (code);
3235 break;
3236 case OP_ROUND:
3237 x86_frndint (code);
3238 break;
3239 case OP_IMIN:
3240 g_assert (cfg->opt & MONO_OPT_CMOV);
3241 g_assert (ins->dreg == ins->sreg1);
3242 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3243 x86_cmov_reg (code, X86_CC_GT, TRUE, ins->dreg, ins->sreg2);
3244 break;
3245 case OP_IMIN_UN:
3246 g_assert (cfg->opt & MONO_OPT_CMOV);
3247 g_assert (ins->dreg == ins->sreg1);
3248 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3249 x86_cmov_reg (code, X86_CC_GT, FALSE, ins->dreg, ins->sreg2);
3250 break;
3251 case OP_IMAX:
3252 g_assert (cfg->opt & MONO_OPT_CMOV);
3253 g_assert (ins->dreg == ins->sreg1);
3254 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3255 x86_cmov_reg (code, X86_CC_LT, TRUE, ins->dreg, ins->sreg2);
3256 break;
3257 case OP_IMAX_UN:
3258 g_assert (cfg->opt & MONO_OPT_CMOV);
3259 g_assert (ins->dreg == ins->sreg1);
3260 x86_alu_reg_reg (code, X86_CMP, ins->sreg1, ins->sreg2);
3261 x86_cmov_reg (code, X86_CC_LT, FALSE, ins->dreg, ins->sreg2);
3262 break;
3263 case OP_X86_FPOP:
3264 x86_fstp (code, 0);
3265 break;
3266 case OP_X86_FXCH:
3267 x86_fxch (code, ins->inst_imm);
3268 break;
3269 case OP_FREM: {
3270 guint8 *l1, *l2;
3272 x86_push_reg (code, X86_EAX);
3273 /* we need to exchange ST(0) with ST(1) */
3274 x86_fxch (code, 1);
3276 /* this requires a loop, because fprem somtimes
3277 * returns a partial remainder */
3278 l1 = code;
3279 /* looks like MS is using fprem instead of the IEEE compatible fprem1 */
3280 /* x86_fprem1 (code); */
3281 x86_fprem (code);
3282 x86_fnstsw (code);
3283 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_C2);
3284 l2 = code + 2;
3285 x86_branch8 (code, X86_CC_NE, l1 - l2, FALSE);
3287 /* pop result */
3288 x86_fstp (code, 1);
3290 x86_pop_reg (code, X86_EAX);
3291 break;
3293 case OP_FCOMPARE:
3294 if (cfg->opt & MONO_OPT_FCMOV) {
3295 x86_fcomip (code, 1);
3296 x86_fstp (code, 0);
3297 break;
3299 /* this overwrites EAX */
3300 EMIT_FPCOMPARE(code);
3301 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3302 break;
3303 case OP_FCEQ:
3304 if (cfg->opt & MONO_OPT_FCMOV) {
3305 /* zeroing the register at the start results in
3306 * shorter and faster code (we can also remove the widening op)
3308 guchar *unordered_check;
3309 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3310 x86_fcomip (code, 1);
3311 x86_fstp (code, 0);
3312 unordered_check = code;
3313 x86_branch8 (code, X86_CC_P, 0, FALSE);
3314 x86_set_reg (code, X86_CC_EQ, ins->dreg, FALSE);
3315 x86_patch (unordered_check, code);
3316 break;
3318 if (ins->dreg != X86_EAX)
3319 x86_push_reg (code, X86_EAX);
3321 EMIT_FPCOMPARE(code);
3322 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3323 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3324 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3325 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3327 if (ins->dreg != X86_EAX)
3328 x86_pop_reg (code, X86_EAX);
3329 break;
3330 case OP_FCLT:
3331 case OP_FCLT_UN:
3332 if (cfg->opt & MONO_OPT_FCMOV) {
3333 /* zeroing the register at the start results in
3334 * shorter and faster code (we can also remove the widening op)
3336 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3337 x86_fcomip (code, 1);
3338 x86_fstp (code, 0);
3339 if (ins->opcode == OP_FCLT_UN) {
3340 guchar *unordered_check = code;
3341 guchar *jump_to_end;
3342 x86_branch8 (code, X86_CC_P, 0, FALSE);
3343 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3344 jump_to_end = code;
3345 x86_jump8 (code, 0);
3346 x86_patch (unordered_check, code);
3347 x86_inc_reg (code, ins->dreg);
3348 x86_patch (jump_to_end, code);
3349 } else {
3350 x86_set_reg (code, X86_CC_GT, ins->dreg, FALSE);
3352 break;
3354 if (ins->dreg != X86_EAX)
3355 x86_push_reg (code, X86_EAX);
3357 EMIT_FPCOMPARE(code);
3358 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3359 if (ins->opcode == OP_FCLT_UN) {
3360 guchar *is_not_zero_check, *end_jump;
3361 is_not_zero_check = code;
3362 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3363 end_jump = code;
3364 x86_jump8 (code, 0);
3365 x86_patch (is_not_zero_check, code);
3366 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3368 x86_patch (end_jump, code);
3370 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3371 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3373 if (ins->dreg != X86_EAX)
3374 x86_pop_reg (code, X86_EAX);
3375 break;
3376 case OP_FCGT:
3377 case OP_FCGT_UN:
3378 if (cfg->opt & MONO_OPT_FCMOV) {
3379 /* zeroing the register at the start results in
3380 * shorter and faster code (we can also remove the widening op)
3382 guchar *unordered_check;
3383 x86_alu_reg_reg (code, X86_XOR, ins->dreg, ins->dreg);
3384 x86_fcomip (code, 1);
3385 x86_fstp (code, 0);
3386 if (ins->opcode == OP_FCGT) {
3387 unordered_check = code;
3388 x86_branch8 (code, X86_CC_P, 0, FALSE);
3389 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3390 x86_patch (unordered_check, code);
3391 } else {
3392 x86_set_reg (code, X86_CC_LT, ins->dreg, FALSE);
3394 break;
3396 if (ins->dreg != X86_EAX)
3397 x86_push_reg (code, X86_EAX);
3399 EMIT_FPCOMPARE(code);
3400 x86_alu_reg_imm (code, X86_AND, X86_EAX, X86_FP_CC_MASK);
3401 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3402 if (ins->opcode == OP_FCGT_UN) {
3403 guchar *is_not_zero_check, *end_jump;
3404 is_not_zero_check = code;
3405 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3406 end_jump = code;
3407 x86_jump8 (code, 0);
3408 x86_patch (is_not_zero_check, code);
3409 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3411 x86_patch (end_jump, code);
3413 x86_set_reg (code, X86_CC_EQ, ins->dreg, TRUE);
3414 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
3416 if (ins->dreg != X86_EAX)
3417 x86_pop_reg (code, X86_EAX);
3418 break;
3419 case OP_FBEQ:
3420 if (cfg->opt & MONO_OPT_FCMOV) {
3421 guchar *jump = code;
3422 x86_branch8 (code, X86_CC_P, 0, TRUE);
3423 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3424 x86_patch (jump, code);
3425 break;
3427 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0x4000);
3428 EMIT_COND_BRANCH (ins, X86_CC_EQ, TRUE);
3429 break;
3430 case OP_FBNE_UN:
3431 /* Branch if C013 != 100 */
3432 if (cfg->opt & MONO_OPT_FCMOV) {
3433 /* branch if !ZF or (PF|CF) */
3434 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3435 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3436 EMIT_COND_BRANCH (ins, X86_CC_B, FALSE);
3437 break;
3439 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3440 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3441 break;
3442 case OP_FBLT:
3443 if (cfg->opt & MONO_OPT_FCMOV) {
3444 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3445 break;
3447 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3448 break;
3449 case OP_FBLT_UN:
3450 if (cfg->opt & MONO_OPT_FCMOV) {
3451 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3452 EMIT_COND_BRANCH (ins, X86_CC_GT, FALSE);
3453 break;
3455 if (ins->opcode == OP_FBLT_UN) {
3456 guchar *is_not_zero_check, *end_jump;
3457 is_not_zero_check = code;
3458 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3459 end_jump = code;
3460 x86_jump8 (code, 0);
3461 x86_patch (is_not_zero_check, code);
3462 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3464 x86_patch (end_jump, code);
3466 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3467 break;
3468 case OP_FBGT:
3469 case OP_FBGT_UN:
3470 if (cfg->opt & MONO_OPT_FCMOV) {
3471 if (ins->opcode == OP_FBGT) {
3472 guchar *br1;
3474 /* skip branch if C1=1 */
3475 br1 = code;
3476 x86_branch8 (code, X86_CC_P, 0, FALSE);
3477 /* branch if (C0 | C3) = 1 */
3478 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3479 x86_patch (br1, code);
3480 } else {
3481 EMIT_COND_BRANCH (ins, X86_CC_LT, FALSE);
3483 break;
3485 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3486 if (ins->opcode == OP_FBGT_UN) {
3487 guchar *is_not_zero_check, *end_jump;
3488 is_not_zero_check = code;
3489 x86_branch8 (code, X86_CC_NZ, 0, TRUE);
3490 end_jump = code;
3491 x86_jump8 (code, 0);
3492 x86_patch (is_not_zero_check, code);
3493 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_CC_MASK);
3495 x86_patch (end_jump, code);
3497 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3498 break;
3499 case OP_FBGE:
3500 /* Branch if C013 == 100 or 001 */
3501 if (cfg->opt & MONO_OPT_FCMOV) {
3502 guchar *br1;
3504 /* skip branch if C1=1 */
3505 br1 = code;
3506 x86_branch8 (code, X86_CC_P, 0, FALSE);
3507 /* branch if (C0 | C3) = 1 */
3508 EMIT_COND_BRANCH (ins, X86_CC_BE, FALSE);
3509 x86_patch (br1, code);
3510 break;
3512 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3513 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3514 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C3);
3515 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3516 break;
3517 case OP_FBGE_UN:
3518 /* Branch if C013 == 000 */
3519 if (cfg->opt & MONO_OPT_FCMOV) {
3520 EMIT_COND_BRANCH (ins, X86_CC_LE, FALSE);
3521 break;
3523 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3524 break;
3525 case OP_FBLE:
3526 /* Branch if C013=000 or 100 */
3527 if (cfg->opt & MONO_OPT_FCMOV) {
3528 guchar *br1;
3530 /* skip branch if C1=1 */
3531 br1 = code;
3532 x86_branch8 (code, X86_CC_P, 0, FALSE);
3533 /* branch if C0=0 */
3534 EMIT_COND_BRANCH (ins, X86_CC_NB, FALSE);
3535 x86_patch (br1, code);
3536 break;
3538 x86_alu_reg_imm (code, X86_AND, X86_EAX, (X86_FP_C0|X86_FP_C1));
3539 x86_alu_reg_imm (code, X86_CMP, X86_EAX, 0);
3540 EMIT_COND_BRANCH (ins, X86_CC_EQ, FALSE);
3541 break;
3542 case OP_FBLE_UN:
3543 /* Branch if C013 != 001 */
3544 if (cfg->opt & MONO_OPT_FCMOV) {
3545 EMIT_COND_BRANCH (ins, X86_CC_P, FALSE);
3546 EMIT_COND_BRANCH (ins, X86_CC_GE, FALSE);
3547 break;
3549 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3550 EMIT_COND_BRANCH (ins, X86_CC_NE, FALSE);
3551 break;
3552 case OP_CKFINITE: {
3553 guchar *br1;
3554 x86_push_reg (code, X86_EAX);
3555 x86_fxam (code);
3556 x86_fnstsw (code);
3557 x86_alu_reg_imm (code, X86_AND, X86_EAX, 0x4100);
3558 x86_alu_reg_imm (code, X86_CMP, X86_EAX, X86_FP_C0);
3559 x86_pop_reg (code, X86_EAX);
3561 /* Have to clean up the fp stack before throwing the exception */
3562 br1 = code;
3563 x86_branch8 (code, X86_CC_NE, 0, FALSE);
3565 x86_fstp (code, 0);
3566 EMIT_COND_SYSTEM_EXCEPTION (X86_CC_EQ, FALSE, "ArithmeticException");
3568 x86_patch (br1, code);
3569 break;
3571 case OP_TLS_GET: {
3572 code = mono_x86_emit_tls_get (code, ins->dreg, ins->inst_offset);
3573 break;
3575 case OP_MEMORY_BARRIER: {
3576 /* Not needed on x86 */
3577 break;
3579 case OP_ATOMIC_ADD_I4: {
3580 int dreg = ins->dreg;
3582 if (dreg == ins->inst_basereg) {
3583 x86_push_reg (code, ins->sreg2);
3584 dreg = ins->sreg2;
3587 if (dreg != ins->sreg2)
3588 x86_mov_reg_reg (code, ins->dreg, ins->sreg2, 4);
3590 x86_prefix (code, X86_LOCK_PREFIX);
3591 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3593 if (dreg != ins->dreg) {
3594 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3595 x86_pop_reg (code, dreg);
3598 break;
3600 case OP_ATOMIC_ADD_NEW_I4: {
3601 int dreg = ins->dreg;
3603 /* hack: limit in regalloc, dreg != sreg1 && dreg != sreg2 */
3604 if (ins->sreg2 == dreg) {
3605 if (dreg == X86_EBX) {
3606 dreg = X86_EDI;
3607 if (ins->inst_basereg == X86_EDI)
3608 dreg = X86_ESI;
3609 } else {
3610 dreg = X86_EBX;
3611 if (ins->inst_basereg == X86_EBX)
3612 dreg = X86_EDI;
3614 } else if (ins->inst_basereg == dreg) {
3615 if (dreg == X86_EBX) {
3616 dreg = X86_EDI;
3617 if (ins->sreg2 == X86_EDI)
3618 dreg = X86_ESI;
3619 } else {
3620 dreg = X86_EBX;
3621 if (ins->sreg2 == X86_EBX)
3622 dreg = X86_EDI;
3626 if (dreg != ins->dreg) {
3627 x86_push_reg (code, dreg);
3630 x86_mov_reg_reg (code, dreg, ins->sreg2, 4);
3631 x86_prefix (code, X86_LOCK_PREFIX);
3632 x86_xadd_membase_reg (code, ins->inst_basereg, ins->inst_offset, dreg, 4);
3633 /* dreg contains the old value, add with sreg2 value */
3634 x86_alu_reg_reg (code, X86_ADD, dreg, ins->sreg2);
3636 if (ins->dreg != dreg) {
3637 x86_mov_reg_reg (code, ins->dreg, dreg, 4);
3638 x86_pop_reg (code, dreg);
3641 break;
3643 case OP_ATOMIC_EXCHANGE_I4:
3644 case OP_ATOMIC_CAS_IMM_I4: {
3645 guchar *br[2];
3646 int sreg2 = ins->sreg2;
3647 int breg = ins->inst_basereg;
3649 /* cmpxchg uses eax as comperand, need to make sure we can use it
3650 * hack to overcome limits in x86 reg allocator
3651 * (req: dreg == eax and sreg2 != eax and breg != eax)
3653 g_assert (ins->dreg == X86_EAX);
3655 /* We need the EAX reg for the cmpxchg */
3656 if (ins->sreg2 == X86_EAX) {
3657 sreg2 = (breg == X86_EDX) ? X86_EBX : X86_EDX;
3658 x86_push_reg (code, sreg2);
3659 x86_mov_reg_reg (code, sreg2, X86_EAX, 4);
3662 if (breg == X86_EAX) {
3663 breg = (sreg2 == X86_ESI) ? X86_EDI : X86_ESI;
3664 x86_push_reg (code, breg);
3665 x86_mov_reg_reg (code, breg, X86_EAX, 4);
3668 if (ins->opcode == OP_ATOMIC_CAS_IMM_I4) {
3669 x86_mov_reg_imm (code, X86_EAX, ins->backend.data);
3671 x86_prefix (code, X86_LOCK_PREFIX);
3672 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3673 } else {
3674 x86_mov_reg_membase (code, X86_EAX, breg, ins->inst_offset, 4);
3676 br [0] = code; x86_prefix (code, X86_LOCK_PREFIX);
3677 x86_cmpxchg_membase_reg (code, breg, ins->inst_offset, sreg2);
3678 br [1] = code; x86_branch8 (code, X86_CC_NE, -1, FALSE);
3679 x86_patch (br [1], br [0]);
3682 if (breg != ins->inst_basereg)
3683 x86_pop_reg (code, breg);
3685 if (ins->sreg2 != sreg2)
3686 x86_pop_reg (code, sreg2);
3688 break;
3690 #ifdef MONO_ARCH_SIMD_INTRINSICS
3691 case OP_ADDPS:
3692 x86_sse_alu_ps_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3693 break;
3694 case OP_DIVPS:
3695 x86_sse_alu_ps_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3696 break;
3697 case OP_MULPS:
3698 x86_sse_alu_ps_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3699 break;
3700 case OP_SUBPS:
3701 x86_sse_alu_ps_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3702 break;
3703 case OP_MAXPS:
3704 x86_sse_alu_ps_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3705 break;
3706 case OP_MINPS:
3707 x86_sse_alu_ps_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3708 break;
3709 case OP_COMPPS:
3710 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3711 x86_sse_alu_ps_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3712 break;
3713 case OP_ANDPS:
3714 x86_sse_alu_ps_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3715 break;
3716 case OP_ANDNPS:
3717 x86_sse_alu_ps_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3718 break;
3719 case OP_ORPS:
3720 x86_sse_alu_ps_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3721 break;
3722 case OP_XORPS:
3723 x86_sse_alu_ps_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3724 break;
3725 case OP_SQRTPS:
3726 x86_sse_alu_ps_reg_reg (code, X86_SSE_SQRT, ins->dreg, ins->sreg1);
3727 break;
3728 case OP_RSQRTPS:
3729 x86_sse_alu_ps_reg_reg (code, X86_SSE_RSQRT, ins->dreg, ins->sreg1);
3730 break;
3731 case OP_RCPPS:
3732 x86_sse_alu_ps_reg_reg (code, X86_SSE_RCP, ins->dreg, ins->sreg1);
3733 break;
3734 case OP_ADDSUBPS:
3735 x86_sse_alu_sd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3736 break;
3737 case OP_HADDPS:
3738 x86_sse_alu_sd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3739 break;
3740 case OP_HSUBPS:
3741 x86_sse_alu_sd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3742 break;
3743 case OP_DUPPS_HIGH:
3744 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSHDUP, ins->dreg, ins->sreg1);
3745 break;
3746 case OP_DUPPS_LOW:
3747 x86_sse_alu_ss_reg_reg (code, X86_SSE_MOVSLDUP, ins->dreg, ins->sreg1);
3748 break;
3750 case OP_PSHUFLEW_HIGH:
3751 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3752 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 1);
3753 break;
3754 case OP_PSHUFLEW_LOW:
3755 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3756 x86_pshufw_reg_reg (code, ins->dreg, ins->sreg1, ins->inst_c0, 0);
3757 break;
3758 case OP_PSHUFLED:
3759 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF);
3760 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->sreg1, ins->inst_c0);
3761 break;
3763 case OP_ADDPD:
3764 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADD, ins->sreg1, ins->sreg2);
3765 break;
3766 case OP_DIVPD:
3767 x86_sse_alu_pd_reg_reg (code, X86_SSE_DIV, ins->sreg1, ins->sreg2);
3768 break;
3769 case OP_MULPD:
3770 x86_sse_alu_pd_reg_reg (code, X86_SSE_MUL, ins->sreg1, ins->sreg2);
3771 break;
3772 case OP_SUBPD:
3773 x86_sse_alu_pd_reg_reg (code, X86_SSE_SUB, ins->sreg1, ins->sreg2);
3774 break;
3775 case OP_MAXPD:
3776 x86_sse_alu_pd_reg_reg (code, X86_SSE_MAX, ins->sreg1, ins->sreg2);
3777 break;
3778 case OP_MINPD:
3779 x86_sse_alu_pd_reg_reg (code, X86_SSE_MIN, ins->sreg1, ins->sreg2);
3780 break;
3781 case OP_COMPPD:
3782 g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7);
3783 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_COMP, ins->sreg1, ins->sreg2, ins->inst_c0);
3784 break;
3785 case OP_ANDPD:
3786 x86_sse_alu_pd_reg_reg (code, X86_SSE_AND, ins->sreg1, ins->sreg2);
3787 break;
3788 case OP_ANDNPD:
3789 x86_sse_alu_pd_reg_reg (code, X86_SSE_ANDN, ins->sreg1, ins->sreg2);
3790 break;
3791 case OP_ORPD:
3792 x86_sse_alu_pd_reg_reg (code, X86_SSE_OR, ins->sreg1, ins->sreg2);
3793 break;
3794 case OP_XORPD:
3795 x86_sse_alu_pd_reg_reg (code, X86_SSE_XOR, ins->sreg1, ins->sreg2);
3796 break;
3797 case OP_ADDSUBPD:
3798 x86_sse_alu_pd_reg_reg (code, X86_SSE_ADDSUB, ins->sreg1, ins->sreg2);
3799 break;
3800 case OP_HADDPD:
3801 x86_sse_alu_pd_reg_reg (code, X86_SSE_HADD, ins->sreg1, ins->sreg2);
3802 break;
3803 case OP_HSUBPD:
3804 x86_sse_alu_pd_reg_reg (code, X86_SSE_HSUB, ins->sreg1, ins->sreg2);
3805 break;
3806 case OP_DUPPD:
3807 x86_sse_alu_sd_reg_reg (code, X86_SSE_MOVDDUP, ins->dreg, ins->sreg1);
3808 break;
3810 case OP_EXTRACT_MASK:
3811 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMOVMSKB, ins->dreg, ins->sreg1);
3812 break;
3814 case OP_PAND:
3815 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAND, ins->sreg1, ins->sreg2);
3816 break;
3817 case OP_POR:
3818 x86_sse_alu_pd_reg_reg (code, X86_SSE_POR, ins->sreg1, ins->sreg2);
3819 break;
3820 case OP_PXOR:
3821 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->sreg1, ins->sreg2);
3822 break;
3824 case OP_PADDB:
3825 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDB, ins->sreg1, ins->sreg2);
3826 break;
3827 case OP_PADDW:
3828 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDW, ins->sreg1, ins->sreg2);
3829 break;
3830 case OP_PADDD:
3831 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDD, ins->sreg1, ins->sreg2);
3832 break;
3833 case OP_PADDQ:
3834 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDQ, ins->sreg1, ins->sreg2);
3835 break;
3837 case OP_PSUBB:
3838 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBB, ins->sreg1, ins->sreg2);
3839 break;
3840 case OP_PSUBW:
3841 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBW, ins->sreg1, ins->sreg2);
3842 break;
3843 case OP_PSUBD:
3844 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBD, ins->sreg1, ins->sreg2);
3845 break;
3846 case OP_PSUBQ:
3847 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBQ, ins->sreg1, ins->sreg2);
3848 break;
3850 case OP_PMAXB_UN:
3851 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXUB, ins->sreg1, ins->sreg2);
3852 break;
3853 case OP_PMAXW_UN:
3854 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUW, ins->sreg1, ins->sreg2);
3855 break;
3856 case OP_PMAXD_UN:
3857 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXUD, ins->sreg1, ins->sreg2);
3858 break;
3860 case OP_PMAXB:
3861 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSB, ins->sreg1, ins->sreg2);
3862 break;
3863 case OP_PMAXW:
3864 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMAXSW, ins->sreg1, ins->sreg2);
3865 break;
3866 case OP_PMAXD:
3867 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMAXSD, ins->sreg1, ins->sreg2);
3868 break;
3870 case OP_PAVGB_UN:
3871 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGB, ins->sreg1, ins->sreg2);
3872 break;
3873 case OP_PAVGW_UN:
3874 x86_sse_alu_pd_reg_reg (code, X86_SSE_PAVGW, ins->sreg1, ins->sreg2);
3875 break;
3877 case OP_PMINB_UN:
3878 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINUB, ins->sreg1, ins->sreg2);
3879 break;
3880 case OP_PMINW_UN:
3881 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUW, ins->sreg1, ins->sreg2);
3882 break;
3883 case OP_PMIND_UN:
3884 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINUD, ins->sreg1, ins->sreg2);
3885 break;
3887 case OP_PMINB:
3888 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSB, ins->sreg1, ins->sreg2);
3889 break;
3890 case OP_PMINW:
3891 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMINSW, ins->sreg1, ins->sreg2);
3892 break;
3893 case OP_PMIND:
3894 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMINSD, ins->sreg1, ins->sreg2);
3895 break;
3897 case OP_PCMPEQB:
3898 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQB, ins->sreg1, ins->sreg2);
3899 break;
3900 case OP_PCMPEQW:
3901 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQW, ins->sreg1, ins->sreg2);
3902 break;
3903 case OP_PCMPEQD:
3904 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPEQD, ins->sreg1, ins->sreg2);
3905 break;
3906 case OP_PCMPEQQ:
3907 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPEQQ, ins->sreg1, ins->sreg2);
3908 break;
3910 case OP_PCMPGTB:
3911 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTB, ins->sreg1, ins->sreg2);
3912 break;
3913 case OP_PCMPGTW:
3914 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTW, ins->sreg1, ins->sreg2);
3915 break;
3916 case OP_PCMPGTD:
3917 x86_sse_alu_pd_reg_reg (code, X86_SSE_PCMPGTD, ins->sreg1, ins->sreg2);
3918 break;
3919 case OP_PCMPGTQ:
3920 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PCMPGTQ, ins->sreg1, ins->sreg2);
3921 break;
3923 case OP_PSUM_ABS_DIFF:
3924 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSADBW, ins->sreg1, ins->sreg2);
3925 break;
3927 case OP_UNPACK_LOWB:
3928 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLBW, ins->sreg1, ins->sreg2);
3929 break;
3930 case OP_UNPACK_LOWW:
3931 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLWD, ins->sreg1, ins->sreg2);
3932 break;
3933 case OP_UNPACK_LOWD:
3934 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLDQ, ins->sreg1, ins->sreg2);
3935 break;
3936 case OP_UNPACK_LOWQ:
3937 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKLQDQ, ins->sreg1, ins->sreg2);
3938 break;
3939 case OP_UNPACK_LOWPS:
3940 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
3941 break;
3942 case OP_UNPACK_LOWPD:
3943 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKL, ins->sreg1, ins->sreg2);
3944 break;
3946 case OP_UNPACK_HIGHB:
3947 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHBW, ins->sreg1, ins->sreg2);
3948 break;
3949 case OP_UNPACK_HIGHW:
3950 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHWD, ins->sreg1, ins->sreg2);
3951 break;
3952 case OP_UNPACK_HIGHD:
3953 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHDQ, ins->sreg1, ins->sreg2);
3954 break;
3955 case OP_UNPACK_HIGHQ:
3956 x86_sse_alu_pd_reg_reg (code, X86_SSE_PUNPCKHQDQ, ins->sreg1, ins->sreg2);
3957 break;
3958 case OP_UNPACK_HIGHPS:
3959 x86_sse_alu_ps_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
3960 break;
3961 case OP_UNPACK_HIGHPD:
3962 x86_sse_alu_pd_reg_reg (code, X86_SSE_UNPCKH, ins->sreg1, ins->sreg2);
3963 break;
3965 case OP_PACKW:
3966 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSWB, ins->sreg1, ins->sreg2);
3967 break;
3968 case OP_PACKD:
3969 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKSSDW, ins->sreg1, ins->sreg2);
3970 break;
3971 case OP_PACKW_UN:
3972 x86_sse_alu_pd_reg_reg (code, X86_SSE_PACKUSWB, ins->sreg1, ins->sreg2);
3973 break;
3974 case OP_PACKD_UN:
3975 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PACKUSDW, ins->sreg1, ins->sreg2);
3976 break;
3978 case OP_PADDB_SAT_UN:
3979 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSB, ins->sreg1, ins->sreg2);
3980 break;
3981 case OP_PSUBB_SAT_UN:
3982 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSB, ins->sreg1, ins->sreg2);
3983 break;
3984 case OP_PADDW_SAT_UN:
3985 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDUSW, ins->sreg1, ins->sreg2);
3986 break;
3987 case OP_PSUBW_SAT_UN:
3988 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBUSW, ins->sreg1, ins->sreg2);
3989 break;
3991 case OP_PADDB_SAT:
3992 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSB, ins->sreg1, ins->sreg2);
3993 break;
3994 case OP_PSUBB_SAT:
3995 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSB, ins->sreg1, ins->sreg2);
3996 break;
3997 case OP_PADDW_SAT:
3998 x86_sse_alu_pd_reg_reg (code, X86_SSE_PADDSW, ins->sreg1, ins->sreg2);
3999 break;
4000 case OP_PSUBW_SAT:
4001 x86_sse_alu_pd_reg_reg (code, X86_SSE_PSUBSW, ins->sreg1, ins->sreg2);
4002 break;
4004 case OP_PMULW:
4005 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULLW, ins->sreg1, ins->sreg2);
4006 break;
4007 case OP_PMULD:
4008 x86_sse_alu_sse41_reg_reg (code, X86_SSE_PMULLD, ins->sreg1, ins->sreg2);
4009 break;
4010 case OP_PMULQ:
4011 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULUDQ, ins->sreg1, ins->sreg2);
4012 break;
4013 case OP_PMULW_HIGH_UN:
4014 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHUW, ins->sreg1, ins->sreg2);
4015 break;
4016 case OP_PMULW_HIGH:
4017 x86_sse_alu_pd_reg_reg (code, X86_SSE_PMULHW, ins->sreg1, ins->sreg2);
4018 break;
4020 case OP_PSHRW:
4021 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4022 break;
4023 case OP_PSHRW_REG:
4024 x86_sse_shift_reg_reg (code, X86_SSE_PSRLW_REG, ins->dreg, ins->sreg2);
4025 break;
4027 case OP_PSARW:
4028 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4029 break;
4030 case OP_PSARW_REG:
4031 x86_sse_shift_reg_reg (code, X86_SSE_PSRAW_REG, ins->dreg, ins->sreg2);
4032 break;
4034 case OP_PSHLW:
4035 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTW, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4036 break;
4037 case OP_PSHLW_REG:
4038 x86_sse_shift_reg_reg (code, X86_SSE_PSLLW_REG, ins->dreg, ins->sreg2);
4039 break;
4041 case OP_PSHRD:
4042 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4043 break;
4044 case OP_PSHRD_REG:
4045 x86_sse_shift_reg_reg (code, X86_SSE_PSRLD_REG, ins->dreg, ins->sreg2);
4046 break;
4048 case OP_PSARD:
4049 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SAR, ins->dreg, ins->inst_imm);
4050 break;
4051 case OP_PSARD_REG:
4052 x86_sse_shift_reg_reg (code, X86_SSE_PSRAD_REG, ins->dreg, ins->sreg2);
4053 break;
4055 case OP_PSHLD:
4056 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTD, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4057 break;
4058 case OP_PSHLD_REG:
4059 x86_sse_shift_reg_reg (code, X86_SSE_PSLLD_REG, ins->dreg, ins->sreg2);
4060 break;
4062 case OP_PSHRQ:
4063 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHR, ins->dreg, ins->inst_imm);
4064 break;
4065 case OP_PSHRQ_REG:
4066 x86_sse_shift_reg_reg (code, X86_SSE_PSRLQ_REG, ins->dreg, ins->sreg2);
4067 break;
4069 case OP_PSHLQ:
4070 x86_sse_shift_reg_imm (code, X86_SSE_PSHIFTQ, X86_SSE_SHL, ins->dreg, ins->inst_imm);
4071 break;
4072 case OP_PSHLQ_REG:
4073 x86_sse_shift_reg_reg (code, X86_SSE_PSLLQ_REG, ins->dreg, ins->sreg2);
4074 break;
4076 case OP_ICONV_TO_X:
4077 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4078 break;
4079 case OP_EXTRACT_I4:
4080 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4081 break;
4082 case OP_EXTRACT_I1:
4083 case OP_EXTRACT_U1:
4084 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4085 if (ins->inst_c0)
4086 x86_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8);
4087 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I1, FALSE);
4088 break;
4089 case OP_EXTRACT_I2:
4090 case OP_EXTRACT_U2:
4091 x86_movd_reg_xreg (code, ins->dreg, ins->sreg1);
4092 if (ins->inst_c0)
4093 x86_shift_reg_imm (code, X86_SHR, ins->dreg, 16);
4094 x86_widen_reg (code, ins->dreg, ins->dreg, ins->opcode == OP_EXTRACT_I2, TRUE);
4095 break;
4096 case OP_EXTRACT_R8:
4097 if (ins->inst_c0)
4098 x86_sse_alu_pd_membase_reg (code, X86_SSE_MOVHPD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4099 else
4100 x86_sse_alu_sd_membase_reg (code, X86_SSE_MOVSD_MEMBASE_REG, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1);
4101 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE);
4102 break;
4104 case OP_INSERT_I2:
4105 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->sreg1, ins->sreg2, ins->inst_c0);
4106 break;
4107 case OP_EXTRACTX_U2:
4108 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PEXTRW, ins->dreg, ins->sreg1, ins->inst_c0);
4109 break;
4110 case OP_INSERTX_U1_SLOW:
4111 /*sreg1 is the extracted ireg (scratch)
4112 /sreg2 is the to be inserted ireg (scratch)
4113 /dreg is the xreg to receive the value*/
4115 /*clear the bits from the extracted word*/
4116 x86_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00);
4117 /*shift the value to insert if needed*/
4118 if (ins->inst_c0 & 1)
4119 x86_shift_reg_imm (code, X86_SHL, ins->sreg2, 8);
4120 /*join them together*/
4121 x86_alu_reg_reg (code, X86_OR, ins->sreg1, ins->sreg2);
4122 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, ins->inst_c0 / 2);
4123 break;
4124 case OP_INSERTX_I4_SLOW:
4125 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2);
4126 x86_shift_reg_imm (code, X86_SHR, ins->sreg2, 16);
4127 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1);
4128 break;
4130 case OP_INSERTX_R4_SLOW:
4131 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4132 /*TODO if inst_c0 == 0 use movss*/
4133 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 0, ins->inst_c0 * 2);
4134 x86_sse_alu_pd_reg_membase_imm (code, X86_SSE_PINSRW, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset + 2, ins->inst_c0 * 2 + 1);
4135 break;
4136 case OP_INSERTX_R8_SLOW:
4137 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4138 if (ins->inst_c0)
4139 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVHPD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4140 else
4141 x86_sse_alu_pd_reg_membase (code, X86_SSE_MOVSD_REG_MEMBASE, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4142 break;
4144 case OP_STOREX_MEMBASE_REG:
4145 case OP_STOREX_MEMBASE:
4146 x86_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4147 break;
4148 case OP_LOADX_MEMBASE:
4149 x86_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4150 break;
4151 case OP_LOADX_ALIGNED_MEMBASE:
4152 x86_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset);
4153 break;
4154 case OP_STOREX_ALIGNED_MEMBASE_REG:
4155 x86_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1);
4156 break;
4157 case OP_STOREX_NTA_MEMBASE_REG:
4158 x86_sse_alu_reg_membase (code, X86_SSE_MOVNTPS, ins->dreg, ins->sreg1, ins->inst_offset);
4159 break;
4160 case OP_PREFETCH_MEMBASE:
4161 x86_sse_alu_reg_membase (code, X86_SSE_PREFETCH, ins->backend.arg_info, ins->sreg1, ins->inst_offset);
4163 break;
4164 case OP_XMOVE:
4165 /*FIXME the peephole pass should have killed this*/
4166 if (ins->dreg != ins->sreg1)
4167 x86_movaps_reg_reg (code, ins->dreg, ins->sreg1);
4168 break;
4169 case OP_XZERO:
4170 x86_sse_alu_pd_reg_reg (code, X86_SSE_PXOR, ins->dreg, ins->dreg);
4171 break;
4172 case OP_ICONV_TO_R8_RAW:
4173 x86_mov_membase_reg (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, ins->sreg1, 4);
4174 x86_fld_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE);
4175 break;
4177 case OP_FCONV_TO_R8_X:
4178 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4179 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4180 break;
4182 case OP_XCONV_R8_TO_I4:
4183 x86_cvttsd2si (code, ins->dreg, ins->sreg1);
4184 switch (ins->backend.source_opcode) {
4185 case OP_FCONV_TO_I1:
4186 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE);
4187 break;
4188 case OP_FCONV_TO_U1:
4189 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE);
4190 break;
4191 case OP_FCONV_TO_I2:
4192 x86_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE);
4193 break;
4194 case OP_FCONV_TO_U2:
4195 x86_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE);
4196 break;
4198 break;
4200 case OP_EXPAND_I1:
4201 /*FIXME this causes a partial register stall, maybe it would not be that bad to use shift + mask + or*/
4202 /*The +4 is to get a mov ?h, ?l over the same reg.*/
4203 x86_mov_reg_reg (code, ins->dreg + 4, ins->dreg, 1);
4204 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4205 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4206 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4207 break;
4208 case OP_EXPAND_I2:
4209 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 0);
4210 x86_sse_alu_pd_reg_reg_imm (code, X86_SSE_PINSRW, ins->dreg, ins->sreg1, 1);
4211 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4212 break;
4213 case OP_EXPAND_I4:
4214 x86_movd_xreg_reg (code, ins->dreg, ins->sreg1);
4215 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4216 break;
4217 case OP_EXPAND_R4:
4218 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, FALSE, TRUE);
4219 x86_movd_xreg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4220 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0);
4221 break;
4222 case OP_EXPAND_R8:
4223 x86_fst_membase (code, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset, TRUE, TRUE);
4224 x86_movsd_reg_membase (code, ins->dreg, ins->backend.spill_var->inst_basereg, ins->backend.spill_var->inst_offset);
4225 x86_sse_shift_reg_imm (code, X86_SSE_PSHUFD, ins->dreg, ins->dreg, 0x44);
4226 break;
4227 #endif
4228 case OP_LIVERANGE_START: {
4229 if (cfg->verbose_level > 1)
4230 printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4231 MONO_VARINFO (cfg, ins->inst_c0)->live_range_start = code - cfg->native_code;
4232 break;
4234 case OP_LIVERANGE_END: {
4235 if (cfg->verbose_level > 1)
4236 printf ("R%d END=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code));
4237 MONO_VARINFO (cfg, ins->inst_c0)->live_range_end = code - cfg->native_code;
4238 break;
4240 default:
4241 g_warning ("unknown opcode %s\n", mono_inst_name (ins->opcode));
4242 g_assert_not_reached ();
4245 if (G_UNLIKELY ((code - cfg->native_code - offset) > max_len)) {
4246 g_warning ("wrong maximal instruction length of instruction %s (expected %d, got %d)",
4247 mono_inst_name (ins->opcode), max_len, code - cfg->native_code - offset);
4248 g_assert_not_reached ();
4251 cpos += max_len;
4254 cfg->code_len = code - cfg->native_code;
4257 #endif /* DISABLE_JIT */
4259 void
4260 mono_arch_register_lowlevel_calls (void)
4264 void
4265 mono_arch_patch_code (MonoMethod *method, MonoDomain *domain, guint8 *code, MonoJumpInfo *ji, gboolean run_cctors)
4267 MonoJumpInfo *patch_info;
4268 gboolean compile_aot = !run_cctors;
4270 for (patch_info = ji; patch_info; patch_info = patch_info->next) {
4271 unsigned char *ip = patch_info->ip.i + code;
4272 const unsigned char *target;
4274 target = mono_resolve_patch_target (method, domain, code, patch_info, run_cctors);
4276 if (compile_aot) {
4277 switch (patch_info->type) {
4278 case MONO_PATCH_INFO_BB:
4279 case MONO_PATCH_INFO_LABEL:
4280 break;
4281 default:
4282 /* No need to patch these */
4283 continue;
4287 switch (patch_info->type) {
4288 case MONO_PATCH_INFO_IP:
4289 *((gconstpointer *)(ip)) = target;
4290 break;
4291 case MONO_PATCH_INFO_CLASS_INIT: {
4292 guint8 *code = ip;
4293 /* Might already been changed to a nop */
4294 x86_call_code (code, 0);
4295 x86_patch (ip, target);
4296 break;
4298 case MONO_PATCH_INFO_ABS:
4299 case MONO_PATCH_INFO_METHOD:
4300 case MONO_PATCH_INFO_METHOD_JUMP:
4301 case MONO_PATCH_INFO_INTERNAL_METHOD:
4302 case MONO_PATCH_INFO_BB:
4303 case MONO_PATCH_INFO_LABEL:
4304 case MONO_PATCH_INFO_RGCTX_FETCH:
4305 case MONO_PATCH_INFO_GENERIC_CLASS_INIT:
4306 case MONO_PATCH_INFO_MONITOR_ENTER:
4307 case MONO_PATCH_INFO_MONITOR_EXIT:
4308 x86_patch (ip, target);
4309 break;
4310 case MONO_PATCH_INFO_NONE:
4311 break;
4312 default: {
4313 guint32 offset = mono_arch_get_patch_offset (ip);
4314 *((gconstpointer *)(ip + offset)) = target;
4315 break;
4321 guint8 *
4322 mono_arch_emit_prolog (MonoCompile *cfg)
4324 MonoMethod *method = cfg->method;
4325 MonoBasicBlock *bb;
4326 MonoMethodSignature *sig;
4327 MonoInst *inst;
4328 int alloc_size, pos, max_offset, i, cfa_offset;
4329 guint8 *code;
4330 gboolean need_stack_frame;
4332 cfg->code_size = MAX (mono_method_get_header (method)->code_size * 4, 10240);
4334 if (cfg->prof_options & MONO_PROFILE_ENTER_LEAVE)
4335 cfg->code_size += 512;
4337 code = cfg->native_code = g_malloc (cfg->code_size);
4339 /* Offset between RSP and the CFA */
4340 cfa_offset = 0;
4342 // CFA = sp + 4
4343 cfa_offset = sizeof (gpointer);
4344 mono_emit_unwind_op_def_cfa (cfg, code, X86_ESP, sizeof (gpointer));
4345 // IP saved at CFA - 4
4346 /* There is no IP reg on x86 */
4347 mono_emit_unwind_op_offset (cfg, code, X86_NREG, -cfa_offset);
4349 need_stack_frame = needs_stack_frame (cfg);
4351 if (need_stack_frame) {
4352 x86_push_reg (code, X86_EBP);
4353 cfa_offset += sizeof (gpointer);
4354 mono_emit_unwind_op_def_cfa_offset (cfg, code, cfa_offset);
4355 mono_emit_unwind_op_offset (cfg, code, X86_EBP, - cfa_offset);
4356 x86_mov_reg_reg (code, X86_EBP, X86_ESP, 4);
4357 mono_emit_unwind_op_def_cfa_reg (cfg, code, X86_EBP);
4360 alloc_size = cfg->stack_offset;
4361 pos = 0;
4363 if (method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED) {
4364 /* Might need to attach the thread to the JIT or change the domain for the callback */
4365 if (appdomain_tls_offset != -1 && lmf_tls_offset != -1) {
4366 guint8 *buf, *no_domain_branch;
4368 code = mono_x86_emit_tls_get (code, X86_EAX, appdomain_tls_offset);
4369 x86_alu_reg_imm (code, X86_CMP, X86_EAX, GPOINTER_TO_UINT (cfg->domain));
4370 no_domain_branch = code;
4371 x86_branch8 (code, X86_CC_NE, 0, 0);
4372 code = mono_x86_emit_tls_get ( code, X86_EAX, lmf_tls_offset);
4373 x86_test_reg_reg (code, X86_EAX, X86_EAX);
4374 buf = code;
4375 x86_branch8 (code, X86_CC_NE, 0, 0);
4376 x86_patch (no_domain_branch, code);
4377 x86_push_imm (code, cfg->domain);
4378 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4379 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4380 x86_patch (buf, code);
4381 #ifdef PLATFORM_WIN32
4382 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4383 /* FIXME: Add a separate key for LMF to avoid this */
4384 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4385 #endif
4387 else {
4388 g_assert (!cfg->compile_aot);
4389 x86_push_imm (code, cfg->domain);
4390 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_jit_thread_attach");
4391 x86_alu_reg_imm (code, X86_ADD, X86_ESP, 4);
4395 if (method->save_lmf) {
4396 pos += sizeof (MonoLMF);
4398 /* save the current IP */
4399 mono_add_patch_info (cfg, code + 1 - cfg->native_code, MONO_PATCH_INFO_IP, NULL);
4400 x86_push_imm_template (code);
4401 cfa_offset += sizeof (gpointer);
4403 /* save all caller saved regs */
4404 x86_push_reg (code, X86_EBP);
4405 cfa_offset += sizeof (gpointer);
4406 x86_push_reg (code, X86_ESI);
4407 cfa_offset += sizeof (gpointer);
4408 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4409 x86_push_reg (code, X86_EDI);
4410 cfa_offset += sizeof (gpointer);
4411 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4412 x86_push_reg (code, X86_EBX);
4413 cfa_offset += sizeof (gpointer);
4414 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4416 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4418 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4419 * through the mono_lmf_addr TLS variable.
4421 /* %eax = previous_lmf */
4422 x86_prefix (code, X86_GS_PREFIX);
4423 x86_mov_reg_mem (code, X86_EAX, lmf_tls_offset, 4);
4424 /* skip esp + method_info + lmf */
4425 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 12);
4426 /* push previous_lmf */
4427 x86_push_reg (code, X86_EAX);
4428 /* new lmf = ESP */
4429 x86_prefix (code, X86_GS_PREFIX);
4430 x86_mov_mem_reg (code, lmf_tls_offset, X86_ESP, 4);
4431 } else {
4432 /* get the address of lmf for the current thread */
4434 * This is performance critical so we try to use some tricks to make
4435 * it fast.
4438 if (lmf_addr_tls_offset != -1) {
4439 /* Load lmf quicky using the GS register */
4440 code = mono_x86_emit_tls_get (code, X86_EAX, lmf_addr_tls_offset);
4441 #ifdef PLATFORM_WIN32
4442 /* The TLS key actually contains a pointer to the MonoJitTlsData structure */
4443 /* FIXME: Add a separate key for LMF to avoid this */
4444 x86_alu_reg_imm (code, X86_ADD, X86_EAX, G_STRUCT_OFFSET (MonoJitTlsData, lmf));
4445 #endif
4446 } else {
4447 code = emit_call (cfg, code, MONO_PATCH_INFO_INTERNAL_METHOD, (gpointer)"mono_get_lmf_addr");
4450 /* Skip esp + method info */
4451 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 8);
4453 /* push lmf */
4454 x86_push_reg (code, X86_EAX);
4455 /* push *lfm (previous_lmf) */
4456 x86_push_membase (code, X86_EAX, 0);
4457 /* *(lmf) = ESP */
4458 x86_mov_membase_reg (code, X86_EAX, 0, X86_ESP, 4);
4460 } else {
4462 if (cfg->used_int_regs & (1 << X86_EBX)) {
4463 x86_push_reg (code, X86_EBX);
4464 pos += 4;
4465 cfa_offset += sizeof (gpointer);
4466 mono_emit_unwind_op_offset (cfg, code, X86_EBX, - cfa_offset);
4469 if (cfg->used_int_regs & (1 << X86_EDI)) {
4470 x86_push_reg (code, X86_EDI);
4471 pos += 4;
4472 cfa_offset += sizeof (gpointer);
4473 mono_emit_unwind_op_offset (cfg, code, X86_EDI, - cfa_offset);
4476 if (cfg->used_int_regs & (1 << X86_ESI)) {
4477 x86_push_reg (code, X86_ESI);
4478 pos += 4;
4479 cfa_offset += sizeof (gpointer);
4480 mono_emit_unwind_op_offset (cfg, code, X86_ESI, - cfa_offset);
4484 alloc_size -= pos;
4486 /* the original alloc_size is already aligned: there is %ebp and retip pushed, so realign */
4487 if (mono_do_x86_stack_align && need_stack_frame) {
4488 int tot = alloc_size + pos + 4; /* ret ip */
4489 if (need_stack_frame)
4490 tot += 4; /* ebp */
4491 tot &= MONO_ARCH_FRAME_ALIGNMENT - 1;
4492 if (tot)
4493 alloc_size += MONO_ARCH_FRAME_ALIGNMENT - tot;
4496 if (alloc_size) {
4497 /* See mono_emit_stack_alloc */
4498 #if defined(PLATFORM_WIN32) || defined(MONO_ARCH_SIGSEGV_ON_ALTSTACK)
4499 guint32 remaining_size = alloc_size;
4500 while (remaining_size >= 0x1000) {
4501 x86_alu_reg_imm (code, X86_SUB, X86_ESP, 0x1000);
4502 x86_test_membase_reg (code, X86_ESP, 0, X86_ESP);
4503 remaining_size -= 0x1000;
4505 if (remaining_size)
4506 x86_alu_reg_imm (code, X86_SUB, X86_ESP, remaining_size);
4507 #else
4508 x86_alu_reg_imm (code, X86_SUB, X86_ESP, alloc_size);
4509 #endif
4511 g_assert (need_stack_frame);
4514 if (cfg->method->wrapper_type == MONO_WRAPPER_NATIVE_TO_MANAGED ||
4515 cfg->method->wrapper_type == MONO_WRAPPER_RUNTIME_INVOKE) {
4516 x86_alu_reg_imm (code, X86_AND, X86_ESP, -MONO_ARCH_FRAME_ALIGNMENT);
4519 #if DEBUG_STACK_ALIGNMENT
4520 /* check the stack is aligned */
4521 if (need_stack_frame && method->wrapper_type == MONO_WRAPPER_NONE) {
4522 x86_mov_reg_reg (code, X86_ECX, X86_ESP, 4);
4523 x86_alu_reg_imm (code, X86_AND, X86_ECX, MONO_ARCH_FRAME_ALIGNMENT - 1);
4524 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4525 x86_branch_disp (code, X86_CC_EQ, 3, FALSE);
4526 x86_breakpoint (code);
4528 #endif
4530 /* compute max_offset in order to use short forward jumps */
4531 max_offset = 0;
4532 if (cfg->opt & MONO_OPT_BRANCH) {
4533 for (bb = cfg->bb_entry; bb; bb = bb->next_bb) {
4534 MonoInst *ins;
4535 bb->max_offset = max_offset;
4537 if (cfg->prof_options & MONO_PROFILE_COVERAGE)
4538 max_offset += 6;
4539 /* max alignment for loops */
4540 if ((cfg->opt & MONO_OPT_LOOP) && bb_is_loop_start (bb))
4541 max_offset += LOOP_ALIGNMENT;
4543 MONO_BB_FOR_EACH_INS (bb, ins) {
4544 if (ins->opcode == OP_LABEL)
4545 ins->inst_c1 = max_offset;
4547 max_offset += ((guint8 *)ins_get_spec (ins->opcode))[MONO_INST_LEN];
4552 /* store runtime generic context */
4553 if (cfg->rgctx_var) {
4554 g_assert (cfg->rgctx_var->opcode == OP_REGOFFSET && cfg->rgctx_var->inst_basereg == X86_EBP);
4556 x86_mov_membase_reg (code, X86_EBP, cfg->rgctx_var->inst_offset, MONO_ARCH_RGCTX_REG, 4);
4559 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4560 code = mono_arch_instrument_prolog (cfg, mono_trace_enter_method, code, TRUE);
4562 /* load arguments allocated to register from the stack */
4563 sig = mono_method_signature (method);
4564 pos = 0;
4566 for (i = 0; i < sig->param_count + sig->hasthis; ++i) {
4567 inst = cfg->args [pos];
4568 if (inst->opcode == OP_REGVAR) {
4569 g_assert (need_stack_frame);
4570 x86_mov_reg_membase (code, inst->dreg, X86_EBP, inst->inst_offset, 4);
4571 if (cfg->verbose_level > 2)
4572 g_print ("Argument %d assigned to register %s\n", pos, mono_arch_regname (inst->dreg));
4574 pos++;
4577 cfg->code_len = code - cfg->native_code;
4579 g_assert (cfg->code_len < cfg->code_size);
4581 return code;
4584 void
4585 mono_arch_emit_epilog (MonoCompile *cfg)
4587 MonoMethod *method = cfg->method;
4588 MonoMethodSignature *sig = mono_method_signature (method);
4589 int quad, pos;
4590 guint32 stack_to_pop;
4591 guint8 *code;
4592 int max_epilog_size = 16;
4593 CallInfo *cinfo;
4594 gboolean need_stack_frame = needs_stack_frame (cfg);
4596 if (cfg->method->save_lmf)
4597 max_epilog_size += 128;
4599 while (cfg->code_len + max_epilog_size > (cfg->code_size - 16)) {
4600 cfg->code_size *= 2;
4601 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4602 mono_jit_stats.code_reallocs++;
4605 code = cfg->native_code + cfg->code_len;
4607 if (mono_jit_trace_calls != NULL && mono_trace_eval (method))
4608 code = mono_arch_instrument_epilog (cfg, mono_trace_leave_method, code, TRUE);
4610 /* the code restoring the registers must be kept in sync with OP_JMP */
4611 pos = 0;
4613 if (method->save_lmf) {
4614 gint32 prev_lmf_reg;
4615 gint32 lmf_offset = -sizeof (MonoLMF);
4617 /* check if we need to restore protection of the stack after a stack overflow */
4618 if (mono_get_jit_tls_offset () != -1) {
4619 guint8 *patch;
4620 code = mono_x86_emit_tls_get (code, X86_ECX, mono_get_jit_tls_offset ());
4621 /* we load the value in a separate instruction: this mechanism may be
4622 * used later as a safer way to do thread interruption
4624 x86_mov_reg_membase (code, X86_ECX, X86_ECX, G_STRUCT_OFFSET (MonoJitTlsData, restore_stack_prot), 4);
4625 x86_alu_reg_imm (code, X86_CMP, X86_ECX, 0);
4626 patch = code;
4627 x86_branch8 (code, X86_CC_Z, 0, FALSE);
4628 /* note that the call trampoline will preserve eax/edx */
4629 x86_call_reg (code, X86_ECX);
4630 x86_patch (patch, code);
4631 } else {
4632 /* FIXME: maybe save the jit tls in the prolog */
4634 if ((lmf_tls_offset != -1) && !is_win32 && !optimize_for_xen) {
4636 * Optimized version which uses the mono_lmf TLS variable instead of indirection
4637 * through the mono_lmf_addr TLS variable.
4639 /* reg = previous_lmf */
4640 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4642 /* lmf = previous_lmf */
4643 x86_prefix (code, X86_GS_PREFIX);
4644 x86_mov_mem_reg (code, lmf_tls_offset, X86_ECX, 4);
4645 } else {
4646 /* Find a spare register */
4647 switch (mini_type_get_underlying_type (cfg->generic_sharing_context, sig->ret)->type) {
4648 case MONO_TYPE_I8:
4649 case MONO_TYPE_U8:
4650 prev_lmf_reg = X86_EDI;
4651 cfg->used_int_regs |= (1 << X86_EDI);
4652 break;
4653 default:
4654 prev_lmf_reg = X86_EDX;
4655 break;
4658 /* reg = previous_lmf */
4659 x86_mov_reg_membase (code, prev_lmf_reg, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, previous_lmf), 4);
4661 /* ecx = lmf */
4662 x86_mov_reg_membase (code, X86_ECX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, lmf_addr), 4);
4664 /* *(lmf) = previous_lmf */
4665 x86_mov_membase_reg (code, X86_ECX, 0, prev_lmf_reg, 4);
4668 /* restore caller saved regs */
4669 if (cfg->used_int_regs & (1 << X86_EBX)) {
4670 x86_mov_reg_membase (code, X86_EBX, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, ebx), 4);
4673 if (cfg->used_int_regs & (1 << X86_EDI)) {
4674 x86_mov_reg_membase (code, X86_EDI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, edi), 4);
4676 if (cfg->used_int_regs & (1 << X86_ESI)) {
4677 x86_mov_reg_membase (code, X86_ESI, X86_EBP, lmf_offset + G_STRUCT_OFFSET (MonoLMF, esi), 4);
4680 /* EBP is restored by LEAVE */
4681 } else {
4682 if (cfg->used_int_regs & (1 << X86_EBX)) {
4683 pos -= 4;
4685 if (cfg->used_int_regs & (1 << X86_EDI)) {
4686 pos -= 4;
4688 if (cfg->used_int_regs & (1 << X86_ESI)) {
4689 pos -= 4;
4692 if (pos) {
4693 g_assert (need_stack_frame);
4694 x86_lea_membase (code, X86_ESP, X86_EBP, pos);
4697 if (cfg->used_int_regs & (1 << X86_ESI)) {
4698 x86_pop_reg (code, X86_ESI);
4700 if (cfg->used_int_regs & (1 << X86_EDI)) {
4701 x86_pop_reg (code, X86_EDI);
4703 if (cfg->used_int_regs & (1 << X86_EBX)) {
4704 x86_pop_reg (code, X86_EBX);
4708 /* Load returned vtypes into registers if needed */
4709 cinfo = get_call_info (cfg->generic_sharing_context, cfg->mempool, sig, FALSE);
4710 if (cinfo->ret.storage == ArgValuetypeInReg) {
4711 for (quad = 0; quad < 2; quad ++) {
4712 switch (cinfo->ret.pair_storage [quad]) {
4713 case ArgInIReg:
4714 x86_mov_reg_membase (code, cinfo->ret.pair_regs [quad], cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), 4);
4715 break;
4716 case ArgOnFloatFpStack:
4717 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), FALSE);
4718 break;
4719 case ArgOnDoubleFpStack:
4720 x86_fld_membase (code, cfg->ret->inst_basereg, cfg->ret->inst_offset + (quad * sizeof (gpointer)), TRUE);
4721 break;
4722 case ArgNone:
4723 break;
4724 default:
4725 g_assert_not_reached ();
4730 if (need_stack_frame)
4731 x86_leave (code);
4733 if (CALLCONV_IS_STDCALL (sig)) {
4734 MonoJitArgumentInfo *arg_info = alloca (sizeof (MonoJitArgumentInfo) * (sig->param_count + 1));
4736 stack_to_pop = mono_arch_get_argument_info (sig, sig->param_count, arg_info);
4737 } else if (MONO_TYPE_ISSTRUCT (mono_method_signature (cfg->method)->ret) && (cinfo->ret.storage == ArgOnStack))
4738 stack_to_pop = 4;
4739 else
4740 stack_to_pop = 0;
4742 if (stack_to_pop) {
4743 g_assert (need_stack_frame);
4744 x86_ret_imm (code, stack_to_pop);
4745 } else {
4746 x86_ret (code);
4749 cfg->code_len = code - cfg->native_code;
4751 g_assert (cfg->code_len < cfg->code_size);
4754 void
4755 mono_arch_emit_exceptions (MonoCompile *cfg)
4757 MonoJumpInfo *patch_info;
4758 int nthrows, i;
4759 guint8 *code;
4760 MonoClass *exc_classes [16];
4761 guint8 *exc_throw_start [16], *exc_throw_end [16];
4762 guint32 code_size;
4763 int exc_count = 0;
4765 /* Compute needed space */
4766 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4767 if (patch_info->type == MONO_PATCH_INFO_EXC)
4768 exc_count++;
4772 * make sure we have enough space for exceptions
4773 * 16 is the size of two push_imm instructions and a call
4775 if (cfg->compile_aot)
4776 code_size = exc_count * 32;
4777 else
4778 code_size = exc_count * 16;
4780 while (cfg->code_len + code_size > (cfg->code_size - 16)) {
4781 cfg->code_size *= 2;
4782 cfg->native_code = g_realloc (cfg->native_code, cfg->code_size);
4783 mono_jit_stats.code_reallocs++;
4786 code = cfg->native_code + cfg->code_len;
4788 nthrows = 0;
4789 for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
4790 switch (patch_info->type) {
4791 case MONO_PATCH_INFO_EXC: {
4792 MonoClass *exc_class;
4793 guint8 *buf, *buf2;
4794 guint32 throw_ip;
4796 x86_patch (patch_info->ip.i + cfg->native_code, code);
4798 exc_class = mono_class_from_name (mono_defaults.corlib, "System", patch_info->data.name);
4799 g_assert (exc_class);
4800 throw_ip = patch_info->ip.i;
4802 /* Find a throw sequence for the same exception class */
4803 for (i = 0; i < nthrows; ++i)
4804 if (exc_classes [i] == exc_class)
4805 break;
4806 if (i < nthrows) {
4807 x86_push_imm (code, (exc_throw_end [i] - cfg->native_code) - throw_ip);
4808 x86_jump_code (code, exc_throw_start [i]);
4809 patch_info->type = MONO_PATCH_INFO_NONE;
4811 else {
4812 guint32 size;
4814 /* Compute size of code following the push <OFFSET> */
4815 size = 5 + 5;
4817 if ((code - cfg->native_code) - throw_ip < 126 - size) {
4818 /* Use the shorter form */
4819 buf = buf2 = code;
4820 x86_push_imm (code, 0);
4822 else {
4823 buf = code;
4824 x86_push_imm (code, 0xf0f0f0f0);
4825 buf2 = code;
4828 if (nthrows < 16) {
4829 exc_classes [nthrows] = exc_class;
4830 exc_throw_start [nthrows] = code;
4833 x86_push_imm (code, exc_class->type_token - MONO_TOKEN_TYPE_DEF);
4834 patch_info->data.name = "mono_arch_throw_corlib_exception";
4835 patch_info->type = MONO_PATCH_INFO_INTERNAL_METHOD;
4836 patch_info->ip.i = code - cfg->native_code;
4837 x86_call_code (code, 0);
4838 x86_push_imm (buf, (code - cfg->native_code) - throw_ip);
4839 while (buf < buf2)
4840 x86_nop (buf);
4842 if (nthrows < 16) {
4843 exc_throw_end [nthrows] = code;
4844 nthrows ++;
4847 break;
4849 default:
4850 /* do nothing */
4851 break;
4855 cfg->code_len = code - cfg->native_code;
4857 g_assert (cfg->code_len < cfg->code_size);
4860 void
4861 mono_arch_flush_icache (guint8 *code, gint size)
4863 /* not needed */
4866 void
4867 mono_arch_flush_register_windows (void)
4871 gboolean
4872 mono_arch_is_inst_imm (gint64 imm)
4874 return TRUE;
4878 * Support for fast access to the thread-local lmf structure using the GS
4879 * segment register on NPTL + kernel 2.6.x.
4882 static gboolean tls_offset_inited = FALSE;
4884 void
4885 mono_arch_setup_jit_tls_data (MonoJitTlsData *tls)
4887 if (!tls_offset_inited) {
4888 if (!getenv ("MONO_NO_TLS")) {
4889 #ifdef PLATFORM_WIN32
4891 * We need to init this multiple times, since when we are first called, the key might not
4892 * be initialized yet.
4894 appdomain_tls_offset = mono_domain_get_tls_key ();
4895 lmf_tls_offset = mono_get_jit_tls_key ();
4896 thread_tls_offset = mono_thread_get_tls_key ();
4898 /* Only 64 tls entries can be accessed using inline code */
4899 if (appdomain_tls_offset >= 64)
4900 appdomain_tls_offset = -1;
4901 if (lmf_tls_offset >= 64)
4902 lmf_tls_offset = -1;
4903 if (thread_tls_offset >= 64)
4904 thread_tls_offset = -1;
4905 #else
4906 #if MONO_XEN_OPT
4907 optimize_for_xen = access ("/proc/xen", F_OK) == 0;
4908 #endif
4909 tls_offset_inited = TRUE;
4910 appdomain_tls_offset = mono_domain_get_tls_offset ();
4911 lmf_tls_offset = mono_get_lmf_tls_offset ();
4912 lmf_addr_tls_offset = mono_get_lmf_addr_tls_offset ();
4913 thread_tls_offset = mono_thread_get_tls_offset ();
4914 #endif
4919 void
4920 mono_arch_free_jit_tls_data (MonoJitTlsData *tls)
4924 #ifdef MONO_ARCH_HAVE_IMT
4926 // Linear handler, the bsearch head compare is shorter
4927 //[2 + 4] x86_alu_reg_imm (code, X86_CMP, ins->sreg1, ins->inst_imm);
4928 //[1 + 1] x86_branch8(inst,cond,imm,is_signed)
4929 // x86_patch(ins,target)
4930 //[1 + 5] x86_jump_mem(inst,mem)
4932 #define CMP_SIZE 6
4933 #define BR_SMALL_SIZE 2
4934 #define BR_LARGE_SIZE 5
4935 #define JUMP_IMM_SIZE 6
4936 #define ENABLE_WRONG_METHOD_CHECK 0
4938 static int
4939 imt_branch_distance (MonoIMTCheckItem **imt_entries, int start, int target)
4941 int i, distance = 0;
4942 for (i = start; i < target; ++i)
4943 distance += imt_entries [i]->chunk_size;
4944 return distance;
4948 * LOCKING: called with the domain lock held
4950 gpointer
4951 mono_arch_build_imt_thunk (MonoVTable *vtable, MonoDomain *domain, MonoIMTCheckItem **imt_entries, int count,
4952 gpointer fail_tramp)
4954 int i;
4955 int size = 0;
4956 guint8 *code, *start;
4958 for (i = 0; i < count; ++i) {
4959 MonoIMTCheckItem *item = imt_entries [i];
4960 if (item->is_equals) {
4961 if (item->check_target_idx) {
4962 if (!item->compare_done)
4963 item->chunk_size += CMP_SIZE;
4964 item->chunk_size += BR_SMALL_SIZE + JUMP_IMM_SIZE;
4965 } else {
4966 if (fail_tramp) {
4967 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + JUMP_IMM_SIZE * 2;
4968 } else {
4969 item->chunk_size += JUMP_IMM_SIZE;
4970 #if ENABLE_WRONG_METHOD_CHECK
4971 item->chunk_size += CMP_SIZE + BR_SMALL_SIZE + 1;
4972 #endif
4975 } else {
4976 item->chunk_size += CMP_SIZE + BR_LARGE_SIZE;
4977 imt_entries [item->check_target_idx]->compare_done = TRUE;
4979 size += item->chunk_size;
4981 if (fail_tramp)
4982 code = mono_method_alloc_generic_virtual_thunk (domain, size);
4983 else
4984 code = mono_domain_code_reserve (domain, size);
4985 start = code;
4986 for (i = 0; i < count; ++i) {
4987 MonoIMTCheckItem *item = imt_entries [i];
4988 item->code_target = code;
4989 if (item->is_equals) {
4990 if (item->check_target_idx) {
4991 if (!item->compare_done)
4992 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
4993 item->jmp_code = code;
4994 x86_branch8 (code, X86_CC_NE, 0, FALSE);
4995 if (item->has_target_code)
4996 x86_jump_code (code, item->value.target_code);
4997 else
4998 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
4999 } else {
5000 if (fail_tramp) {
5001 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5002 item->jmp_code = code;
5003 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5004 if (item->has_target_code)
5005 x86_jump_code (code, item->value.target_code);
5006 else
5007 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5008 x86_patch (item->jmp_code, code);
5009 x86_jump_code (code, fail_tramp);
5010 item->jmp_code = NULL;
5011 } else {
5012 /* enable the commented code to assert on wrong method */
5013 #if ENABLE_WRONG_METHOD_CHECK
5014 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5015 item->jmp_code = code;
5016 x86_branch8 (code, X86_CC_NE, 0, FALSE);
5017 #endif
5018 if (item->has_target_code)
5019 x86_jump_code (code, item->value.target_code);
5020 else
5021 x86_jump_mem (code, & (vtable->vtable [item->value.vtable_slot]));
5022 #if ENABLE_WRONG_METHOD_CHECK
5023 x86_patch (item->jmp_code, code);
5024 x86_breakpoint (code);
5025 item->jmp_code = NULL;
5026 #endif
5029 } else {
5030 x86_alu_reg_imm (code, X86_CMP, MONO_ARCH_IMT_REG, (guint32)item->key);
5031 item->jmp_code = code;
5032 if (x86_is_imm8 (imt_branch_distance (imt_entries, i, item->check_target_idx)))
5033 x86_branch8 (code, X86_CC_GE, 0, FALSE);
5034 else
5035 x86_branch32 (code, X86_CC_GE, 0, FALSE);
5038 /* patch the branches to get to the target items */
5039 for (i = 0; i < count; ++i) {
5040 MonoIMTCheckItem *item = imt_entries [i];
5041 if (item->jmp_code) {
5042 if (item->check_target_idx) {
5043 x86_patch (item->jmp_code, imt_entries [item->check_target_idx]->code_target);
5048 if (!fail_tramp)
5049 mono_stats.imt_thunks_size += code - start;
5050 g_assert (code - start <= size);
5051 return start;
5054 MonoMethod*
5055 mono_arch_find_imt_method (gpointer *regs, guint8 *code)
5057 return (MonoMethod*) regs [MONO_ARCH_IMT_REG];
5060 MonoObject*
5061 mono_arch_find_this_argument (gpointer *regs, MonoMethod *method, MonoGenericSharingContext *gsctx)
5063 MonoMethodSignature *sig = mono_method_signature (method);
5064 CallInfo *cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5065 int this_argument_offset;
5066 MonoObject *this_argument;
5069 * this is the offset of the this arg from esp as saved at the start of
5070 * mono_arch_create_trampoline_code () in tramp-x86.c.
5072 this_argument_offset = 5;
5073 if (MONO_TYPE_ISSTRUCT (sig->ret) && (cinfo->ret.storage == ArgOnStack))
5074 this_argument_offset++;
5076 this_argument = * (MonoObject**) (((guint8*) regs [X86_ESP]) + this_argument_offset * sizeof (gpointer));
5078 g_free (cinfo);
5079 return this_argument;
5081 #endif
5083 MonoVTable*
5084 mono_arch_find_static_call_vtable (gpointer *regs, guint8 *code)
5086 return (MonoVTable*) regs [MONO_ARCH_RGCTX_REG];
5089 MonoInst*
5090 mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args)
5092 MonoInst *ins = NULL;
5093 int opcode = 0;
5095 if (cmethod->klass == mono_defaults.math_class) {
5096 if (strcmp (cmethod->name, "Sin") == 0) {
5097 opcode = OP_SIN;
5098 } else if (strcmp (cmethod->name, "Cos") == 0) {
5099 opcode = OP_COS;
5100 } else if (strcmp (cmethod->name, "Tan") == 0) {
5101 opcode = OP_TAN;
5102 } else if (strcmp (cmethod->name, "Atan") == 0) {
5103 opcode = OP_ATAN;
5104 } else if (strcmp (cmethod->name, "Sqrt") == 0) {
5105 opcode = OP_SQRT;
5106 } else if (strcmp (cmethod->name, "Abs") == 0 && fsig->params [0]->type == MONO_TYPE_R8) {
5107 opcode = OP_ABS;
5108 } else if (strcmp (cmethod->name, "Round") == 0 && fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) {
5109 opcode = OP_ROUND;
5112 if (opcode) {
5113 MONO_INST_NEW (cfg, ins, opcode);
5114 ins->type = STACK_R8;
5115 ins->dreg = mono_alloc_freg (cfg);
5116 ins->sreg1 = args [0]->dreg;
5117 MONO_ADD_INS (cfg->cbb, ins);
5120 if (cfg->opt & MONO_OPT_CMOV) {
5121 int opcode = 0;
5123 if (strcmp (cmethod->name, "Min") == 0) {
5124 if (fsig->params [0]->type == MONO_TYPE_I4)
5125 opcode = OP_IMIN;
5126 } else if (strcmp (cmethod->name, "Max") == 0) {
5127 if (fsig->params [0]->type == MONO_TYPE_I4)
5128 opcode = OP_IMAX;
5131 if (opcode) {
5132 MONO_INST_NEW (cfg, ins, opcode);
5133 ins->type = STACK_I4;
5134 ins->dreg = mono_alloc_ireg (cfg);
5135 ins->sreg1 = args [0]->dreg;
5136 ins->sreg2 = args [1]->dreg;
5137 MONO_ADD_INS (cfg->cbb, ins);
5141 #if 0
5142 /* OP_FREM is not IEEE compatible */
5143 else if (strcmp (cmethod->name, "IEEERemainder") == 0) {
5144 MONO_INST_NEW (cfg, ins, OP_FREM);
5145 ins->inst_i0 = args [0];
5146 ins->inst_i1 = args [1];
5148 #endif
5151 return ins;
5154 gboolean
5155 mono_arch_print_tree (MonoInst *tree, int arity)
5157 return 0;
5160 MonoInst* mono_arch_get_domain_intrinsic (MonoCompile* cfg)
5162 MonoInst* ins;
5164 return NULL;
5166 if (appdomain_tls_offset == -1)
5167 return NULL;
5169 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5170 ins->inst_offset = appdomain_tls_offset;
5171 return ins;
5174 MonoInst* mono_arch_get_thread_intrinsic (MonoCompile* cfg)
5176 MonoInst* ins;
5178 if (thread_tls_offset == -1)
5179 return NULL;
5181 MONO_INST_NEW (cfg, ins, OP_TLS_GET);
5182 ins->inst_offset = thread_tls_offset;
5183 return ins;
5186 guint32
5187 mono_arch_get_patch_offset (guint8 *code)
5189 if ((code [0] == 0x8b) && (x86_modrm_mod (code [1]) == 0x2))
5190 return 2;
5191 else if ((code [0] == 0xba))
5192 return 1;
5193 else if ((code [0] == 0x68))
5194 /* push IMM */
5195 return 1;
5196 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x6))
5197 /* push <OFFSET>(<REG>) */
5198 return 2;
5199 else if ((code [0] == 0xff) && (x86_modrm_reg (code [1]) == 0x2))
5200 /* call *<OFFSET>(<REG>) */
5201 return 2;
5202 else if ((code [0] == 0xdd) || (code [0] == 0xd9))
5203 /* fldl <ADDR> */
5204 return 2;
5205 else if ((code [0] == 0x58) && (code [1] == 0x05))
5206 /* pop %eax; add <OFFSET>, %eax */
5207 return 2;
5208 else if ((code [0] >= 0x58) && (code [0] <= 0x58 + X86_NREG) && (code [1] == 0x81))
5209 /* pop <REG>; add <OFFSET>, <REG> */
5210 return 3;
5211 else if ((code [0] >= 0xb8) && (code [0] < 0xb8 + 8))
5212 /* mov <REG>, imm */
5213 return 1;
5214 else {
5215 g_assert_not_reached ();
5216 return -1;
5221 * mono_breakpoint_clean_code:
5223 * Copy @size bytes from @code - @offset to the buffer @buf. If the debugger inserted software
5224 * breakpoints in the original code, they are removed in the copy.
5226 * Returns TRUE if no sw breakpoint was present.
5228 gboolean
5229 mono_breakpoint_clean_code (guint8 *method_start, guint8 *code, int offset, guint8 *buf, int size)
5231 int i;
5232 gboolean can_write = TRUE;
5234 * If method_start is non-NULL we need to perform bound checks, since we access memory
5235 * at code - offset we could go before the start of the method and end up in a different
5236 * page of memory that is not mapped or read incorrect data anyway. We zero-fill the bytes
5237 * instead.
5239 if (!method_start || code - offset >= method_start) {
5240 memcpy (buf, code - offset, size);
5241 } else {
5242 int diff = code - method_start;
5243 memset (buf, 0, size);
5244 memcpy (buf + offset - diff, method_start, diff + size - offset);
5246 code -= offset;
5247 for (i = 0; i < MONO_BREAKPOINT_ARRAY_SIZE; ++i) {
5248 int idx = mono_breakpoint_info_index [i];
5249 guint8 *ptr;
5250 if (idx < 1)
5251 continue;
5252 ptr = mono_breakpoint_info [idx].address;
5253 if (ptr >= code && ptr < code + size) {
5254 guint8 saved_byte = mono_breakpoint_info [idx].saved_byte;
5255 can_write = FALSE;
5256 /*g_print ("patching %p with 0x%02x (was: 0x%02x)\n", ptr, saved_byte, buf [ptr - code]);*/
5257 buf [ptr - code] = saved_byte;
5260 return can_write;
5263 gpointer
5264 mono_arch_get_vcall_slot (guint8 *code, gpointer *regs, int *displacement)
5266 guint8 buf [8];
5267 guint8 reg = 0;
5268 gint32 disp = 0;
5270 mono_breakpoint_clean_code (NULL, code, 8, buf, sizeof (buf));
5271 code = buf + 8;
5273 *displacement = 0;
5275 /* go to the start of the call instruction
5277 * address_byte = (m << 6) | (o << 3) | reg
5278 * call opcode: 0xff address_byte displacement
5279 * 0xff m=1,o=2 imm8
5280 * 0xff m=2,o=2 imm32
5282 code -= 6;
5285 * A given byte sequence can match more than case here, so we have to be
5286 * really careful about the ordering of the cases. Longer sequences
5287 * come first.
5288 * Some of the rules are only needed because the imm in the mov could
5289 * match the
5290 * code [2] == 0xe8 case below.
5292 if ((code [-2] == 0x8b) && (x86_modrm_mod (code [-1]) == 0x2) && (code [4] == 0xff) && (x86_modrm_reg (code [5]) == 0x2) && (x86_modrm_mod (code [5]) == 0x0)) {
5294 * This is an interface call
5295 * 8b 80 0c e8 ff ff mov 0xffffe80c(%eax),%eax
5296 * ff 10 call *(%eax)
5298 reg = x86_modrm_rm (code [5]);
5299 disp = 0;
5300 #ifdef MONO_ARCH_HAVE_IMT
5301 } else if ((code [-2] == 0xba) && (code [3] == 0xff) && (x86_modrm_mod (code [4]) == 1) && (x86_modrm_reg (code [4]) == 2) && ((signed char)code [5] < 0)) {
5302 /* IMT-based interface calls: with MONO_ARCH_IMT_REG == edx
5303 * ba 14 f8 28 08 mov $0x828f814,%edx
5304 * ff 50 fc call *0xfffffffc(%eax)
5306 reg = code [4] & 0x07;
5307 disp = (signed char)code [5];
5308 #endif
5309 } else if ((code [-2] >= 0xb8) && (code [-2] < 0xb8 + 8) && (code [3] == 0xff) && (x86_modrm_reg (code [4]) == 0x2) && (x86_modrm_mod (code [4]) == 0x1)) {
5311 * ba e8 e8 e8 e8 mov $0xe8e8e8e8,%edx
5312 * ff 50 60 callq *0x60(%eax)
5314 reg = x86_modrm_rm (code [4]);
5315 disp = *(gint8*)(code + 5);
5316 } else if ((code [1] != 0xe8) && (code [3] == 0xff) && ((code [4] & 0x18) == 0x10) && ((code [4] >> 6) == 1)) {
5317 reg = code [4] & 0x07;
5318 disp = (signed char)code [5];
5319 } else {
5320 if ((code [0] == 0xff) && ((code [1] & 0x18) == 0x10) && ((code [1] >> 6) == 2)) {
5321 reg = code [1] & 0x07;
5322 disp = *((gint32*)(code + 2));
5323 } else if ((code [1] == 0xe8)) {
5324 return NULL;
5325 } else if ((code [4] == 0xff) && (((code [5] >> 6) & 0x3) == 0) && (((code [5] >> 3) & 0x7) == 2)) {
5327 * This is a interface call
5328 * 8b 40 30 mov 0x30(%eax),%eax
5329 * ff 10 call *(%eax)
5331 disp = 0;
5332 reg = code [5] & 0x07;
5334 else
5335 return NULL;
5338 *displacement = disp;
5339 return regs [reg];
5342 gpointer*
5343 mono_arch_get_vcall_slot_addr (guint8 *code, gpointer *regs)
5345 gpointer vt;
5346 int displacement;
5347 vt = mono_arch_get_vcall_slot (code, regs, &displacement);
5348 if (!vt)
5349 return NULL;
5350 return (gpointer*)((char*)vt + displacement);
5353 gpointer
5354 mono_arch_get_this_arg_from_call (MonoGenericSharingContext *gsctx, MonoMethodSignature *sig,
5355 gssize *regs, guint8 *code)
5357 guint32 esp = regs [X86_ESP];
5358 CallInfo *cinfo = NULL;
5359 gpointer res;
5360 int offset;
5363 * Avoid expensive calls to get_generic_context_from_code () + get_call_info
5364 * if possible.
5366 if (MONO_TYPE_ISSTRUCT (sig->ret)) {
5367 if (!gsctx && code)
5368 gsctx = mono_get_generic_context_from_code (code);
5369 cinfo = get_call_info (gsctx, NULL, sig, FALSE);
5371 offset = cinfo->args [0].offset;
5372 } else {
5373 offset = 0;
5377 * The stack looks like:
5378 * <other args>
5379 * <this=delegate>
5380 * <possible vtype return address>
5381 * <return addr>
5382 * <4 pointers pushed by mono_arch_create_trampoline_code ()>
5384 res = (((MonoObject**)esp) [5 + (offset / 4)]);
5385 if (cinfo)
5386 g_free (cinfo);
5387 return res;
5390 #define MAX_ARCH_DELEGATE_PARAMS 10
5392 gpointer
5393 mono_arch_get_delegate_invoke_impl (MonoMethodSignature *sig, gboolean has_target)
5395 guint8 *code, *start;
5397 if (sig->param_count > MAX_ARCH_DELEGATE_PARAMS)
5398 return NULL;
5400 /* FIXME: Support more cases */
5401 if (MONO_TYPE_ISSTRUCT (sig->ret))
5402 return NULL;
5405 * The stack contains:
5406 * <delegate>
5407 * <return addr>
5410 if (has_target) {
5411 static guint8* cached = NULL;
5412 if (cached)
5413 return cached;
5415 start = code = mono_global_codeman_reserve (64);
5417 /* Replace the this argument with the target */
5418 x86_mov_reg_membase (code, X86_EAX, X86_ESP, 4, 4);
5419 x86_mov_reg_membase (code, X86_ECX, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, target), 4);
5420 x86_mov_membase_reg (code, X86_ESP, 4, X86_ECX, 4);
5421 x86_jump_membase (code, X86_EAX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5423 g_assert ((code - start) < 64);
5425 mono_debug_add_delegate_trampoline (start, code - start);
5427 mono_memory_barrier ();
5429 cached = start;
5430 } else {
5431 static guint8* cache [MAX_ARCH_DELEGATE_PARAMS + 1] = {NULL};
5432 int i = 0;
5433 /* 8 for mov_reg and jump, plus 8 for each parameter */
5434 int code_reserve = 8 + (sig->param_count * 8);
5436 for (i = 0; i < sig->param_count; ++i)
5437 if (!mono_is_regsize_var (sig->params [i]))
5438 return NULL;
5440 code = cache [sig->param_count];
5441 if (code)
5442 return code;
5445 * The stack contains:
5446 * <args in reverse order>
5447 * <delegate>
5448 * <return addr>
5450 * and we need:
5451 * <args in reverse order>
5452 * <return addr>
5454 * without unbalancing the stack.
5455 * So move each arg up a spot in the stack (overwriting un-needed 'this' arg)
5456 * and leaving original spot of first arg as placeholder in stack so
5457 * when callee pops stack everything works.
5460 start = code = mono_global_codeman_reserve (code_reserve);
5462 /* store delegate for access to method_ptr */
5463 x86_mov_reg_membase (code, X86_ECX, X86_ESP, 4, 4);
5465 /* move args up */
5466 for (i = 0; i < sig->param_count; ++i) {
5467 x86_mov_reg_membase (code, X86_EAX, X86_ESP, (i+2)*4, 4);
5468 x86_mov_membase_reg (code, X86_ESP, (i+1)*4, X86_EAX, 4);
5471 x86_jump_membase (code, X86_ECX, G_STRUCT_OFFSET (MonoDelegate, method_ptr));
5473 g_assert ((code - start) < code_reserve);
5475 mono_debug_add_delegate_trampoline (start, code - start);
5477 mono_memory_barrier ();
5479 cache [sig->param_count] = start;
5482 return start;
5485 gpointer
5486 mono_arch_context_get_int_reg (MonoContext *ctx, int reg)
5488 switch (reg) {
5489 case X86_EAX: return (gpointer)ctx->eax;
5490 case X86_EBX: return (gpointer)ctx->ebx;
5491 case X86_ECX: return (gpointer)ctx->ecx;
5492 case X86_EDX: return (gpointer)ctx->edx;
5493 case X86_ESP: return (gpointer)ctx->esp;
5494 case X86_EBP: return (gpointer)ctx->ebp;
5495 case X86_ESI: return (gpointer)ctx->esi;
5496 case X86_EDI: return (gpointer)ctx->edi;
5497 default: g_assert_not_reached ();
5501 #ifdef MONO_ARCH_SIMD_INTRINSICS
5503 static MonoInst*
5504 get_float_to_x_spill_area (MonoCompile *cfg)
5506 if (!cfg->fconv_to_r8_x_var) {
5507 cfg->fconv_to_r8_x_var = mono_compile_create_var (cfg, &mono_defaults.double_class->byval_arg, OP_LOCAL);
5508 cfg->fconv_to_r8_x_var->flags |= MONO_INST_VOLATILE; /*FIXME, use the don't regalloc flag*/
5510 return cfg->fconv_to_r8_x_var;
5514 * Convert all fconv opts that MONO_OPT_SSE2 would get wrong.
5516 void
5517 mono_arch_decompose_opts (MonoCompile *cfg, MonoInst *ins)
5519 MonoInst *fconv;
5520 int dreg, src_opcode;
5522 if (!(cfg->opt & MONO_OPT_SSE2) || !(cfg->opt & MONO_OPT_SIMD))
5523 return;
5525 switch (src_opcode = ins->opcode) {
5526 case OP_FCONV_TO_I1:
5527 case OP_FCONV_TO_U1:
5528 case OP_FCONV_TO_I2:
5529 case OP_FCONV_TO_U2:
5530 case OP_FCONV_TO_I4:
5531 case OP_FCONV_TO_I:
5532 break;
5533 default:
5534 return;
5537 /* dreg is the IREG and sreg1 is the FREG */
5538 MONO_INST_NEW (cfg, fconv, OP_FCONV_TO_R8_X);
5539 fconv->klass = NULL; /*FIXME, what can I use here as the Mono.Simd lib might not be loaded yet*/
5540 fconv->sreg1 = ins->sreg1;
5541 fconv->dreg = mono_alloc_ireg (cfg);
5542 fconv->type = STACK_VTYPE;
5543 fconv->backend.spill_var = get_float_to_x_spill_area (cfg);
5545 mono_bblock_insert_before_ins (cfg->cbb, ins, fconv);
5547 dreg = ins->dreg;
5548 NULLIFY_INS (ins);
5549 ins->opcode = OP_XCONV_R8_TO_I4;
5551 ins->klass = mono_defaults.int32_class;
5552 ins->sreg1 = fconv->dreg;
5553 ins->dreg = dreg;
5554 ins->type = STACK_I4;
5555 ins->backend.source_opcode = src_opcode;
5558 void
5559 mono_arch_decompose_long_opts (MonoCompile *cfg, MonoInst *long_ins)
5561 MonoInst *ins;
5562 int vreg;
5563 if (!(cfg->opt & MONO_OPT_SIMD))
5564 return;
5566 /*TODO move this to simd-intrinsic.c once we support sse 4.1 dword extractors since we need the runtime caps info */
5567 switch (long_ins->opcode) {
5568 case OP_EXTRACT_I8:
5569 vreg = long_ins->sreg1;
5571 if (long_ins->inst_c0) {
5572 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5573 ins->klass = long_ins->klass;
5574 ins->sreg1 = long_ins->sreg1;
5575 ins->inst_c0 = 2;
5576 ins->type = STACK_VTYPE;
5577 ins->dreg = vreg = alloc_ireg (cfg);
5578 MONO_ADD_INS (cfg->cbb, ins);
5581 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5582 ins->klass = mono_defaults.int32_class;
5583 ins->sreg1 = vreg;
5584 ins->type = STACK_I4;
5585 ins->dreg = long_ins->dreg + 1;
5586 MONO_ADD_INS (cfg->cbb, ins);
5588 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5589 ins->klass = long_ins->klass;
5590 ins->sreg1 = long_ins->sreg1;
5591 ins->inst_c0 = long_ins->inst_c0 ? 3 : 1;
5592 ins->type = STACK_VTYPE;
5593 ins->dreg = vreg = alloc_ireg (cfg);
5594 MONO_ADD_INS (cfg->cbb, ins);
5596 MONO_INST_NEW (cfg, ins, OP_EXTRACT_I4);
5597 ins->klass = mono_defaults.int32_class;
5598 ins->sreg1 = vreg;
5599 ins->type = STACK_I4;
5600 ins->dreg = long_ins->dreg + 2;
5601 MONO_ADD_INS (cfg->cbb, ins);
5603 long_ins->opcode = OP_NOP;
5604 break;
5605 case OP_INSERTX_I8_SLOW:
5606 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5607 ins->dreg = long_ins->dreg;
5608 ins->sreg1 = long_ins->dreg;
5609 ins->sreg2 = long_ins->sreg2 + 1;
5610 ins->inst_c0 = long_ins->inst_c0 * 2;
5611 MONO_ADD_INS (cfg->cbb, ins);
5613 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5614 ins->dreg = long_ins->dreg;
5615 ins->sreg1 = long_ins->dreg;
5616 ins->sreg2 = long_ins->sreg2 + 2;
5617 ins->inst_c0 = long_ins->inst_c0 * 2 + 1;
5618 MONO_ADD_INS (cfg->cbb, ins);
5620 long_ins->opcode = OP_NOP;
5621 break;
5622 case OP_EXPAND_I8:
5623 MONO_INST_NEW (cfg, ins, OP_ICONV_TO_X);
5624 ins->dreg = long_ins->dreg;
5625 ins->sreg1 = long_ins->sreg1 + 1;
5626 ins->klass = long_ins->klass;
5627 ins->type = STACK_VTYPE;
5628 MONO_ADD_INS (cfg->cbb, ins);
5630 MONO_INST_NEW (cfg, ins, OP_INSERTX_I4_SLOW);
5631 ins->dreg = long_ins->dreg;
5632 ins->sreg1 = long_ins->dreg;
5633 ins->sreg2 = long_ins->sreg1 + 2;
5634 ins->inst_c0 = 1;
5635 ins->klass = long_ins->klass;
5636 ins->type = STACK_VTYPE;
5637 MONO_ADD_INS (cfg->cbb, ins);
5639 MONO_INST_NEW (cfg, ins, OP_PSHUFLED);
5640 ins->dreg = long_ins->dreg;
5641 ins->sreg1 = long_ins->dreg;;
5642 ins->inst_c0 = 0x44; /*Magic number for swizzling (X,Y,X,Y)*/
5643 ins->klass = long_ins->klass;
5644 ins->type = STACK_VTYPE;
5645 MONO_ADD_INS (cfg->cbb, ins);
5647 long_ins->opcode = OP_NOP;
5648 break;
5651 #endif