Improve mips code generator
[sljit.git] / sljit_src / sljitNativeX86_32.c
blob755f90bb7ad81e24004407e099d8b6c109d541df
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
45 /* Size contains the flags as well. */
46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
70 size &= 0xf;
71 inst_size = size;
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else if (immb != 0 && !(b & OFFS_REG_MASK)) {
84 /* Immediate operand. */
85 if (immb <= 127 && immb >= -128)
86 inst_size += sizeof(sljit_s8);
87 else
88 inst_size += sizeof(sljit_sw);
90 else if (reg_map[b & REG_MASK] == 5)
91 inst_size += sizeof(sljit_s8);
93 if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
94 b |= TO_OFFS_REG(SLJIT_SP);
96 if (b & OFFS_REG_MASK)
97 inst_size += 1; /* SIB byte. */
100 /* Calculate size of a. */
101 if (a & SLJIT_IMM) {
102 if (flags & EX86_BIN_INS) {
103 if (imma <= 127 && imma >= -128) {
104 inst_size += 1;
105 flags |= EX86_BYTE_ARG;
106 } else
107 inst_size += 4;
109 else if (flags & EX86_SHIFT_INS) {
110 imma &= 0x1f;
111 if (imma != 1) {
112 inst_size++;
113 flags |= EX86_BYTE_ARG;
115 } else if (flags & EX86_BYTE_ARG)
116 inst_size++;
117 else if (flags & EX86_HALF_ARG)
118 inst_size += sizeof(short);
119 else
120 inst_size += sizeof(sljit_sw);
122 else
123 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
125 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
126 PTR_FAIL_IF(!inst);
128 /* Encoding the byte. */
129 INC_SIZE(inst_size);
130 if (flags & EX86_PREF_F2)
131 *inst++ = 0xf2;
132 if (flags & EX86_PREF_F3)
133 *inst++ = 0xf3;
134 if (flags & EX86_PREF_66)
135 *inst++ = 0x66;
137 buf_ptr = inst + size;
139 /* Encode mod/rm byte. */
140 if (!(flags & EX86_SHIFT_INS)) {
141 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
142 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
144 if (a & SLJIT_IMM)
145 *buf_ptr = 0;
146 else if (!(flags & EX86_SSE2_OP1))
147 *buf_ptr = U8(reg_map[a] << 3);
148 else
149 *buf_ptr = U8(a << 3);
151 else {
152 if (a & SLJIT_IMM) {
153 if (imma == 1)
154 *inst = GROUP_SHIFT_1;
155 else
156 *inst = GROUP_SHIFT_N;
157 } else
158 *inst = GROUP_SHIFT_CL;
159 *buf_ptr = 0;
162 if (!(b & SLJIT_MEM)) {
163 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
164 buf_ptr++;
165 } else if (b & REG_MASK) {
166 reg_map_b = reg_map[b & REG_MASK];
168 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) {
169 if (immb != 0 || reg_map_b == 5) {
170 if (immb <= 127 && immb >= -128)
171 *buf_ptr |= 0x40;
172 else
173 *buf_ptr |= 0x80;
176 if (!(b & OFFS_REG_MASK))
177 *buf_ptr++ |= reg_map_b;
178 else {
179 *buf_ptr++ |= 0x04;
180 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
183 if (immb != 0 || reg_map_b == 5) {
184 if (immb <= 127 && immb >= -128)
185 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
186 else {
187 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
188 buf_ptr += sizeof(sljit_sw);
192 else {
193 *buf_ptr++ |= 0x04;
194 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
197 else {
198 *buf_ptr++ |= 0x05;
199 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
200 buf_ptr += sizeof(sljit_sw);
203 if (a & SLJIT_IMM) {
204 if (flags & EX86_BYTE_ARG)
205 *buf_ptr = U8(imma);
206 else if (flags & EX86_HALF_ARG)
207 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
208 else if (!(flags & EX86_SHIFT_INS))
209 sljit_unaligned_store_sw(buf_ptr, imma);
212 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
215 /* --------------------------------------------------------------------- */
216 /* Enter / return */
217 /* --------------------------------------------------------------------- */
219 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
221 sljit_uw type = jump->flags >> TYPE_SHIFT;
223 if (type == SLJIT_JUMP) {
224 *code_ptr++ = JMP_i32;
225 jump->addr++;
227 else if (type >= SLJIT_FAST_CALL) {
228 *code_ptr++ = CALL_i32;
229 jump->addr++;
231 else {
232 *code_ptr++ = GROUP_0F;
233 *code_ptr++ = get_jump_code(type);
234 jump->addr += 2;
237 if (jump->flags & JUMP_LABEL)
238 jump->flags |= PATCH_MW;
239 else
240 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
241 code_ptr += 4;
243 return code_ptr;
246 #define ENTER_R2_USED 0x00001
247 #define ENTER_R2_SAVED 0x00002
248 #define ENTER_R2_TO_S 0x00004
249 #define ENTER_R2_TO_R0 0x00008
250 #define ENTER_R1_TO_S 0x00010
251 #define ENTER_TMP_TO_R4 0x00020
252 #define ENTER_TMP_TO_S 0x00040
254 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
255 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
256 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
258 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
259 sljit_s32 size, locals_offset, args_size, types, status;
260 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
261 sljit_u8 *inst;
262 #ifdef _WIN32
263 sljit_s32 r2_offset = -1;
264 #endif
266 CHECK_ERROR();
267 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
268 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
270 /* Emit ENDBR32 at function entry if needed. */
271 FAIL_IF(emit_endbranch(compiler));
273 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
275 arg_types >>= SLJIT_ARG_SHIFT;
276 types = arg_types;
277 word_arg_count = 0;
278 saved_arg_count = 0;
279 float_arg_count = 0;
280 args_size = SSIZE_OF(sw);
281 status = 0;
282 while (types) {
283 switch (types & SLJIT_ARG_MASK) {
284 case SLJIT_ARG_TYPE_F64:
285 float_arg_count++;
286 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
287 args_size += SSIZE_OF(f64);
288 break;
289 case SLJIT_ARG_TYPE_F32:
290 float_arg_count++;
291 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
292 args_size += SSIZE_OF(f32);
293 break;
294 default:
295 word_arg_count++;
297 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
298 saved_arg_count++;
300 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
301 if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL))
302 break;
303 #endif /* SLJIT_X86_32_FASTCALL */
305 if (word_arg_count == 4) {
306 if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
307 status |= ENTER_TMP_TO_R4;
308 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
309 } else if (saved_arg_count == 4) {
310 status |= ENTER_TMP_TO_S;
311 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
315 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
316 if (word_arg_count == 3 && ((types & SLJIT_ARG_TYPE_SCRATCH_REG) || (saved_arg_count + kept_saveds_count > 3)))
317 status |= ENTER_R2_USED;
318 #endif /* SLJIT_X86_32_FASTCALL */
320 args_size += SSIZE_OF(sw);
321 break;
323 types >>= SLJIT_ARG_SHIFT;
326 args_size -= SSIZE_OF(sw);
327 compiler->args_size = args_size;
329 /* [esp+0] for saving temporaries and function calls. */
330 locals_offset = 2 * SSIZE_OF(sw);
332 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
333 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
334 locals_offset = 4 * SSIZE_OF(sw);
335 #else
336 if (scratches >= 3)
337 locals_offset = 4 * SSIZE_OF(sw);
338 #endif
340 compiler->scratches_offset = locals_offset;
342 if (scratches > 3)
343 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
345 if (saveds > 3)
346 locals_offset += (saveds - 3) * SSIZE_OF(sw);
348 compiler->locals_offset = locals_offset;
350 size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
351 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
352 FAIL_IF(!inst);
354 INC_SIZE((sljit_uw)size);
355 PUSH_REG(reg_map[TMP_REG1]);
356 if (saveds > 2 || scratches > 9)
357 PUSH_REG(reg_map[SLJIT_S2]);
358 if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
359 PUSH_REG(reg_map[SLJIT_S1]);
360 if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
361 PUSH_REG(reg_map[SLJIT_S0]);
363 size *= SSIZE_OF(sw);
365 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
366 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
368 size += SSIZE_OF(sw);
370 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
371 if (!(options & SLJIT_ENTER_CDECL))
372 size += args_size;
373 #endif
375 local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
376 compiler->local_size = local_size;
378 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
379 if (!(options & SLJIT_ENTER_CDECL))
380 size -= args_size;
381 #endif
383 word_arg_count = 0;
384 saved_arg_count = kept_saveds_count;
385 args_size = size;
386 while (arg_types) {
387 switch (arg_types & SLJIT_ARG_MASK) {
388 case SLJIT_ARG_TYPE_F64:
389 args_size += SSIZE_OF(f64);
390 break;
391 case SLJIT_ARG_TYPE_F32:
392 args_size += SSIZE_OF(f32);
393 break;
394 default:
395 word_arg_count++;
396 SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
398 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG))
399 saved_arg_count++;
401 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
402 if (!(options & SLJIT_ENTER_CDECL)) {
403 if (word_arg_count == 1) {
404 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
405 if (!(status & ENTER_R2_USED))
406 status |= ENTER_R2_TO_R0;
407 else
408 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
409 break;
412 status |= ENTER_R2_SAVED;
414 if (!(status & ENTER_R2_USED))
415 status |= ENTER_R2_TO_S;
416 else
417 EMIT_MOV(compiler, (SLJIT_S0 + 1) - saved_arg_count, 0, SLJIT_R2, 0);
418 break;
421 if (word_arg_count == 2) {
422 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG))
423 status |= ENTER_R1_TO_S;
424 break;
427 #endif /* SLJIT_X86_32_FASTCALL */
429 if ((arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) || saved_arg_count > 3) {
430 #ifdef _WIN32
431 if (word_arg_count == 3 && local_size > 4 * 4096)
432 r2_offset = local_size + args_size;
433 else
434 #endif
435 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
437 } else
438 EMIT_MOV(compiler, (SLJIT_S0 + 1) - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
440 args_size += SSIZE_OF(sw);
441 break;
443 arg_types >>= SLJIT_ARG_SHIFT;
446 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
447 if (!(options & SLJIT_ENTER_CDECL)) {
448 if (status & ENTER_R2_TO_S) {
449 SLJIT_ASSERT(status & ENTER_R2_SAVED);
450 EMIT_MOV(compiler, SLJIT_S0 - kept_saveds_count, 0, SLJIT_R2, 0);
451 } else if (status & ENTER_R2_TO_R0)
452 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
454 size = kept_saveds_count;
455 if (status & ENTER_R2_SAVED)
456 size++;
458 if ((status & ENTER_R1_TO_S) && size < 3)
459 EMIT_MOV(compiler, SLJIT_S0 - size, 0, SLJIT_R1, 0);
461 #endif /* SLJIT_X86_32_FASTCALL */
463 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
465 #ifdef _WIN32
466 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
468 if (local_size > 4096) {
469 if (local_size <= 4 * 4096) {
470 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
472 if (local_size > 2 * 4096)
473 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
474 if (local_size > 3 * 4096)
475 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
477 else {
478 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
480 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
481 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
483 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
484 FAIL_IF(!inst);
486 INC_SIZE(2);
487 inst[0] = LOOP_i8;
488 inst[1] = (sljit_u8)-16;
489 local_size &= 0xfff;
493 if (local_size > 0) {
494 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
495 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
498 if (r2_offset != -1)
499 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
501 #else /* !_WIN32 */
503 SLJIT_ASSERT(local_size > 0);
505 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
507 #endif /* _WIN32 */
509 locals_offset -= SSIZE_OF(sw);
510 kept_saveds_count = SLJIT_R3 - kept_saveds_count;
512 while (saved_arg_count > 3) {
513 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), locals_offset, kept_saveds_count, 0);
514 kept_saveds_count++;
515 locals_offset -= SSIZE_OF(sw);
516 saved_arg_count--;
519 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
520 size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : locals_offset;
521 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
524 return SLJIT_SUCCESS;
527 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
528 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
529 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
531 sljit_s32 args_size, locals_offset;
532 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
533 sljit_s32 word_arg_count = 0;
534 #endif
536 CHECK_ERROR();
537 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
538 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
540 arg_types >>= SLJIT_ARG_SHIFT;
541 args_size = 0;
542 while (arg_types) {
543 switch (arg_types & SLJIT_ARG_MASK) {
544 case SLJIT_ARG_TYPE_F64:
545 args_size += SSIZE_OF(f64);
546 break;
547 case SLJIT_ARG_TYPE_F32:
548 args_size += SSIZE_OF(f32);
549 break;
550 default:
551 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
552 if (word_arg_count >= 2)
553 args_size += SSIZE_OF(sw);
554 word_arg_count++;
555 #else
556 args_size += SSIZE_OF(sw);
557 #endif
558 break;
560 arg_types >>= SLJIT_ARG_SHIFT;
563 compiler->args_size = args_size;
565 /* [esp+0] for saving temporaries and function calls. */
566 locals_offset = 2 * SSIZE_OF(sw);
568 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
569 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
570 locals_offset = 4 * SSIZE_OF(sw);
571 #else
572 if (scratches >= 3)
573 locals_offset = 4 * SSIZE_OF(sw);
574 #endif
576 compiler->scratches_offset = locals_offset;
578 if (scratches > 3)
579 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
581 if (saveds > 3)
582 locals_offset += (saveds - 3) * SSIZE_OF(sw);
584 compiler->locals_offset = locals_offset;
586 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
588 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
589 if (!(options & SLJIT_ENTER_CDECL))
590 saveds += args_size;
591 #endif
593 compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
594 return SLJIT_SUCCESS;
597 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
599 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
600 sljit_uw size;
601 sljit_u8 *inst;
603 size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
604 (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
605 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
606 FAIL_IF(!inst);
608 INC_SIZE(size);
610 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
611 POP_REG(reg_map[SLJIT_S0]);
612 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
613 POP_REG(reg_map[SLJIT_S1]);
614 if (compiler->saveds > 2 || compiler->scratches > 9)
615 POP_REG(reg_map[SLJIT_S2]);
616 POP_REG(reg_map[TMP_REG1]);
618 return SLJIT_SUCCESS;
621 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
623 sljit_uw size;
624 sljit_u8 *inst;
626 CHECK_ERROR();
627 CHECK(check_sljit_emit_return_void(compiler));
629 SLJIT_ASSERT(compiler->args_size >= 0);
630 SLJIT_ASSERT(compiler->local_size > 0);
632 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
634 FAIL_IF(emit_stack_frame_release(compiler));
636 size = 1;
637 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
638 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL))
639 size = 3;
640 #endif
641 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
642 FAIL_IF(!inst);
644 INC_SIZE(size);
646 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
647 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) {
648 RET_I16(U8(compiler->args_size));
649 return SLJIT_SUCCESS;
651 #endif
653 RET();
654 return SLJIT_SUCCESS;
657 /* --------------------------------------------------------------------- */
658 /* Call / return instructions */
659 /* --------------------------------------------------------------------- */
661 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
663 static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
665 sljit_sw stack_size = 0;
666 sljit_s32 word_arg_count = 0;
668 arg_types >>= SLJIT_ARG_SHIFT;
670 while (arg_types) {
671 switch (arg_types & SLJIT_ARG_MASK) {
672 case SLJIT_ARG_TYPE_F64:
673 stack_size += SSIZE_OF(f64);
674 break;
675 case SLJIT_ARG_TYPE_F32:
676 stack_size += SSIZE_OF(f32);
677 break;
678 default:
679 word_arg_count++;
680 if (word_arg_count > 2)
681 stack_size += SSIZE_OF(sw);
682 break;
685 arg_types >>= SLJIT_ARG_SHIFT;
688 if (word_arg_count_ptr)
689 *word_arg_count_ptr = word_arg_count;
691 return stack_size;
694 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
695 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
697 sljit_u8 *inst;
698 sljit_s32 float_arg_count;
700 if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) {
701 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
702 FAIL_IF(!inst);
703 INC_SIZE(1);
704 PUSH_REG(reg_map[SLJIT_R2]);
706 else if (stack_size > 0) {
707 if (word_arg_count >= 4)
708 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
710 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
712 stack_size = 0;
713 arg_types >>= SLJIT_ARG_SHIFT;
714 word_arg_count = 0;
715 float_arg_count = 0;
716 while (arg_types) {
717 switch (arg_types & SLJIT_ARG_MASK) {
718 case SLJIT_ARG_TYPE_F64:
719 float_arg_count++;
720 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
721 stack_size += SSIZE_OF(f64);
722 break;
723 case SLJIT_ARG_TYPE_F32:
724 float_arg_count++;
725 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
726 stack_size += SSIZE_OF(f32);
727 break;
728 default:
729 word_arg_count++;
730 if (word_arg_count == 3) {
731 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
732 stack_size += SSIZE_OF(sw);
734 else if (word_arg_count == 4) {
735 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
736 stack_size += SSIZE_OF(sw);
738 break;
741 arg_types >>= SLJIT_ARG_SHIFT;
745 if (word_arg_count > 0) {
746 if (swap_args) {
747 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
748 FAIL_IF(!inst);
749 INC_SIZE(1);
751 *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]);
753 else {
754 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
755 FAIL_IF(!inst);
756 INC_SIZE(2);
758 *inst++ = MOV_r_rm;
759 *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]);
763 return SLJIT_SUCCESS;
766 #endif
768 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
770 sljit_sw stack_size = 0;
771 sljit_s32 word_arg_count = 0;
773 arg_types >>= SLJIT_ARG_SHIFT;
775 while (arg_types) {
776 switch (arg_types & SLJIT_ARG_MASK) {
777 case SLJIT_ARG_TYPE_F64:
778 stack_size += SSIZE_OF(f64);
779 break;
780 case SLJIT_ARG_TYPE_F32:
781 stack_size += SSIZE_OF(f32);
782 break;
783 default:
784 word_arg_count++;
785 stack_size += SSIZE_OF(sw);
786 break;
789 arg_types >>= SLJIT_ARG_SHIFT;
792 if (word_arg_count_ptr)
793 *word_arg_count_ptr = word_arg_count;
795 if (stack_size <= compiler->scratches_offset)
796 return 0;
798 return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
801 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
802 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
804 sljit_s32 float_arg_count = 0;
805 sljit_u8 *inst;
807 if (word_arg_count >= 4)
808 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
810 if (stack_size > 0)
811 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
813 stack_size = 0;
814 word_arg_count = 0;
815 arg_types >>= SLJIT_ARG_SHIFT;
817 while (arg_types) {
818 switch (arg_types & SLJIT_ARG_MASK) {
819 case SLJIT_ARG_TYPE_F64:
820 float_arg_count++;
821 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
822 stack_size += SSIZE_OF(f64);
823 break;
824 case SLJIT_ARG_TYPE_F32:
825 float_arg_count++;
826 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
827 stack_size += SSIZE_OF(f32);
828 break;
829 default:
830 word_arg_count++;
831 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
832 stack_size += SSIZE_OF(sw);
833 break;
836 arg_types >>= SLJIT_ARG_SHIFT;
839 return SLJIT_SUCCESS;
842 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
843 sljit_s32 arg_types, sljit_s32 stack_size)
845 sljit_u8 *inst;
846 sljit_s32 single;
848 if (stack_size > 0)
849 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
851 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
852 return SLJIT_SUCCESS;
854 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
856 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
857 FAIL_IF(!inst);
858 INC_SIZE(3);
859 inst[0] = single ? FSTPS : FSTPD;
860 inst[1] = (0x03 << 3) | 0x04;
861 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
863 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
866 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
867 sljit_s32 *extra_space, sljit_s32 arg_types,
868 sljit_s32 src, sljit_sw srcw)
870 sljit_sw args_size, prev_args_size, saved_regs_size;
871 sljit_sw types, word_arg_count, float_arg_count;
872 sljit_sw stack_size, prev_stack_size, min_size, offset;
873 sljit_sw word_arg4_offset;
874 sljit_u8 r2_offset = 0;
875 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
876 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
877 sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
878 #endif
879 sljit_u8* inst;
881 ADJUST_LOCAL_OFFSET(src, srcw);
882 CHECK_EXTRA_REGS(src, srcw, (void)0);
884 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
885 + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
887 word_arg_count = 0;
888 float_arg_count = 0;
889 arg_types >>= SLJIT_ARG_SHIFT;
890 types = 0;
891 args_size = 0;
893 while (arg_types != 0) {
894 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
896 switch (arg_types & SLJIT_ARG_MASK) {
897 case SLJIT_ARG_TYPE_F64:
898 args_size += SSIZE_OF(f64);
899 float_arg_count++;
900 break;
901 case SLJIT_ARG_TYPE_F32:
902 args_size += SSIZE_OF(f32);
903 float_arg_count++;
904 break;
905 default:
906 word_arg_count++;
907 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
908 if (!fast_call || word_arg_count > 2)
909 args_size += SSIZE_OF(sw);
910 #else
911 args_size += SSIZE_OF(sw);
912 #endif
913 break;
915 arg_types >>= SLJIT_ARG_SHIFT;
918 if (args_size <= compiler->args_size
919 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
920 && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call)
921 #endif /* SLJIT_X86_32_FASTCALL */
922 && 1) {
923 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
924 *extra_space = fast_call ? 0 : args_size;
925 prev_args_size = compiler->args_size;
926 stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
927 #else /* !SLJIT_X86_32_FASTCALL */
928 *extra_space = 0;
929 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
930 #endif /* SLJIT_X86_32_FASTCALL */
932 offset = stack_size + compiler->local_size;
934 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
935 if (word_arg_count >= 1) {
936 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
937 r2_offset = sizeof(sljit_sw);
939 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
942 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
943 if (!(compiler->options & SLJIT_ENTER_CDECL)) {
944 if (!fast_call)
945 offset -= SSIZE_OF(sw);
947 if (word_arg_count >= 3) {
948 word_arg4_offset = SSIZE_OF(sw);
950 if (word_arg_count + float_arg_count >= 4) {
951 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw);
952 if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
953 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64);
956 /* In cdecl mode, at least one more word value must
957 * be present on the stack before the return address. */
958 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0);
961 if (fast_call) {
962 if (args_size < prev_args_size) {
963 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw));
964 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
966 } else if (prev_args_size > 0) {
967 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size);
968 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
971 #endif /* SLJIT_X86_32_FASTCALL */
973 while (types != 0) {
974 switch (types & SLJIT_ARG_MASK) {
975 case SLJIT_ARG_TYPE_F64:
976 offset -= SSIZE_OF(f64);
977 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
978 float_arg_count--;
979 break;
980 case SLJIT_ARG_TYPE_F32:
981 offset -= SSIZE_OF(f32);
982 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
983 float_arg_count--;
984 break;
985 default:
986 switch (word_arg_count) {
987 case 1:
988 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
989 if (fast_call) {
990 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
991 break;
993 #endif
994 offset -= SSIZE_OF(sw);
995 if (r2_offset != 0) {
996 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
997 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
998 } else
999 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1000 break;
1001 case 2:
1002 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1003 if (fast_call)
1004 break;
1005 #endif
1006 offset -= SSIZE_OF(sw);
1007 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1008 break;
1009 case 3:
1010 offset -= SSIZE_OF(sw);
1011 break;
1012 case 4:
1013 offset -= SSIZE_OF(sw);
1014 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
1015 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1016 break;
1018 word_arg_count--;
1019 break;
1021 types >>= SLJIT_ARG_SHIFT;
1024 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
1025 FAIL_IF(emit_stack_frame_release(compiler));
1027 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1028 if (args_size < prev_args_size)
1029 BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0);
1030 #endif
1032 return SLJIT_SUCCESS;
1035 stack_size = args_size + SSIZE_OF(sw);
1037 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
1038 r2_offset = SSIZE_OF(sw);
1039 stack_size += SSIZE_OF(sw);
1042 if (word_arg_count >= 3)
1043 stack_size += SSIZE_OF(sw);
1045 prev_args_size = 0;
1046 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1047 if (!(compiler->options & SLJIT_ENTER_CDECL))
1048 prev_args_size = compiler->args_size;
1049 #endif
1051 prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
1052 min_size = prev_stack_size + compiler->local_size;
1054 word_arg4_offset = compiler->scratches_offset;
1056 if (stack_size > min_size) {
1057 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
1058 if (src == SLJIT_MEM1(SLJIT_SP))
1059 srcw += stack_size - min_size;
1060 word_arg4_offset += stack_size - min_size;
1062 else
1063 stack_size = min_size;
1065 if (word_arg_count >= 3) {
1066 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
1068 if (word_arg_count >= 4)
1069 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
1072 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
1073 if (word_arg_count >= 1) {
1074 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
1075 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
1077 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
1080 /* Restore saved registers. */
1081 offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw);
1082 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1084 if (compiler->saveds > 2 || compiler->scratches > 9) {
1085 offset -= SSIZE_OF(sw);
1086 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1088 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
1089 offset -= SSIZE_OF(sw);
1090 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1092 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
1093 offset -= SSIZE_OF(sw);
1094 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
1097 /* Copy fourth argument and return address. */
1098 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1099 if (fast_call) {
1100 offset = stack_size;
1101 *extra_space = 0;
1103 if (word_arg_count >= 4 && prev_args_size == 0) {
1104 offset -= SSIZE_OF(sw);
1105 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1106 FAIL_IF(!inst);
1107 *inst = XCHG_r_rm;
1109 SLJIT_ASSERT(args_size != prev_args_size);
1110 } else {
1111 if (word_arg_count >= 4) {
1112 offset -= SSIZE_OF(sw);
1113 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1116 if (args_size != prev_args_size)
1117 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1120 if (args_size != prev_args_size)
1121 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
1122 } else {
1123 #endif /* SLJIT_X86_32_FASTCALL */
1124 offset = stack_size - SSIZE_OF(sw);
1125 *extra_space = args_size;
1127 if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) {
1128 offset -= SSIZE_OF(sw);
1129 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1130 FAIL_IF(!inst);
1131 *inst = XCHG_r_rm;
1133 SLJIT_ASSERT(prev_args_size > 0);
1134 } else {
1135 if (word_arg_count >= 4) {
1136 offset -= SSIZE_OF(sw);
1137 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1140 if (prev_args_size > 0)
1141 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1144 /* Copy return address. */
1145 if (prev_args_size > 0)
1146 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0);
1147 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1149 #endif /* SLJIT_X86_32_FASTCALL */
1151 while (types != 0) {
1152 switch (types & SLJIT_ARG_MASK) {
1153 case SLJIT_ARG_TYPE_F64:
1154 offset -= SSIZE_OF(f64);
1155 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1156 float_arg_count--;
1157 break;
1158 case SLJIT_ARG_TYPE_F32:
1159 offset -= SSIZE_OF(f32);
1160 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1161 float_arg_count--;
1162 break;
1163 default:
1164 switch (word_arg_count) {
1165 case 1:
1166 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1167 if (fast_call) {
1168 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1169 break;
1171 #endif
1172 offset -= SSIZE_OF(sw);
1173 if (r2_offset != 0) {
1174 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1175 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1176 } else
1177 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1178 break;
1179 case 2:
1180 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1181 if (fast_call)
1182 break;
1183 #endif
1184 offset -= SSIZE_OF(sw);
1185 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1186 break;
1187 case 3:
1188 offset -= SSIZE_OF(sw);
1189 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
1190 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1191 break;
1193 word_arg_count--;
1194 break;
1196 types >>= SLJIT_ARG_SHIFT;
1199 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1200 /* Skip return address. */
1201 if (fast_call)
1202 offset -= SSIZE_OF(sw);
1203 #endif
1205 SLJIT_ASSERT(offset >= 0);
1207 if (offset == 0)
1208 return SLJIT_SUCCESS;
1210 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
1211 return SLJIT_SUCCESS;
1214 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
1216 /* Called when stack consumption cannot be reduced to 0. */
1217 sljit_u8 *inst;
1219 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1221 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1222 FAIL_IF(!inst);
1223 INC_SIZE(1);
1224 RET();
1226 return SLJIT_SUCCESS;
1229 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1230 sljit_s32 arg_types)
1232 struct sljit_jump *jump;
1233 sljit_sw stack_size = 0;
1234 sljit_s32 word_arg_count;
1236 CHECK_ERROR_PTR();
1237 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1239 if (type & SLJIT_CALL_RETURN) {
1240 stack_size = type;
1241 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1243 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1244 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1245 compiler->skip_checks = 1;
1246 #endif
1248 if (stack_size == 0) {
1249 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
1250 return sljit_emit_jump(compiler, type);
1253 jump = sljit_emit_jump(compiler, type);
1254 PTR_FAIL_IF(jump == NULL);
1256 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1257 return jump;
1260 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1261 if ((type & 0xff) == SLJIT_CALL) {
1262 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1263 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1265 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1266 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1267 compiler->skip_checks = 1;
1268 #endif
1270 jump = sljit_emit_jump(compiler, type);
1271 PTR_FAIL_IF(jump == NULL);
1273 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
1274 return jump;
1276 #endif
1278 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1279 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1281 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1282 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1283 compiler->skip_checks = 1;
1284 #endif
1286 jump = sljit_emit_jump(compiler, type);
1287 PTR_FAIL_IF(jump == NULL);
1289 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1290 return jump;
1293 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1294 sljit_s32 arg_types,
1295 sljit_s32 src, sljit_sw srcw)
1297 sljit_sw stack_size = 0;
1298 sljit_s32 word_arg_count;
1299 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1300 sljit_s32 swap_args;
1301 #endif
1303 CHECK_ERROR();
1304 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1306 if (type & SLJIT_CALL_RETURN) {
1307 stack_size = type;
1308 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1310 if (!(src & SLJIT_IMM)) {
1311 src = SLJIT_R0;
1312 srcw = 0;
1315 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1316 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1317 compiler->skip_checks = 1;
1318 #endif
1320 if (stack_size == 0)
1321 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1323 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1324 return emit_tail_call_end(compiler, stack_size);
1327 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1328 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
1330 if ((type & 0xff) == SLJIT_CALL) {
1331 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1332 swap_args = 0;
1334 if (word_arg_count > 0) {
1335 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
1336 swap_args = 1;
1337 if (((src & REG_MASK) | 0x2) == SLJIT_R2)
1338 src ^= 0x2;
1339 if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
1340 src ^= TO_OFFS_REG(0x2);
1344 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
1346 compiler->scratches_offset += stack_size;
1347 compiler->locals_offset += stack_size;
1349 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1350 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1351 compiler->skip_checks = 1;
1352 #endif
1353 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1355 compiler->scratches_offset -= stack_size;
1356 compiler->locals_offset -= stack_size;
1358 return post_call_with_args(compiler, arg_types, 0);
1360 #endif
1362 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1363 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1365 compiler->scratches_offset += stack_size;
1366 compiler->locals_offset += stack_size;
1368 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1369 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1370 compiler->skip_checks = 1;
1371 #endif
1372 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1374 compiler->scratches_offset -= stack_size;
1375 compiler->locals_offset -= stack_size;
1377 return post_call_with_args(compiler, arg_types, stack_size);
1380 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1382 sljit_u8 *inst;
1384 CHECK_ERROR();
1385 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1386 ADJUST_LOCAL_OFFSET(dst, dstw);
1388 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1390 if (FAST_IS_REG(dst)) {
1391 /* Unused dest is possible here. */
1392 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1393 FAIL_IF(!inst);
1395 INC_SIZE(1);
1396 POP_REG(reg_map[dst]);
1397 return SLJIT_SUCCESS;
1400 /* Memory. */
1401 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1402 FAIL_IF(!inst);
1403 *inst++ = POP_rm;
1404 return SLJIT_SUCCESS;
1407 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1409 sljit_u8 *inst;
1411 CHECK_EXTRA_REGS(src, srcw, (void)0);
1413 if (FAST_IS_REG(src)) {
1414 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1415 FAIL_IF(!inst);
1417 INC_SIZE(1 + 1);
1418 PUSH_REG(reg_map[src]);
1420 else {
1421 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1422 FAIL_IF(!inst);
1423 *inst++ = GROUP_FF;
1424 *inst |= PUSH_rm;
1426 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1427 FAIL_IF(!inst);
1428 INC_SIZE(1);
1431 RET();
1432 return SLJIT_SUCCESS;
1435 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1437 sljit_sw size;
1439 /* Don't adjust shadow stack if it isn't enabled. */
1440 if (!cpu_has_shadow_stack())
1441 return SLJIT_SUCCESS;
1443 SLJIT_ASSERT(compiler->args_size >= 0);
1444 SLJIT_ASSERT(compiler->local_size > 0);
1446 size = compiler->local_size;
1447 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1448 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1450 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);