Increase the number floating point registers on x86 and ARM.
[sljit.git] / sljit_src / sljitNativeX86_32.c
blob8cbd98935af837435ba3eee476853236b59962f6
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
45 /* Size contains the flags as well. */
46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
70 size &= 0xf;
71 inst_size = size;
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else if (immb != 0 && !(b & OFFS_REG_MASK)) {
84 /* Immediate operand. */
85 if (immb <= 127 && immb >= -128)
86 inst_size += sizeof(sljit_s8);
87 else
88 inst_size += sizeof(sljit_sw);
90 else if (reg_map[b & REG_MASK] == 5)
91 inst_size += sizeof(sljit_s8);
93 if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
94 b |= TO_OFFS_REG(SLJIT_SP);
96 if (b & OFFS_REG_MASK)
97 inst_size += 1; /* SIB byte. */
100 /* Calculate size of a. */
101 if (a & SLJIT_IMM) {
102 if (flags & EX86_BIN_INS) {
103 if (imma <= 127 && imma >= -128) {
104 inst_size += 1;
105 flags |= EX86_BYTE_ARG;
106 } else
107 inst_size += 4;
109 else if (flags & EX86_SHIFT_INS) {
110 imma &= 0x1f;
111 if (imma != 1) {
112 inst_size ++;
113 flags |= EX86_BYTE_ARG;
115 } else if (flags & EX86_BYTE_ARG)
116 inst_size++;
117 else if (flags & EX86_HALF_ARG)
118 inst_size += sizeof(short);
119 else
120 inst_size += sizeof(sljit_sw);
122 else
123 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
125 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
126 PTR_FAIL_IF(!inst);
128 /* Encoding the byte. */
129 INC_SIZE(inst_size);
130 if (flags & EX86_PREF_F2)
131 *inst++ = 0xf2;
132 if (flags & EX86_PREF_F3)
133 *inst++ = 0xf3;
134 if (flags & EX86_PREF_66)
135 *inst++ = 0x66;
137 buf_ptr = inst + size;
139 /* Encode mod/rm byte. */
140 if (!(flags & EX86_SHIFT_INS)) {
141 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
142 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
144 if (a & SLJIT_IMM)
145 *buf_ptr = 0;
146 else if (!(flags & EX86_SSE2_OP1))
147 *buf_ptr = U8(reg_map[a] << 3);
148 else
149 *buf_ptr = U8(a << 3);
151 else {
152 if (a & SLJIT_IMM) {
153 if (imma == 1)
154 *inst = GROUP_SHIFT_1;
155 else
156 *inst = GROUP_SHIFT_N;
157 } else
158 *inst = GROUP_SHIFT_CL;
159 *buf_ptr = 0;
162 if (!(b & SLJIT_MEM)) {
163 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
164 buf_ptr++;
165 } else if (b & REG_MASK) {
166 reg_map_b = reg_map[b & REG_MASK];
168 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) {
169 if (immb != 0 || reg_map_b == 5) {
170 if (immb <= 127 && immb >= -128)
171 *buf_ptr |= 0x40;
172 else
173 *buf_ptr |= 0x80;
176 if (!(b & OFFS_REG_MASK))
177 *buf_ptr++ |= reg_map_b;
178 else {
179 *buf_ptr++ |= 0x04;
180 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
183 if (immb != 0 || reg_map_b == 5) {
184 if (immb <= 127 && immb >= -128)
185 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
186 else {
187 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
188 buf_ptr += sizeof(sljit_sw);
192 else {
193 *buf_ptr++ |= 0x04;
194 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
197 else {
198 *buf_ptr++ |= 0x05;
199 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
200 buf_ptr += sizeof(sljit_sw);
203 if (a & SLJIT_IMM) {
204 if (flags & EX86_BYTE_ARG)
205 *buf_ptr = U8(imma);
206 else if (flags & EX86_HALF_ARG)
207 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
208 else if (!(flags & EX86_SHIFT_INS))
209 sljit_unaligned_store_sw(buf_ptr, imma);
212 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
215 /* --------------------------------------------------------------------- */
216 /* Enter / return */
217 /* --------------------------------------------------------------------- */
219 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
221 sljit_uw type = jump->flags >> TYPE_SHIFT;
223 if (type == SLJIT_JUMP) {
224 *code_ptr++ = JMP_i32;
225 jump->addr++;
227 else if (type >= SLJIT_FAST_CALL) {
228 *code_ptr++ = CALL_i32;
229 jump->addr++;
231 else {
232 *code_ptr++ = GROUP_0F;
233 *code_ptr++ = get_jump_code(type);
234 jump->addr += 2;
237 if (jump->flags & JUMP_LABEL)
238 jump->flags |= PATCH_MW;
239 else
240 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
241 code_ptr += 4;
243 return code_ptr;
246 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
247 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
248 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
250 sljit_s32 word_arg_count, float_arg_count, args_size, types;
251 sljit_uw size;
252 sljit_u8 *inst;
254 CHECK_ERROR();
255 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
256 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
258 /* Emit ENDBR32 at function entry if needed. */
259 FAIL_IF(emit_endbranch(compiler));
261 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
263 arg_types >>= SLJIT_ARG_SHIFT;
264 types = arg_types;
265 word_arg_count = 0;
266 float_arg_count = 0;
267 args_size = SSIZE_OF(sw);
268 while (types) {
269 switch (types & SLJIT_ARG_MASK) {
270 case SLJIT_ARG_TYPE_F64:
271 float_arg_count++;
272 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
273 args_size += SSIZE_OF(f64);
274 break;
275 case SLJIT_ARG_TYPE_F32:
276 float_arg_count++;
277 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
278 args_size += SSIZE_OF(f32);
279 break;
280 default:
281 word_arg_count++;
282 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
283 if (word_arg_count > 2)
284 args_size += SSIZE_OF(sw);
285 #else
286 args_size += SSIZE_OF(sw);
287 #endif
288 break;
290 types >>= SLJIT_ARG_SHIFT;
293 args_size -= SSIZE_OF(sw);
294 compiler->args_size = args_size;
296 /* [esp+0] for saving temporaries and function calls. */
297 compiler->stack_tmp_size = 2 * SSIZE_OF(sw);
299 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
300 if (scratches > 3)
301 compiler->stack_tmp_size = 3 * SSIZE_OF(sw);
302 #endif
304 compiler->saveds_offset = compiler->stack_tmp_size;
305 if (scratches > 3)
306 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
308 compiler->locals_offset = compiler->saveds_offset;
310 if (saveds > 3)
311 compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw);
313 if (options & SLJIT_F64_ALIGNMENT)
314 compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1);
316 size = (sljit_uw)(1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3));
317 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
318 FAIL_IF(!inst);
320 INC_SIZE(size);
321 PUSH_REG(reg_map[TMP_REG1]);
322 if (saveds > 2 || scratches > 9)
323 PUSH_REG(reg_map[SLJIT_S2]);
324 if (saveds > 1 || scratches > 10)
325 PUSH_REG(reg_map[SLJIT_S1]);
326 if (saveds > 0 || scratches > 11)
327 PUSH_REG(reg_map[SLJIT_S0]);
329 if (word_arg_count >= 4)
330 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + (sljit_s32)(size * sizeof(sljit_sw)));
332 word_arg_count = 0;
333 args_size = (sljit_s32)((size + 1) * sizeof(sljit_sw));
334 while (arg_types) {
335 switch (arg_types & SLJIT_ARG_MASK) {
336 case SLJIT_ARG_TYPE_F64:
337 args_size += SSIZE_OF(f64);
338 break;
339 case SLJIT_ARG_TYPE_F32:
340 args_size += SSIZE_OF(f32);
341 break;
342 default:
343 word_arg_count++;
344 if (word_arg_count <= 3) {
345 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
346 if (word_arg_count <= 2)
347 break;
348 #endif
349 EMIT_MOV(compiler, SLJIT_S0 + 1 - word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
351 args_size += SSIZE_OF(sw);
352 break;
354 arg_types >>= SLJIT_ARG_SHIFT;
357 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
358 if (word_arg_count > 0)
359 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0);
360 if (word_arg_count > 1)
361 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_R1, 0);
362 #endif
364 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
366 #if defined(__APPLE__)
367 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
368 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
369 local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
370 #else
371 if (options & SLJIT_F64_ALIGNMENT)
372 local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1));
373 else
374 local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1));
375 #endif
377 compiler->local_size = local_size;
379 #ifdef _WIN32
380 if (local_size > 0) {
381 if (local_size <= 4 * 4096) {
382 if (local_size > 4096)
383 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096);
384 if (local_size > 2 * 4096)
385 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
386 if (local_size > 3 * 4096)
387 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
389 else {
390 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
391 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
393 SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
395 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
396 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
397 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
398 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
399 SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
401 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
402 FAIL_IF(!inst);
404 INC_SIZE(2);
405 inst[0] = JNE_i8;
406 inst[1] = (sljit_s8) -16;
409 #endif
411 SLJIT_ASSERT(local_size > 0);
413 #if !defined(__APPLE__)
414 if (options & SLJIT_F64_ALIGNMENT) {
415 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
417 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
418 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
419 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + SSIZE_OF(sw)));
421 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
422 if (compiler->local_size > 1024)
423 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
424 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, SSIZE_OF(sw)));
425 #endif
427 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
428 FAIL_IF(!inst);
430 INC_SIZE(6);
431 inst[0] = GROUP_BINARY_81;
432 inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
433 sljit_unaligned_store_sw(inst + 2, ~(SSIZE_OF(f64) - 1));
435 if (word_arg_count == 4)
436 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0);
438 /* The real local size must be used. */
439 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, SLJIT_R0, 0);
441 #endif
442 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
443 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
445 if (word_arg_count == 4)
446 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - SSIZE_OF(sw), TMP_REG1, 0);
448 return SLJIT_SUCCESS;
451 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
452 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
453 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
455 sljit_s32 args_size;
456 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
457 sljit_s32 word_arg_count = 0;
458 #endif
460 CHECK_ERROR();
461 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
462 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
464 arg_types >>= SLJIT_ARG_SHIFT;
465 args_size = 0;
466 while (arg_types) {
467 switch (arg_types & SLJIT_ARG_MASK) {
468 case SLJIT_ARG_TYPE_F64:
469 args_size += SSIZE_OF(f64);
470 break;
471 case SLJIT_ARG_TYPE_F32:
472 args_size += SSIZE_OF(f32);
473 break;
474 default:
475 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
476 if (word_arg_count >= 2)
477 args_size += SSIZE_OF(sw);
478 word_arg_count++;
479 #else
480 args_size += SSIZE_OF(sw);
481 #endif
482 break;
484 arg_types >>= SLJIT_ARG_SHIFT;
487 compiler->args_size = args_size;
489 /* [esp+0] for saving temporaries and function calls. */
490 compiler->stack_tmp_size = 2 * SSIZE_OF(sw);
492 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
493 if (scratches > 3)
494 compiler->stack_tmp_size = 3 * SSIZE_OF(sw);
495 #endif
497 compiler->saveds_offset = compiler->stack_tmp_size;
498 if (scratches > 3)
499 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
501 compiler->locals_offset = compiler->saveds_offset;
503 if (saveds > 3)
504 compiler->locals_offset += (saveds - 3) * SSIZE_OF(sw);
506 if (options & SLJIT_F64_ALIGNMENT)
507 compiler->locals_offset = (compiler->locals_offset + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1);
509 #if defined(__APPLE__)
510 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
511 compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
512 #else
513 if (options & SLJIT_F64_ALIGNMENT)
514 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(f64) - 1) & ~(SSIZE_OF(f64) - 1));
515 else
516 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + SSIZE_OF(sw) - 1) & ~(SSIZE_OF(sw) - 1));
517 #endif
518 return SLJIT_SUCCESS;
521 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
523 sljit_uw size;
524 sljit_u8 *inst;
526 size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
527 (compiler->saveds <= 3 ? compiler->saveds : 3));
528 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
529 FAIL_IF(!inst);
531 INC_SIZE(size);
533 if (compiler->saveds > 0 || compiler->scratches > 11)
534 POP_REG(reg_map[SLJIT_S0]);
535 if (compiler->saveds > 1 || compiler->scratches > 10)
536 POP_REG(reg_map[SLJIT_S1]);
537 if (compiler->saveds > 2 || compiler->scratches > 9)
538 POP_REG(reg_map[SLJIT_S2]);
539 POP_REG(reg_map[TMP_REG1]);
541 return SLJIT_SUCCESS;
544 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
546 sljit_uw size;
547 sljit_u8 *inst;
549 CHECK_ERROR();
550 CHECK(check_sljit_emit_return_void(compiler));
552 SLJIT_ASSERT(compiler->args_size >= 0);
553 SLJIT_ASSERT(compiler->local_size > 0);
555 #if !defined(__APPLE__)
556 if (compiler->options & SLJIT_F64_ALIGNMENT)
557 EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
558 else
559 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
560 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
561 #else
562 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
563 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
564 #endif
566 FAIL_IF(emit_stack_frame_release(compiler));
568 size = 1;
569 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
570 if (compiler->args_size > 0)
571 size = 3;
572 #endif
573 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
574 FAIL_IF(!inst);
576 INC_SIZE(size);
578 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
579 if (compiler->args_size > 0) {
580 RET_I16(U8(compiler->args_size));
581 return SLJIT_SUCCESS;
583 #endif
585 RET();
586 return SLJIT_SUCCESS;
589 /* --------------------------------------------------------------------- */
590 /* Call / return instructions */
591 /* --------------------------------------------------------------------- */
593 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
595 static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
597 sljit_sw stack_size = 0;
598 sljit_s32 word_arg_count = 0;
600 arg_types >>= SLJIT_ARG_SHIFT;
602 while (arg_types) {
603 switch (arg_types & SLJIT_ARG_MASK) {
604 case SLJIT_ARG_TYPE_F64:
605 stack_size += SSIZE_OF(f64);
606 break;
607 case SLJIT_ARG_TYPE_F32:
608 stack_size += SSIZE_OF(f32);
609 break;
610 default:
611 word_arg_count++;
612 if (word_arg_count > 2)
613 stack_size += SSIZE_OF(sw);
614 break;
617 arg_types >>= SLJIT_ARG_SHIFT;
620 if (word_arg_count_ptr)
621 *word_arg_count_ptr = word_arg_count;
623 return stack_size;
626 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
627 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
629 sljit_u8 *inst;
630 sljit_s32 float_arg_count;
632 if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) {
633 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
634 FAIL_IF(!inst);
635 INC_SIZE(1);
636 PUSH_REG(reg_map[SLJIT_R2]);
638 else if (stack_size > 0) {
639 if (word_arg_count >= 4)
640 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw));
642 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
643 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
645 stack_size = 0;
646 arg_types >>= SLJIT_ARG_SHIFT;
647 word_arg_count = 0;
648 float_arg_count = 0;
649 while (arg_types) {
650 switch (arg_types & SLJIT_ARG_MASK) {
651 case SLJIT_ARG_TYPE_F64:
652 float_arg_count++;
653 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
654 stack_size += SSIZE_OF(f64);
655 break;
656 case SLJIT_ARG_TYPE_F32:
657 float_arg_count++;
658 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
659 stack_size += SSIZE_OF(f32);
660 break;
661 default:
662 word_arg_count++;
663 if (word_arg_count == 3) {
664 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
665 stack_size += SSIZE_OF(sw);
667 else if (word_arg_count == 4) {
668 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
669 stack_size += SSIZE_OF(sw);
671 break;
674 arg_types >>= SLJIT_ARG_SHIFT;
678 if (word_arg_count > 0) {
679 if (swap_args) {
680 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
681 FAIL_IF(!inst);
682 INC_SIZE(1);
684 *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]);
686 else {
687 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
688 FAIL_IF(!inst);
689 INC_SIZE(2);
691 *inst++ = MOV_r_rm;
692 *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]);
696 return SLJIT_SUCCESS;
699 #endif
701 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
703 sljit_sw stack_size = 0;
704 sljit_s32 word_arg_count = 0;
706 arg_types >>= SLJIT_ARG_SHIFT;
708 while (arg_types) {
709 switch (arg_types & SLJIT_ARG_MASK) {
710 case SLJIT_ARG_TYPE_F64:
711 stack_size += SSIZE_OF(f64);
712 break;
713 case SLJIT_ARG_TYPE_F32:
714 stack_size += SSIZE_OF(f32);
715 break;
716 default:
717 word_arg_count++;
718 stack_size += SSIZE_OF(sw);
719 break;
722 arg_types >>= SLJIT_ARG_SHIFT;
725 if (word_arg_count_ptr)
726 *word_arg_count_ptr = word_arg_count;
728 if (stack_size <= compiler->stack_tmp_size)
729 return 0;
731 #if defined(__APPLE__)
732 return ((stack_size - compiler->stack_tmp_size + 15) & ~15);
733 #else
734 return stack_size - compiler->stack_tmp_size;
735 #endif
738 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
739 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
741 sljit_s32 float_arg_count = 0;
743 if (word_arg_count >= 4)
744 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw));
746 if (stack_size > 0)
747 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
748 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
750 stack_size = 0;
751 word_arg_count = 0;
752 arg_types >>= SLJIT_ARG_SHIFT;
754 while (arg_types) {
755 switch (arg_types & SLJIT_ARG_MASK) {
756 case SLJIT_ARG_TYPE_F64:
757 float_arg_count++;
758 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
759 stack_size += SSIZE_OF(f64);
760 break;
761 case SLJIT_ARG_TYPE_F32:
762 float_arg_count++;
763 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
764 stack_size += SSIZE_OF(f32);
765 break;
766 default:
767 word_arg_count++;
768 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
769 stack_size += SSIZE_OF(sw);
770 break;
773 arg_types >>= SLJIT_ARG_SHIFT;
776 return SLJIT_SUCCESS;
779 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
780 sljit_s32 arg_types, sljit_s32 stack_size)
782 sljit_u8 *inst;
783 sljit_s32 single;
785 if (stack_size > 0)
786 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
787 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
789 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
790 return SLJIT_SUCCESS;
792 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
794 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
795 FAIL_IF(!inst);
796 INC_SIZE(3);
797 inst[0] = single ? FSTPS : FSTPD;
798 inst[1] = (0x03 << 3) | 0x04;
799 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
801 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
804 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
805 sljit_s32 *extra_space, sljit_s32 arg_types,
806 sljit_s32 src, sljit_sw srcw)
808 sljit_sw args_size, prev_args_size, saved_regs_size;
809 sljit_sw types, word_arg_count, float_arg_count;
810 sljit_sw stack_size, prev_stack_size, min_size, offset;
811 sljit_sw base_reg, word_arg4_offset;
812 sljit_u8 r2_offset = 0;
813 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
814 sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
815 #endif
816 sljit_u8* inst;
818 ADJUST_LOCAL_OFFSET(src, srcw);
819 CHECK_EXTRA_REGS(src, srcw, (void)0);
821 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
822 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
824 word_arg_count = 0;
825 float_arg_count = 0;
826 arg_types >>= SLJIT_ARG_SHIFT;
827 types = 0;
828 args_size = 0;
830 while (arg_types != 0) {
831 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
833 switch (arg_types & SLJIT_ARG_MASK) {
834 case SLJIT_ARG_TYPE_F64:
835 args_size += SSIZE_OF(f64);
836 float_arg_count++;
837 break;
838 case SLJIT_ARG_TYPE_F32:
839 args_size += SSIZE_OF(f32);
840 float_arg_count++;
841 break;
842 default:
843 word_arg_count++;
844 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
845 if (!fast_call || word_arg_count > 2)
846 args_size += SSIZE_OF(sw);
847 #else
848 args_size += SSIZE_OF(sw);
849 #endif
850 break;
852 arg_types >>= SLJIT_ARG_SHIFT;
855 if (args_size <= compiler->args_size) {
856 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
857 *extra_space = fast_call ? 0 : args_size;
858 prev_args_size = compiler->args_size;
859 stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
860 #else /* !SLJIT_X86_32_FASTCALL */
861 *extra_space = 0;
862 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
863 #endif /* SLJIT_X86_32_FASTCALL */
865 #if !defined(__APPLE__)
866 if (compiler->options & SLJIT_F64_ALIGNMENT) {
867 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size);
868 offset = stack_size;
869 base_reg = SLJIT_MEM1(TMP_REG1);
870 } else {
871 #endif /* !__APPLE__ */
872 offset = stack_size + compiler->local_size;
873 base_reg = SLJIT_MEM1(SLJIT_SP);
874 #if !defined(__APPLE__)
876 #endif /* !__APPLE__ */
878 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
879 if (word_arg_count >= 1) {
880 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
881 r2_offset = sizeof(sljit_sw);
883 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
886 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
887 if (!fast_call)
888 offset -= SSIZE_OF(sw);
890 if (word_arg_count >= 3) {
891 word_arg4_offset = SSIZE_OF(sw);
893 if (word_arg_count + float_arg_count >= 4) {
894 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw);
895 if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
896 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64);
899 /* In cdecl mode, at least one more word value must
900 * be present on the stack before the return address. */
901 EMIT_MOV(compiler, base_reg, offset - word_arg4_offset, SLJIT_R2, 0);
904 if (fast_call) {
905 if (args_size < prev_args_size) {
906 EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size - SSIZE_OF(sw));
907 EMIT_MOV(compiler, base_reg, offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
909 } else if (prev_args_size > 0) {
910 EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, offset - prev_args_size);
911 EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0);
913 #endif /* SLJIT_X86_32_FASTCALL */
915 while (types != 0) {
916 switch (types & SLJIT_ARG_MASK) {
917 case SLJIT_ARG_TYPE_F64:
918 offset -= SSIZE_OF(f64);
919 FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count));
920 float_arg_count--;
921 break;
922 case SLJIT_ARG_TYPE_F32:
923 offset -= SSIZE_OF(f32);
924 FAIL_IF(emit_sse2_store(compiler, 0, base_reg, offset, float_arg_count));
925 float_arg_count--;
926 break;
927 default:
928 switch (word_arg_count) {
929 case 1:
930 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
931 if (fast_call) {
932 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
933 break;
935 #endif
936 offset -= SSIZE_OF(sw);
937 if (r2_offset != 0) {
938 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
939 EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0);
940 } else
941 EMIT_MOV(compiler, base_reg, offset, SLJIT_R0, 0);
942 break;
943 case 2:
944 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
945 if (fast_call)
946 break;
947 #endif
948 offset -= SSIZE_OF(sw);
949 EMIT_MOV(compiler, base_reg, offset, SLJIT_R1, 0);
950 break;
951 case 3:
952 offset -= SSIZE_OF(sw);
953 break;
954 case 4:
955 offset -= SSIZE_OF(sw);
956 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - SSIZE_OF(sw));
957 EMIT_MOV(compiler, base_reg, offset, SLJIT_R2, 0);
958 break;
960 word_arg_count--;
961 break;
963 types >>= SLJIT_ARG_SHIFT;
966 #if !defined(__APPLE__)
967 if (compiler->options & SLJIT_F64_ALIGNMENT) {
968 EMIT_MOV(compiler, SLJIT_SP, 0, TMP_REG1, 0);
969 } else {
970 #endif /* !__APPLE__ */
971 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
972 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
973 #if !defined(__APPLE__)
975 #endif /* !__APPLE__ */
976 FAIL_IF(emit_stack_frame_release(compiler));
978 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
979 if (args_size < prev_args_size)
980 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
981 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, prev_args_size - args_size));
982 #endif
984 return SLJIT_SUCCESS;
987 stack_size = args_size + SSIZE_OF(sw);
989 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
990 r2_offset = SSIZE_OF(sw);
991 stack_size += SSIZE_OF(sw);
994 if (word_arg_count >= 3)
995 stack_size += SSIZE_OF(sw);
997 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
998 prev_args_size = compiler->args_size;
999 #else
1000 prev_args_size = 0;
1001 #endif
1003 prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
1004 min_size = prev_stack_size + compiler->local_size;
1006 base_reg = SLJIT_MEM1(SLJIT_SP);
1007 word_arg4_offset = compiler->saveds_offset - SSIZE_OF(sw);
1009 #if !defined(__APPLE__)
1010 if (compiler->options & SLJIT_F64_ALIGNMENT) {
1011 min_size += 2 * SSIZE_OF(sw);
1013 if (stack_size < min_size)
1014 stack_size = min_size;
1016 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size);
1017 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
1018 TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, stack_size - prev_stack_size));
1020 inst = emit_x86_instruction(compiler, 1, SLJIT_SP, 0, TMP_REG1, 0);
1021 FAIL_IF(!inst);
1022 *inst = XCHG_r_rm;
1024 if (src == SLJIT_MEM1(SLJIT_SP))
1025 src = SLJIT_MEM1(TMP_REG1);
1026 base_reg = SLJIT_MEM1(TMP_REG1);
1027 } else {
1028 #endif /* !__APPLE__ */
1029 if (stack_size > min_size) {
1030 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
1031 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size - min_size));
1032 if (src == SLJIT_MEM1(SLJIT_SP))
1033 srcw += stack_size - min_size;
1034 word_arg4_offset += stack_size - min_size;
1036 else
1037 stack_size = min_size;
1038 #if !defined(__APPLE__)
1040 #endif /* !__APPLE__ */
1042 if (word_arg_count >= 3) {
1043 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
1045 if (word_arg_count >= 4)
1046 EMIT_MOV(compiler, SLJIT_R2, 0, base_reg, word_arg4_offset);
1049 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
1050 if (word_arg_count >= 1) {
1051 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
1052 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
1054 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
1057 /* Restore saved registers. */
1058 offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw);
1059 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1061 if (compiler->saveds > 2 || compiler->scratches > 9) {
1062 offset -= SSIZE_OF(sw);
1063 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1065 if (compiler->saveds > 1 || compiler->scratches > 10) {
1066 offset -= SSIZE_OF(sw);
1067 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1069 if (compiler->saveds > 0 || compiler->scratches > 11) {
1070 offset -= SSIZE_OF(sw);
1071 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
1074 /* Copy fourth argument and return address. */
1075 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1076 if (fast_call) {
1077 offset = stack_size;
1078 *extra_space = 0;
1080 if (word_arg_count >= 4 && prev_args_size == 0) {
1081 offset -= SSIZE_OF(sw);
1082 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1083 FAIL_IF(!inst);
1084 *inst = XCHG_r_rm;
1086 SLJIT_ASSERT(args_size != prev_args_size);
1087 } else {
1088 if (word_arg_count >= 4) {
1089 offset -= SSIZE_OF(sw);
1090 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1093 if (args_size != prev_args_size)
1094 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1097 if (args_size != prev_args_size)
1098 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
1099 } else {
1100 #endif /* SLJIT_X86_32_FASTCALL */
1101 offset = stack_size - SSIZE_OF(sw);
1102 *extra_space = args_size;
1104 if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) {
1105 offset -= SSIZE_OF(sw);
1106 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1107 FAIL_IF(!inst);
1108 *inst = XCHG_r_rm;
1110 SLJIT_ASSERT(prev_args_size > 0);
1111 } else {
1112 if (word_arg_count >= 4) {
1113 offset -= SSIZE_OF(sw);
1114 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1117 if (prev_args_size > 0)
1118 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1121 /* Copy return address. */
1122 if (prev_args_size > 0)
1123 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0);
1124 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1126 #endif /* SLJIT_X86_32_FASTCALL */
1128 while (types != 0) {
1129 switch (types & SLJIT_ARG_MASK) {
1130 case SLJIT_ARG_TYPE_F64:
1131 offset -= SSIZE_OF(f64);
1132 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1133 float_arg_count--;
1134 break;
1135 case SLJIT_ARG_TYPE_F32:
1136 offset -= SSIZE_OF(f32);
1137 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1138 float_arg_count--;
1139 break;
1140 default:
1141 switch (word_arg_count) {
1142 case 1:
1143 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1144 if (fast_call) {
1145 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1146 break;
1148 #endif
1149 offset -= SSIZE_OF(sw);
1150 if (r2_offset != 0) {
1151 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1152 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1153 } else
1154 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1155 break;
1156 case 2:
1157 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1158 if (fast_call)
1159 break;
1160 #endif
1161 offset -= SSIZE_OF(sw);
1162 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1163 break;
1164 case 3:
1165 offset -= SSIZE_OF(sw);
1166 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
1167 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1168 break;
1170 word_arg_count--;
1171 break;
1173 types >>= SLJIT_ARG_SHIFT;
1176 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1177 /* Skip return address. */
1178 if (fast_call)
1179 offset -= SSIZE_OF(sw);
1180 #endif
1182 SLJIT_ASSERT(offset >= 0);
1184 if (offset == 0)
1185 return SLJIT_SUCCESS;
1187 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
1188 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, offset);
1191 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
1193 /* Called when stack consumption cannot be reduced to 0. */
1194 sljit_u8 *inst;
1196 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
1197 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, extra_space));
1199 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1200 FAIL_IF(!inst);
1201 INC_SIZE(1);
1202 RET();
1204 return SLJIT_SUCCESS;
1207 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1208 sljit_s32 arg_types)
1210 struct sljit_jump *jump;
1211 sljit_sw stack_size = 0;
1212 sljit_s32 word_arg_count;
1214 CHECK_ERROR_PTR();
1215 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1217 if (type & SLJIT_TAIL_CALL) {
1218 stack_size = type;
1219 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1221 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1222 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1223 compiler->skip_checks = 1;
1224 #endif
1226 if (stack_size == 0) {
1227 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
1228 return sljit_emit_jump(compiler, type);
1231 jump = sljit_emit_jump(compiler, type);
1232 PTR_FAIL_IF(jump == NULL);
1234 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1235 return jump;
1238 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1239 if ((type & 0xff) == SLJIT_CALL) {
1240 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1241 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1243 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1244 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1245 compiler->skip_checks = 1;
1246 #endif
1248 jump = sljit_emit_jump(compiler, type);
1249 PTR_FAIL_IF(jump == NULL);
1251 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
1252 return jump;
1254 #endif
1256 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1257 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1259 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1260 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1261 compiler->skip_checks = 1;
1262 #endif
1264 jump = sljit_emit_jump(compiler, type);
1265 PTR_FAIL_IF(jump == NULL);
1267 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1268 return jump;
1271 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1272 sljit_s32 arg_types,
1273 sljit_s32 src, sljit_sw srcw)
1275 sljit_sw stack_size = 0;
1276 sljit_s32 word_arg_count;
1277 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1278 sljit_s32 swap_args;
1279 #endif
1281 CHECK_ERROR();
1282 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1284 if (type & SLJIT_TAIL_CALL) {
1285 stack_size = type;
1286 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1288 if (!(src & SLJIT_IMM)) {
1289 src = SLJIT_R0;
1290 srcw = 0;
1293 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1294 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1295 compiler->skip_checks = 1;
1296 #endif
1298 if (stack_size == 0)
1299 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1301 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1302 return emit_tail_call_end(compiler, stack_size);
1305 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1306 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
1308 if ((type & 0xff) == SLJIT_CALL) {
1309 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1310 swap_args = 0;
1312 if (word_arg_count > 0) {
1313 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
1314 swap_args = 1;
1315 if (((src & REG_MASK) | 0x2) == SLJIT_R2)
1316 src ^= 0x2;
1317 if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
1318 src ^= TO_OFFS_REG(0x2);
1322 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
1324 compiler->saveds_offset += stack_size;
1325 compiler->locals_offset += stack_size;
1327 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1328 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1329 compiler->skip_checks = 1;
1330 #endif
1331 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1333 compiler->saveds_offset -= stack_size;
1334 compiler->locals_offset -= stack_size;
1336 return post_call_with_args(compiler, arg_types, 0);
1338 #endif
1340 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1341 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1343 compiler->saveds_offset += stack_size;
1344 compiler->locals_offset += stack_size;
1346 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1347 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1348 compiler->skip_checks = 1;
1349 #endif
1350 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1352 compiler->saveds_offset -= stack_size;
1353 compiler->locals_offset -= stack_size;
1355 return post_call_with_args(compiler, arg_types, stack_size);
1358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1360 sljit_u8 *inst;
1362 CHECK_ERROR();
1363 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1364 ADJUST_LOCAL_OFFSET(dst, dstw);
1366 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1368 if (FAST_IS_REG(dst)) {
1369 /* Unused dest is possible here. */
1370 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1371 FAIL_IF(!inst);
1373 INC_SIZE(1);
1374 POP_REG(reg_map[dst]);
1375 return SLJIT_SUCCESS;
1378 /* Memory. */
1379 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1380 FAIL_IF(!inst);
1381 *inst++ = POP_rm;
1382 return SLJIT_SUCCESS;
1385 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1387 sljit_u8 *inst;
1389 CHECK_EXTRA_REGS(src, srcw, (void)0);
1391 if (FAST_IS_REG(src)) {
1392 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1393 FAIL_IF(!inst);
1395 INC_SIZE(1 + 1);
1396 PUSH_REG(reg_map[src]);
1398 else {
1399 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1400 FAIL_IF(!inst);
1401 *inst++ = GROUP_FF;
1402 *inst |= PUSH_rm;
1404 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1405 FAIL_IF(!inst);
1406 INC_SIZE(1);
1409 RET();
1410 return SLJIT_SUCCESS;
1413 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1415 sljit_sw size, saved_size;
1416 sljit_s32 has_f64_aligment;
1418 /* Don't adjust shadow stack if it isn't enabled. */
1419 if (!cpu_has_shadow_stack ())
1420 return SLJIT_SUCCESS;
1422 SLJIT_ASSERT(compiler->args_size >= 0);
1423 SLJIT_ASSERT(compiler->local_size > 0);
1425 #if !defined(__APPLE__)
1426 has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT;
1427 #else
1428 has_f64_aligment = 0;
1429 #endif
1431 size = compiler->local_size;
1432 saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1433 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1435 if (has_f64_aligment) {
1436 /* mov TMP_REG1, [esp + local_size]. */
1437 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size);
1438 /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
1439 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size);
1440 /* Move return address to [esp]. */
1441 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0);
1442 size = 0;
1443 } else
1444 size += saved_size;
1446 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);