Fix x86 mem address encoding
[sljit.git] / sljit_src / sljitNativeX86_32.c
blob771a59df6d73c8ac226b76699f6c6fde58b06c0a
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
45 /* Size contains the flags as well. */
46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
70 size &= 0xf;
71 inst_size = size;
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else {
84 if (immb != 0 && !(b & OFFS_REG_MASK)) {
85 /* Immediate operand. */
86 if (immb <= 127 && immb >= -128)
87 inst_size += sizeof(sljit_s8);
88 else
89 inst_size += sizeof(sljit_sw);
91 else if (reg_map[b & REG_MASK] == 5) {
92 /* Swap registers if possible. */
93 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
94 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
95 else
96 inst_size += sizeof(sljit_s8);
99 if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
100 b |= TO_OFFS_REG(SLJIT_SP);
102 if (b & OFFS_REG_MASK)
103 inst_size += 1; /* SIB byte. */
107 /* Calculate size of a. */
108 if (a & SLJIT_IMM) {
109 if (flags & EX86_BIN_INS) {
110 if (imma <= 127 && imma >= -128) {
111 inst_size += 1;
112 flags |= EX86_BYTE_ARG;
113 } else
114 inst_size += 4;
116 else if (flags & EX86_SHIFT_INS) {
117 imma &= 0x1f;
118 if (imma != 1) {
119 inst_size++;
120 flags |= EX86_BYTE_ARG;
122 } else if (flags & EX86_BYTE_ARG)
123 inst_size++;
124 else if (flags & EX86_HALF_ARG)
125 inst_size += sizeof(short);
126 else
127 inst_size += sizeof(sljit_sw);
129 else
130 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
132 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
133 PTR_FAIL_IF(!inst);
135 /* Encoding the byte. */
136 INC_SIZE(inst_size);
137 if (flags & EX86_PREF_F2)
138 *inst++ = 0xf2;
139 if (flags & EX86_PREF_F3)
140 *inst++ = 0xf3;
141 if (flags & EX86_PREF_66)
142 *inst++ = 0x66;
144 buf_ptr = inst + size;
146 /* Encode mod/rm byte. */
147 if (!(flags & EX86_SHIFT_INS)) {
148 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
149 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
151 if (a & SLJIT_IMM)
152 *buf_ptr = 0;
153 else if (!(flags & EX86_SSE2_OP1))
154 *buf_ptr = U8(reg_map[a] << 3);
155 else
156 *buf_ptr = U8(a << 3);
158 else {
159 if (a & SLJIT_IMM) {
160 if (imma == 1)
161 *inst = GROUP_SHIFT_1;
162 else
163 *inst = GROUP_SHIFT_N;
164 } else
165 *inst = GROUP_SHIFT_CL;
166 *buf_ptr = 0;
169 if (!(b & SLJIT_MEM)) {
170 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
171 buf_ptr++;
172 } else if (b & REG_MASK) {
173 reg_map_b = reg_map[b & REG_MASK];
175 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
176 if (immb != 0 || reg_map_b == 5) {
177 if (immb <= 127 && immb >= -128)
178 *buf_ptr |= 0x40;
179 else
180 *buf_ptr |= 0x80;
183 if (!(b & OFFS_REG_MASK))
184 *buf_ptr++ |= reg_map_b;
185 else {
186 *buf_ptr++ |= 0x04;
187 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
190 if (immb != 0 || reg_map_b == 5) {
191 if (immb <= 127 && immb >= -128)
192 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
193 else {
194 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
195 buf_ptr += sizeof(sljit_sw);
199 else {
200 if (reg_map_b == 5)
201 *buf_ptr |= 0x40;
203 *buf_ptr++ |= 0x04;
204 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
206 if (reg_map_b == 5)
207 *buf_ptr++ = 0;
210 else {
211 *buf_ptr++ |= 0x05;
212 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
213 buf_ptr += sizeof(sljit_sw);
216 if (a & SLJIT_IMM) {
217 if (flags & EX86_BYTE_ARG)
218 *buf_ptr = U8(imma);
219 else if (flags & EX86_HALF_ARG)
220 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
221 else if (!(flags & EX86_SHIFT_INS))
222 sljit_unaligned_store_sw(buf_ptr, imma);
225 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
228 /* --------------------------------------------------------------------- */
229 /* Enter / return */
230 /* --------------------------------------------------------------------- */
232 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
234 sljit_uw type = jump->flags >> TYPE_SHIFT;
236 if (type == SLJIT_JUMP) {
237 *code_ptr++ = JMP_i32;
238 jump->addr++;
240 else if (type >= SLJIT_FAST_CALL) {
241 *code_ptr++ = CALL_i32;
242 jump->addr++;
244 else {
245 *code_ptr++ = GROUP_0F;
246 *code_ptr++ = get_jump_code(type);
247 jump->addr += 2;
250 if (jump->flags & JUMP_LABEL)
251 jump->flags |= PATCH_MW;
252 else
253 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
254 code_ptr += 4;
256 return code_ptr;
259 #define ENTER_R2_USED 0x00001
260 #define ENTER_R2_SAVED 0x00002
261 #define ENTER_R2_TO_S 0x00004
262 #define ENTER_R2_TO_R0 0x00008
263 #define ENTER_R1_TO_S 0x00010
264 #define ENTER_TMP_TO_R4 0x00020
265 #define ENTER_TMP_TO_S 0x00040
267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
268 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
269 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
271 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
272 sljit_s32 size, locals_offset, args_size, types, status;
273 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
274 sljit_u8 *inst;
275 #ifdef _WIN32
276 sljit_s32 r2_offset = -1;
277 #endif
279 CHECK_ERROR();
280 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
281 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
283 /* Emit ENDBR32 at function entry if needed. */
284 FAIL_IF(emit_endbranch(compiler));
286 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
288 arg_types >>= SLJIT_ARG_SHIFT;
289 types = arg_types;
290 word_arg_count = 0;
291 saved_arg_count = 0;
292 float_arg_count = 0;
293 args_size = SSIZE_OF(sw);
294 status = 0;
295 while (types) {
296 switch (types & SLJIT_ARG_MASK) {
297 case SLJIT_ARG_TYPE_F64:
298 float_arg_count++;
299 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
300 args_size += SSIZE_OF(f64);
301 break;
302 case SLJIT_ARG_TYPE_F32:
303 float_arg_count++;
304 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
305 args_size += SSIZE_OF(f32);
306 break;
307 default:
308 word_arg_count++;
310 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
311 saved_arg_count++;
313 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
314 if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL))
315 break;
316 #endif /* SLJIT_X86_32_FASTCALL */
318 if (word_arg_count == 4) {
319 if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
320 status |= ENTER_TMP_TO_R4;
321 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
322 } else if (saved_arg_count == 4) {
323 status |= ENTER_TMP_TO_S;
324 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
328 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
329 if (word_arg_count == 3 && ((types & SLJIT_ARG_TYPE_SCRATCH_REG) || (saved_arg_count + kept_saveds_count > 3)))
330 status |= ENTER_R2_USED;
331 #endif /* SLJIT_X86_32_FASTCALL */
333 args_size += SSIZE_OF(sw);
334 break;
336 types >>= SLJIT_ARG_SHIFT;
339 args_size -= SSIZE_OF(sw);
340 compiler->args_size = args_size;
342 /* [esp+0] for saving temporaries and function calls. */
343 locals_offset = 2 * SSIZE_OF(sw);
345 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
346 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
347 locals_offset = 4 * SSIZE_OF(sw);
348 #else
349 if (scratches >= 3)
350 locals_offset = 4 * SSIZE_OF(sw);
351 #endif
353 compiler->scratches_offset = locals_offset;
355 if (scratches > 3)
356 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
358 if (saveds > 3)
359 locals_offset += (saveds - 3) * SSIZE_OF(sw);
361 compiler->locals_offset = locals_offset;
363 size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
364 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
365 FAIL_IF(!inst);
367 INC_SIZE((sljit_uw)size);
368 PUSH_REG(reg_map[TMP_REG1]);
369 if (saveds > 2 || scratches > 9)
370 PUSH_REG(reg_map[SLJIT_S2]);
371 if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
372 PUSH_REG(reg_map[SLJIT_S1]);
373 if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
374 PUSH_REG(reg_map[SLJIT_S0]);
376 size *= SSIZE_OF(sw);
378 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
379 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
381 size += SSIZE_OF(sw);
383 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
384 if (!(options & SLJIT_ENTER_CDECL))
385 size += args_size;
386 #endif
388 local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
389 compiler->local_size = local_size;
391 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
392 if (!(options & SLJIT_ENTER_CDECL))
393 size -= args_size;
394 #endif
396 word_arg_count = 0;
397 saved_arg_count = kept_saveds_count;
398 args_size = size;
399 while (arg_types) {
400 switch (arg_types & SLJIT_ARG_MASK) {
401 case SLJIT_ARG_TYPE_F64:
402 args_size += SSIZE_OF(f64);
403 break;
404 case SLJIT_ARG_TYPE_F32:
405 args_size += SSIZE_OF(f32);
406 break;
407 default:
408 word_arg_count++;
409 SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
411 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG))
412 saved_arg_count++;
414 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
415 if (!(options & SLJIT_ENTER_CDECL)) {
416 if (word_arg_count == 1) {
417 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
418 if (!(status & ENTER_R2_USED))
419 status |= ENTER_R2_TO_R0;
420 else
421 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
422 break;
425 status |= ENTER_R2_SAVED;
427 if (!(status & ENTER_R2_USED))
428 status |= ENTER_R2_TO_S;
429 else
430 EMIT_MOV(compiler, (SLJIT_S0 + 1) - saved_arg_count, 0, SLJIT_R2, 0);
431 break;
434 if (word_arg_count == 2) {
435 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG))
436 status |= ENTER_R1_TO_S;
437 break;
440 #endif /* SLJIT_X86_32_FASTCALL */
442 if ((arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) || saved_arg_count > 3) {
443 #ifdef _WIN32
444 if (word_arg_count == 3 && local_size > 4 * 4096)
445 r2_offset = local_size + args_size;
446 else
447 #endif
448 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
450 } else
451 EMIT_MOV(compiler, (SLJIT_S0 + 1) - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
453 args_size += SSIZE_OF(sw);
454 break;
456 arg_types >>= SLJIT_ARG_SHIFT;
459 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
460 if (!(options & SLJIT_ENTER_CDECL)) {
461 if (status & ENTER_R2_TO_S) {
462 SLJIT_ASSERT(status & ENTER_R2_SAVED);
463 EMIT_MOV(compiler, SLJIT_S0 - kept_saveds_count, 0, SLJIT_R2, 0);
464 } else if (status & ENTER_R2_TO_R0)
465 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
467 size = kept_saveds_count;
468 if (status & ENTER_R2_SAVED)
469 size++;
471 if ((status & ENTER_R1_TO_S) && size < 3)
472 EMIT_MOV(compiler, SLJIT_S0 - size, 0, SLJIT_R1, 0);
474 #endif /* SLJIT_X86_32_FASTCALL */
476 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
478 #ifdef _WIN32
479 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
481 if (local_size > 4096) {
482 if (local_size <= 4 * 4096) {
483 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
485 if (local_size > 2 * 4096)
486 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
487 if (local_size > 3 * 4096)
488 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
490 else {
491 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
493 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
494 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
496 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
497 FAIL_IF(!inst);
499 INC_SIZE(2);
500 inst[0] = LOOP_i8;
501 inst[1] = (sljit_u8)-16;
502 local_size &= 0xfff;
506 if (local_size > 0) {
507 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
508 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
511 if (r2_offset != -1)
512 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
514 #else /* !_WIN32 */
516 SLJIT_ASSERT(local_size > 0);
518 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
520 #endif /* _WIN32 */
522 locals_offset -= SSIZE_OF(sw);
523 kept_saveds_count = SLJIT_R3 - kept_saveds_count;
525 while (saved_arg_count > 3) {
526 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), locals_offset, kept_saveds_count, 0);
527 kept_saveds_count++;
528 locals_offset -= SSIZE_OF(sw);
529 saved_arg_count--;
532 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
533 size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : locals_offset;
534 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
537 return SLJIT_SUCCESS;
540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
541 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
542 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
544 sljit_s32 args_size, locals_offset;
545 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
546 sljit_s32 word_arg_count = 0;
547 #endif
549 CHECK_ERROR();
550 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
551 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
553 arg_types >>= SLJIT_ARG_SHIFT;
554 args_size = 0;
555 while (arg_types) {
556 switch (arg_types & SLJIT_ARG_MASK) {
557 case SLJIT_ARG_TYPE_F64:
558 args_size += SSIZE_OF(f64);
559 break;
560 case SLJIT_ARG_TYPE_F32:
561 args_size += SSIZE_OF(f32);
562 break;
563 default:
564 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
565 if (word_arg_count >= 2)
566 args_size += SSIZE_OF(sw);
567 word_arg_count++;
568 #else
569 args_size += SSIZE_OF(sw);
570 #endif
571 break;
573 arg_types >>= SLJIT_ARG_SHIFT;
576 compiler->args_size = args_size;
578 /* [esp+0] for saving temporaries and function calls. */
579 locals_offset = 2 * SSIZE_OF(sw);
581 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
582 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
583 locals_offset = 4 * SSIZE_OF(sw);
584 #else
585 if (scratches >= 3)
586 locals_offset = 4 * SSIZE_OF(sw);
587 #endif
589 compiler->scratches_offset = locals_offset;
591 if (scratches > 3)
592 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
594 if (saveds > 3)
595 locals_offset += (saveds - 3) * SSIZE_OF(sw);
597 compiler->locals_offset = locals_offset;
599 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
601 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
602 if (!(options & SLJIT_ENTER_CDECL))
603 saveds += args_size;
604 #endif
606 compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
607 return SLJIT_SUCCESS;
610 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
612 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
613 sljit_uw size;
614 sljit_u8 *inst;
616 size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
617 (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
618 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
619 FAIL_IF(!inst);
621 INC_SIZE(size);
623 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
624 POP_REG(reg_map[SLJIT_S0]);
625 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
626 POP_REG(reg_map[SLJIT_S1]);
627 if (compiler->saveds > 2 || compiler->scratches > 9)
628 POP_REG(reg_map[SLJIT_S2]);
629 POP_REG(reg_map[TMP_REG1]);
631 return SLJIT_SUCCESS;
634 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
636 sljit_uw size;
637 sljit_u8 *inst;
639 CHECK_ERROR();
640 CHECK(check_sljit_emit_return_void(compiler));
642 SLJIT_ASSERT(compiler->args_size >= 0);
643 SLJIT_ASSERT(compiler->local_size > 0);
645 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
647 FAIL_IF(emit_stack_frame_release(compiler));
649 size = 1;
650 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
651 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL))
652 size = 3;
653 #endif
654 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
655 FAIL_IF(!inst);
657 INC_SIZE(size);
659 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
660 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) {
661 RET_I16(U8(compiler->args_size));
662 return SLJIT_SUCCESS;
664 #endif
666 RET();
667 return SLJIT_SUCCESS;
670 /* --------------------------------------------------------------------- */
671 /* Call / return instructions */
672 /* --------------------------------------------------------------------- */
674 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
676 static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
678 sljit_sw stack_size = 0;
679 sljit_s32 word_arg_count = 0;
681 arg_types >>= SLJIT_ARG_SHIFT;
683 while (arg_types) {
684 switch (arg_types & SLJIT_ARG_MASK) {
685 case SLJIT_ARG_TYPE_F64:
686 stack_size += SSIZE_OF(f64);
687 break;
688 case SLJIT_ARG_TYPE_F32:
689 stack_size += SSIZE_OF(f32);
690 break;
691 default:
692 word_arg_count++;
693 if (word_arg_count > 2)
694 stack_size += SSIZE_OF(sw);
695 break;
698 arg_types >>= SLJIT_ARG_SHIFT;
701 if (word_arg_count_ptr)
702 *word_arg_count_ptr = word_arg_count;
704 return stack_size;
707 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
708 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
710 sljit_u8 *inst;
711 sljit_s32 float_arg_count;
713 if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) {
714 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
715 FAIL_IF(!inst);
716 INC_SIZE(1);
717 PUSH_REG(reg_map[SLJIT_R2]);
719 else if (stack_size > 0) {
720 if (word_arg_count >= 4)
721 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
723 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
725 stack_size = 0;
726 arg_types >>= SLJIT_ARG_SHIFT;
727 word_arg_count = 0;
728 float_arg_count = 0;
729 while (arg_types) {
730 switch (arg_types & SLJIT_ARG_MASK) {
731 case SLJIT_ARG_TYPE_F64:
732 float_arg_count++;
733 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
734 stack_size += SSIZE_OF(f64);
735 break;
736 case SLJIT_ARG_TYPE_F32:
737 float_arg_count++;
738 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
739 stack_size += SSIZE_OF(f32);
740 break;
741 default:
742 word_arg_count++;
743 if (word_arg_count == 3) {
744 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
745 stack_size += SSIZE_OF(sw);
747 else if (word_arg_count == 4) {
748 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
749 stack_size += SSIZE_OF(sw);
751 break;
754 arg_types >>= SLJIT_ARG_SHIFT;
758 if (word_arg_count > 0) {
759 if (swap_args) {
760 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
761 FAIL_IF(!inst);
762 INC_SIZE(1);
764 *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]);
766 else {
767 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
768 FAIL_IF(!inst);
769 INC_SIZE(2);
771 *inst++ = MOV_r_rm;
772 *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]);
776 return SLJIT_SUCCESS;
779 #endif
781 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
783 sljit_sw stack_size = 0;
784 sljit_s32 word_arg_count = 0;
786 arg_types >>= SLJIT_ARG_SHIFT;
788 while (arg_types) {
789 switch (arg_types & SLJIT_ARG_MASK) {
790 case SLJIT_ARG_TYPE_F64:
791 stack_size += SSIZE_OF(f64);
792 break;
793 case SLJIT_ARG_TYPE_F32:
794 stack_size += SSIZE_OF(f32);
795 break;
796 default:
797 word_arg_count++;
798 stack_size += SSIZE_OF(sw);
799 break;
802 arg_types >>= SLJIT_ARG_SHIFT;
805 if (word_arg_count_ptr)
806 *word_arg_count_ptr = word_arg_count;
808 if (stack_size <= compiler->scratches_offset)
809 return 0;
811 return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
814 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
815 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
817 sljit_s32 float_arg_count = 0;
818 sljit_u8 *inst;
820 if (word_arg_count >= 4)
821 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
823 if (stack_size > 0)
824 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
826 stack_size = 0;
827 word_arg_count = 0;
828 arg_types >>= SLJIT_ARG_SHIFT;
830 while (arg_types) {
831 switch (arg_types & SLJIT_ARG_MASK) {
832 case SLJIT_ARG_TYPE_F64:
833 float_arg_count++;
834 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
835 stack_size += SSIZE_OF(f64);
836 break;
837 case SLJIT_ARG_TYPE_F32:
838 float_arg_count++;
839 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
840 stack_size += SSIZE_OF(f32);
841 break;
842 default:
843 word_arg_count++;
844 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
845 stack_size += SSIZE_OF(sw);
846 break;
849 arg_types >>= SLJIT_ARG_SHIFT;
852 return SLJIT_SUCCESS;
855 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
856 sljit_s32 arg_types, sljit_s32 stack_size)
858 sljit_u8 *inst;
859 sljit_s32 single;
861 if (stack_size > 0)
862 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
864 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
865 return SLJIT_SUCCESS;
867 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
869 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
870 FAIL_IF(!inst);
871 INC_SIZE(3);
872 inst[0] = single ? FSTPS : FSTPD;
873 inst[1] = (0x03 << 3) | 0x04;
874 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
876 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
879 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
880 sljit_s32 *extra_space, sljit_s32 arg_types,
881 sljit_s32 src, sljit_sw srcw)
883 sljit_sw args_size, prev_args_size, saved_regs_size;
884 sljit_sw types, word_arg_count, float_arg_count;
885 sljit_sw stack_size, prev_stack_size, min_size, offset;
886 sljit_sw word_arg4_offset;
887 sljit_u8 r2_offset = 0;
888 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
889 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
890 sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
891 #endif
892 sljit_u8* inst;
894 ADJUST_LOCAL_OFFSET(src, srcw);
895 CHECK_EXTRA_REGS(src, srcw, (void)0);
897 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
898 + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
900 word_arg_count = 0;
901 float_arg_count = 0;
902 arg_types >>= SLJIT_ARG_SHIFT;
903 types = 0;
904 args_size = 0;
906 while (arg_types != 0) {
907 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
909 switch (arg_types & SLJIT_ARG_MASK) {
910 case SLJIT_ARG_TYPE_F64:
911 args_size += SSIZE_OF(f64);
912 float_arg_count++;
913 break;
914 case SLJIT_ARG_TYPE_F32:
915 args_size += SSIZE_OF(f32);
916 float_arg_count++;
917 break;
918 default:
919 word_arg_count++;
920 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
921 if (!fast_call || word_arg_count > 2)
922 args_size += SSIZE_OF(sw);
923 #else
924 args_size += SSIZE_OF(sw);
925 #endif
926 break;
928 arg_types >>= SLJIT_ARG_SHIFT;
931 if (args_size <= compiler->args_size
932 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
933 && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call)
934 #endif /* SLJIT_X86_32_FASTCALL */
935 && 1) {
936 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
937 *extra_space = fast_call ? 0 : args_size;
938 prev_args_size = compiler->args_size;
939 stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
940 #else /* !SLJIT_X86_32_FASTCALL */
941 *extra_space = 0;
942 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
943 #endif /* SLJIT_X86_32_FASTCALL */
945 offset = stack_size + compiler->local_size;
947 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
948 if (word_arg_count >= 1) {
949 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
950 r2_offset = sizeof(sljit_sw);
952 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
955 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
956 if (!(compiler->options & SLJIT_ENTER_CDECL)) {
957 if (!fast_call)
958 offset -= SSIZE_OF(sw);
960 if (word_arg_count >= 3) {
961 word_arg4_offset = SSIZE_OF(sw);
963 if (word_arg_count + float_arg_count >= 4) {
964 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw);
965 if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
966 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64);
969 /* In cdecl mode, at least one more word value must
970 * be present on the stack before the return address. */
971 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0);
974 if (fast_call) {
975 if (args_size < prev_args_size) {
976 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw));
977 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
979 } else if (prev_args_size > 0) {
980 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size);
981 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
984 #endif /* SLJIT_X86_32_FASTCALL */
986 while (types != 0) {
987 switch (types & SLJIT_ARG_MASK) {
988 case SLJIT_ARG_TYPE_F64:
989 offset -= SSIZE_OF(f64);
990 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
991 float_arg_count--;
992 break;
993 case SLJIT_ARG_TYPE_F32:
994 offset -= SSIZE_OF(f32);
995 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
996 float_arg_count--;
997 break;
998 default:
999 switch (word_arg_count) {
1000 case 1:
1001 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1002 if (fast_call) {
1003 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1004 break;
1006 #endif
1007 offset -= SSIZE_OF(sw);
1008 if (r2_offset != 0) {
1009 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1010 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1011 } else
1012 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1013 break;
1014 case 2:
1015 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1016 if (fast_call)
1017 break;
1018 #endif
1019 offset -= SSIZE_OF(sw);
1020 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1021 break;
1022 case 3:
1023 offset -= SSIZE_OF(sw);
1024 break;
1025 case 4:
1026 offset -= SSIZE_OF(sw);
1027 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
1028 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1029 break;
1031 word_arg_count--;
1032 break;
1034 types >>= SLJIT_ARG_SHIFT;
1037 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
1038 FAIL_IF(emit_stack_frame_release(compiler));
1040 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1041 if (args_size < prev_args_size)
1042 BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0);
1043 #endif
1045 return SLJIT_SUCCESS;
1048 stack_size = args_size + SSIZE_OF(sw);
1050 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
1051 r2_offset = SSIZE_OF(sw);
1052 stack_size += SSIZE_OF(sw);
1055 if (word_arg_count >= 3)
1056 stack_size += SSIZE_OF(sw);
1058 prev_args_size = 0;
1059 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1060 if (!(compiler->options & SLJIT_ENTER_CDECL))
1061 prev_args_size = compiler->args_size;
1062 #endif
1064 prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
1065 min_size = prev_stack_size + compiler->local_size;
1067 word_arg4_offset = compiler->scratches_offset;
1069 if (stack_size > min_size) {
1070 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
1071 if (src == SLJIT_MEM1(SLJIT_SP))
1072 srcw += stack_size - min_size;
1073 word_arg4_offset += stack_size - min_size;
1075 else
1076 stack_size = min_size;
1078 if (word_arg_count >= 3) {
1079 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
1081 if (word_arg_count >= 4)
1082 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
1085 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
1086 if (word_arg_count >= 1) {
1087 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
1088 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
1090 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
1093 /* Restore saved registers. */
1094 offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw);
1095 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1097 if (compiler->saveds > 2 || compiler->scratches > 9) {
1098 offset -= SSIZE_OF(sw);
1099 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1101 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
1102 offset -= SSIZE_OF(sw);
1103 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1105 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
1106 offset -= SSIZE_OF(sw);
1107 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
1110 /* Copy fourth argument and return address. */
1111 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1112 if (fast_call) {
1113 offset = stack_size;
1114 *extra_space = 0;
1116 if (word_arg_count >= 4 && prev_args_size == 0) {
1117 offset -= SSIZE_OF(sw);
1118 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1119 FAIL_IF(!inst);
1120 *inst = XCHG_r_rm;
1122 SLJIT_ASSERT(args_size != prev_args_size);
1123 } else {
1124 if (word_arg_count >= 4) {
1125 offset -= SSIZE_OF(sw);
1126 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1129 if (args_size != prev_args_size)
1130 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1133 if (args_size != prev_args_size)
1134 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
1135 } else {
1136 #endif /* SLJIT_X86_32_FASTCALL */
1137 offset = stack_size - SSIZE_OF(sw);
1138 *extra_space = args_size;
1140 if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) {
1141 offset -= SSIZE_OF(sw);
1142 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1143 FAIL_IF(!inst);
1144 *inst = XCHG_r_rm;
1146 SLJIT_ASSERT(prev_args_size > 0);
1147 } else {
1148 if (word_arg_count >= 4) {
1149 offset -= SSIZE_OF(sw);
1150 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1153 if (prev_args_size > 0)
1154 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1157 /* Copy return address. */
1158 if (prev_args_size > 0)
1159 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0);
1160 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1162 #endif /* SLJIT_X86_32_FASTCALL */
1164 while (types != 0) {
1165 switch (types & SLJIT_ARG_MASK) {
1166 case SLJIT_ARG_TYPE_F64:
1167 offset -= SSIZE_OF(f64);
1168 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1169 float_arg_count--;
1170 break;
1171 case SLJIT_ARG_TYPE_F32:
1172 offset -= SSIZE_OF(f32);
1173 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1174 float_arg_count--;
1175 break;
1176 default:
1177 switch (word_arg_count) {
1178 case 1:
1179 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1180 if (fast_call) {
1181 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1182 break;
1184 #endif
1185 offset -= SSIZE_OF(sw);
1186 if (r2_offset != 0) {
1187 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1188 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1189 } else
1190 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1191 break;
1192 case 2:
1193 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1194 if (fast_call)
1195 break;
1196 #endif
1197 offset -= SSIZE_OF(sw);
1198 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1199 break;
1200 case 3:
1201 offset -= SSIZE_OF(sw);
1202 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
1203 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1204 break;
1206 word_arg_count--;
1207 break;
1209 types >>= SLJIT_ARG_SHIFT;
1212 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1213 /* Skip return address. */
1214 if (fast_call)
1215 offset -= SSIZE_OF(sw);
1216 #endif
1218 SLJIT_ASSERT(offset >= 0);
1220 if (offset == 0)
1221 return SLJIT_SUCCESS;
1223 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
1224 return SLJIT_SUCCESS;
1227 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
1229 /* Called when stack consumption cannot be reduced to 0. */
1230 sljit_u8 *inst;
1232 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1234 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1235 FAIL_IF(!inst);
1236 INC_SIZE(1);
1237 RET();
1239 return SLJIT_SUCCESS;
1242 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1243 sljit_s32 arg_types)
1245 struct sljit_jump *jump;
1246 sljit_sw stack_size = 0;
1247 sljit_s32 word_arg_count;
1249 CHECK_ERROR_PTR();
1250 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1252 if (type & SLJIT_CALL_RETURN) {
1253 stack_size = type;
1254 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1256 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1257 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1258 compiler->skip_checks = 1;
1259 #endif
1261 if (stack_size == 0) {
1262 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
1263 return sljit_emit_jump(compiler, type);
1266 jump = sljit_emit_jump(compiler, type);
1267 PTR_FAIL_IF(jump == NULL);
1269 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1270 return jump;
1273 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1274 if ((type & 0xff) == SLJIT_CALL) {
1275 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1276 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1278 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1279 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1280 compiler->skip_checks = 1;
1281 #endif
1283 jump = sljit_emit_jump(compiler, type);
1284 PTR_FAIL_IF(jump == NULL);
1286 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
1287 return jump;
1289 #endif
1291 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1292 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1294 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1295 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1296 compiler->skip_checks = 1;
1297 #endif
1299 jump = sljit_emit_jump(compiler, type);
1300 PTR_FAIL_IF(jump == NULL);
1302 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1303 return jump;
1306 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1307 sljit_s32 arg_types,
1308 sljit_s32 src, sljit_sw srcw)
1310 sljit_sw stack_size = 0;
1311 sljit_s32 word_arg_count;
1312 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1313 sljit_s32 swap_args;
1314 #endif
1316 CHECK_ERROR();
1317 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1319 if (type & SLJIT_CALL_RETURN) {
1320 stack_size = type;
1321 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1323 if (!(src & SLJIT_IMM)) {
1324 src = SLJIT_R0;
1325 srcw = 0;
1328 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1329 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1330 compiler->skip_checks = 1;
1331 #endif
1333 if (stack_size == 0)
1334 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1336 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1337 return emit_tail_call_end(compiler, stack_size);
1340 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1341 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
1343 if ((type & 0xff) == SLJIT_CALL) {
1344 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1345 swap_args = 0;
1347 if (word_arg_count > 0) {
1348 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
1349 swap_args = 1;
1350 if (((src & REG_MASK) | 0x2) == SLJIT_R2)
1351 src ^= 0x2;
1352 if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
1353 src ^= TO_OFFS_REG(0x2);
1357 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
1359 compiler->scratches_offset += stack_size;
1360 compiler->locals_offset += stack_size;
1362 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1363 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1364 compiler->skip_checks = 1;
1365 #endif
1366 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1368 compiler->scratches_offset -= stack_size;
1369 compiler->locals_offset -= stack_size;
1371 return post_call_with_args(compiler, arg_types, 0);
1373 #endif
1375 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1376 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1378 compiler->scratches_offset += stack_size;
1379 compiler->locals_offset += stack_size;
1381 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1382 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1383 compiler->skip_checks = 1;
1384 #endif
1385 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1387 compiler->scratches_offset -= stack_size;
1388 compiler->locals_offset -= stack_size;
1390 return post_call_with_args(compiler, arg_types, stack_size);
1393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1395 sljit_u8 *inst;
1397 CHECK_ERROR();
1398 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1399 ADJUST_LOCAL_OFFSET(dst, dstw);
1401 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1403 if (FAST_IS_REG(dst)) {
1404 /* Unused dest is possible here. */
1405 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1406 FAIL_IF(!inst);
1408 INC_SIZE(1);
1409 POP_REG(reg_map[dst]);
1410 return SLJIT_SUCCESS;
1413 /* Memory. */
1414 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1415 FAIL_IF(!inst);
1416 *inst++ = POP_rm;
1417 return SLJIT_SUCCESS;
1420 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1422 sljit_u8 *inst;
1424 CHECK_EXTRA_REGS(src, srcw, (void)0);
1426 if (FAST_IS_REG(src)) {
1427 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1428 FAIL_IF(!inst);
1430 INC_SIZE(1 + 1);
1431 PUSH_REG(reg_map[src]);
1433 else {
1434 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1435 FAIL_IF(!inst);
1436 *inst++ = GROUP_FF;
1437 *inst |= PUSH_rm;
1439 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1440 FAIL_IF(!inst);
1441 INC_SIZE(1);
1444 RET();
1445 return SLJIT_SUCCESS;
1448 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1450 sljit_sw size;
1452 /* Don't adjust shadow stack if it isn't enabled. */
1453 if (!cpu_has_shadow_stack())
1454 return SLJIT_SUCCESS;
1456 SLJIT_ASSERT(compiler->args_size >= 0);
1457 SLJIT_ASSERT(compiler->local_size > 0);
1459 size = compiler->local_size;
1460 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1461 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1463 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);