2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
37 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
39 INC_SIZE(1 + sizeof(sljit_sw
));
41 sljit_unaligned_store_sw(inst
, imm
);
45 /* Size contains the flags as well. */
46 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_uw size
,
47 /* The register or immediate operand. */
48 sljit_s32 a
, sljit_sw imma
,
49 /* The general operand (not immediate). */
50 sljit_s32 b
, sljit_sw immb
)
55 sljit_uw flags
= size
;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
66 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
67 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
68 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
73 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
75 if (flags
& EX86_PREF_66
)
78 /* Calculate size of b. */
79 inst_size
+= 1; /* mod r/m byte. */
82 inst_size
+= sizeof(sljit_sw
);
84 if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
85 /* Immediate operand. */
86 if (immb
<= 127 && immb
>= -128)
87 inst_size
+= sizeof(sljit_s8
);
89 inst_size
+= sizeof(sljit_sw
);
91 else if (reg_map
[b
& REG_MASK
] == 5) {
92 /* Swap registers if possible. */
93 if ((b
& OFFS_REG_MASK
) && (immb
& 0x3) == 0 && reg_map
[OFFS_REG(b
)] != 5)
94 b
= SLJIT_MEM
| OFFS_REG(b
) | TO_OFFS_REG(b
& REG_MASK
);
96 inst_size
+= sizeof(sljit_s8
);
99 if (reg_map
[b
& REG_MASK
] == 4 && !(b
& OFFS_REG_MASK
))
100 b
|= TO_OFFS_REG(SLJIT_SP
);
102 if (b
& OFFS_REG_MASK
)
103 inst_size
+= 1; /* SIB byte. */
107 /* Calculate size of a. */
109 if (flags
& EX86_BIN_INS
) {
110 if (imma
<= 127 && imma
>= -128) {
112 flags
|= EX86_BYTE_ARG
;
116 else if (flags
& EX86_SHIFT_INS
) {
120 flags
|= EX86_BYTE_ARG
;
122 } else if (flags
& EX86_BYTE_ARG
)
124 else if (flags
& EX86_HALF_ARG
)
125 inst_size
+= sizeof(short);
127 inst_size
+= sizeof(sljit_sw
);
130 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
132 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
135 /* Encoding the byte. */
137 if (flags
& EX86_PREF_F2
)
139 if (flags
& EX86_PREF_F3
)
141 if (flags
& EX86_PREF_66
)
144 buf_ptr
= inst
+ size
;
146 /* Encode mod/rm byte. */
147 if (!(flags
& EX86_SHIFT_INS
)) {
148 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
149 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
153 else if (!(flags
& EX86_SSE2_OP1
))
154 *buf_ptr
= U8(reg_map
[a
] << 3);
156 *buf_ptr
= U8(a
<< 3);
161 *inst
= GROUP_SHIFT_1
;
163 *inst
= GROUP_SHIFT_N
;
165 *inst
= GROUP_SHIFT_CL
;
169 if (!(b
& SLJIT_MEM
)) {
170 *buf_ptr
= U8(*buf_ptr
| MOD_REG
| (!(flags
& EX86_SSE2_OP2
) ? reg_map
[b
] : b
));
172 } else if (b
& REG_MASK
) {
173 reg_map_b
= reg_map
[b
& REG_MASK
];
175 if (!(b
& OFFS_REG_MASK
) || (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
)) {
176 if (immb
!= 0 || reg_map_b
== 5) {
177 if (immb
<= 127 && immb
>= -128)
183 if (!(b
& OFFS_REG_MASK
))
184 *buf_ptr
++ |= reg_map_b
;
187 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3));
190 if (immb
!= 0 || reg_map_b
== 5) {
191 if (immb
<= 127 && immb
>= -128)
192 *buf_ptr
++ = U8(immb
); /* 8 bit displacement. */
194 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
195 buf_ptr
+= sizeof(sljit_sw
);
204 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6));
212 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
213 buf_ptr
+= sizeof(sljit_sw
);
217 if (flags
& EX86_BYTE_ARG
)
219 else if (flags
& EX86_HALF_ARG
)
220 sljit_unaligned_store_s16(buf_ptr
, (sljit_s16
)imma
);
221 else if (!(flags
& EX86_SHIFT_INS
))
222 sljit_unaligned_store_sw(buf_ptr
, imma
);
225 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
228 /* --------------------------------------------------------------------- */
230 /* --------------------------------------------------------------------- */
232 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
234 sljit_uw type
= jump
->flags
>> TYPE_SHIFT
;
236 if (type
== SLJIT_JUMP
) {
237 *code_ptr
++ = JMP_i32
;
240 else if (type
>= SLJIT_FAST_CALL
) {
241 *code_ptr
++ = CALL_i32
;
245 *code_ptr
++ = GROUP_0F
;
246 *code_ptr
++ = get_jump_code(type
);
250 if (jump
->flags
& JUMP_LABEL
)
251 jump
->flags
|= PATCH_MW
;
253 sljit_unaligned_store_sw(code_ptr
, (sljit_sw
)(jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
));
259 #define ENTER_R2_USED 0x00001
260 #define ENTER_R2_SAVED 0x00002
261 #define ENTER_R2_TO_S 0x00004
262 #define ENTER_R2_TO_R0 0x00008
263 #define ENTER_R1_TO_S 0x00010
264 #define ENTER_TMP_TO_R4 0x00020
265 #define ENTER_TMP_TO_S 0x00040
267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
268 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
269 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
271 sljit_s32 word_arg_count
, saved_arg_count
, float_arg_count
;
272 sljit_s32 size
, locals_offset
, args_size
, types
, status
;
273 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(options
);
276 sljit_s32 r2_offset
= -1;
280 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
281 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
283 /* Emit ENDBR32 at function entry if needed. */
284 FAIL_IF(emit_endbranch(compiler
));
286 SLJIT_COMPILE_ASSERT(SLJIT_FR0
== 1, float_register_index_start
);
288 arg_types
>>= SLJIT_ARG_SHIFT
;
293 args_size
= SSIZE_OF(sw
);
296 switch (types
& SLJIT_ARG_MASK
) {
297 case SLJIT_ARG_TYPE_F64
:
299 FAIL_IF(emit_sse2_load(compiler
, 0, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
300 args_size
+= SSIZE_OF(f64
);
302 case SLJIT_ARG_TYPE_F32
:
304 FAIL_IF(emit_sse2_load(compiler
, 1, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
305 args_size
+= SSIZE_OF(f32
);
310 if (!(types
& SLJIT_ARG_TYPE_SCRATCH_REG
))
313 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
314 if (word_arg_count
<= 2 && !(options
& SLJIT_ENTER_CDECL
))
316 #endif /* SLJIT_X86_32_FASTCALL */
318 if (word_arg_count
== 4) {
319 if (types
& SLJIT_ARG_TYPE_SCRATCH_REG
) {
320 status
|= ENTER_TMP_TO_R4
;
321 arg_types
&= ~(SLJIT_ARG_FULL_MASK
<< 3 * SLJIT_ARG_SHIFT
);
322 } else if (saved_arg_count
== 4) {
323 status
|= ENTER_TMP_TO_S
;
324 arg_types
&= ~(SLJIT_ARG_FULL_MASK
<< 3 * SLJIT_ARG_SHIFT
);
328 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
329 if (word_arg_count
== 3 && ((types
& SLJIT_ARG_TYPE_SCRATCH_REG
) || (saved_arg_count
+ kept_saveds_count
> 3)))
330 status
|= ENTER_R2_USED
;
331 #endif /* SLJIT_X86_32_FASTCALL */
333 args_size
+= SSIZE_OF(sw
);
336 types
>>= SLJIT_ARG_SHIFT
;
339 args_size
-= SSIZE_OF(sw
);
340 compiler
->args_size
= args_size
;
342 /* [esp+0] for saving temporaries and function calls. */
343 locals_offset
= 2 * SSIZE_OF(sw
);
345 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
346 if ((options
& SLJIT_ENTER_CDECL
) && scratches
>= 3)
347 locals_offset
= 4 * SSIZE_OF(sw
);
350 locals_offset
= 4 * SSIZE_OF(sw
);
353 compiler
->scratches_offset
= locals_offset
;
356 locals_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
359 locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
361 compiler
->locals_offset
= locals_offset
;
363 size
= 1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3) - kept_saveds_count
;
364 inst
= (sljit_u8
*)ensure_buf(compiler
, (sljit_uw
)(size
+ 1));
367 INC_SIZE((sljit_uw
)size
);
368 PUSH_REG(reg_map
[TMP_REG1
]);
369 if (saveds
> 2 || scratches
> 9)
370 PUSH_REG(reg_map
[SLJIT_S2
]);
371 if ((saveds
> 1 && kept_saveds_count
<= 1) || scratches
> 10)
372 PUSH_REG(reg_map
[SLJIT_S1
]);
373 if ((saveds
> 0 && kept_saveds_count
== 0) || scratches
> 11)
374 PUSH_REG(reg_map
[SLJIT_S0
]);
376 size
*= SSIZE_OF(sw
);
378 if (status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
))
379 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
+ size
);
381 size
+= SSIZE_OF(sw
);
383 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
384 if (!(options
& SLJIT_ENTER_CDECL
))
388 local_size
= ((locals_offset
+ local_size
+ size
+ 0xf) & ~0xf) - size
;
389 compiler
->local_size
= local_size
;
391 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
392 if (!(options
& SLJIT_ENTER_CDECL
))
397 saved_arg_count
= kept_saveds_count
;
400 switch (arg_types
& SLJIT_ARG_MASK
) {
401 case SLJIT_ARG_TYPE_F64
:
402 args_size
+= SSIZE_OF(f64
);
404 case SLJIT_ARG_TYPE_F32
:
405 args_size
+= SSIZE_OF(f32
);
409 SLJIT_ASSERT(word_arg_count
<= 3 || (word_arg_count
== 4 && !(status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
))));
411 if (!(arg_types
& SLJIT_ARG_TYPE_SCRATCH_REG
))
414 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
415 if (!(options
& SLJIT_ENTER_CDECL
)) {
416 if (word_arg_count
== 1) {
417 if (arg_types
& SLJIT_ARG_TYPE_SCRATCH_REG
) {
418 if (!(status
& ENTER_R2_USED
))
419 status
|= ENTER_R2_TO_R0
;
421 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_R2
, 0);
425 status
|= ENTER_R2_SAVED
;
427 if (!(status
& ENTER_R2_USED
))
428 status
|= ENTER_R2_TO_S
;
430 EMIT_MOV(compiler
, (SLJIT_S0
+ 1) - saved_arg_count
, 0, SLJIT_R2
, 0);
434 if (word_arg_count
== 2) {
435 if (!(arg_types
& SLJIT_ARG_TYPE_SCRATCH_REG
))
436 status
|= ENTER_R1_TO_S
;
440 #endif /* SLJIT_X86_32_FASTCALL */
442 if ((arg_types
& SLJIT_ARG_TYPE_SCRATCH_REG
) || saved_arg_count
> 3) {
444 if (word_arg_count
== 3 && local_size
> 4 * 4096)
445 r2_offset
= local_size
+ args_size
;
448 EMIT_MOV(compiler
, word_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
451 EMIT_MOV(compiler
, (SLJIT_S0
+ 1) - saved_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
453 args_size
+= SSIZE_OF(sw
);
456 arg_types
>>= SLJIT_ARG_SHIFT
;
459 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
460 if (!(options
& SLJIT_ENTER_CDECL
)) {
461 if (status
& ENTER_R2_TO_S
) {
462 SLJIT_ASSERT(status
& ENTER_R2_SAVED
);
463 EMIT_MOV(compiler
, SLJIT_S0
- kept_saveds_count
, 0, SLJIT_R2
, 0);
464 } else if (status
& ENTER_R2_TO_R0
)
465 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_R2
, 0);
467 size
= kept_saveds_count
;
468 if (status
& ENTER_R2_SAVED
)
471 if ((status
& ENTER_R1_TO_S
) && size
< 3)
472 EMIT_MOV(compiler
, SLJIT_S0
- size
, 0, SLJIT_R1
, 0);
474 #endif /* SLJIT_X86_32_FASTCALL */
476 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
479 SLJIT_ASSERT(r2_offset
== -1 || local_size
> 4 * 4096);
481 if (local_size
> 4096) {
482 if (local_size
<= 4 * 4096) {
483 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
485 if (local_size
> 2 * 4096)
486 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
487 if (local_size
> 3 * 4096)
488 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
491 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_IMM
, local_size
>> 12);
493 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
494 BINARY_IMM32(SUB
, 4096, SLJIT_SP
, 0);
496 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
501 inst
[1] = (sljit_u8
)-16;
506 if (local_size
> 0) {
507 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -local_size
);
508 BINARY_IMM32(SUB
, local_size
, SLJIT_SP
, 0);
512 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), r2_offset
);
516 SLJIT_ASSERT(local_size
> 0);
518 BINARY_IMM32(SUB
, local_size
, SLJIT_SP
, 0);
522 locals_offset
-= SSIZE_OF(sw
);
523 kept_saveds_count
= SLJIT_R3
- kept_saveds_count
;
525 while (saved_arg_count
> 3) {
526 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), locals_offset
, kept_saveds_count
, 0);
528 locals_offset
-= SSIZE_OF(sw
);
532 if (status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
)) {
533 size
= (status
& ENTER_TMP_TO_R4
) ? compiler
->scratches_offset
: locals_offset
;
534 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), size
, TMP_REG1
, 0);
537 return SLJIT_SUCCESS
;
540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
541 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
542 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
544 sljit_s32 args_size
, locals_offset
;
545 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
546 sljit_s32 word_arg_count
= 0;
550 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
551 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
553 arg_types
>>= SLJIT_ARG_SHIFT
;
556 switch (arg_types
& SLJIT_ARG_MASK
) {
557 case SLJIT_ARG_TYPE_F64
:
558 args_size
+= SSIZE_OF(f64
);
560 case SLJIT_ARG_TYPE_F32
:
561 args_size
+= SSIZE_OF(f32
);
564 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
565 if (word_arg_count
>= 2)
566 args_size
+= SSIZE_OF(sw
);
569 args_size
+= SSIZE_OF(sw
);
573 arg_types
>>= SLJIT_ARG_SHIFT
;
576 compiler
->args_size
= args_size
;
578 /* [esp+0] for saving temporaries and function calls. */
579 locals_offset
= 2 * SSIZE_OF(sw
);
581 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
582 if ((options
& SLJIT_ENTER_CDECL
) && scratches
>= 3)
583 locals_offset
= 4 * SSIZE_OF(sw
);
586 locals_offset
= 4 * SSIZE_OF(sw
);
589 compiler
->scratches_offset
= locals_offset
;
592 locals_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
595 locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
597 compiler
->locals_offset
= locals_offset
;
599 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3) - SLJIT_KEPT_SAVEDS_COUNT(options
)) * SSIZE_OF(sw
);
601 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
602 if (!(options
& SLJIT_ENTER_CDECL
))
606 compiler
->local_size
= ((locals_offset
+ local_size
+ saveds
+ 0xf) & ~0xf) - saveds
;
607 return SLJIT_SUCCESS
;
610 static sljit_s32
emit_stack_frame_release(struct sljit_compiler
*compiler
)
612 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(compiler
->options
);
616 size
= (sljit_uw
)(1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) +
617 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3) - kept_saveds_count
);
618 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
623 if ((compiler
->saveds
> 0 && kept_saveds_count
== 0) || compiler
->scratches
> 11)
624 POP_REG(reg_map
[SLJIT_S0
]);
625 if ((compiler
->saveds
> 1 && kept_saveds_count
<= 1) || compiler
->scratches
> 10)
626 POP_REG(reg_map
[SLJIT_S1
]);
627 if (compiler
->saveds
> 2 || compiler
->scratches
> 9)
628 POP_REG(reg_map
[SLJIT_S2
]);
629 POP_REG(reg_map
[TMP_REG1
]);
631 return SLJIT_SUCCESS
;
634 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return_void(struct sljit_compiler
*compiler
)
640 CHECK(check_sljit_emit_return_void(compiler
));
642 SLJIT_ASSERT(compiler
->args_size
>= 0);
643 SLJIT_ASSERT(compiler
->local_size
> 0);
645 BINARY_IMM32(ADD
, compiler
->local_size
, SLJIT_SP
, 0);
647 FAIL_IF(emit_stack_frame_release(compiler
));
650 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
651 if (compiler
->args_size
> 0 && !(compiler
->options
& SLJIT_ENTER_CDECL
))
654 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
659 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
660 if (compiler
->args_size
> 0 && !(compiler
->options
& SLJIT_ENTER_CDECL
)) {
661 RET_I16(U8(compiler
->args_size
));
662 return SLJIT_SUCCESS
;
667 return SLJIT_SUCCESS
;
670 /* --------------------------------------------------------------------- */
671 /* Call / return instructions */
672 /* --------------------------------------------------------------------- */
674 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
676 static sljit_sw
c_fast_call_get_stack_size(sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
678 sljit_sw stack_size
= 0;
679 sljit_s32 word_arg_count
= 0;
681 arg_types
>>= SLJIT_ARG_SHIFT
;
684 switch (arg_types
& SLJIT_ARG_MASK
) {
685 case SLJIT_ARG_TYPE_F64
:
686 stack_size
+= SSIZE_OF(f64
);
688 case SLJIT_ARG_TYPE_F32
:
689 stack_size
+= SSIZE_OF(f32
);
693 if (word_arg_count
> 2)
694 stack_size
+= SSIZE_OF(sw
);
698 arg_types
>>= SLJIT_ARG_SHIFT
;
701 if (word_arg_count_ptr
)
702 *word_arg_count_ptr
= word_arg_count
;
707 static sljit_s32
c_fast_call_with_args(struct sljit_compiler
*compiler
,
708 sljit_s32 arg_types
, sljit_sw stack_size
, sljit_s32 word_arg_count
, sljit_s32 swap_args
)
711 sljit_s32 float_arg_count
;
713 if (stack_size
== SSIZE_OF(sw
) && word_arg_count
== 3) {
714 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
717 PUSH_REG(reg_map
[SLJIT_R2
]);
719 else if (stack_size
> 0) {
720 if (word_arg_count
>= 4)
721 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
723 BINARY_IMM32(SUB
, stack_size
, SLJIT_SP
, 0);
726 arg_types
>>= SLJIT_ARG_SHIFT
;
730 switch (arg_types
& SLJIT_ARG_MASK
) {
731 case SLJIT_ARG_TYPE_F64
:
733 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
734 stack_size
+= SSIZE_OF(f64
);
736 case SLJIT_ARG_TYPE_F32
:
738 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
739 stack_size
+= SSIZE_OF(f32
);
743 if (word_arg_count
== 3) {
744 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, SLJIT_R2
, 0);
745 stack_size
+= SSIZE_OF(sw
);
747 else if (word_arg_count
== 4) {
748 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, TMP_REG1
, 0);
749 stack_size
+= SSIZE_OF(sw
);
754 arg_types
>>= SLJIT_ARG_SHIFT
;
758 if (word_arg_count
> 0) {
760 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
764 *inst
++ = U8(XCHG_EAX_r
| reg_map
[SLJIT_R2
]);
767 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
772 *inst
++ = U8(MOD_REG
| (reg_map
[SLJIT_R2
] << 3) | reg_map
[SLJIT_R0
]);
776 return SLJIT_SUCCESS
;
781 static sljit_s32
cdecl_call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
783 sljit_sw stack_size
= 0;
784 sljit_s32 word_arg_count
= 0;
786 arg_types
>>= SLJIT_ARG_SHIFT
;
789 switch (arg_types
& SLJIT_ARG_MASK
) {
790 case SLJIT_ARG_TYPE_F64
:
791 stack_size
+= SSIZE_OF(f64
);
793 case SLJIT_ARG_TYPE_F32
:
794 stack_size
+= SSIZE_OF(f32
);
798 stack_size
+= SSIZE_OF(sw
);
802 arg_types
>>= SLJIT_ARG_SHIFT
;
805 if (word_arg_count_ptr
)
806 *word_arg_count_ptr
= word_arg_count
;
808 if (stack_size
<= compiler
->scratches_offset
)
811 return ((stack_size
- compiler
->scratches_offset
+ 0xf) & ~0xf);
814 static sljit_s32
cdecl_call_with_args(struct sljit_compiler
*compiler
,
815 sljit_s32 arg_types
, sljit_sw stack_size
, sljit_s32 word_arg_count
)
817 sljit_s32 float_arg_count
= 0;
820 if (word_arg_count
>= 4)
821 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
824 BINARY_IMM32(SUB
, stack_size
, SLJIT_SP
, 0);
828 arg_types
>>= SLJIT_ARG_SHIFT
;
831 switch (arg_types
& SLJIT_ARG_MASK
) {
832 case SLJIT_ARG_TYPE_F64
:
834 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
835 stack_size
+= SSIZE_OF(f64
);
837 case SLJIT_ARG_TYPE_F32
:
839 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
840 stack_size
+= SSIZE_OF(f32
);
844 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
845 stack_size
+= SSIZE_OF(sw
);
849 arg_types
>>= SLJIT_ARG_SHIFT
;
852 return SLJIT_SUCCESS
;
855 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
856 sljit_s32 arg_types
, sljit_s32 stack_size
)
862 BINARY_IMM32(ADD
, stack_size
, SLJIT_SP
, 0);
864 if ((arg_types
& SLJIT_ARG_MASK
) < SLJIT_ARG_TYPE_F64
)
865 return SLJIT_SUCCESS
;
867 single
= ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F32
);
869 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
872 inst
[0] = single
? FSTPS
: FSTPD
;
873 inst
[1] = (0x03 << 3) | 0x04;
874 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
876 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
879 static sljit_s32
tail_call_with_args(struct sljit_compiler
*compiler
,
880 sljit_s32
*extra_space
, sljit_s32 arg_types
,
881 sljit_s32 src
, sljit_sw srcw
)
883 sljit_sw args_size
, prev_args_size
, saved_regs_size
;
884 sljit_sw types
, word_arg_count
, float_arg_count
;
885 sljit_sw stack_size
, prev_stack_size
, min_size
, offset
;
886 sljit_sw word_arg4_offset
;
887 sljit_u8 r2_offset
= 0;
888 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(compiler
->options
);
889 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
890 sljit_u8 fast_call
= (*extra_space
& 0xff) == SLJIT_CALL
;
894 ADJUST_LOCAL_OFFSET(src
, srcw
);
895 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
897 saved_regs_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
898 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3) - kept_saveds_count
) * SSIZE_OF(sw
);
902 arg_types
>>= SLJIT_ARG_SHIFT
;
906 while (arg_types
!= 0) {
907 types
= (types
<< SLJIT_ARG_SHIFT
) | (arg_types
& SLJIT_ARG_MASK
);
909 switch (arg_types
& SLJIT_ARG_MASK
) {
910 case SLJIT_ARG_TYPE_F64
:
911 args_size
+= SSIZE_OF(f64
);
914 case SLJIT_ARG_TYPE_F32
:
915 args_size
+= SSIZE_OF(f32
);
920 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
921 if (!fast_call
|| word_arg_count
> 2)
922 args_size
+= SSIZE_OF(sw
);
924 args_size
+= SSIZE_OF(sw
);
928 arg_types
>>= SLJIT_ARG_SHIFT
;
931 if (args_size
<= compiler
->args_size
932 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
933 && (!(compiler
->options
& SLJIT_ENTER_CDECL
) || args_size
== 0 || !fast_call
)
934 #endif /* SLJIT_X86_32_FASTCALL */
936 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
937 *extra_space
= fast_call
? 0 : args_size
;
938 prev_args_size
= compiler
->args_size
;
939 stack_size
= prev_args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
940 #else /* !SLJIT_X86_32_FASTCALL */
942 stack_size
= args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
943 #endif /* SLJIT_X86_32_FASTCALL */
945 offset
= stack_size
+ compiler
->local_size
;
947 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
948 if (word_arg_count
>= 1) {
949 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
950 r2_offset
= sizeof(sljit_sw
);
952 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
955 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
956 if (!(compiler
->options
& SLJIT_ENTER_CDECL
)) {
958 offset
-= SSIZE_OF(sw
);
960 if (word_arg_count
>= 3) {
961 word_arg4_offset
= SSIZE_OF(sw
);
963 if (word_arg_count
+ float_arg_count
>= 4) {
964 word_arg4_offset
= SSIZE_OF(sw
) + SSIZE_OF(sw
);
965 if ((types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F64
)
966 word_arg4_offset
= SSIZE_OF(sw
) + SSIZE_OF(f64
);
969 /* In cdecl mode, at least one more word value must
970 * be present on the stack before the return address. */
971 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
- word_arg4_offset
, SLJIT_R2
, 0);
975 if (args_size
< prev_args_size
) {
976 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
- prev_args_size
- SSIZE_OF(sw
));
977 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
- args_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
979 } else if (prev_args_size
> 0) {
980 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
- prev_args_size
);
981 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
984 #endif /* SLJIT_X86_32_FASTCALL */
987 switch (types
& SLJIT_ARG_MASK
) {
988 case SLJIT_ARG_TYPE_F64
:
989 offset
-= SSIZE_OF(f64
);
990 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
993 case SLJIT_ARG_TYPE_F32
:
994 offset
-= SSIZE_OF(f32
);
995 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
999 switch (word_arg_count
) {
1001 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1003 EMIT_MOV(compiler
, SLJIT_R2
, 0, r2_offset
!= 0 ? SLJIT_MEM1(SLJIT_SP
) : SLJIT_R0
, 0);
1007 offset
-= SSIZE_OF(sw
);
1008 if (r2_offset
!= 0) {
1009 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
1010 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1012 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R0
, 0);
1015 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1019 offset
-= SSIZE_OF(sw
);
1020 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R1
, 0);
1023 offset
-= SSIZE_OF(sw
);
1026 offset
-= SSIZE_OF(sw
);
1027 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
1028 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1034 types
>>= SLJIT_ARG_SHIFT
;
1037 BINARY_IMM32(ADD
, compiler
->local_size
, SLJIT_SP
, 0);
1038 FAIL_IF(emit_stack_frame_release(compiler
));
1040 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1041 if (args_size
< prev_args_size
)
1042 BINARY_IMM32(ADD
, prev_args_size
- args_size
, SLJIT_SP
, 0);
1045 return SLJIT_SUCCESS
;
1048 stack_size
= args_size
+ SSIZE_OF(sw
);
1050 if (word_arg_count
>= 1 && !(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
1051 r2_offset
= SSIZE_OF(sw
);
1052 stack_size
+= SSIZE_OF(sw
);
1055 if (word_arg_count
>= 3)
1056 stack_size
+= SSIZE_OF(sw
);
1059 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1060 if (!(compiler
->options
& SLJIT_ENTER_CDECL
))
1061 prev_args_size
= compiler
->args_size
;
1064 prev_stack_size
= prev_args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
1065 min_size
= prev_stack_size
+ compiler
->local_size
;
1067 word_arg4_offset
= compiler
->scratches_offset
;
1069 if (stack_size
> min_size
) {
1070 BINARY_IMM32(SUB
, stack_size
- min_size
, SLJIT_SP
, 0);
1071 if (src
== SLJIT_MEM1(SLJIT_SP
))
1072 srcw
+= stack_size
- min_size
;
1073 word_arg4_offset
+= stack_size
- min_size
;
1076 stack_size
= min_size
;
1078 if (word_arg_count
>= 3) {
1079 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), r2_offset
, SLJIT_R2
, 0);
1081 if (word_arg_count
>= 4)
1082 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), word_arg4_offset
);
1085 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
1086 if (word_arg_count
>= 1) {
1087 SLJIT_ASSERT(r2_offset
== sizeof(sljit_sw
));
1088 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
1090 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
1093 /* Restore saved registers. */
1094 offset
= stack_size
- prev_args_size
- 2 * SSIZE_OF(sw
);
1095 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1097 if (compiler
->saveds
> 2 || compiler
->scratches
> 9) {
1098 offset
-= SSIZE_OF(sw
);
1099 EMIT_MOV(compiler
, SLJIT_S2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1101 if ((compiler
->saveds
> 1 && kept_saveds_count
<= 1) || compiler
->scratches
> 10) {
1102 offset
-= SSIZE_OF(sw
);
1103 EMIT_MOV(compiler
, SLJIT_S1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1105 if ((compiler
->saveds
> 0 && kept_saveds_count
== 0) || compiler
->scratches
> 11) {
1106 offset
-= SSIZE_OF(sw
);
1107 EMIT_MOV(compiler
, SLJIT_S0
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1110 /* Copy fourth argument and return address. */
1111 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1113 offset
= stack_size
;
1116 if (word_arg_count
>= 4 && prev_args_size
== 0) {
1117 offset
-= SSIZE_OF(sw
);
1118 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1122 SLJIT_ASSERT(args_size
!= prev_args_size
);
1124 if (word_arg_count
>= 4) {
1125 offset
-= SSIZE_OF(sw
);
1126 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1129 if (args_size
!= prev_args_size
)
1130 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
- prev_args_size
- SSIZE_OF(sw
));
1133 if (args_size
!= prev_args_size
)
1134 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
- args_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
1136 #endif /* SLJIT_X86_32_FASTCALL */
1137 offset
= stack_size
- SSIZE_OF(sw
);
1138 *extra_space
= args_size
;
1140 if (word_arg_count
>= 4 && prev_args_size
== SSIZE_OF(sw
)) {
1141 offset
-= SSIZE_OF(sw
);
1142 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1146 SLJIT_ASSERT(prev_args_size
> 0);
1148 if (word_arg_count
>= 4) {
1149 offset
-= SSIZE_OF(sw
);
1150 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1153 if (prev_args_size
> 0)
1154 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
- prev_args_size
- SSIZE_OF(sw
));
1157 /* Copy return address. */
1158 if (prev_args_size
> 0)
1159 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
1160 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1162 #endif /* SLJIT_X86_32_FASTCALL */
1164 while (types
!= 0) {
1165 switch (types
& SLJIT_ARG_MASK
) {
1166 case SLJIT_ARG_TYPE_F64
:
1167 offset
-= SSIZE_OF(f64
);
1168 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
1171 case SLJIT_ARG_TYPE_F32
:
1172 offset
-= SSIZE_OF(f32
);
1173 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
1177 switch (word_arg_count
) {
1179 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1181 EMIT_MOV(compiler
, SLJIT_R2
, 0, r2_offset
!= 0 ? SLJIT_MEM1(SLJIT_SP
) : SLJIT_R0
, 0);
1185 offset
-= SSIZE_OF(sw
);
1186 if (r2_offset
!= 0) {
1187 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
1188 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1190 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R0
, 0);
1193 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1197 offset
-= SSIZE_OF(sw
);
1198 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R1
, 0);
1201 offset
-= SSIZE_OF(sw
);
1202 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), r2_offset
);
1203 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1209 types
>>= SLJIT_ARG_SHIFT
;
1212 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1213 /* Skip return address. */
1215 offset
-= SSIZE_OF(sw
);
1218 SLJIT_ASSERT(offset
>= 0);
1221 return SLJIT_SUCCESS
;
1223 BINARY_IMM32(ADD
, offset
, SLJIT_SP
, 0);
1224 return SLJIT_SUCCESS
;
1227 static sljit_s32
emit_tail_call_end(struct sljit_compiler
*compiler
, sljit_s32 extra_space
)
1229 /* Called when stack consumption cannot be reduced to 0. */
1232 BINARY_IMM32(ADD
, extra_space
, SLJIT_SP
, 0);
1234 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1239 return SLJIT_SUCCESS
;
1242 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
1243 sljit_s32 arg_types
)
1245 struct sljit_jump
*jump
;
1246 sljit_sw stack_size
= 0;
1247 sljit_s32 word_arg_count
;
1250 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
1252 if (type
& SLJIT_CALL_RETURN
) {
1254 PTR_FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, SLJIT_IMM
, 0));
1256 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1257 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1258 compiler
->skip_checks
= 1;
1261 if (stack_size
== 0) {
1262 type
= SLJIT_JUMP
| (type
& SLJIT_REWRITABLE_JUMP
);
1263 return sljit_emit_jump(compiler
, type
);
1266 jump
= sljit_emit_jump(compiler
, type
);
1267 PTR_FAIL_IF(jump
== NULL
);
1269 PTR_FAIL_IF(emit_tail_call_end(compiler
, stack_size
));
1273 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1274 if ((type
& 0xff) == SLJIT_CALL
) {
1275 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
1276 PTR_FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, 0));
1278 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1279 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1280 compiler
->skip_checks
= 1;
1283 jump
= sljit_emit_jump(compiler
, type
);
1284 PTR_FAIL_IF(jump
== NULL
);
1286 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, 0));
1291 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1292 PTR_FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1294 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1295 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1296 compiler
->skip_checks
= 1;
1299 jump
= sljit_emit_jump(compiler
, type
);
1300 PTR_FAIL_IF(jump
== NULL
);
1302 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
1306 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
1307 sljit_s32 arg_types
,
1308 sljit_s32 src
, sljit_sw srcw
)
1310 sljit_sw stack_size
= 0;
1311 sljit_s32 word_arg_count
;
1312 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1313 sljit_s32 swap_args
;
1317 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
1319 if (type
& SLJIT_CALL_RETURN
) {
1321 FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, src
, srcw
));
1323 if (!(src
& SLJIT_IMM
)) {
1328 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1329 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1330 compiler
->skip_checks
= 1;
1333 if (stack_size
== 0)
1334 return sljit_emit_ijump(compiler
, SLJIT_JUMP
, src
, srcw
);
1336 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1337 return emit_tail_call_end(compiler
, stack_size
);
1340 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1341 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0 && reg_map
[SLJIT_R2
] == 1 && SLJIT_R0
== 1 && SLJIT_R2
== 3);
1343 if ((type
& 0xff) == SLJIT_CALL
) {
1344 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
1347 if (word_arg_count
> 0) {
1348 if ((src
& REG_MASK
) == SLJIT_R2
|| OFFS_REG(src
) == SLJIT_R2
) {
1350 if (((src
& REG_MASK
) | 0x2) == SLJIT_R2
)
1352 if ((OFFS_REG(src
) | 0x2) == SLJIT_R2
)
1353 src
^= TO_OFFS_REG(0x2);
1357 FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, swap_args
));
1359 compiler
->scratches_offset
+= stack_size
;
1360 compiler
->locals_offset
+= stack_size
;
1362 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1363 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1364 compiler
->skip_checks
= 1;
1366 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1368 compiler
->scratches_offset
-= stack_size
;
1369 compiler
->locals_offset
-= stack_size
;
1371 return post_call_with_args(compiler
, arg_types
, 0);
1375 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1376 FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1378 compiler
->scratches_offset
+= stack_size
;
1379 compiler
->locals_offset
+= stack_size
;
1381 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1382 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1383 compiler
->skip_checks
= 1;
1385 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1387 compiler
->scratches_offset
-= stack_size
;
1388 compiler
->locals_offset
-= stack_size
;
1390 return post_call_with_args(compiler
, arg_types
, stack_size
);
1393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
1398 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
1399 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1401 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
1403 if (FAST_IS_REG(dst
)) {
1404 /* Unused dest is possible here. */
1405 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1409 POP_REG(reg_map
[dst
]);
1410 return SLJIT_SUCCESS
;
1414 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1417 return SLJIT_SUCCESS
;
1420 static sljit_s32
emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
1424 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1426 if (FAST_IS_REG(src
)) {
1427 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
1431 PUSH_REG(reg_map
[src
]);
1434 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
1439 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1445 return SLJIT_SUCCESS
;
1448 static sljit_s32
skip_frames_before_return(struct sljit_compiler
*compiler
)
1452 /* Don't adjust shadow stack if it isn't enabled. */
1453 if (!cpu_has_shadow_stack())
1454 return SLJIT_SUCCESS
;
1456 SLJIT_ASSERT(compiler
->args_size
>= 0);
1457 SLJIT_ASSERT(compiler
->local_size
> 0);
1459 size
= compiler
->local_size
;
1460 size
+= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
1461 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * SSIZE_OF(sw
);
1463 return adjust_shadow_stack(compiler
, SLJIT_MEM1(SLJIT_SP
), size
);