2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
37 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
39 INC_SIZE(1 + sizeof(sljit_sw
));
41 sljit_unaligned_store_sw(inst
, imm
);
45 /* Size contains the flags as well. */
46 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_uw size
,
47 /* The register or immediate operand. */
48 sljit_s32 a
, sljit_sw imma
,
49 /* The general operand (not immediate). */
50 sljit_s32 b
, sljit_sw immb
)
55 sljit_uw flags
= size
;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
66 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
67 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
68 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
73 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
75 if (flags
& EX86_PREF_66
)
78 /* Calculate size of b. */
79 inst_size
+= 1; /* mod r/m byte. */
82 inst_size
+= sizeof(sljit_sw
);
83 else if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
84 /* Immediate operand. */
85 if (immb
<= 127 && immb
>= -128)
86 inst_size
+= sizeof(sljit_s8
);
88 inst_size
+= sizeof(sljit_sw
);
90 else if (reg_map
[b
& REG_MASK
] == 5)
91 inst_size
+= sizeof(sljit_s8
);
93 if ((b
& REG_MASK
) == SLJIT_SP
&& !(b
& OFFS_REG_MASK
))
94 b
|= TO_OFFS_REG(SLJIT_SP
);
96 if (b
& OFFS_REG_MASK
)
97 inst_size
+= 1; /* SIB byte. */
100 /* Calculate size of a. */
102 if (flags
& EX86_BIN_INS
) {
103 if (imma
<= 127 && imma
>= -128) {
105 flags
|= EX86_BYTE_ARG
;
109 else if (flags
& EX86_SHIFT_INS
) {
113 flags
|= EX86_BYTE_ARG
;
115 } else if (flags
& EX86_BYTE_ARG
)
117 else if (flags
& EX86_HALF_ARG
)
118 inst_size
+= sizeof(short);
120 inst_size
+= sizeof(sljit_sw
);
123 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
125 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
128 /* Encoding the byte. */
130 if (flags
& EX86_PREF_F2
)
132 if (flags
& EX86_PREF_F3
)
134 if (flags
& EX86_PREF_66
)
137 buf_ptr
= inst
+ size
;
139 /* Encode mod/rm byte. */
140 if (!(flags
& EX86_SHIFT_INS
)) {
141 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
142 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
146 else if (!(flags
& EX86_SSE2_OP1
))
147 *buf_ptr
= U8(reg_map
[a
] << 3);
149 *buf_ptr
= U8(a
<< 3);
154 *inst
= GROUP_SHIFT_1
;
156 *inst
= GROUP_SHIFT_N
;
158 *inst
= GROUP_SHIFT_CL
;
162 if (!(b
& SLJIT_MEM
)) {
163 *buf_ptr
= U8(*buf_ptr
| MOD_REG
| (!(flags
& EX86_SSE2_OP2
) ? reg_map
[b
] : b
));
165 } else if (b
& REG_MASK
) {
166 reg_map_b
= reg_map
[b
& REG_MASK
];
168 if (!(b
& OFFS_REG_MASK
) || (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
) || reg_map_b
== 5) {
169 if (immb
!= 0 || reg_map_b
== 5) {
170 if (immb
<= 127 && immb
>= -128)
176 if (!(b
& OFFS_REG_MASK
))
177 *buf_ptr
++ |= reg_map_b
;
180 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3));
183 if (immb
!= 0 || reg_map_b
== 5) {
184 if (immb
<= 127 && immb
>= -128)
185 *buf_ptr
++ = U8(immb
); /* 8 bit displacement. */
187 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
188 buf_ptr
+= sizeof(sljit_sw
);
194 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6));
199 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
200 buf_ptr
+= sizeof(sljit_sw
);
204 if (flags
& EX86_BYTE_ARG
)
206 else if (flags
& EX86_HALF_ARG
)
207 sljit_unaligned_store_s16(buf_ptr
, (sljit_s16
)imma
);
208 else if (!(flags
& EX86_SHIFT_INS
))
209 sljit_unaligned_store_sw(buf_ptr
, imma
);
212 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
215 /* --------------------------------------------------------------------- */
217 /* --------------------------------------------------------------------- */
219 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
221 sljit_uw type
= jump
->flags
>> TYPE_SHIFT
;
223 if (type
== SLJIT_JUMP
) {
224 *code_ptr
++ = JMP_i32
;
227 else if (type
>= SLJIT_FAST_CALL
) {
228 *code_ptr
++ = CALL_i32
;
232 *code_ptr
++ = GROUP_0F
;
233 *code_ptr
++ = get_jump_code(type
);
237 if (jump
->flags
& JUMP_LABEL
)
238 jump
->flags
|= PATCH_MW
;
240 sljit_unaligned_store_sw(code_ptr
, (sljit_sw
)(jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
));
246 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
247 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
248 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
250 sljit_s32 word_arg_count
, float_arg_count
, args_size
, types
;
255 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
256 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
258 /* Emit ENDBR32 at function entry if needed. */
259 FAIL_IF(emit_endbranch(compiler
));
261 SLJIT_COMPILE_ASSERT(SLJIT_FR0
== 1, float_register_index_start
);
263 arg_types
>>= SLJIT_ARG_SHIFT
;
267 args_size
= SSIZE_OF(sw
);
269 switch (types
& SLJIT_ARG_MASK
) {
270 case SLJIT_ARG_TYPE_F64
:
272 FAIL_IF(emit_sse2_load(compiler
, 0, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
273 args_size
+= SSIZE_OF(f64
);
275 case SLJIT_ARG_TYPE_F32
:
277 FAIL_IF(emit_sse2_load(compiler
, 1, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
278 args_size
+= SSIZE_OF(f32
);
282 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
283 if (word_arg_count
> 2)
284 args_size
+= SSIZE_OF(sw
);
286 args_size
+= SSIZE_OF(sw
);
290 types
>>= SLJIT_ARG_SHIFT
;
293 args_size
-= SSIZE_OF(sw
);
294 compiler
->args_size
= args_size
;
296 /* [esp+0] for saving temporaries and function calls. */
297 compiler
->stack_tmp_size
= 2 * SSIZE_OF(sw
);
299 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
301 compiler
->stack_tmp_size
= 3 * SSIZE_OF(sw
);
304 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
306 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
308 compiler
->locals_offset
= compiler
->saveds_offset
;
311 compiler
->locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
313 if (options
& SLJIT_F64_ALIGNMENT
)
314 compiler
->locals_offset
= (compiler
->locals_offset
+ SSIZE_OF(f64
) - 1) & ~(SSIZE_OF(f64
) - 1);
316 size
= (sljit_uw
)(1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3));
317 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
321 PUSH_REG(reg_map
[TMP_REG1
]);
322 if (saveds
> 2 || scratches
> 9)
323 PUSH_REG(reg_map
[SLJIT_S2
]);
324 if (saveds
> 1 || scratches
> 10)
325 PUSH_REG(reg_map
[SLJIT_S1
]);
326 if (saveds
> 0 || scratches
> 11)
327 PUSH_REG(reg_map
[SLJIT_S0
]);
329 if (word_arg_count
>= 4)
330 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
+ (sljit_s32
)(size
* sizeof(sljit_sw
)));
333 args_size
= (sljit_s32
)((size
+ 1) * sizeof(sljit_sw
));
335 switch (arg_types
& SLJIT_ARG_MASK
) {
336 case SLJIT_ARG_TYPE_F64
:
337 args_size
+= SSIZE_OF(f64
);
339 case SLJIT_ARG_TYPE_F32
:
340 args_size
+= SSIZE_OF(f32
);
344 if (word_arg_count
<= 3) {
345 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
346 if (word_arg_count
<= 2)
349 EMIT_MOV(compiler
, SLJIT_S0
+ 1 - word_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
351 args_size
+= SSIZE_OF(sw
);
354 arg_types
>>= SLJIT_ARG_SHIFT
;
357 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
358 if (word_arg_count
> 0)
359 EMIT_MOV(compiler
, SLJIT_S0
, 0, SLJIT_R2
, 0);
360 if (word_arg_count
> 1)
361 EMIT_MOV(compiler
, SLJIT_S1
, 0, SLJIT_R1
, 0);
364 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
366 #if defined(__APPLE__)
367 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
368 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * SSIZE_OF(sw
);
369 local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
371 if (options
& SLJIT_F64_ALIGNMENT
)
372 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ SSIZE_OF(f64
) - 1) & ~(SSIZE_OF(f64
) - 1));
374 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ SSIZE_OF(sw
) - 1) & ~(SSIZE_OF(sw
) - 1));
377 compiler
->local_size
= local_size
;
380 if (local_size
> 0) {
381 if (local_size
<= 4 * 4096) {
382 if (local_size
> 4096)
383 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
384 if (local_size
> 2 * 4096)
385 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
386 if (local_size
> 3 * 4096)
387 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
390 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_SP
, 0);
391 EMIT_MOV(compiler
, SLJIT_R1
, 0, SLJIT_IMM
, (local_size
- 1) >> 12);
393 SLJIT_ASSERT (reg_map
[SLJIT_R0
] == 0);
395 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_R0
), -4096);
396 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
397 SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, 4096));
398 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
399 SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1));
401 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
406 inst
[1] = (sljit_s8
) -16;
411 SLJIT_ASSERT(local_size
> 0);
413 #if !defined(__APPLE__)
414 if (options
& SLJIT_F64_ALIGNMENT
) {
415 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_SP
, 0);
417 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
418 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
419 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
+ SSIZE_OF(sw
)));
421 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
422 if (compiler
->local_size
> 1024)
423 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
424 SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, SSIZE_OF(sw
)));
427 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 6);
431 inst
[0] = GROUP_BINARY_81
;
432 inst
[1] = MOD_REG
| AND
| reg_map
[SLJIT_SP
];
433 sljit_unaligned_store_sw(inst
+ 2, ~(SSIZE_OF(f64
) - 1));
435 if (word_arg_count
== 4)
436 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->locals_offset
- SSIZE_OF(sw
), TMP_REG1
, 0);
438 /* The real local size must be used. */
439 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
, SLJIT_R0
, 0);
442 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
443 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
));
445 if (word_arg_count
== 4)
446 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->locals_offset
- SSIZE_OF(sw
), TMP_REG1
, 0);
448 return SLJIT_SUCCESS
;
451 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
452 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
453 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
456 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
457 sljit_s32 word_arg_count
= 0;
461 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
462 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
464 arg_types
>>= SLJIT_ARG_SHIFT
;
467 switch (arg_types
& SLJIT_ARG_MASK
) {
468 case SLJIT_ARG_TYPE_F64
:
469 args_size
+= SSIZE_OF(f64
);
471 case SLJIT_ARG_TYPE_F32
:
472 args_size
+= SSIZE_OF(f32
);
475 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
476 if (word_arg_count
>= 2)
477 args_size
+= SSIZE_OF(sw
);
480 args_size
+= SSIZE_OF(sw
);
484 arg_types
>>= SLJIT_ARG_SHIFT
;
487 compiler
->args_size
= args_size
;
489 /* [esp+0] for saving temporaries and function calls. */
490 compiler
->stack_tmp_size
= 2 * SSIZE_OF(sw
);
492 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
494 compiler
->stack_tmp_size
= 3 * SSIZE_OF(sw
);
497 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
499 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
501 compiler
->locals_offset
= compiler
->saveds_offset
;
504 compiler
->locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
506 if (options
& SLJIT_F64_ALIGNMENT
)
507 compiler
->locals_offset
= (compiler
->locals_offset
+ SSIZE_OF(f64
) - 1) & ~(SSIZE_OF(f64
) - 1);
509 #if defined(__APPLE__)
510 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * SSIZE_OF(sw
);
511 compiler
->local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
513 if (options
& SLJIT_F64_ALIGNMENT
)
514 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ SSIZE_OF(f64
) - 1) & ~(SSIZE_OF(f64
) - 1));
516 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ SSIZE_OF(sw
) - 1) & ~(SSIZE_OF(sw
) - 1));
518 return SLJIT_SUCCESS
;
521 static sljit_s32
emit_stack_frame_release(struct sljit_compiler
*compiler
)
526 size
= (sljit_uw
)(1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) +
527 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3));
528 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
533 if (compiler
->saveds
> 0 || compiler
->scratches
> 11)
534 POP_REG(reg_map
[SLJIT_S0
]);
535 if (compiler
->saveds
> 1 || compiler
->scratches
> 10)
536 POP_REG(reg_map
[SLJIT_S1
]);
537 if (compiler
->saveds
> 2 || compiler
->scratches
> 9)
538 POP_REG(reg_map
[SLJIT_S2
]);
539 POP_REG(reg_map
[TMP_REG1
]);
541 return SLJIT_SUCCESS
;
544 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return_void(struct sljit_compiler
*compiler
)
550 CHECK(check_sljit_emit_return_void(compiler
));
552 SLJIT_ASSERT(compiler
->args_size
>= 0);
553 SLJIT_ASSERT(compiler
->local_size
> 0);
555 #if !defined(__APPLE__)
556 if (compiler
->options
& SLJIT_F64_ALIGNMENT
)
557 EMIT_MOV(compiler
, SLJIT_SP
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
)
559 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
560 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
562 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
563 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
566 FAIL_IF(emit_stack_frame_release(compiler
));
569 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
570 if (compiler
->args_size
> 0)
573 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
578 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
579 if (compiler
->args_size
> 0) {
580 RET_I16(U8(compiler
->args_size
));
581 return SLJIT_SUCCESS
;
586 return SLJIT_SUCCESS
;
589 /* --------------------------------------------------------------------- */
590 /* Call / return instructions */
591 /* --------------------------------------------------------------------- */
593 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
595 static sljit_sw
c_fast_call_get_stack_size(sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
597 sljit_sw stack_size
= 0;
598 sljit_s32 word_arg_count
= 0;
600 arg_types
>>= SLJIT_ARG_SHIFT
;
603 switch (arg_types
& SLJIT_ARG_MASK
) {
604 case SLJIT_ARG_TYPE_F64
:
605 stack_size
+= SSIZE_OF(f64
);
607 case SLJIT_ARG_TYPE_F32
:
608 stack_size
+= SSIZE_OF(f32
);
612 if (word_arg_count
> 2)
613 stack_size
+= SSIZE_OF(sw
);
617 arg_types
>>= SLJIT_ARG_SHIFT
;
620 if (word_arg_count_ptr
)
621 *word_arg_count_ptr
= word_arg_count
;
626 static sljit_s32
c_fast_call_with_args(struct sljit_compiler
*compiler
,
627 sljit_s32 arg_types
, sljit_sw stack_size
, sljit_s32 word_arg_count
, sljit_s32 swap_args
)
630 sljit_s32 float_arg_count
;
632 if (stack_size
== SSIZE_OF(sw
) && word_arg_count
== 3) {
633 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
636 PUSH_REG(reg_map
[SLJIT_R2
]);
638 else if (stack_size
> 0) {
639 if (word_arg_count
>= 4)
640 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- SSIZE_OF(sw
));
642 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
643 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
646 arg_types
>>= SLJIT_ARG_SHIFT
;
650 switch (arg_types
& SLJIT_ARG_MASK
) {
651 case SLJIT_ARG_TYPE_F64
:
653 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
654 stack_size
+= SSIZE_OF(f64
);
656 case SLJIT_ARG_TYPE_F32
:
658 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
659 stack_size
+= SSIZE_OF(f32
);
663 if (word_arg_count
== 3) {
664 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, SLJIT_R2
, 0);
665 stack_size
+= SSIZE_OF(sw
);
667 else if (word_arg_count
== 4) {
668 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, TMP_REG1
, 0);
669 stack_size
+= SSIZE_OF(sw
);
674 arg_types
>>= SLJIT_ARG_SHIFT
;
678 if (word_arg_count
> 0) {
680 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
684 *inst
++ = U8(XCHG_EAX_r
| reg_map
[SLJIT_R2
]);
687 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
692 *inst
++ = U8(MOD_REG
| (reg_map
[SLJIT_R2
] << 3) | reg_map
[SLJIT_R0
]);
696 return SLJIT_SUCCESS
;
701 static sljit_s32
cdecl_call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
703 sljit_sw stack_size
= 0;
704 sljit_s32 word_arg_count
= 0;
706 arg_types
>>= SLJIT_ARG_SHIFT
;
709 switch (arg_types
& SLJIT_ARG_MASK
) {
710 case SLJIT_ARG_TYPE_F64
:
711 stack_size
+= SSIZE_OF(f64
);
713 case SLJIT_ARG_TYPE_F32
:
714 stack_size
+= SSIZE_OF(f32
);
718 stack_size
+= SSIZE_OF(sw
);
722 arg_types
>>= SLJIT_ARG_SHIFT
;
725 if (word_arg_count_ptr
)
726 *word_arg_count_ptr
= word_arg_count
;
728 if (stack_size
<= compiler
->stack_tmp_size
)
731 #if defined(__APPLE__)
732 return ((stack_size
- compiler
->stack_tmp_size
+ 15) & ~15);
734 return stack_size
- compiler
->stack_tmp_size
;
738 static sljit_s32
cdecl_call_with_args(struct sljit_compiler
*compiler
,
739 sljit_s32 arg_types
, sljit_sw stack_size
, sljit_s32 word_arg_count
)
741 sljit_s32 float_arg_count
= 0;
743 if (word_arg_count
>= 4)
744 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- SSIZE_OF(sw
));
747 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
748 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
752 arg_types
>>= SLJIT_ARG_SHIFT
;
755 switch (arg_types
& SLJIT_ARG_MASK
) {
756 case SLJIT_ARG_TYPE_F64
:
758 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
759 stack_size
+= SSIZE_OF(f64
);
761 case SLJIT_ARG_TYPE_F32
:
763 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
764 stack_size
+= SSIZE_OF(f32
);
768 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
769 stack_size
+= SSIZE_OF(sw
);
773 arg_types
>>= SLJIT_ARG_SHIFT
;
776 return SLJIT_SUCCESS
;
779 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
780 sljit_s32 arg_types
, sljit_s32 stack_size
)
786 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
787 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
789 if ((arg_types
& SLJIT_ARG_MASK
) < SLJIT_ARG_TYPE_F64
)
790 return SLJIT_SUCCESS
;
792 single
= ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F32
);
794 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
797 inst
[0] = single
? FSTPS
: FSTPD
;
798 inst
[1] = (0x03 << 3) | 0x04;
799 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
801 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
804 static sljit_s32
tail_call_with_args(struct sljit_compiler
*compiler
,
805 sljit_s32
*extra_space
, sljit_s32 arg_types
,
806 sljit_s32 src
, sljit_sw srcw
)
808 sljit_sw args_size
, prev_args_size
, saved_regs_size
;
809 sljit_sw types
, word_arg_count
, float_arg_count
;
810 sljit_sw stack_size
, prev_stack_size
, min_size
, offset
;
811 sljit_sw base_reg
, word_arg4_offset
;
812 sljit_u8 r2_offset
= 0;
813 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
814 sljit_u8 fast_call
= (*extra_space
& 0xff) == SLJIT_CALL
;
818 ADJUST_LOCAL_OFFSET(src
, srcw
);
819 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
821 saved_regs_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
822 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * SSIZE_OF(sw
);
826 arg_types
>>= SLJIT_ARG_SHIFT
;
830 while (arg_types
!= 0) {
831 types
= (types
<< SLJIT_ARG_SHIFT
) | (arg_types
& SLJIT_ARG_MASK
);
833 switch (arg_types
& SLJIT_ARG_MASK
) {
834 case SLJIT_ARG_TYPE_F64
:
835 args_size
+= SSIZE_OF(f64
);
838 case SLJIT_ARG_TYPE_F32
:
839 args_size
+= SSIZE_OF(f32
);
844 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
845 if (!fast_call
|| word_arg_count
> 2)
846 args_size
+= SSIZE_OF(sw
);
848 args_size
+= SSIZE_OF(sw
);
852 arg_types
>>= SLJIT_ARG_SHIFT
;
855 if (args_size
<= compiler
->args_size
) {
856 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
857 *extra_space
= fast_call
? 0 : args_size
;
858 prev_args_size
= compiler
->args_size
;
859 stack_size
= prev_args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
860 #else /* !SLJIT_X86_32_FASTCALL */
862 stack_size
= args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
863 #endif /* SLJIT_X86_32_FASTCALL */
865 #if !defined(__APPLE__)
866 if (compiler
->options
& SLJIT_F64_ALIGNMENT
) {
867 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
);
869 base_reg
= SLJIT_MEM1(TMP_REG1
);
871 #endif /* !__APPLE__ */
872 offset
= stack_size
+ compiler
->local_size
;
873 base_reg
= SLJIT_MEM1(SLJIT_SP
);
874 #if !defined(__APPLE__)
876 #endif /* !__APPLE__ */
878 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
879 if (word_arg_count
>= 1) {
880 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
881 r2_offset
= sizeof(sljit_sw
);
883 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
886 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
888 offset
-= SSIZE_OF(sw
);
890 if (word_arg_count
>= 3) {
891 word_arg4_offset
= SSIZE_OF(sw
);
893 if (word_arg_count
+ float_arg_count
>= 4) {
894 word_arg4_offset
= SSIZE_OF(sw
) + SSIZE_OF(sw
);
895 if ((types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F64
)
896 word_arg4_offset
= SSIZE_OF(sw
) + SSIZE_OF(f64
);
899 /* In cdecl mode, at least one more word value must
900 * be present on the stack before the return address. */
901 EMIT_MOV(compiler
, base_reg
, offset
- word_arg4_offset
, SLJIT_R2
, 0);
905 if (args_size
< prev_args_size
) {
906 EMIT_MOV(compiler
, SLJIT_R2
, 0, base_reg
, offset
- prev_args_size
- SSIZE_OF(sw
));
907 EMIT_MOV(compiler
, base_reg
, offset
- args_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
909 } else if (prev_args_size
> 0) {
910 EMIT_MOV(compiler
, SLJIT_R2
, 0, base_reg
, offset
- prev_args_size
);
911 EMIT_MOV(compiler
, base_reg
, offset
, SLJIT_R2
, 0);
913 #endif /* SLJIT_X86_32_FASTCALL */
916 switch (types
& SLJIT_ARG_MASK
) {
917 case SLJIT_ARG_TYPE_F64
:
918 offset
-= SSIZE_OF(f64
);
919 FAIL_IF(emit_sse2_store(compiler
, 0, base_reg
, offset
, float_arg_count
));
922 case SLJIT_ARG_TYPE_F32
:
923 offset
-= SSIZE_OF(f32
);
924 FAIL_IF(emit_sse2_store(compiler
, 0, base_reg
, offset
, float_arg_count
));
928 switch (word_arg_count
) {
930 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
932 EMIT_MOV(compiler
, SLJIT_R2
, 0, r2_offset
!= 0 ? SLJIT_MEM1(SLJIT_SP
) : SLJIT_R0
, 0);
936 offset
-= SSIZE_OF(sw
);
937 if (r2_offset
!= 0) {
938 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
939 EMIT_MOV(compiler
, base_reg
, offset
, SLJIT_R2
, 0);
941 EMIT_MOV(compiler
, base_reg
, offset
, SLJIT_R0
, 0);
944 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
948 offset
-= SSIZE_OF(sw
);
949 EMIT_MOV(compiler
, base_reg
, offset
, SLJIT_R1
, 0);
952 offset
-= SSIZE_OF(sw
);
955 offset
-= SSIZE_OF(sw
);
956 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- SSIZE_OF(sw
));
957 EMIT_MOV(compiler
, base_reg
, offset
, SLJIT_R2
, 0);
963 types
>>= SLJIT_ARG_SHIFT
;
966 #if !defined(__APPLE__)
967 if (compiler
->options
& SLJIT_F64_ALIGNMENT
) {
968 EMIT_MOV(compiler
, SLJIT_SP
, 0, TMP_REG1
, 0);
970 #endif /* !__APPLE__ */
971 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
972 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
973 #if !defined(__APPLE__)
975 #endif /* !__APPLE__ */
976 FAIL_IF(emit_stack_frame_release(compiler
));
978 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
979 if (args_size
< prev_args_size
)
980 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
981 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, prev_args_size
- args_size
));
984 return SLJIT_SUCCESS
;
987 stack_size
= args_size
+ SSIZE_OF(sw
);
989 if (word_arg_count
>= 1 && !(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
990 r2_offset
= SSIZE_OF(sw
);
991 stack_size
+= SSIZE_OF(sw
);
994 if (word_arg_count
>= 3)
995 stack_size
+= SSIZE_OF(sw
);
997 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
998 prev_args_size
= compiler
->args_size
;
1003 prev_stack_size
= prev_args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
1004 min_size
= prev_stack_size
+ compiler
->local_size
;
1006 base_reg
= SLJIT_MEM1(SLJIT_SP
);
1007 word_arg4_offset
= compiler
->saveds_offset
- SSIZE_OF(sw
);
1009 #if !defined(__APPLE__)
1010 if (compiler
->options
& SLJIT_F64_ALIGNMENT
) {
1011 min_size
+= 2 * SSIZE_OF(sw
);
1013 if (stack_size
< min_size
)
1014 stack_size
= min_size
;
1016 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
);
1017 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
1018 TMP_REG1
, 0, TMP_REG1
, 0, SLJIT_IMM
, stack_size
- prev_stack_size
));
1020 inst
= emit_x86_instruction(compiler
, 1, SLJIT_SP
, 0, TMP_REG1
, 0);
1024 if (src
== SLJIT_MEM1(SLJIT_SP
))
1025 src
= SLJIT_MEM1(TMP_REG1
);
1026 base_reg
= SLJIT_MEM1(TMP_REG1
);
1028 #endif /* !__APPLE__ */
1029 if (stack_size
> min_size
) {
1030 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
1031 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
- min_size
));
1032 if (src
== SLJIT_MEM1(SLJIT_SP
))
1033 srcw
+= stack_size
- min_size
;
1034 word_arg4_offset
+= stack_size
- min_size
;
1037 stack_size
= min_size
;
1038 #if !defined(__APPLE__)
1040 #endif /* !__APPLE__ */
1042 if (word_arg_count
>= 3) {
1043 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), r2_offset
, SLJIT_R2
, 0);
1045 if (word_arg_count
>= 4)
1046 EMIT_MOV(compiler
, SLJIT_R2
, 0, base_reg
, word_arg4_offset
);
1049 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
1050 if (word_arg_count
>= 1) {
1051 SLJIT_ASSERT(r2_offset
== sizeof(sljit_sw
));
1052 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
1054 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
1057 /* Restore saved registers. */
1058 offset
= stack_size
- prev_args_size
- 2 * SSIZE_OF(sw
);
1059 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1061 if (compiler
->saveds
> 2 || compiler
->scratches
> 9) {
1062 offset
-= SSIZE_OF(sw
);
1063 EMIT_MOV(compiler
, SLJIT_S2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1065 if (compiler
->saveds
> 1 || compiler
->scratches
> 10) {
1066 offset
-= SSIZE_OF(sw
);
1067 EMIT_MOV(compiler
, SLJIT_S1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1069 if (compiler
->saveds
> 0 || compiler
->scratches
> 11) {
1070 offset
-= SSIZE_OF(sw
);
1071 EMIT_MOV(compiler
, SLJIT_S0
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1074 /* Copy fourth argument and return address. */
1075 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1077 offset
= stack_size
;
1080 if (word_arg_count
>= 4 && prev_args_size
== 0) {
1081 offset
-= SSIZE_OF(sw
);
1082 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1086 SLJIT_ASSERT(args_size
!= prev_args_size
);
1088 if (word_arg_count
>= 4) {
1089 offset
-= SSIZE_OF(sw
);
1090 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1093 if (args_size
!= prev_args_size
)
1094 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
- prev_args_size
- SSIZE_OF(sw
));
1097 if (args_size
!= prev_args_size
)
1098 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
- args_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
1100 #endif /* SLJIT_X86_32_FASTCALL */
1101 offset
= stack_size
- SSIZE_OF(sw
);
1102 *extra_space
= args_size
;
1104 if (word_arg_count
>= 4 && prev_args_size
== SSIZE_OF(sw
)) {
1105 offset
-= SSIZE_OF(sw
);
1106 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
1110 SLJIT_ASSERT(prev_args_size
> 0);
1112 if (word_arg_count
>= 4) {
1113 offset
-= SSIZE_OF(sw
);
1114 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1117 if (prev_args_size
> 0)
1118 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
- prev_args_size
- SSIZE_OF(sw
));
1121 /* Copy return address. */
1122 if (prev_args_size
> 0)
1123 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
- SSIZE_OF(sw
), SLJIT_R2
, 0);
1124 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1126 #endif /* SLJIT_X86_32_FASTCALL */
1128 while (types
!= 0) {
1129 switch (types
& SLJIT_ARG_MASK
) {
1130 case SLJIT_ARG_TYPE_F64
:
1131 offset
-= SSIZE_OF(f64
);
1132 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
1135 case SLJIT_ARG_TYPE_F32
:
1136 offset
-= SSIZE_OF(f32
);
1137 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
1141 switch (word_arg_count
) {
1143 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1145 EMIT_MOV(compiler
, SLJIT_R2
, 0, r2_offset
!= 0 ? SLJIT_MEM1(SLJIT_SP
) : SLJIT_R0
, 0);
1149 offset
-= SSIZE_OF(sw
);
1150 if (r2_offset
!= 0) {
1151 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
1152 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1154 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R0
, 0);
1157 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1161 offset
-= SSIZE_OF(sw
);
1162 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R1
, 0);
1165 offset
-= SSIZE_OF(sw
);
1166 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), r2_offset
);
1167 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
1173 types
>>= SLJIT_ARG_SHIFT
;
1176 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1177 /* Skip return address. */
1179 offset
-= SSIZE_OF(sw
);
1182 SLJIT_ASSERT(offset
>= 0);
1185 return SLJIT_SUCCESS
;
1187 return emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
1188 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, offset
);
1191 static sljit_s32
emit_tail_call_end(struct sljit_compiler
*compiler
, sljit_s32 extra_space
)
1193 /* Called when stack consumption cannot be reduced to 0. */
1196 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
1197 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, extra_space
));
1199 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1204 return SLJIT_SUCCESS
;
1207 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
1208 sljit_s32 arg_types
)
1210 struct sljit_jump
*jump
;
1211 sljit_sw stack_size
= 0;
1212 sljit_s32 word_arg_count
;
1215 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
1217 if (type
& SLJIT_TAIL_CALL
) {
1219 PTR_FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, SLJIT_IMM
, 0));
1221 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1222 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1223 compiler
->skip_checks
= 1;
1226 if (stack_size
== 0) {
1227 type
= SLJIT_JUMP
| (type
& SLJIT_REWRITABLE_JUMP
);
1228 return sljit_emit_jump(compiler
, type
);
1231 jump
= sljit_emit_jump(compiler
, type
);
1232 PTR_FAIL_IF(jump
== NULL
);
1234 PTR_FAIL_IF(emit_tail_call_end(compiler
, stack_size
));
1238 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1239 if ((type
& 0xff) == SLJIT_CALL
) {
1240 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
1241 PTR_FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, 0));
1243 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1244 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1245 compiler
->skip_checks
= 1;
1248 jump
= sljit_emit_jump(compiler
, type
);
1249 PTR_FAIL_IF(jump
== NULL
);
1251 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, 0));
1256 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1257 PTR_FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1259 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1260 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1261 compiler
->skip_checks
= 1;
1264 jump
= sljit_emit_jump(compiler
, type
);
1265 PTR_FAIL_IF(jump
== NULL
);
1267 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
1271 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
1272 sljit_s32 arg_types
,
1273 sljit_s32 src
, sljit_sw srcw
)
1275 sljit_sw stack_size
= 0;
1276 sljit_s32 word_arg_count
;
1277 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1278 sljit_s32 swap_args
;
1282 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
1284 if (type
& SLJIT_TAIL_CALL
) {
1286 FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, src
, srcw
));
1288 if (!(src
& SLJIT_IMM
)) {
1293 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1294 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1295 compiler
->skip_checks
= 1;
1298 if (stack_size
== 0)
1299 return sljit_emit_ijump(compiler
, SLJIT_JUMP
, src
, srcw
);
1301 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1302 return emit_tail_call_end(compiler
, stack_size
);
1305 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1306 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0 && reg_map
[SLJIT_R2
] == 1 && SLJIT_R0
== 1 && SLJIT_R2
== 3);
1308 if ((type
& 0xff) == SLJIT_CALL
) {
1309 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
1312 if (word_arg_count
> 0) {
1313 if ((src
& REG_MASK
) == SLJIT_R2
|| OFFS_REG(src
) == SLJIT_R2
) {
1315 if (((src
& REG_MASK
) | 0x2) == SLJIT_R2
)
1317 if ((OFFS_REG(src
) | 0x2) == SLJIT_R2
)
1318 src
^= TO_OFFS_REG(0x2);
1322 FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, swap_args
));
1324 compiler
->saveds_offset
+= stack_size
;
1325 compiler
->locals_offset
+= stack_size
;
1327 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1328 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1329 compiler
->skip_checks
= 1;
1331 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1333 compiler
->saveds_offset
-= stack_size
;
1334 compiler
->locals_offset
-= stack_size
;
1336 return post_call_with_args(compiler
, arg_types
, 0);
1340 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1341 FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1343 compiler
->saveds_offset
+= stack_size
;
1344 compiler
->locals_offset
+= stack_size
;
1346 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1347 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1348 compiler
->skip_checks
= 1;
1350 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1352 compiler
->saveds_offset
-= stack_size
;
1353 compiler
->locals_offset
-= stack_size
;
1355 return post_call_with_args(compiler
, arg_types
, stack_size
);
1358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
1363 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
1364 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1366 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
1368 if (FAST_IS_REG(dst
)) {
1369 /* Unused dest is possible here. */
1370 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1374 POP_REG(reg_map
[dst
]);
1375 return SLJIT_SUCCESS
;
1379 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1382 return SLJIT_SUCCESS
;
1385 static sljit_s32
emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
1389 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1391 if (FAST_IS_REG(src
)) {
1392 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
1396 PUSH_REG(reg_map
[src
]);
1399 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
1404 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1410 return SLJIT_SUCCESS
;
1413 static sljit_s32
skip_frames_before_return(struct sljit_compiler
*compiler
)
1415 sljit_sw size
, saved_size
;
1416 sljit_s32 has_f64_aligment
;
1418 /* Don't adjust shadow stack if it isn't enabled. */
1419 if (!cpu_has_shadow_stack ())
1420 return SLJIT_SUCCESS
;
1422 SLJIT_ASSERT(compiler
->args_size
>= 0);
1423 SLJIT_ASSERT(compiler
->local_size
> 0);
1425 #if !defined(__APPLE__)
1426 has_f64_aligment
= compiler
->options
& SLJIT_F64_ALIGNMENT
;
1428 has_f64_aligment
= 0;
1431 size
= compiler
->local_size
;
1432 saved_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
1433 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * SSIZE_OF(sw
);
1435 if (has_f64_aligment
) {
1436 /* mov TMP_REG1, [esp + local_size]. */
1437 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), size
);
1438 /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
1439 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(TMP_REG1
), saved_size
);
1440 /* Move return address to [esp]. */
1441 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, TMP_REG1
, 0);
1446 return adjust_shadow_stack(compiler
, SLJIT_MEM1(SLJIT_SP
), size
);