2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
33 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
35 INC_SIZE(1 + sizeof(sljit_sw
));
37 sljit_unaligned_store_sw(inst
, imm
);
41 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
43 sljit_s32 type
= jump
->flags
>> TYPE_SHIFT
;
45 if (type
== SLJIT_JUMP
) {
46 *code_ptr
++ = JMP_i32
;
49 else if (type
>= SLJIT_FAST_CALL
) {
50 *code_ptr
++ = CALL_i32
;
54 *code_ptr
++ = GROUP_0F
;
55 *code_ptr
++ = get_jump_code(type
);
59 if (jump
->flags
& JUMP_LABEL
)
60 jump
->flags
|= PATCH_MW
;
62 sljit_unaligned_store_sw(code_ptr
, jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
);
68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
69 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
70 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
72 sljit_s32 word_arg_count
, float_arg_count
, args_size
, size
, types
;
76 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
77 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
79 /* Emit ENDBR32 at function entry if needed. */
80 FAIL_IF(emit_endbranch(compiler
));
82 SLJIT_COMPILE_ASSERT(SLJIT_FR0
== 1, float_register_index_start
);
84 arg_types
>>= SLJIT_DEF_SHIFT
;
88 args_size
= sizeof(sljit_sw
);
90 switch (types
& SLJIT_DEF_MASK
) {
91 case SLJIT_ARG_TYPE_F32
:
93 FAIL_IF(emit_sse2_load(compiler
, 1, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
94 args_size
+= sizeof(sljit_f32
);
96 case SLJIT_ARG_TYPE_F64
:
98 FAIL_IF(emit_sse2_load(compiler
, 0, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
99 args_size
+= sizeof(sljit_f64
);
103 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
104 if (word_arg_count
> 2)
105 args_size
+= sizeof(sljit_sw
);
107 args_size
+= sizeof(sljit_sw
);
111 types
>>= SLJIT_DEF_SHIFT
;
114 args_size
-= sizeof(sljit_sw
);
115 compiler
->args_size
= args_size
;
117 /* [esp+0] for saving temporaries and function calls. */
118 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
120 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
122 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
125 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
127 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
129 compiler
->locals_offset
= compiler
->saveds_offset
;
132 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
134 if (options
& SLJIT_F64_ALIGNMENT
)
135 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
137 size
= 1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3);
138 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
142 PUSH_REG(reg_map
[TMP_REG1
]);
143 if (saveds
> 2 || scratches
> 9)
144 PUSH_REG(reg_map
[SLJIT_S2
]);
145 if (saveds
> 1 || scratches
> 10)
146 PUSH_REG(reg_map
[SLJIT_S1
]);
147 if (saveds
> 0 || scratches
> 11)
148 PUSH_REG(reg_map
[SLJIT_S0
]);
150 if (word_arg_count
>= 4)
151 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
+ size
* sizeof(sljit_sw
));
154 args_size
= (size
+ 1) * sizeof(sljit_sw
);
156 switch (arg_types
& SLJIT_DEF_MASK
) {
157 case SLJIT_ARG_TYPE_F32
:
158 args_size
+= sizeof(sljit_f32
);
160 case SLJIT_ARG_TYPE_F64
:
161 args_size
+= sizeof(sljit_f64
);
165 if (word_arg_count
<= 3) {
166 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
167 if (word_arg_count
<= 2)
170 EMIT_MOV(compiler
, SLJIT_S0
+ 1 - word_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
172 args_size
+= sizeof(sljit_sw
);
175 arg_types
>>= SLJIT_DEF_SHIFT
;
178 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
179 if (word_arg_count
> 0)
180 EMIT_MOV(compiler
, SLJIT_S0
, 0, SLJIT_R2
, 0);
181 if (word_arg_count
> 1)
182 EMIT_MOV(compiler
, SLJIT_S1
, 0, SLJIT_R1
, 0);
185 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
187 #if defined(__APPLE__)
188 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
189 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
190 local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
192 if (options
& SLJIT_F64_ALIGNMENT
)
193 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
195 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
198 compiler
->local_size
= local_size
;
201 if (local_size
> 0) {
202 if (local_size
<= 4 * 4096) {
203 if (local_size
> 4096)
204 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
205 if (local_size
> 2 * 4096)
206 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
207 if (local_size
> 3 * 4096)
208 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
211 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_SP
, 0);
212 EMIT_MOV(compiler
, SLJIT_R1
, 0, SLJIT_IMM
, (local_size
- 1) >> 12);
214 SLJIT_ASSERT (reg_map
[SLJIT_R0
] == 0);
216 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_R0
), -4096);
217 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
218 SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, 4096));
219 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
220 SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1));
222 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
227 inst
[1] = (sljit_s8
) -16;
230 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -local_size
);
234 SLJIT_ASSERT(local_size
> 0);
236 #if !defined(__APPLE__)
237 if (options
& SLJIT_F64_ALIGNMENT
) {
238 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_SP
, 0);
240 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
241 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
242 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
+ sizeof(sljit_sw
)));
244 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
245 if (compiler
->local_size
> 1024)
246 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
247 TMP_REG1
, 0, TMP_REG1
, 0, SLJIT_IMM
, sizeof(sljit_sw
)));
250 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 6);
254 inst
[0] = GROUP_BINARY_81
;
255 inst
[1] = MOD_REG
| AND
| reg_map
[SLJIT_SP
];
256 sljit_unaligned_store_sw(inst
+ 2, ~(sizeof(sljit_f64
) - 1));
258 /* The real local size must be used. */
259 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
, TMP_REG1
, 0);
262 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
263 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
));
265 if (word_arg_count
== 4)
266 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->locals_offset
- sizeof(sljit_sw
), TMP_REG1
, 0);
268 return SLJIT_SUCCESS
;
271 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
272 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
273 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
276 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
277 sljit_s32 word_arg_count
= 0;
281 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
282 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
284 arg_types
>>= SLJIT_DEF_SHIFT
;
287 switch (arg_types
& SLJIT_DEF_MASK
) {
288 case SLJIT_ARG_TYPE_F32
:
289 args_size
+= sizeof(sljit_f32
);
291 case SLJIT_ARG_TYPE_F64
:
292 args_size
+= sizeof(sljit_f64
);
295 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
296 if (word_arg_count
>= 2)
297 args_size
+= sizeof(sljit_sw
);
300 args_size
+= sizeof(sljit_sw
);
304 arg_types
>>= SLJIT_DEF_SHIFT
;
307 compiler
->args_size
= args_size
;
309 /* [esp+0] for saving temporaries and function calls. */
310 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
312 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
314 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
317 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
319 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
321 compiler
->locals_offset
= compiler
->saveds_offset
;
324 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
326 if (options
& SLJIT_F64_ALIGNMENT
)
327 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
329 #if defined(__APPLE__)
330 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
331 compiler
->local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
333 if (options
& SLJIT_F64_ALIGNMENT
)
334 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
336 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
338 return SLJIT_SUCCESS
;
341 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return_void(struct sljit_compiler
*compiler
)
347 CHECK(check_sljit_emit_return_void(compiler
));
349 SLJIT_ASSERT(compiler
->args_size
>= 0);
350 SLJIT_ASSERT(compiler
->local_size
> 0);
352 #if !defined(__APPLE__)
353 if (compiler
->options
& SLJIT_F64_ALIGNMENT
)
354 EMIT_MOV(compiler
, SLJIT_SP
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
)
356 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
357 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
359 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
360 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
363 size
= 2 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) +
364 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3);
365 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
366 if (compiler
->args_size
> 0)
369 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
374 if (compiler
->saveds
> 0 || compiler
->scratches
> 11)
375 POP_REG(reg_map
[SLJIT_S0
]);
376 if (compiler
->saveds
> 1 || compiler
->scratches
> 10)
377 POP_REG(reg_map
[SLJIT_S1
]);
378 if (compiler
->saveds
> 2 || compiler
->scratches
> 9)
379 POP_REG(reg_map
[SLJIT_S2
]);
380 POP_REG(reg_map
[TMP_REG1
]);
381 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
382 if (compiler
->args_size
> 0)
383 RET_I16((sljit_u8
)compiler
->args_size
);
390 return SLJIT_SUCCESS
;
393 /* --------------------------------------------------------------------- */
395 /* --------------------------------------------------------------------- */
397 /* Size contains the flags as well. */
398 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_s32 size
,
399 /* The register or immediate operand. */
400 sljit_s32 a
, sljit_sw imma
,
401 /* The general operand (not immediate). */
402 sljit_s32 b
, sljit_sw immb
)
406 sljit_s32 flags
= size
& ~0xf;
409 /* Both cannot be switched on. */
410 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
411 /* Size flags not allowed for typed instructions. */
412 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
413 /* Both size flags cannot be switched on. */
414 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
415 /* SSE2 and immediate is not possible. */
416 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
417 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
418 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
419 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
420 /* We don't support (%ebp). */
421 SLJIT_ASSERT(!(b
& SLJIT_MEM
) || immb
|| reg_map
[b
& REG_MASK
] != 5);
426 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
428 if (flags
& EX86_PREF_66
)
431 /* Calculate size of b. */
432 inst_size
+= 1; /* mod r/m byte. */
435 inst_size
+= sizeof(sljit_sw
);
436 else if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
437 /* Immediate operand. */
438 if (immb
<= 127 && immb
>= -128)
439 inst_size
+= sizeof(sljit_s8
);
441 inst_size
+= sizeof(sljit_sw
);
444 if ((b
& REG_MASK
) == SLJIT_SP
&& !(b
& OFFS_REG_MASK
))
445 b
|= TO_OFFS_REG(SLJIT_SP
);
447 if (b
& OFFS_REG_MASK
)
448 inst_size
+= 1; /* SIB byte. */
451 /* Calculate size of a. */
453 if (flags
& EX86_BIN_INS
) {
454 if (imma
<= 127 && imma
>= -128) {
456 flags
|= EX86_BYTE_ARG
;
460 else if (flags
& EX86_SHIFT_INS
) {
464 flags
|= EX86_BYTE_ARG
;
466 } else if (flags
& EX86_BYTE_ARG
)
468 else if (flags
& EX86_HALF_ARG
)
469 inst_size
+= sizeof(short);
471 inst_size
+= sizeof(sljit_sw
);
474 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
476 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
479 /* Encoding the byte. */
481 if (flags
& EX86_PREF_F2
)
483 if (flags
& EX86_PREF_F3
)
485 if (flags
& EX86_PREF_66
)
488 buf_ptr
= inst
+ size
;
490 /* Encode mod/rm byte. */
491 if (!(flags
& EX86_SHIFT_INS
)) {
492 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
493 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
497 else if (!(flags
& EX86_SSE2_OP1
))
498 *buf_ptr
= reg_map
[a
] << 3;
505 *inst
= GROUP_SHIFT_1
;
507 *inst
= GROUP_SHIFT_N
;
509 *inst
= GROUP_SHIFT_CL
;
513 if (!(b
& SLJIT_MEM
))
514 *buf_ptr
++ |= MOD_REG
+ ((!(flags
& EX86_SSE2_OP2
)) ? reg_map
[b
] : b
);
515 else if (b
& REG_MASK
) {
516 if (!(b
& OFFS_REG_MASK
) || (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
)) {
518 if (immb
<= 127 && immb
>= -128)
524 if (!(b
& OFFS_REG_MASK
))
525 *buf_ptr
++ |= reg_map
[b
& REG_MASK
];
528 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3);
532 if (immb
<= 127 && immb
>= -128)
533 *buf_ptr
++ = immb
; /* 8 bit displacement. */
535 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
536 buf_ptr
+= sizeof(sljit_sw
);
542 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6);
547 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
548 buf_ptr
+= sizeof(sljit_sw
);
552 if (flags
& EX86_BYTE_ARG
)
554 else if (flags
& EX86_HALF_ARG
)
555 sljit_unaligned_store_s16(buf_ptr
, imma
);
556 else if (!(flags
& EX86_SHIFT_INS
))
557 sljit_unaligned_store_sw(buf_ptr
, imma
);
560 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
563 /* --------------------------------------------------------------------- */
564 /* Call / return instructions */
565 /* --------------------------------------------------------------------- */
567 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
569 static sljit_s32
c_fast_call_get_stack_size(sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
571 sljit_s32 stack_size
= 0;
572 sljit_s32 word_arg_count
= 0;
574 arg_types
>>= SLJIT_DEF_SHIFT
;
577 switch (arg_types
& SLJIT_DEF_MASK
) {
578 case SLJIT_ARG_TYPE_F32
:
579 stack_size
+= sizeof(sljit_f32
);
581 case SLJIT_ARG_TYPE_F64
:
582 stack_size
+= sizeof(sljit_f64
);
586 if (word_arg_count
> 2)
587 stack_size
+= sizeof(sljit_sw
);
591 arg_types
>>= SLJIT_DEF_SHIFT
;
594 if (word_arg_count_ptr
)
595 *word_arg_count_ptr
= word_arg_count
;
600 static sljit_s32
c_fast_call_with_args(struct sljit_compiler
*compiler
,
601 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
, sljit_s32 swap_args
)
604 sljit_s32 float_arg_count
;
606 if (stack_size
== sizeof(sljit_sw
) && word_arg_count
== 3) {
607 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
610 PUSH_REG(reg_map
[SLJIT_R2
]);
612 else if (stack_size
> 0) {
613 if (word_arg_count
>= 4)
614 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
616 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
617 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
620 arg_types
>>= SLJIT_DEF_SHIFT
;
624 switch (arg_types
& SLJIT_DEF_MASK
) {
625 case SLJIT_ARG_TYPE_F32
:
627 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
628 stack_size
+= sizeof(sljit_f32
);
630 case SLJIT_ARG_TYPE_F64
:
632 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
633 stack_size
+= sizeof(sljit_f64
);
637 if (word_arg_count
== 3) {
638 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, SLJIT_R2
, 0);
639 stack_size
+= sizeof(sljit_sw
);
641 else if (word_arg_count
== 4) {
642 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, TMP_REG1
, 0);
643 stack_size
+= sizeof(sljit_sw
);
648 arg_types
>>= SLJIT_DEF_SHIFT
;
652 if (word_arg_count
> 0) {
654 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
658 *inst
++ = XCHG_EAX_r
| reg_map
[SLJIT_R2
];
661 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
666 *inst
++ = MOD_REG
| (reg_map
[SLJIT_R2
] << 3) | reg_map
[SLJIT_R0
];
670 return SLJIT_SUCCESS
;
675 static sljit_s32
cdecl_call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
677 sljit_s32 stack_size
= 0;
678 sljit_s32 word_arg_count
= 0;
680 arg_types
>>= SLJIT_DEF_SHIFT
;
683 switch (arg_types
& SLJIT_DEF_MASK
) {
684 case SLJIT_ARG_TYPE_F32
:
685 stack_size
+= sizeof(sljit_f32
);
687 case SLJIT_ARG_TYPE_F64
:
688 stack_size
+= sizeof(sljit_f64
);
692 stack_size
+= sizeof(sljit_sw
);
696 arg_types
>>= SLJIT_DEF_SHIFT
;
699 if (word_arg_count_ptr
)
700 *word_arg_count_ptr
= word_arg_count
;
702 if (stack_size
<= compiler
->stack_tmp_size
)
705 #if defined(__APPLE__)
706 return ((stack_size
- compiler
->stack_tmp_size
+ 15) & ~15);
708 return stack_size
- compiler
->stack_tmp_size
;
712 static sljit_s32
cdecl_call_with_args(struct sljit_compiler
*compiler
,
713 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
)
715 sljit_s32 float_arg_count
= 0;
717 if (word_arg_count
>= 4)
718 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
721 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
722 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
726 arg_types
>>= SLJIT_DEF_SHIFT
;
729 switch (arg_types
& SLJIT_DEF_MASK
) {
730 case SLJIT_ARG_TYPE_F32
:
732 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
733 stack_size
+= sizeof(sljit_f32
);
735 case SLJIT_ARG_TYPE_F64
:
737 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
738 stack_size
+= sizeof(sljit_f64
);
742 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
743 stack_size
+= sizeof(sljit_sw
);
747 arg_types
>>= SLJIT_DEF_SHIFT
;
750 return SLJIT_SUCCESS
;
753 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
754 sljit_s32 arg_types
, sljit_s32 stack_size
)
760 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
761 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
763 if ((arg_types
& SLJIT_DEF_MASK
) < SLJIT_ARG_TYPE_F32
)
764 return SLJIT_SUCCESS
;
766 single
= ((arg_types
& SLJIT_DEF_MASK
) == SLJIT_ARG_TYPE_F32
);
768 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
771 inst
[0] = single
? FSTPS
: FSTPD
;
772 inst
[1] = (0x03 << 3) | 0x04;
773 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
775 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
778 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
781 struct sljit_jump
*jump
;
782 sljit_s32 stack_size
= 0;
783 sljit_s32 word_arg_count
;
786 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
788 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
789 if ((type
& 0xff) == SLJIT_CALL
) {
790 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
791 PTR_FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, 0));
793 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
794 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
795 compiler
->skip_checks
= 1;
798 jump
= sljit_emit_jump(compiler
, type
);
799 PTR_FAIL_IF(jump
== NULL
);
801 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, 0));
806 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
807 PTR_FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
809 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
810 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
811 compiler
->skip_checks
= 1;
814 jump
= sljit_emit_jump(compiler
, type
);
815 PTR_FAIL_IF(jump
== NULL
);
817 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
823 sljit_s32 src
, sljit_sw srcw
)
825 sljit_s32 stack_size
= 0;
826 sljit_s32 word_arg_count
;
827 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
832 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
834 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
835 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0 && reg_map
[SLJIT_R2
] == 1 && SLJIT_R0
== 1 && SLJIT_R2
== 3);
837 if ((type
& 0xff) == SLJIT_CALL
) {
838 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
841 if (word_arg_count
> 0) {
842 if ((src
& REG_MASK
) == SLJIT_R2
|| OFFS_REG(src
) == SLJIT_R2
) {
844 if (((src
& REG_MASK
) | 0x2) == SLJIT_R2
)
846 if ((OFFS_REG(src
) | 0x2) == SLJIT_R2
)
847 src
^= TO_OFFS_REG(0x2);
851 FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, swap_args
));
853 compiler
->saveds_offset
+= stack_size
;
854 compiler
->locals_offset
+= stack_size
;
856 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
857 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
858 compiler
->skip_checks
= 1;
860 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
862 compiler
->saveds_offset
-= stack_size
;
863 compiler
->locals_offset
-= stack_size
;
865 return post_call_with_args(compiler
, arg_types
, 0);
869 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
870 FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
872 compiler
->saveds_offset
+= stack_size
;
873 compiler
->locals_offset
+= stack_size
;
875 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
876 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
877 compiler
->skip_checks
= 1;
879 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
881 compiler
->saveds_offset
-= stack_size
;
882 compiler
->locals_offset
-= stack_size
;
884 return post_call_with_args(compiler
, arg_types
, stack_size
);
887 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
892 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
893 ADJUST_LOCAL_OFFSET(dst
, dstw
);
895 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
897 if (FAST_IS_REG(dst
)) {
898 /* Unused dest is possible here. */
899 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
903 POP_REG(reg_map
[dst
]);
904 return SLJIT_SUCCESS
;
908 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
911 return SLJIT_SUCCESS
;
914 static sljit_s32
emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
918 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
920 if (FAST_IS_REG(src
)) {
921 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
925 PUSH_REG(reg_map
[src
]);
928 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
933 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
939 return SLJIT_SUCCESS
;
942 static sljit_s32
skip_frames_before_return(struct sljit_compiler
*compiler
)
944 sljit_s32 size
, saved_size
;
945 sljit_s32 has_f64_aligment
;
947 /* Don't adjust shadow stack if it isn't enabled. */
948 if (!cpu_has_shadow_stack ())
949 return SLJIT_SUCCESS
;
951 SLJIT_ASSERT(compiler
->args_size
>= 0);
952 SLJIT_ASSERT(compiler
->local_size
> 0);
954 #if !defined(__APPLE__)
955 has_f64_aligment
= compiler
->options
& SLJIT_F64_ALIGNMENT
;
957 has_f64_aligment
= 0;
960 size
= compiler
->local_size
;
961 saved_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * sizeof(sljit_uw
);
962 if (has_f64_aligment
) {
963 /* mov TMP_REG1, [esp + local_size]. */
964 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), size
);
965 /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
966 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(TMP_REG1
), saved_size
);
967 /* Move return address to [esp]. */
968 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, TMP_REG1
, 0);
973 return adjust_shadow_stack(compiler
, SLJIT_MEM1(SLJIT_SP
), size
);