2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
33 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
35 INC_SIZE(1 + sizeof(sljit_sw
));
37 sljit_unaligned_store_sw(inst
, imm
);
41 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
43 sljit_s32 type
= jump
->flags
>> TYPE_SHIFT
;
45 if (type
== SLJIT_JUMP
) {
46 *code_ptr
++ = JMP_i32
;
49 else if (type
>= SLJIT_FAST_CALL
) {
50 *code_ptr
++ = CALL_i32
;
54 *code_ptr
++ = GROUP_0F
;
55 *code_ptr
++ = get_jump_code(type
);
59 if (jump
->flags
& JUMP_LABEL
)
60 jump
->flags
|= PATCH_MW
;
62 sljit_unaligned_store_sw(code_ptr
, jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
);
68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
69 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
70 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
76 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
77 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
79 /* Emit ENDBR32 at function entry if needed. */
80 FAIL_IF(emit_endbranch(compiler
));
82 args
= get_arg_count(arg_types
);
83 compiler
->args
= args
;
85 /* [esp+0] for saving temporaries and function calls. */
86 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
88 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
90 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
93 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
95 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
97 compiler
->locals_offset
= compiler
->saveds_offset
;
100 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
102 if (options
& SLJIT_F64_ALIGNMENT
)
103 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
105 size
= 1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3);
106 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
107 size
+= (args
> 0 ? (args
* 2) : 0) + (args
> 2 ? 2 : 0);
109 size
+= (args
> 0 ? (2 + args
* 3) : 0);
111 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
115 PUSH_REG(reg_map
[TMP_REG1
]);
116 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
119 *inst
++ = MOD_REG
| (reg_map
[TMP_REG1
] << 3) | 0x4 /* esp */;
122 if (saveds
> 2 || scratches
> 9)
123 PUSH_REG(reg_map
[SLJIT_S2
]);
124 if (saveds
> 1 || scratches
> 10)
125 PUSH_REG(reg_map
[SLJIT_S1
]);
126 if (saveds
> 0 || scratches
> 11)
127 PUSH_REG(reg_map
[SLJIT_S0
]);
129 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
132 inst
[1] = MOD_REG
| (reg_map
[SLJIT_S0
] << 3) | reg_map
[SLJIT_R2
];
137 inst
[1] = MOD_REG
| (reg_map
[SLJIT_S1
] << 3) | reg_map
[SLJIT_R1
];
142 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S2
] << 3) | 0x4 /* esp */;
144 inst
[3] = sizeof(sljit_sw
) * (3 + 2); /* saveds >= 3 as well. */
149 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S0
] << 3) | reg_map
[TMP_REG1
];
150 inst
[2] = sizeof(sljit_sw
) * 2;
155 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S1
] << 3) | reg_map
[TMP_REG1
];
156 inst
[2] = sizeof(sljit_sw
) * 3;
161 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S2
] << 3) | reg_map
[TMP_REG1
];
162 inst
[2] = sizeof(sljit_sw
) * 4;
166 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
168 #if defined(__APPLE__)
169 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
170 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
171 local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
173 if (options
& SLJIT_F64_ALIGNMENT
)
174 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
176 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
179 compiler
->local_size
= local_size
;
182 if (local_size
> 0) {
183 if (local_size
<= 4 * 4096) {
184 if (local_size
> 4096)
185 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
186 if (local_size
> 2 * 4096)
187 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
188 if (local_size
> 3 * 4096)
189 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
192 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_SP
, 0);
193 EMIT_MOV(compiler
, SLJIT_R1
, 0, SLJIT_IMM
, (local_size
- 1) >> 12);
195 SLJIT_ASSERT (reg_map
[SLJIT_R0
] == 0);
197 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_R0
), -4096);
198 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
199 SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, 4096));
200 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
201 SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1));
203 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
208 inst
[1] = (sljit_s8
) -16;
211 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -local_size
);
215 SLJIT_ASSERT(local_size
> 0);
217 #if !defined(__APPLE__)
218 if (options
& SLJIT_F64_ALIGNMENT
) {
219 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_SP
, 0);
221 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
222 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
223 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
+ sizeof(sljit_sw
)));
225 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
226 if (compiler
->local_size
> 1024)
227 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
228 TMP_REG1
, 0, TMP_REG1
, 0, SLJIT_IMM
, sizeof(sljit_sw
)));
231 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 6);
235 inst
[0] = GROUP_BINARY_81
;
236 inst
[1] = MOD_REG
| AND
| reg_map
[SLJIT_SP
];
237 sljit_unaligned_store_sw(inst
+ 2, ~(sizeof(sljit_f64
) - 1));
239 /* The real local size must be used. */
240 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
, TMP_REG1
, 0);
243 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
244 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
);
247 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
248 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
249 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
252 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
253 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
255 compiler
->args
= get_arg_count(arg_types
);
257 /* [esp+0] for saving temporaries and function calls. */
258 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
260 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
262 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
265 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
267 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
269 compiler
->locals_offset
= compiler
->saveds_offset
;
272 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
274 if (options
& SLJIT_F64_ALIGNMENT
)
275 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
277 #if defined(__APPLE__)
278 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
279 compiler
->local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
281 if (options
& SLJIT_F64_ALIGNMENT
)
282 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
284 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
286 return SLJIT_SUCCESS
;
289 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return(struct sljit_compiler
*compiler
, sljit_s32 op
, sljit_s32 src
, sljit_sw srcw
)
295 CHECK(check_sljit_emit_return(compiler
, op
, src
, srcw
));
296 SLJIT_ASSERT(compiler
->args
>= 0);
298 FAIL_IF(emit_mov_before_return(compiler
, op
, src
, srcw
));
300 SLJIT_ASSERT(compiler
->local_size
> 0);
302 #if !defined(__APPLE__)
303 if (compiler
->options
& SLJIT_F64_ALIGNMENT
)
304 EMIT_MOV(compiler
, SLJIT_SP
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
)
306 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
307 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
309 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
310 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
313 size
= 2 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) +
314 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3);
315 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
316 if (compiler
->args
> 2)
319 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
324 if (compiler
->saveds
> 0 || compiler
->scratches
> 11)
325 POP_REG(reg_map
[SLJIT_S0
]);
326 if (compiler
->saveds
> 1 || compiler
->scratches
> 10)
327 POP_REG(reg_map
[SLJIT_S1
]);
328 if (compiler
->saveds
> 2 || compiler
->scratches
> 9)
329 POP_REG(reg_map
[SLJIT_S2
]);
330 POP_REG(reg_map
[TMP_REG1
]);
331 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
332 if (compiler
->args
> 2)
333 RET_I16(sizeof(sljit_sw
));
340 return SLJIT_SUCCESS
;
343 /* --------------------------------------------------------------------- */
345 /* --------------------------------------------------------------------- */
347 /* Size contains the flags as well. */
348 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_s32 size
,
349 /* The register or immediate operand. */
350 sljit_s32 a
, sljit_sw imma
,
351 /* The general operand (not immediate). */
352 sljit_s32 b
, sljit_sw immb
)
356 sljit_s32 flags
= size
& ~0xf;
359 /* Both cannot be switched on. */
360 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
361 /* Size flags not allowed for typed instructions. */
362 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
363 /* Both size flags cannot be switched on. */
364 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
365 /* SSE2 and immediate is not possible. */
366 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
367 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
368 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
369 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
370 /* We don't support (%ebp). */
371 SLJIT_ASSERT(!(b
& SLJIT_MEM
) || immb
|| reg_map
[b
& REG_MASK
] != 5);
376 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
378 if (flags
& EX86_PREF_66
)
381 /* Calculate size of b. */
382 inst_size
+= 1; /* mod r/m byte. */
384 if ((b
& REG_MASK
) == SLJIT_UNUSED
)
385 inst_size
+= sizeof(sljit_sw
);
386 else if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
387 /* Immediate operand. */
388 if (immb
<= 127 && immb
>= -128)
389 inst_size
+= sizeof(sljit_s8
);
391 inst_size
+= sizeof(sljit_sw
);
394 if ((b
& REG_MASK
) == SLJIT_SP
&& !(b
& OFFS_REG_MASK
))
395 b
|= TO_OFFS_REG(SLJIT_SP
);
397 if ((b
& OFFS_REG_MASK
) != SLJIT_UNUSED
)
398 inst_size
+= 1; /* SIB byte. */
401 /* Calculate size of a. */
403 if (flags
& EX86_BIN_INS
) {
404 if (imma
<= 127 && imma
>= -128) {
406 flags
|= EX86_BYTE_ARG
;
410 else if (flags
& EX86_SHIFT_INS
) {
414 flags
|= EX86_BYTE_ARG
;
416 } else if (flags
& EX86_BYTE_ARG
)
418 else if (flags
& EX86_HALF_ARG
)
419 inst_size
+= sizeof(short);
421 inst_size
+= sizeof(sljit_sw
);
424 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
426 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
429 /* Encoding the byte. */
431 if (flags
& EX86_PREF_F2
)
433 if (flags
& EX86_PREF_F3
)
435 if (flags
& EX86_PREF_66
)
438 buf_ptr
= inst
+ size
;
440 /* Encode mod/rm byte. */
441 if (!(flags
& EX86_SHIFT_INS
)) {
442 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
443 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
447 else if (!(flags
& EX86_SSE2_OP1
))
448 *buf_ptr
= reg_map
[a
] << 3;
455 *inst
= GROUP_SHIFT_1
;
457 *inst
= GROUP_SHIFT_N
;
459 *inst
= GROUP_SHIFT_CL
;
463 if (!(b
& SLJIT_MEM
))
464 *buf_ptr
++ |= MOD_REG
+ ((!(flags
& EX86_SSE2_OP2
)) ? reg_map
[b
] : b
);
465 else if ((b
& REG_MASK
) != SLJIT_UNUSED
) {
466 if ((b
& OFFS_REG_MASK
) == SLJIT_UNUSED
|| (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
)) {
468 if (immb
<= 127 && immb
>= -128)
474 if ((b
& OFFS_REG_MASK
) == SLJIT_UNUSED
)
475 *buf_ptr
++ |= reg_map
[b
& REG_MASK
];
478 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3);
482 if (immb
<= 127 && immb
>= -128)
483 *buf_ptr
++ = immb
; /* 8 bit displacement. */
485 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
486 buf_ptr
+= sizeof(sljit_sw
);
492 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6);
497 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
498 buf_ptr
+= sizeof(sljit_sw
);
502 if (flags
& EX86_BYTE_ARG
)
504 else if (flags
& EX86_HALF_ARG
)
505 sljit_unaligned_store_s16(buf_ptr
, imma
);
506 else if (!(flags
& EX86_SHIFT_INS
))
507 sljit_unaligned_store_sw(buf_ptr
, imma
);
510 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
513 /* --------------------------------------------------------------------- */
514 /* Call / return instructions */
515 /* --------------------------------------------------------------------- */
517 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
519 static sljit_s32
c_fast_call_get_stack_size(sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
521 sljit_s32 stack_size
= 0;
522 sljit_s32 word_arg_count
= 0;
524 arg_types
>>= SLJIT_DEF_SHIFT
;
527 switch (arg_types
& SLJIT_DEF_MASK
) {
528 case SLJIT_ARG_TYPE_F32
:
529 stack_size
+= sizeof(sljit_f32
);
531 case SLJIT_ARG_TYPE_F64
:
532 stack_size
+= sizeof(sljit_f64
);
536 if (word_arg_count
> 2)
537 stack_size
+= sizeof(sljit_sw
);
541 arg_types
>>= SLJIT_DEF_SHIFT
;
544 if (word_arg_count_ptr
)
545 *word_arg_count_ptr
= word_arg_count
;
550 static sljit_s32
c_fast_call_with_args(struct sljit_compiler
*compiler
,
551 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
, sljit_s32 swap_args
)
554 sljit_s32 float_arg_count
;
556 if (stack_size
== sizeof(sljit_sw
) && word_arg_count
== 3) {
557 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
560 PUSH_REG(reg_map
[SLJIT_R2
]);
562 else if (stack_size
> 0) {
563 if (word_arg_count
>= 4)
564 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
566 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
567 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
570 arg_types
>>= SLJIT_DEF_SHIFT
;
574 switch (arg_types
& SLJIT_DEF_MASK
) {
575 case SLJIT_ARG_TYPE_F32
:
577 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
578 stack_size
+= sizeof(sljit_f32
);
580 case SLJIT_ARG_TYPE_F64
:
582 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
583 stack_size
+= sizeof(sljit_f64
);
587 if (word_arg_count
== 3) {
588 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, SLJIT_R2
, 0);
589 stack_size
+= sizeof(sljit_sw
);
591 else if (word_arg_count
== 4) {
592 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, TMP_REG1
, 0);
593 stack_size
+= sizeof(sljit_sw
);
598 arg_types
>>= SLJIT_DEF_SHIFT
;
602 if (word_arg_count
> 0) {
604 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
608 *inst
++ = XCHG_EAX_r
| reg_map
[SLJIT_R2
];
611 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
616 *inst
++ = MOD_REG
| (reg_map
[SLJIT_R2
] << 3) | reg_map
[SLJIT_R0
];
620 return SLJIT_SUCCESS
;
625 static sljit_s32
cdecl_call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
627 sljit_s32 stack_size
= 0;
628 sljit_s32 word_arg_count
= 0;
630 arg_types
>>= SLJIT_DEF_SHIFT
;
633 switch (arg_types
& SLJIT_DEF_MASK
) {
634 case SLJIT_ARG_TYPE_F32
:
635 stack_size
+= sizeof(sljit_f32
);
637 case SLJIT_ARG_TYPE_F64
:
638 stack_size
+= sizeof(sljit_f64
);
642 stack_size
+= sizeof(sljit_sw
);
646 arg_types
>>= SLJIT_DEF_SHIFT
;
649 if (word_arg_count_ptr
)
650 *word_arg_count_ptr
= word_arg_count
;
652 if (stack_size
<= compiler
->stack_tmp_size
)
655 #if defined(__APPLE__)
656 return ((stack_size
- compiler
->stack_tmp_size
+ 15) & ~15);
658 return stack_size
- compiler
->stack_tmp_size
;
662 static sljit_s32
cdecl_call_with_args(struct sljit_compiler
*compiler
,
663 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
)
665 sljit_s32 float_arg_count
= 0;
667 if (word_arg_count
>= 4)
668 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
671 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
672 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
676 arg_types
>>= SLJIT_DEF_SHIFT
;
679 switch (arg_types
& SLJIT_DEF_MASK
) {
680 case SLJIT_ARG_TYPE_F32
:
682 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
683 stack_size
+= sizeof(sljit_f32
);
685 case SLJIT_ARG_TYPE_F64
:
687 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
688 stack_size
+= sizeof(sljit_f64
);
692 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
693 stack_size
+= sizeof(sljit_sw
);
697 arg_types
>>= SLJIT_DEF_SHIFT
;
700 return SLJIT_SUCCESS
;
703 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
704 sljit_s32 arg_types
, sljit_s32 stack_size
)
710 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
711 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
713 if ((arg_types
& SLJIT_DEF_MASK
) < SLJIT_ARG_TYPE_F32
)
714 return SLJIT_SUCCESS
;
716 single
= ((arg_types
& SLJIT_DEF_MASK
) == SLJIT_ARG_TYPE_F32
);
718 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
721 inst
[0] = single
? FSTPS
: FSTPD
;
722 inst
[1] = (0x03 << 3) | 0x04;
723 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
725 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
728 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
731 struct sljit_jump
*jump
;
732 sljit_s32 stack_size
= 0;
733 sljit_s32 word_arg_count
;
736 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
738 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
739 if ((type
& 0xff) == SLJIT_CALL
) {
740 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
741 PTR_FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, 0));
743 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
744 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
745 compiler
->skip_checks
= 1;
748 jump
= sljit_emit_jump(compiler
, type
);
749 PTR_FAIL_IF(jump
== NULL
);
751 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, 0));
756 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
757 PTR_FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
759 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
760 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
761 compiler
->skip_checks
= 1;
764 jump
= sljit_emit_jump(compiler
, type
);
765 PTR_FAIL_IF(jump
== NULL
);
767 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
771 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
773 sljit_s32 src
, sljit_sw srcw
)
775 sljit_s32 stack_size
= 0;
776 sljit_s32 word_arg_count
;
777 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
782 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
784 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
785 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0 && reg_map
[SLJIT_R2
] == 1 && SLJIT_R0
== 1 && SLJIT_R2
== 3);
787 if ((type
& 0xff) == SLJIT_CALL
) {
788 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
791 if (word_arg_count
> 0) {
792 if ((src
& REG_MASK
) == SLJIT_R2
|| OFFS_REG(src
) == SLJIT_R2
) {
794 if (((src
& REG_MASK
) | 0x2) == SLJIT_R2
)
796 if ((OFFS_REG(src
) | 0x2) == SLJIT_R2
)
797 src
^= TO_OFFS_REG(0x2);
801 FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, swap_args
));
803 compiler
->saveds_offset
+= stack_size
;
804 compiler
->locals_offset
+= stack_size
;
806 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
807 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
808 compiler
->skip_checks
= 1;
810 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
812 compiler
->saveds_offset
-= stack_size
;
813 compiler
->locals_offset
-= stack_size
;
815 return post_call_with_args(compiler
, arg_types
, 0);
819 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
820 FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
822 compiler
->saveds_offset
+= stack_size
;
823 compiler
->locals_offset
+= stack_size
;
825 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
826 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
827 compiler
->skip_checks
= 1;
829 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
831 compiler
->saveds_offset
-= stack_size
;
832 compiler
->locals_offset
-= stack_size
;
834 return post_call_with_args(compiler
, arg_types
, stack_size
);
837 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
842 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
843 ADJUST_LOCAL_OFFSET(dst
, dstw
);
845 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
847 /* For UNUSED dst. Uncommon, but possible. */
848 if (dst
== SLJIT_UNUSED
)
851 if (FAST_IS_REG(dst
)) {
852 /* Unused dest is possible here. */
853 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
857 POP_REG(reg_map
[dst
]);
858 return SLJIT_SUCCESS
;
862 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
865 return SLJIT_SUCCESS
;
868 static sljit_s32
emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
872 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
874 if (FAST_IS_REG(src
)) {
875 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
879 PUSH_REG(reg_map
[src
]);
882 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
887 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
893 return SLJIT_SUCCESS
;
896 static sljit_s32
skip_frames_before_return(struct sljit_compiler
*compiler
)
898 sljit_s32 size
, saved_size
;
899 sljit_s32 has_f64_aligment
;
901 /* Don't adjust shadow stack if it isn't enabled. */
902 if (!cpu_has_shadow_stack ())
903 return SLJIT_SUCCESS
;
905 SLJIT_ASSERT(compiler
->args
>= 0);
906 SLJIT_ASSERT(compiler
->local_size
> 0);
908 #if !defined(__APPLE__)
909 has_f64_aligment
= compiler
->options
& SLJIT_F64_ALIGNMENT
;
911 has_f64_aligment
= 0;
914 size
= compiler
->local_size
;
915 saved_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * sizeof(sljit_uw
);
916 if (has_f64_aligment
) {
917 /* mov TMP_REG1, [esp + local_size]. */
918 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), size
);
919 /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
920 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(TMP_REG1
), saved_size
);
921 /* Move return address to [esp]. */
922 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, TMP_REG1
, 0);
927 return adjust_shadow_stack(compiler
, SLJIT_UNUSED
, 0, SLJIT_SP
, size
);