2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
33 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
35 INC_SIZE(1 + sizeof(sljit_sw
));
37 sljit_unaligned_store_sw(inst
, imm
);
41 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
43 sljit_s32 type
= jump
->flags
>> TYPE_SHIFT
;
45 if (type
== SLJIT_JUMP
) {
46 *code_ptr
++ = JMP_i32
;
49 else if (type
>= SLJIT_FAST_CALL
) {
50 *code_ptr
++ = CALL_i32
;
54 *code_ptr
++ = GROUP_0F
;
55 *code_ptr
++ = get_jump_code(type
);
59 if (jump
->flags
& JUMP_LABEL
)
60 jump
->flags
|= PATCH_MW
;
62 sljit_unaligned_store_sw(code_ptr
, jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
);
68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
69 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
70 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
76 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
77 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
79 /* Emit ENDBR32 at function entry if needed. */
80 FAIL_IF(emit_endbranch(compiler
));
82 args
= get_arg_count(arg_types
);
83 compiler
->args
= args
;
85 /* [esp+0] for saving temporaries and function calls. */
86 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
88 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
90 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
93 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
95 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
97 compiler
->locals_offset
= compiler
->saveds_offset
;
100 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
102 if (options
& SLJIT_F64_ALIGNMENT
)
103 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
105 size
= 1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3);
106 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
107 size
+= (args
> 0 ? (args
* 2) : 0) + (args
> 2 ? 2 : 0);
109 size
+= (args
> 0 ? (2 + args
* 3) : 0);
111 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
115 PUSH_REG(reg_map
[TMP_REG1
]);
116 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
119 *inst
++ = MOD_REG
| (reg_map
[TMP_REG1
] << 3) | 0x4 /* esp */;
122 if (saveds
> 2 || scratches
> 9)
123 PUSH_REG(reg_map
[SLJIT_S2
]);
124 if (saveds
> 1 || scratches
> 10)
125 PUSH_REG(reg_map
[SLJIT_S1
]);
126 if (saveds
> 0 || scratches
> 11)
127 PUSH_REG(reg_map
[SLJIT_S0
]);
129 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
132 inst
[1] = MOD_REG
| (reg_map
[SLJIT_S0
] << 3) | reg_map
[SLJIT_R2
];
137 inst
[1] = MOD_REG
| (reg_map
[SLJIT_S1
] << 3) | reg_map
[SLJIT_R1
];
142 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S2
] << 3) | 0x4 /* esp */;
144 inst
[3] = sizeof(sljit_sw
) * (3 + 2); /* saveds >= 3 as well. */
149 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S0
] << 3) | reg_map
[TMP_REG1
];
150 inst
[2] = sizeof(sljit_sw
) * 2;
155 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S1
] << 3) | reg_map
[TMP_REG1
];
156 inst
[2] = sizeof(sljit_sw
) * 3;
161 inst
[1] = MOD_DISP8
| (reg_map
[SLJIT_S2
] << 3) | reg_map
[TMP_REG1
];
162 inst
[2] = sizeof(sljit_sw
) * 4;
166 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
168 #if defined(__APPLE__)
169 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
170 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
171 local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
173 if (options
& SLJIT_F64_ALIGNMENT
)
174 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
176 local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
179 compiler
->local_size
= local_size
;
182 if (local_size
> 0) {
183 if (local_size
<= 4 * 4096) {
184 if (local_size
> 4096)
185 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
186 if (local_size
> 2 * 4096)
187 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
188 if (local_size
> 3 * 4096)
189 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
192 EMIT_MOV(compiler
, SLJIT_R0
, 0, SLJIT_SP
, 0);
193 EMIT_MOV(compiler
, SLJIT_R1
, 0, SLJIT_IMM
, (local_size
- 1) >> 12);
195 SLJIT_ASSERT (reg_map
[SLJIT_R0
] == 0);
197 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_R0
), -4096);
198 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
199 SLJIT_R0
, 0, SLJIT_R0
, 0, SLJIT_IMM
, 4096));
200 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
201 SLJIT_R1
, 0, SLJIT_R1
, 0, SLJIT_IMM
, 1));
203 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
208 inst
[1] = (sljit_s8
) -16;
211 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), -local_size
);
215 SLJIT_ASSERT(local_size
> 0);
217 #if !defined(__APPLE__)
218 if (options
& SLJIT_F64_ALIGNMENT
) {
219 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_SP
, 0);
221 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
222 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
223 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
+ sizeof(sljit_sw
)));
225 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
226 if (compiler
->local_size
> 1024)
227 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
228 TMP_REG1
, 0, TMP_REG1
, 0, SLJIT_IMM
, sizeof(sljit_sw
)));
231 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 6);
235 inst
[0] = GROUP_BINARY_81
;
236 inst
[1] = MOD_REG
| AND
| reg_map
[SLJIT_SP
];
237 sljit_unaligned_store_sw(inst
+ 2, ~(sizeof(sljit_f64
) - 1));
239 /* The real local size must be used. */
240 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
, TMP_REG1
, 0);
243 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
244 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, local_size
);
247 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
248 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
249 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
252 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
253 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
255 compiler
->args
= get_arg_count(arg_types
);
257 /* [esp+0] for saving temporaries and function calls. */
258 compiler
->stack_tmp_size
= 2 * sizeof(sljit_sw
);
260 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
262 compiler
->stack_tmp_size
= 3 * sizeof(sljit_sw
);
265 compiler
->saveds_offset
= compiler
->stack_tmp_size
;
267 compiler
->saveds_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * sizeof(sljit_sw
);
269 compiler
->locals_offset
= compiler
->saveds_offset
;
272 compiler
->locals_offset
+= (saveds
- 3) * sizeof(sljit_sw
);
274 if (options
& SLJIT_F64_ALIGNMENT
)
275 compiler
->locals_offset
= (compiler
->locals_offset
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1);
277 #if defined(__APPLE__)
278 saveds
= (2 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3)) * sizeof(sljit_uw
);
279 compiler
->local_size
= ((SLJIT_LOCALS_OFFSET
+ saveds
+ local_size
+ 15) & ~15) - saveds
;
281 if (options
& SLJIT_F64_ALIGNMENT
)
282 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_f64
) - 1) & ~(sizeof(sljit_f64
) - 1));
284 compiler
->local_size
= SLJIT_LOCALS_OFFSET
+ ((local_size
+ sizeof(sljit_sw
) - 1) & ~(sizeof(sljit_sw
) - 1));
286 return SLJIT_SUCCESS
;
289 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return(struct sljit_compiler
*compiler
, sljit_s32 op
, sljit_s32 src
, sljit_sw srcw
)
295 CHECK(check_sljit_emit_return(compiler
, op
, src
, srcw
));
296 SLJIT_ASSERT(compiler
->args
>= 0);
298 FAIL_IF(emit_mov_before_return(compiler
, op
, src
, srcw
));
300 SLJIT_ASSERT(compiler
->local_size
> 0);
302 #if !defined(__APPLE__)
303 if (compiler
->options
& SLJIT_F64_ALIGNMENT
)
304 EMIT_MOV(compiler
, SLJIT_SP
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->local_size
)
306 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
307 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
309 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
310 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, compiler
->local_size
));
313 size
= 2 + (compiler
->scratches
> 7 ? (compiler
->scratches
- 7) : 0) +
314 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3);
315 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
316 if (compiler
->args
> 2)
319 if (compiler
->args
> 0)
322 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
327 if (compiler
->saveds
> 0 || compiler
->scratches
> 11)
328 POP_REG(reg_map
[SLJIT_S0
]);
329 if (compiler
->saveds
> 1 || compiler
->scratches
> 10)
330 POP_REG(reg_map
[SLJIT_S1
]);
331 if (compiler
->saveds
> 2 || compiler
->scratches
> 9)
332 POP_REG(reg_map
[SLJIT_S2
]);
333 POP_REG(reg_map
[TMP_REG1
]);
334 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
335 if (compiler
->args
> 2)
336 RET_I16(sizeof(sljit_sw
));
343 return SLJIT_SUCCESS
;
346 /* --------------------------------------------------------------------- */
348 /* --------------------------------------------------------------------- */
350 /* Size contains the flags as well. */
351 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_s32 size
,
352 /* The register or immediate operand. */
353 sljit_s32 a
, sljit_sw imma
,
354 /* The general operand (not immediate). */
355 sljit_s32 b
, sljit_sw immb
)
359 sljit_s32 flags
= size
& ~0xf;
362 /* Both cannot be switched on. */
363 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
364 /* Size flags not allowed for typed instructions. */
365 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
366 /* Both size flags cannot be switched on. */
367 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
368 /* SSE2 and immediate is not possible. */
369 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
370 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
371 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
372 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
377 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
379 if (flags
& EX86_PREF_66
)
382 /* Calculate size of b. */
383 inst_size
+= 1; /* mod r/m byte. */
385 if ((b
& REG_MASK
) == SLJIT_UNUSED
)
386 inst_size
+= sizeof(sljit_sw
);
387 else if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
388 /* Immediate operand. */
389 if (immb
<= 127 && immb
>= -128)
390 inst_size
+= sizeof(sljit_s8
);
392 inst_size
+= sizeof(sljit_sw
);
395 if ((b
& REG_MASK
) == SLJIT_SP
&& !(b
& OFFS_REG_MASK
))
396 b
|= TO_OFFS_REG(SLJIT_SP
);
398 if ((b
& OFFS_REG_MASK
) != SLJIT_UNUSED
)
399 inst_size
+= 1; /* SIB byte. */
402 /* Calculate size of a. */
404 if (flags
& EX86_BIN_INS
) {
405 if (imma
<= 127 && imma
>= -128) {
407 flags
|= EX86_BYTE_ARG
;
411 else if (flags
& EX86_SHIFT_INS
) {
415 flags
|= EX86_BYTE_ARG
;
417 } else if (flags
& EX86_BYTE_ARG
)
419 else if (flags
& EX86_HALF_ARG
)
420 inst_size
+= sizeof(short);
422 inst_size
+= sizeof(sljit_sw
);
425 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
427 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
430 /* Encoding the byte. */
432 if (flags
& EX86_PREF_F2
)
434 if (flags
& EX86_PREF_F3
)
436 if (flags
& EX86_PREF_66
)
439 buf_ptr
= inst
+ size
;
441 /* Encode mod/rm byte. */
442 if (!(flags
& EX86_SHIFT_INS
)) {
443 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
444 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
448 else if (!(flags
& EX86_SSE2_OP1
))
449 *buf_ptr
= reg_map
[a
] << 3;
456 *inst
= GROUP_SHIFT_1
;
458 *inst
= GROUP_SHIFT_N
;
460 *inst
= GROUP_SHIFT_CL
;
464 if (!(b
& SLJIT_MEM
))
465 *buf_ptr
++ |= MOD_REG
+ ((!(flags
& EX86_SSE2_OP2
)) ? reg_map
[b
] : b
);
466 else if ((b
& REG_MASK
) != SLJIT_UNUSED
) {
467 if ((b
& OFFS_REG_MASK
) == SLJIT_UNUSED
|| (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
)) {
469 if (immb
<= 127 && immb
>= -128)
475 if ((b
& OFFS_REG_MASK
) == SLJIT_UNUSED
)
476 *buf_ptr
++ |= reg_map
[b
& REG_MASK
];
479 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3);
483 if (immb
<= 127 && immb
>= -128)
484 *buf_ptr
++ = immb
; /* 8 bit displacement. */
486 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
487 buf_ptr
+= sizeof(sljit_sw
);
493 *buf_ptr
++ = reg_map
[b
& REG_MASK
] | (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6);
498 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
499 buf_ptr
+= sizeof(sljit_sw
);
503 if (flags
& EX86_BYTE_ARG
)
505 else if (flags
& EX86_HALF_ARG
)
506 sljit_unaligned_store_s16(buf_ptr
, imma
);
507 else if (!(flags
& EX86_SHIFT_INS
))
508 sljit_unaligned_store_sw(buf_ptr
, imma
);
511 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
514 /* --------------------------------------------------------------------- */
515 /* Call / return instructions */
516 /* --------------------------------------------------------------------- */
518 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
520 static sljit_s32
c_fast_call_get_stack_size(sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
522 sljit_s32 stack_size
= 0;
523 sljit_s32 word_arg_count
= 0;
525 arg_types
>>= SLJIT_DEF_SHIFT
;
528 switch (arg_types
& SLJIT_DEF_MASK
) {
529 case SLJIT_ARG_TYPE_F32
:
530 stack_size
+= sizeof(sljit_f32
);
532 case SLJIT_ARG_TYPE_F64
:
533 stack_size
+= sizeof(sljit_f64
);
537 if (word_arg_count
> 2)
538 stack_size
+= sizeof(sljit_sw
);
542 arg_types
>>= SLJIT_DEF_SHIFT
;
545 if (word_arg_count_ptr
)
546 *word_arg_count_ptr
= word_arg_count
;
551 static sljit_s32
c_fast_call_with_args(struct sljit_compiler
*compiler
,
552 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
, sljit_s32 swap_args
)
555 sljit_s32 float_arg_count
;
557 if (stack_size
== sizeof(sljit_sw
) && word_arg_count
== 3) {
558 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
561 PUSH_REG(reg_map
[SLJIT_R2
]);
563 else if (stack_size
> 0) {
564 if (word_arg_count
>= 4)
565 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
567 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
568 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
571 arg_types
>>= SLJIT_DEF_SHIFT
;
575 switch (arg_types
& SLJIT_DEF_MASK
) {
576 case SLJIT_ARG_TYPE_F32
:
578 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
579 stack_size
+= sizeof(sljit_f32
);
581 case SLJIT_ARG_TYPE_F64
:
583 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
584 stack_size
+= sizeof(sljit_f64
);
588 if (word_arg_count
== 3) {
589 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, SLJIT_R2
, 0);
590 stack_size
+= sizeof(sljit_sw
);
592 else if (word_arg_count
== 4) {
593 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, TMP_REG1
, 0);
594 stack_size
+= sizeof(sljit_sw
);
599 arg_types
>>= SLJIT_DEF_SHIFT
;
603 if (word_arg_count
> 0) {
605 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
609 *inst
++ = XCHG_EAX_r
| reg_map
[SLJIT_R2
];
612 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
617 *inst
++ = MOD_REG
| (reg_map
[SLJIT_R2
] << 3) | reg_map
[SLJIT_R0
];
621 return SLJIT_SUCCESS
;
626 static sljit_s32
cdecl_call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
628 sljit_s32 stack_size
= 0;
629 sljit_s32 word_arg_count
= 0;
631 arg_types
>>= SLJIT_DEF_SHIFT
;
634 switch (arg_types
& SLJIT_DEF_MASK
) {
635 case SLJIT_ARG_TYPE_F32
:
636 stack_size
+= sizeof(sljit_f32
);
638 case SLJIT_ARG_TYPE_F64
:
639 stack_size
+= sizeof(sljit_f64
);
643 stack_size
+= sizeof(sljit_sw
);
647 arg_types
>>= SLJIT_DEF_SHIFT
;
650 if (word_arg_count_ptr
)
651 *word_arg_count_ptr
= word_arg_count
;
653 if (stack_size
<= compiler
->stack_tmp_size
)
656 #if defined(__APPLE__)
657 return ((stack_size
- compiler
->stack_tmp_size
+ 15) & ~15);
659 return stack_size
- compiler
->stack_tmp_size
;
663 static sljit_s32
cdecl_call_with_args(struct sljit_compiler
*compiler
,
664 sljit_s32 arg_types
, sljit_s32 stack_size
, sljit_s32 word_arg_count
)
666 sljit_s32 float_arg_count
= 0;
668 if (word_arg_count
>= 4)
669 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->saveds_offset
- sizeof(sljit_sw
));
672 FAIL_IF(emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
673 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
677 arg_types
>>= SLJIT_DEF_SHIFT
;
680 switch (arg_types
& SLJIT_DEF_MASK
) {
681 case SLJIT_ARG_TYPE_F32
:
683 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
684 stack_size
+= sizeof(sljit_f32
);
686 case SLJIT_ARG_TYPE_F64
:
688 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
689 stack_size
+= sizeof(sljit_f64
);
693 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
694 stack_size
+= sizeof(sljit_sw
);
698 arg_types
>>= SLJIT_DEF_SHIFT
;
701 return SLJIT_SUCCESS
;
704 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
705 sljit_s32 arg_types
, sljit_s32 stack_size
)
711 FAIL_IF(emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
712 SLJIT_SP
, 0, SLJIT_SP
, 0, SLJIT_IMM
, stack_size
));
714 if ((arg_types
& SLJIT_DEF_MASK
) < SLJIT_ARG_TYPE_F32
)
715 return SLJIT_SUCCESS
;
717 single
= ((arg_types
& SLJIT_DEF_MASK
) == SLJIT_ARG_TYPE_F32
);
719 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
722 inst
[0] = single
? FSTPS
: FSTPD
;
723 inst
[1] = (0x03 << 3) | 0x04;
724 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
726 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
729 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
732 struct sljit_jump
*jump
;
733 sljit_s32 stack_size
= 0;
734 sljit_s32 word_arg_count
;
737 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
739 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
740 if ((type
& 0xff) == SLJIT_CALL
) {
741 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
742 PTR_FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, 0));
744 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
745 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
746 compiler
->skip_checks
= 1;
749 jump
= sljit_emit_jump(compiler
, type
);
750 PTR_FAIL_IF(jump
== NULL
);
752 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, 0));
757 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
758 PTR_FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
760 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
761 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
762 compiler
->skip_checks
= 1;
765 jump
= sljit_emit_jump(compiler
, type
);
766 PTR_FAIL_IF(jump
== NULL
);
768 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
774 sljit_s32 src
, sljit_sw srcw
)
776 sljit_s32 stack_size
= 0;
777 sljit_s32 word_arg_count
;
778 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
783 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
785 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
786 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0 && reg_map
[SLJIT_R2
] == 1 && SLJIT_R0
== 1 && SLJIT_R2
== 3);
788 if ((type
& 0xff) == SLJIT_CALL
) {
789 stack_size
= c_fast_call_get_stack_size(arg_types
, &word_arg_count
);
792 if (word_arg_count
> 0) {
793 if ((src
& REG_MASK
) == SLJIT_R2
|| OFFS_REG(src
) == SLJIT_R2
) {
795 if (((src
& REG_MASK
) | 0x2) == SLJIT_R2
)
797 if ((OFFS_REG(src
) | 0x2) == SLJIT_R2
)
798 src
^= TO_OFFS_REG(0x2);
802 FAIL_IF(c_fast_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
, swap_args
));
804 compiler
->saveds_offset
+= stack_size
;
805 compiler
->locals_offset
+= stack_size
;
807 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
808 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
809 compiler
->skip_checks
= 1;
811 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
813 compiler
->saveds_offset
-= stack_size
;
814 compiler
->locals_offset
-= stack_size
;
816 return post_call_with_args(compiler
, arg_types
, 0);
820 stack_size
= cdecl_call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
821 FAIL_IF(cdecl_call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
823 compiler
->saveds_offset
+= stack_size
;
824 compiler
->locals_offset
+= stack_size
;
826 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
827 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
828 compiler
->skip_checks
= 1;
830 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
832 compiler
->saveds_offset
-= stack_size
;
833 compiler
->locals_offset
-= stack_size
;
835 return post_call_with_args(compiler
, arg_types
, stack_size
);
838 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
843 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
844 ADJUST_LOCAL_OFFSET(dst
, dstw
);
846 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
848 /* For UNUSED dst. Uncommon, but possible. */
849 if (dst
== SLJIT_UNUSED
)
852 if (FAST_IS_REG(dst
)) {
853 /* Unused dest is possible here. */
854 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
858 POP_REG(reg_map
[dst
]);
859 return SLJIT_SUCCESS
;
863 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
866 return SLJIT_SUCCESS
;
869 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
874 CHECK(check_sljit_emit_fast_return(compiler
, src
, srcw
));
875 ADJUST_LOCAL_OFFSET(src
, srcw
);
877 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
879 if (FAST_IS_REG(src
)) {
880 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
884 PUSH_REG(reg_map
[src
]);
887 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
892 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
898 return SLJIT_SUCCESS
;