2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32
emit_do_imm(struct sljit_compiler
*compiler
, sljit_u8 opcode
, sljit_sw imm
)
37 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + sizeof(sljit_sw
));
39 INC_SIZE(1 + sizeof(sljit_sw
));
41 sljit_unaligned_store_sw(inst
, imm
);
45 /* Size contains the flags as well. */
46 static sljit_u8
* emit_x86_instruction(struct sljit_compiler
*compiler
, sljit_uw size
,
47 /* The register or immediate operand. */
48 sljit_s32 a
, sljit_sw imma
,
49 /* The general operand (not immediate). */
50 sljit_s32 b
, sljit_sw immb
)
55 sljit_uw flags
= size
;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) != (EX86_BIN_INS
| EX86_SHIFT_INS
));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags
& (EX86_BIN_INS
| EX86_SHIFT_INS
)) || (flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags
& (EX86_BYTE_ARG
| EX86_HALF_ARG
)) != (EX86_BYTE_ARG
| EX86_HALF_ARG
));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a
& SLJIT_IMM
) || !(flags
& EX86_SSE2
));
66 SLJIT_ASSERT((flags
& (EX86_PREF_F2
| EX86_PREF_F3
)) != (EX86_PREF_F2
| EX86_PREF_F3
)
67 && (flags
& (EX86_PREF_F2
| EX86_PREF_66
)) != (EX86_PREF_F2
| EX86_PREF_66
)
68 && (flags
& (EX86_PREF_F3
| EX86_PREF_66
)) != (EX86_PREF_F3
| EX86_PREF_66
));
73 if (flags
& (EX86_PREF_F2
| EX86_PREF_F3
))
75 if (flags
& EX86_PREF_66
)
78 /* Calculate size of b. */
79 inst_size
+= 1; /* mod r/m byte. */
82 inst_size
+= sizeof(sljit_sw
);
84 if (immb
!= 0 && !(b
& OFFS_REG_MASK
)) {
85 /* Immediate operand. */
86 if (immb
<= 127 && immb
>= -128)
87 inst_size
+= sizeof(sljit_s8
);
89 inst_size
+= sizeof(sljit_sw
);
91 else if (reg_map
[b
& REG_MASK
] == 5) {
92 /* Swap registers if possible. */
93 if ((b
& OFFS_REG_MASK
) && (immb
& 0x3) == 0 && reg_map
[OFFS_REG(b
)] != 5)
94 b
= SLJIT_MEM
| OFFS_REG(b
) | TO_OFFS_REG(b
& REG_MASK
);
96 inst_size
+= sizeof(sljit_s8
);
99 if (reg_map
[b
& REG_MASK
] == 4 && !(b
& OFFS_REG_MASK
))
100 b
|= TO_OFFS_REG(SLJIT_SP
);
102 if (b
& OFFS_REG_MASK
)
103 inst_size
+= 1; /* SIB byte. */
107 /* Calculate size of a. */
109 if (flags
& EX86_BIN_INS
) {
110 if (imma
<= 127 && imma
>= -128) {
112 flags
|= EX86_BYTE_ARG
;
116 else if (flags
& EX86_SHIFT_INS
) {
120 flags
|= EX86_BYTE_ARG
;
122 } else if (flags
& EX86_BYTE_ARG
)
124 else if (flags
& EX86_HALF_ARG
)
125 inst_size
+= sizeof(short);
127 inst_size
+= sizeof(sljit_sw
);
130 SLJIT_ASSERT(!(flags
& EX86_SHIFT_INS
) || a
== SLJIT_PREF_SHIFT_REG
);
132 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + inst_size
);
135 /* Encoding the byte. */
137 if (flags
& EX86_PREF_F2
)
139 if (flags
& EX86_PREF_F3
)
141 if (flags
& EX86_PREF_66
)
144 buf_ptr
= inst
+ size
;
146 /* Encode mod/rm byte. */
147 if (!(flags
& EX86_SHIFT_INS
)) {
148 if ((flags
& EX86_BIN_INS
) && (a
& SLJIT_IMM
))
149 *inst
= (flags
& EX86_BYTE_ARG
) ? GROUP_BINARY_83
: GROUP_BINARY_81
;
153 else if (!(flags
& EX86_SSE2_OP1
))
154 *buf_ptr
= U8(reg_map
[a
] << 3);
156 *buf_ptr
= U8(a
<< 3);
161 *inst
= GROUP_SHIFT_1
;
163 *inst
= GROUP_SHIFT_N
;
165 *inst
= GROUP_SHIFT_CL
;
169 if (!(b
& SLJIT_MEM
)) {
170 *buf_ptr
= U8(*buf_ptr
| MOD_REG
| (!(flags
& EX86_SSE2_OP2
) ? reg_map
[b
] : b
));
172 } else if (b
& REG_MASK
) {
173 reg_map_b
= reg_map
[b
& REG_MASK
];
175 if (!(b
& OFFS_REG_MASK
) || (b
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_SP
)) {
176 if (immb
!= 0 || reg_map_b
== 5) {
177 if (immb
<= 127 && immb
>= -128)
183 if (!(b
& OFFS_REG_MASK
))
184 *buf_ptr
++ |= reg_map_b
;
187 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3));
190 if (immb
!= 0 || reg_map_b
== 5) {
191 if (immb
<= 127 && immb
>= -128)
192 *buf_ptr
++ = U8(immb
); /* 8 bit displacement. */
194 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
195 buf_ptr
+= sizeof(sljit_sw
);
204 *buf_ptr
++ = U8(reg_map_b
| (reg_map
[OFFS_REG(b
)] << 3) | (immb
<< 6));
212 sljit_unaligned_store_sw(buf_ptr
, immb
); /* 32 bit displacement. */
213 buf_ptr
+= sizeof(sljit_sw
);
217 if (flags
& EX86_BYTE_ARG
)
219 else if (flags
& EX86_HALF_ARG
)
220 sljit_unaligned_store_s16(buf_ptr
, (sljit_s16
)imma
);
221 else if (!(flags
& EX86_SHIFT_INS
))
222 sljit_unaligned_store_sw(buf_ptr
, imma
);
225 return !(flags
& EX86_SHIFT_INS
) ? inst
: (inst
+ 1);
228 /* --------------------------------------------------------------------- */
230 /* --------------------------------------------------------------------- */
232 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
)
234 sljit_uw type
= jump
->flags
>> TYPE_SHIFT
;
236 if (type
== SLJIT_JUMP
) {
237 *code_ptr
++ = JMP_i32
;
240 else if (type
>= SLJIT_FAST_CALL
) {
241 *code_ptr
++ = CALL_i32
;
245 *code_ptr
++ = GROUP_0F
;
246 *code_ptr
++ = get_jump_code(type
);
250 if (jump
->flags
& JUMP_LABEL
)
251 jump
->flags
|= PATCH_MW
;
253 sljit_unaligned_store_sw(code_ptr
, (sljit_sw
)(jump
->u
.target
- (jump
->addr
+ 4) - (sljit_uw
)executable_offset
));
259 #define ENTER_TMP_TO_R4 0x00001
260 #define ENTER_TMP_TO_S 0x00002
262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
263 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
264 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
266 sljit_s32 word_arg_count
, saved_arg_count
, float_arg_count
;
267 sljit_s32 size
, locals_offset
, args_size
, types
, status
;
268 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(options
);
271 sljit_s32 r2_offset
= -1;
275 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
276 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
278 /* Emit ENDBR32 at function entry if needed. */
279 FAIL_IF(emit_endbranch(compiler
));
281 SLJIT_COMPILE_ASSERT(SLJIT_FR0
== 1, float_register_index_start
);
283 arg_types
>>= SLJIT_ARG_SHIFT
;
287 if (options
& SLJIT_ENTER_REG_ARG
) {
291 if ((arg_types
& SLJIT_ARG_MASK
) < SLJIT_ARG_TYPE_F64
) {
293 if (word_arg_count
>= 4) {
294 status
|= ENTER_TMP_TO_R4
;
295 args_size
= SSIZE_OF(sw
);
299 arg_types
>>= SLJIT_ARG_SHIFT
;
305 args_size
= SSIZE_OF(sw
);
307 switch (types
& SLJIT_ARG_MASK
) {
308 case SLJIT_ARG_TYPE_F64
:
310 FAIL_IF(emit_sse2_load(compiler
, 0, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
311 args_size
+= SSIZE_OF(f64
);
313 case SLJIT_ARG_TYPE_F32
:
315 FAIL_IF(emit_sse2_load(compiler
, 1, float_arg_count
, SLJIT_MEM1(SLJIT_SP
), args_size
));
316 args_size
+= SSIZE_OF(f32
);
321 if (!(types
& SLJIT_ARG_TYPE_SCRATCH_REG
))
324 if (word_arg_count
== 4) {
325 if (types
& SLJIT_ARG_TYPE_SCRATCH_REG
) {
326 status
|= ENTER_TMP_TO_R4
;
327 arg_types
&= ~(SLJIT_ARG_FULL_MASK
<< 3 * SLJIT_ARG_SHIFT
);
328 } else if (saved_arg_count
== 4) {
329 status
|= ENTER_TMP_TO_S
;
330 arg_types
&= ~(SLJIT_ARG_FULL_MASK
<< 3 * SLJIT_ARG_SHIFT
);
334 args_size
+= SSIZE_OF(sw
);
337 types
>>= SLJIT_ARG_SHIFT
;
340 args_size
-= SSIZE_OF(sw
);
343 compiler
->args_size
= args_size
;
345 /* [esp+0] for saving temporaries and function calls. */
346 locals_offset
= 2 * SSIZE_OF(sw
);
349 locals_offset
= 4 * SSIZE_OF(sw
);
351 compiler
->scratches_offset
= locals_offset
;
354 locals_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
357 locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
359 compiler
->locals_offset
= locals_offset
;
361 size
= (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3) - kept_saveds_count
;
362 if (!(options
& SLJIT_ENTER_REG_ARG
))
366 inst
= (sljit_u8
*)ensure_buf(compiler
, (sljit_uw
)(size
+ 1));
369 INC_SIZE((sljit_uw
)size
);
371 if (!(options
& SLJIT_ENTER_REG_ARG
))
372 PUSH_REG(reg_map
[TMP_REG1
]);
374 if ((saveds
> 2 && kept_saveds_count
<= 2) || scratches
> 9)
375 PUSH_REG(reg_map
[SLJIT_S2
]);
376 if ((saveds
> 1 && kept_saveds_count
<= 1) || scratches
> 10)
377 PUSH_REG(reg_map
[SLJIT_S1
]);
378 if ((saveds
> 0 && kept_saveds_count
== 0) || scratches
> 11)
379 PUSH_REG(reg_map
[SLJIT_S0
]);
381 size
*= SSIZE_OF(sw
);
384 if (status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
))
385 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
+ size
);
387 size
+= SSIZE_OF(sw
);
389 local_size
= ((locals_offset
+ local_size
+ size
+ 0xf) & ~0xf) - size
;
390 compiler
->local_size
= local_size
;
396 switch (arg_types
& SLJIT_ARG_MASK
) {
397 case SLJIT_ARG_TYPE_F64
:
398 args_size
+= SSIZE_OF(f64
);
400 case SLJIT_ARG_TYPE_F32
:
401 args_size
+= SSIZE_OF(f32
);
405 SLJIT_ASSERT(word_arg_count
<= 3 || (word_arg_count
== 4 && !(status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
))));
407 if (arg_types
& SLJIT_ARG_TYPE_SCRATCH_REG
) {
409 if (word_arg_count
== 3 && local_size
> 4 * 4096)
410 r2_offset
= local_size
+ args_size
;
413 EMIT_MOV(compiler
, word_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
416 EMIT_MOV(compiler
, SLJIT_S0
- saved_arg_count
, 0, SLJIT_MEM1(SLJIT_SP
), args_size
);
420 args_size
+= SSIZE_OF(sw
);
423 arg_types
>>= SLJIT_ARG_SHIFT
;
426 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET
> 0);
429 SLJIT_ASSERT(r2_offset
== -1 || local_size
> 4 * 4096);
431 if (local_size
> 4096) {
432 if (local_size
<= 4 * 4096) {
433 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
435 if (local_size
> 2 * 4096)
436 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 2);
437 if (local_size
> 3 * 4096)
438 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096 * 3);
441 if (options
& SLJIT_ENTER_REG_ARG
) {
442 SLJIT_ASSERT(r2_offset
== -1);
444 inst
= (sljit_u8
*)ensure_buf(compiler
, (sljit_uw
)(1 + 1));
447 PUSH_REG(reg_map
[SLJIT_R2
]);
449 local_size
-= SSIZE_OF(sw
);
450 r2_offset
= local_size
;
453 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_IMM
, local_size
>> 12);
455 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -4096);
456 BINARY_IMM32(SUB
, 4096, SLJIT_SP
, 0);
458 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
463 inst
[1] = (sljit_u8
)-16;
468 if (local_size
> 0) {
469 BINARY_IMM32(OR
, 0, SLJIT_MEM1(SLJIT_SP
), -local_size
);
470 BINARY_IMM32(SUB
, local_size
, SLJIT_SP
, 0);
474 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), r2_offset
);
478 SLJIT_ASSERT(local_size
> 0);
480 BINARY_IMM32(SUB
, local_size
, SLJIT_SP
, 0);
484 locals_offset
-= SSIZE_OF(sw
);
485 kept_saveds_count
= SLJIT_R3
- kept_saveds_count
;
487 while (saved_arg_count
> 3) {
488 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), locals_offset
, kept_saveds_count
, 0);
490 locals_offset
-= SSIZE_OF(sw
);
494 if (status
& (ENTER_TMP_TO_R4
| ENTER_TMP_TO_S
)) {
495 size
= (status
& ENTER_TMP_TO_R4
) ? compiler
->scratches_offset
: locals_offset
;
496 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), size
, TMP_REG1
, 0);
499 return SLJIT_SUCCESS
;
502 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
503 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
504 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
506 sljit_s32 args_size
, locals_offset
;
509 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
510 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
512 arg_types
>>= SLJIT_ARG_SHIFT
;
515 if (!(options
& SLJIT_ENTER_REG_ARG
)) {
517 switch (arg_types
& SLJIT_ARG_MASK
) {
518 case SLJIT_ARG_TYPE_F64
:
519 args_size
+= SSIZE_OF(f64
);
521 case SLJIT_ARG_TYPE_F32
:
522 args_size
+= SSIZE_OF(f32
);
525 args_size
+= SSIZE_OF(sw
);
528 arg_types
>>= SLJIT_ARG_SHIFT
;
532 compiler
->args_size
= args_size
;
534 /* [esp+0] for saving temporaries and function calls. */
535 locals_offset
= 2 * SSIZE_OF(sw
);
538 locals_offset
= 4 * SSIZE_OF(sw
);
540 compiler
->scratches_offset
= locals_offset
;
543 locals_offset
+= ((scratches
> (3 + 6)) ? 6 : (scratches
- 3)) * SSIZE_OF(sw
);
546 locals_offset
+= (saveds
- 3) * SSIZE_OF(sw
);
548 compiler
->locals_offset
= locals_offset
;
550 saveds
= (1 + (scratches
> 9 ? (scratches
- 9) : 0) + (saveds
<= 3 ? saveds
: 3) - SLJIT_KEPT_SAVEDS_COUNT(options
)) * SSIZE_OF(sw
);
552 if (!(options
& SLJIT_ENTER_REG_ARG
))
553 saveds
+= SSIZE_OF(sw
);
555 compiler
->local_size
= ((locals_offset
+ local_size
+ saveds
+ 0xf) & ~0xf) - saveds
;
556 return SLJIT_SUCCESS
;
559 static sljit_s32
emit_stack_frame_release(struct sljit_compiler
*compiler
)
561 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(compiler
->options
);
566 BINARY_IMM32(ADD
, compiler
->local_size
, SLJIT_SP
, 0);
568 size
= (sljit_uw
)((compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0) +
569 (compiler
->saveds
<= 3 ? compiler
->saveds
: 3) - kept_saveds_count
);
571 if (!(compiler
->options
& SLJIT_ENTER_REG_ARG
))
575 return SLJIT_SUCCESS
;
577 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
582 saveds
= compiler
->saveds
;
584 if ((saveds
> 0 && kept_saveds_count
== 0) || compiler
->scratches
> 11)
585 POP_REG(reg_map
[SLJIT_S0
]);
586 if ((saveds
> 1 && kept_saveds_count
<= 1) || compiler
->scratches
> 10)
587 POP_REG(reg_map
[SLJIT_S1
]);
588 if ((saveds
> 2 && kept_saveds_count
<= 2) || compiler
->scratches
> 9)
589 POP_REG(reg_map
[SLJIT_S2
]);
591 if (!(compiler
->options
& SLJIT_ENTER_REG_ARG
))
592 POP_REG(reg_map
[TMP_REG1
]);
594 return SLJIT_SUCCESS
;
597 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return_void(struct sljit_compiler
*compiler
)
602 CHECK(check_sljit_emit_return_void(compiler
));
604 SLJIT_ASSERT(compiler
->args_size
>= 0);
605 SLJIT_ASSERT(compiler
->local_size
> 0);
607 FAIL_IF(emit_stack_frame_release(compiler
));
609 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
613 return SLJIT_SUCCESS
;
616 /* --------------------------------------------------------------------- */
617 /* Call / return instructions */
618 /* --------------------------------------------------------------------- */
620 static sljit_s32
call_get_stack_size(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*word_arg_count_ptr
)
622 sljit_sw stack_size
= 0;
623 sljit_s32 word_arg_count
= 0;
625 arg_types
>>= SLJIT_ARG_SHIFT
;
628 switch (arg_types
& SLJIT_ARG_MASK
) {
629 case SLJIT_ARG_TYPE_F64
:
630 stack_size
+= SSIZE_OF(f64
);
632 case SLJIT_ARG_TYPE_F32
:
633 stack_size
+= SSIZE_OF(f32
);
637 stack_size
+= SSIZE_OF(sw
);
641 arg_types
>>= SLJIT_ARG_SHIFT
;
644 if (word_arg_count_ptr
)
645 *word_arg_count_ptr
= word_arg_count
;
647 if (stack_size
<= compiler
->scratches_offset
)
650 return ((stack_size
- compiler
->scratches_offset
+ 0xf) & ~0xf);
653 static sljit_s32
call_with_args(struct sljit_compiler
*compiler
,
654 sljit_s32 arg_types
, sljit_sw stack_size
, sljit_s32 word_arg_count
)
656 sljit_s32 float_arg_count
= 0;
659 if (word_arg_count
>= 4)
660 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
663 BINARY_IMM32(SUB
, stack_size
, SLJIT_SP
, 0);
667 arg_types
>>= SLJIT_ARG_SHIFT
;
670 switch (arg_types
& SLJIT_ARG_MASK
) {
671 case SLJIT_ARG_TYPE_F64
:
673 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
674 stack_size
+= SSIZE_OF(f64
);
676 case SLJIT_ARG_TYPE_F32
:
678 FAIL_IF(emit_sse2_store(compiler
, 1, SLJIT_MEM1(SLJIT_SP
), stack_size
, float_arg_count
));
679 stack_size
+= SSIZE_OF(f32
);
683 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), stack_size
, (word_arg_count
>= 4) ? TMP_REG1
: word_arg_count
, 0);
684 stack_size
+= SSIZE_OF(sw
);
688 arg_types
>>= SLJIT_ARG_SHIFT
;
691 return SLJIT_SUCCESS
;
694 static sljit_s32
post_call_with_args(struct sljit_compiler
*compiler
,
695 sljit_s32 arg_types
, sljit_s32 stack_size
)
701 BINARY_IMM32(ADD
, stack_size
, SLJIT_SP
, 0);
703 if ((arg_types
& SLJIT_ARG_MASK
) < SLJIT_ARG_TYPE_F64
)
704 return SLJIT_SUCCESS
;
706 single
= ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F32
);
708 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
711 inst
[0] = single
? FSTPS
: FSTPD
;
712 inst
[1] = (0x03 << 3) | 0x04;
713 inst
[2] = (0x04 << 3) | reg_map
[SLJIT_SP
];
715 return emit_sse2_load(compiler
, single
, SLJIT_FR0
, SLJIT_MEM1(SLJIT_SP
), 0);
718 static sljit_s32
tail_call_with_args(struct sljit_compiler
*compiler
,
719 sljit_s32
*extra_space
, sljit_s32 arg_types
,
720 sljit_s32 src
, sljit_sw srcw
)
722 sljit_sw args_size
, saved_regs_size
;
723 sljit_sw types
, word_arg_count
, float_arg_count
;
724 sljit_sw stack_size
, prev_stack_size
, min_size
, offset
;
725 sljit_sw word_arg4_offset
;
726 sljit_u8 r2_offset
= 0;
727 sljit_s32 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(compiler
->options
);
730 ADJUST_LOCAL_OFFSET(src
, srcw
);
731 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
733 saved_regs_size
= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
734 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3) - kept_saveds_count
) * SSIZE_OF(sw
);
738 arg_types
>>= SLJIT_ARG_SHIFT
;
742 while (arg_types
!= 0) {
743 types
= (types
<< SLJIT_ARG_SHIFT
) | (arg_types
& SLJIT_ARG_MASK
);
745 switch (arg_types
& SLJIT_ARG_MASK
) {
746 case SLJIT_ARG_TYPE_F64
:
747 args_size
+= SSIZE_OF(f64
);
750 case SLJIT_ARG_TYPE_F32
:
751 args_size
+= SSIZE_OF(f32
);
756 args_size
+= SSIZE_OF(sw
);
759 arg_types
>>= SLJIT_ARG_SHIFT
;
762 if (args_size
<= compiler
->args_size
) {
764 stack_size
= args_size
+ SSIZE_OF(sw
) + saved_regs_size
;
766 offset
= stack_size
+ compiler
->local_size
;
768 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
769 if (word_arg_count
>= 1) {
770 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
771 r2_offset
= sizeof(sljit_sw
);
773 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
777 switch (types
& SLJIT_ARG_MASK
) {
778 case SLJIT_ARG_TYPE_F64
:
779 offset
-= SSIZE_OF(f64
);
780 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
783 case SLJIT_ARG_TYPE_F32
:
784 offset
-= SSIZE_OF(f32
);
785 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
789 switch (word_arg_count
) {
791 offset
-= SSIZE_OF(sw
);
792 if (r2_offset
!= 0) {
793 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
794 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
796 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R0
, 0);
799 offset
-= SSIZE_OF(sw
);
800 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R1
, 0);
803 offset
-= SSIZE_OF(sw
);
806 offset
-= SSIZE_OF(sw
);
807 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
808 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
814 types
>>= SLJIT_ARG_SHIFT
;
817 return emit_stack_frame_release(compiler
);
820 stack_size
= args_size
+ SSIZE_OF(sw
);
822 if (word_arg_count
>= 1 && !(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
823 r2_offset
= SSIZE_OF(sw
);
824 stack_size
+= SSIZE_OF(sw
);
827 if (word_arg_count
>= 3)
828 stack_size
+= SSIZE_OF(sw
);
830 prev_stack_size
= SSIZE_OF(sw
) + saved_regs_size
;
831 min_size
= prev_stack_size
+ compiler
->local_size
;
833 word_arg4_offset
= compiler
->scratches_offset
;
835 if (stack_size
> min_size
) {
836 BINARY_IMM32(SUB
, stack_size
- min_size
, SLJIT_SP
, 0);
837 if (src
== SLJIT_MEM1(SLJIT_SP
))
838 srcw
+= stack_size
- min_size
;
839 word_arg4_offset
+= stack_size
- min_size
;
842 stack_size
= min_size
;
844 if (word_arg_count
>= 3) {
845 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), r2_offset
, SLJIT_R2
, 0);
847 if (word_arg_count
>= 4)
848 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), word_arg4_offset
);
851 if (!(src
& SLJIT_IMM
) && src
!= SLJIT_R0
) {
852 if (word_arg_count
>= 1) {
853 SLJIT_ASSERT(r2_offset
== sizeof(sljit_sw
));
854 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_R0
, 0);
856 EMIT_MOV(compiler
, SLJIT_R0
, 0, src
, srcw
);
859 /* Restore saved registers. */
860 offset
= stack_size
- 2 * SSIZE_OF(sw
);
861 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
863 if (compiler
->saveds
> 2 || compiler
->scratches
> 9) {
864 offset
-= SSIZE_OF(sw
);
865 EMIT_MOV(compiler
, SLJIT_S2
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
867 if ((compiler
->saveds
> 1 && kept_saveds_count
<= 1) || compiler
->scratches
> 10) {
868 offset
-= SSIZE_OF(sw
);
869 EMIT_MOV(compiler
, SLJIT_S1
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
871 if ((compiler
->saveds
> 0 && kept_saveds_count
== 0) || compiler
->scratches
> 11) {
872 offset
-= SSIZE_OF(sw
);
873 EMIT_MOV(compiler
, SLJIT_S0
, 0, SLJIT_MEM1(SLJIT_SP
), offset
);
876 /* Copy fourth argument and return address. */
877 offset
= stack_size
- SSIZE_OF(sw
);
878 *extra_space
= args_size
;
880 if (word_arg_count
>= 4) {
881 offset
-= SSIZE_OF(sw
);
882 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
886 switch (types
& SLJIT_ARG_MASK
) {
887 case SLJIT_ARG_TYPE_F64
:
888 offset
-= SSIZE_OF(f64
);
889 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
892 case SLJIT_ARG_TYPE_F32
:
893 offset
-= SSIZE_OF(f32
);
894 FAIL_IF(emit_sse2_store(compiler
, 0, SLJIT_MEM1(SLJIT_SP
), offset
, float_arg_count
));
898 switch (word_arg_count
) {
900 offset
-= SSIZE_OF(sw
);
901 if (r2_offset
!= 0) {
902 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
903 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
905 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R0
, 0);
908 offset
-= SSIZE_OF(sw
);
909 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R1
, 0);
912 offset
-= SSIZE_OF(sw
);
913 EMIT_MOV(compiler
, SLJIT_R2
, 0, SLJIT_MEM1(SLJIT_SP
), r2_offset
);
914 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, SLJIT_R2
, 0);
920 types
>>= SLJIT_ARG_SHIFT
;
923 SLJIT_ASSERT(offset
>= 0);
926 return SLJIT_SUCCESS
;
928 BINARY_IMM32(ADD
, offset
, SLJIT_SP
, 0);
929 return SLJIT_SUCCESS
;
932 static sljit_s32
emit_tail_call_end(struct sljit_compiler
*compiler
, sljit_s32 extra_space
)
934 /* Called when stack consumption cannot be reduced to 0. */
937 BINARY_IMM32(ADD
, extra_space
, SLJIT_SP
, 0);
939 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
944 return SLJIT_SUCCESS
;
947 static sljit_s32
call_reg_arg_with_args(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32 is_tail
)
949 sljit_s32 word_arg_count
= 0;
950 sljit_s32 kept_saveds_count
, offset
;
952 arg_types
>>= SLJIT_ARG_SHIFT
;
955 if ((arg_types
& SLJIT_ARG_MASK
) < SLJIT_ARG_TYPE_F64
)
958 arg_types
>>= SLJIT_ARG_SHIFT
;
961 if (word_arg_count
< 4)
962 return SLJIT_SUCCESS
;
964 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_MEM1(SLJIT_SP
), compiler
->scratches_offset
);
967 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, TMP_REG1
, 0);
969 kept_saveds_count
= SLJIT_KEPT_SAVEDS_COUNT(compiler
->options
);
970 offset
= compiler
->local_size
+ SSIZE_OF(sw
);
972 if ((compiler
->saveds
> 0 && kept_saveds_count
== 0) || compiler
->scratches
> 11)
973 offset
+= SSIZE_OF(sw
);
974 if ((compiler
->saveds
> 1 && kept_saveds_count
<= 1) || compiler
->scratches
> 10)
975 offset
+= SSIZE_OF(sw
);
976 if ((compiler
->saveds
> 2 && kept_saveds_count
<= 2) || compiler
->scratches
> 9)
977 offset
+= SSIZE_OF(sw
);
979 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), offset
, TMP_REG1
, 0);
982 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
985 struct sljit_jump
*jump
;
986 sljit_sw stack_size
= 0;
987 sljit_s32 word_arg_count
;
990 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
992 if (type
& SLJIT_CALL_RETURN
) {
993 if ((type
& 0xff) == SLJIT_CALL_REG_ARG
) {
994 PTR_FAIL_IF(call_reg_arg_with_args(compiler
, arg_types
, 1));
995 PTR_FAIL_IF(emit_stack_frame_release(compiler
));
997 SLJIT_SKIP_CHECKS(compiler
);
998 return sljit_emit_jump(compiler
, SLJIT_JUMP
| (type
& SLJIT_REWRITABLE_JUMP
));
1002 PTR_FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, SLJIT_IMM
, 0));
1004 SLJIT_SKIP_CHECKS(compiler
);
1006 if (stack_size
== 0)
1007 return sljit_emit_jump(compiler
, SLJIT_JUMP
| (type
& SLJIT_REWRITABLE_JUMP
));
1009 jump
= sljit_emit_jump(compiler
, type
);
1010 PTR_FAIL_IF(jump
== NULL
);
1012 PTR_FAIL_IF(emit_tail_call_end(compiler
, stack_size
));
1016 if ((type
& 0xff) == SLJIT_CALL_REG_ARG
) {
1017 PTR_FAIL_IF(call_reg_arg_with_args(compiler
, arg_types
, 0));
1019 SLJIT_SKIP_CHECKS(compiler
);
1020 return sljit_emit_jump(compiler
, type
);
1023 stack_size
= call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1024 PTR_FAIL_IF(call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1026 SLJIT_SKIP_CHECKS(compiler
);
1027 jump
= sljit_emit_jump(compiler
, type
);
1028 PTR_FAIL_IF(jump
== NULL
);
1030 PTR_FAIL_IF(post_call_with_args(compiler
, arg_types
, stack_size
));
1034 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
1035 sljit_s32 arg_types
,
1036 sljit_s32 src
, sljit_sw srcw
)
1038 sljit_sw stack_size
= 0;
1039 sljit_s32 word_arg_count
;
1042 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
1044 if (type
& SLJIT_CALL_RETURN
) {
1045 if ((type
& 0xff) == SLJIT_CALL_REG_ARG
) {
1046 FAIL_IF(call_reg_arg_with_args(compiler
, arg_types
, 1));
1048 if ((src
& SLJIT_MEM
) || (src
>= SLJIT_FIRST_SAVED_REG
&& src
<= SLJIT_S0
)) {
1049 ADJUST_LOCAL_OFFSET(src
, srcw
);
1050 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1052 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
1057 FAIL_IF(emit_stack_frame_release(compiler
));
1059 SLJIT_SKIP_CHECKS(compiler
);
1060 return sljit_emit_ijump(compiler
, SLJIT_JUMP
, src
, srcw
);
1064 FAIL_IF(tail_call_with_args(compiler
, &stack_size
, arg_types
, src
, srcw
));
1066 if (!(src
& SLJIT_IMM
)) {
1071 SLJIT_SKIP_CHECKS(compiler
);
1073 if (stack_size
== 0)
1074 return sljit_emit_ijump(compiler
, SLJIT_JUMP
, src
, srcw
);
1076 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1077 return emit_tail_call_end(compiler
, stack_size
);
1080 if ((type
& 0xff) == SLJIT_CALL_REG_ARG
) {
1081 FAIL_IF(call_reg_arg_with_args(compiler
, arg_types
, 0));
1083 SLJIT_SKIP_CHECKS(compiler
);
1084 return sljit_emit_ijump(compiler
, type
, src
, srcw
);
1087 stack_size
= call_get_stack_size(compiler
, arg_types
, &word_arg_count
);
1088 FAIL_IF(call_with_args(compiler
, arg_types
, stack_size
, word_arg_count
));
1090 compiler
->scratches_offset
+= stack_size
;
1091 compiler
->locals_offset
+= stack_size
;
1093 SLJIT_SKIP_CHECKS(compiler
);
1094 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
1096 compiler
->scratches_offset
-= stack_size
;
1097 compiler
->locals_offset
-= stack_size
;
1099 return post_call_with_args(compiler
, arg_types
, stack_size
);
1102 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
1107 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
1108 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1110 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
1112 if (FAST_IS_REG(dst
)) {
1113 /* Unused dest is possible here. */
1114 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1118 POP_REG(reg_map
[dst
]);
1119 return SLJIT_SUCCESS
;
1123 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1126 return SLJIT_SUCCESS
;
1129 static sljit_s32
emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
1133 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1135 if (FAST_IS_REG(src
)) {
1136 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 1);
1140 PUSH_REG(reg_map
[src
]);
1143 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
1148 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1154 return SLJIT_SUCCESS
;
1157 static sljit_s32
skip_frames_before_return(struct sljit_compiler
*compiler
)
1161 /* Don't adjust shadow stack if it isn't enabled. */
1162 if (!cpu_has_shadow_stack())
1163 return SLJIT_SUCCESS
;
1165 SLJIT_ASSERT(compiler
->args_size
>= 0);
1166 SLJIT_ASSERT(compiler
->local_size
> 0);
1168 size
= compiler
->local_size
;
1169 size
+= (1 + (compiler
->scratches
> 9 ? (compiler
->scratches
- 9) : 0)
1170 + (compiler
->saveds
<= 3 ? compiler
->saveds
: 3)) * SSIZE_OF(sw
);
1172 return adjust_shadow_stack(compiler
, SLJIT_MEM1(SLJIT_SP
), size
);