Improve sljit abi calling convention
[sljit.git] / sljit_src / sljitNativeX86_32.c
blob6773c71de153843eade564c248f44ee856503def
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
45 /* Size contains the flags as well. */
46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
70 size &= 0xf;
71 inst_size = size;
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else {
84 if (immb != 0 && !(b & OFFS_REG_MASK)) {
85 /* Immediate operand. */
86 if (immb <= 127 && immb >= -128)
87 inst_size += sizeof(sljit_s8);
88 else
89 inst_size += sizeof(sljit_sw);
91 else if (reg_map[b & REG_MASK] == 5) {
92 /* Swap registers if possible. */
93 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
94 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
95 else
96 inst_size += sizeof(sljit_s8);
99 if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
100 b |= TO_OFFS_REG(SLJIT_SP);
102 if (b & OFFS_REG_MASK)
103 inst_size += 1; /* SIB byte. */
107 /* Calculate size of a. */
108 if (a & SLJIT_IMM) {
109 if (flags & EX86_BIN_INS) {
110 if (imma <= 127 && imma >= -128) {
111 inst_size += 1;
112 flags |= EX86_BYTE_ARG;
113 } else
114 inst_size += 4;
116 else if (flags & EX86_SHIFT_INS) {
117 imma &= 0x1f;
118 if (imma != 1) {
119 inst_size++;
120 flags |= EX86_BYTE_ARG;
122 } else if (flags & EX86_BYTE_ARG)
123 inst_size++;
124 else if (flags & EX86_HALF_ARG)
125 inst_size += sizeof(short);
126 else
127 inst_size += sizeof(sljit_sw);
129 else
130 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
132 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
133 PTR_FAIL_IF(!inst);
135 /* Encoding the byte. */
136 INC_SIZE(inst_size);
137 if (flags & EX86_PREF_F2)
138 *inst++ = 0xf2;
139 if (flags & EX86_PREF_F3)
140 *inst++ = 0xf3;
141 if (flags & EX86_PREF_66)
142 *inst++ = 0x66;
144 buf_ptr = inst + size;
146 /* Encode mod/rm byte. */
147 if (!(flags & EX86_SHIFT_INS)) {
148 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
149 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
151 if (a & SLJIT_IMM)
152 *buf_ptr = 0;
153 else if (!(flags & EX86_SSE2_OP1))
154 *buf_ptr = U8(reg_map[a] << 3);
155 else
156 *buf_ptr = U8(a << 3);
158 else {
159 if (a & SLJIT_IMM) {
160 if (imma == 1)
161 *inst = GROUP_SHIFT_1;
162 else
163 *inst = GROUP_SHIFT_N;
164 } else
165 *inst = GROUP_SHIFT_CL;
166 *buf_ptr = 0;
169 if (!(b & SLJIT_MEM)) {
170 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
171 buf_ptr++;
172 } else if (b & REG_MASK) {
173 reg_map_b = reg_map[b & REG_MASK];
175 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
176 if (immb != 0 || reg_map_b == 5) {
177 if (immb <= 127 && immb >= -128)
178 *buf_ptr |= 0x40;
179 else
180 *buf_ptr |= 0x80;
183 if (!(b & OFFS_REG_MASK))
184 *buf_ptr++ |= reg_map_b;
185 else {
186 *buf_ptr++ |= 0x04;
187 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
190 if (immb != 0 || reg_map_b == 5) {
191 if (immb <= 127 && immb >= -128)
192 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
193 else {
194 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
195 buf_ptr += sizeof(sljit_sw);
199 else {
200 if (reg_map_b == 5)
201 *buf_ptr |= 0x40;
203 *buf_ptr++ |= 0x04;
204 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
206 if (reg_map_b == 5)
207 *buf_ptr++ = 0;
210 else {
211 *buf_ptr++ |= 0x05;
212 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
213 buf_ptr += sizeof(sljit_sw);
216 if (a & SLJIT_IMM) {
217 if (flags & EX86_BYTE_ARG)
218 *buf_ptr = U8(imma);
219 else if (flags & EX86_HALF_ARG)
220 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
221 else if (!(flags & EX86_SHIFT_INS))
222 sljit_unaligned_store_sw(buf_ptr, imma);
225 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
228 /* --------------------------------------------------------------------- */
229 /* Enter / return */
230 /* --------------------------------------------------------------------- */
232 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
234 sljit_uw type = jump->flags >> TYPE_SHIFT;
236 if (type == SLJIT_JUMP) {
237 *code_ptr++ = JMP_i32;
238 jump->addr++;
240 else if (type >= SLJIT_FAST_CALL) {
241 *code_ptr++ = CALL_i32;
242 jump->addr++;
244 else {
245 *code_ptr++ = GROUP_0F;
246 *code_ptr++ = get_jump_code(type);
247 jump->addr += 2;
250 if (jump->flags & JUMP_LABEL)
251 jump->flags |= PATCH_MW;
252 else
253 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
254 code_ptr += 4;
256 return code_ptr;
259 #define ENTER_TMP_TO_R4 0x00001
260 #define ENTER_TMP_TO_S 0x00002
262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
263 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
264 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
266 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
267 sljit_s32 size, locals_offset, args_size, types, status;
268 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
269 sljit_u8 *inst;
270 #ifdef _WIN32
271 sljit_s32 r2_offset = -1;
272 #endif
274 CHECK_ERROR();
275 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
276 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
278 /* Emit ENDBR32 at function entry if needed. */
279 FAIL_IF(emit_endbranch(compiler));
281 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
283 arg_types >>= SLJIT_ARG_SHIFT;
284 word_arg_count = 0;
285 status = 0;
287 if (options & SLJIT_ENTER_REG_ARG) {
288 args_size = 0;
290 while (arg_types) {
291 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
292 word_arg_count++;
293 if (word_arg_count >= 4) {
294 status |= ENTER_TMP_TO_R4;
295 args_size = SSIZE_OF(sw);
299 arg_types >>= SLJIT_ARG_SHIFT;
301 } else {
302 types = arg_types;
303 saved_arg_count = 0;
304 float_arg_count = 0;
305 args_size = SSIZE_OF(sw);
306 while (types) {
307 switch (types & SLJIT_ARG_MASK) {
308 case SLJIT_ARG_TYPE_F64:
309 float_arg_count++;
310 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
311 args_size += SSIZE_OF(f64);
312 break;
313 case SLJIT_ARG_TYPE_F32:
314 float_arg_count++;
315 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
316 args_size += SSIZE_OF(f32);
317 break;
318 default:
319 word_arg_count++;
321 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
322 saved_arg_count++;
324 if (word_arg_count == 4) {
325 if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
326 status |= ENTER_TMP_TO_R4;
327 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
328 } else if (saved_arg_count == 4) {
329 status |= ENTER_TMP_TO_S;
330 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
334 args_size += SSIZE_OF(sw);
335 break;
337 types >>= SLJIT_ARG_SHIFT;
340 args_size -= SSIZE_OF(sw);
343 compiler->args_size = args_size;
345 /* [esp+0] for saving temporaries and function calls. */
346 locals_offset = 2 * SSIZE_OF(sw);
348 if (scratches >= 3)
349 locals_offset = 4 * SSIZE_OF(sw);
351 compiler->scratches_offset = locals_offset;
353 if (scratches > 3)
354 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
356 if (saveds > 3)
357 locals_offset += (saveds - 3) * SSIZE_OF(sw);
359 compiler->locals_offset = locals_offset;
361 size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
362 if (!(options & SLJIT_ENTER_REG_ARG))
363 size++;
365 if (size != 0) {
366 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
367 FAIL_IF(!inst);
369 INC_SIZE((sljit_uw)size);
371 if (!(options & SLJIT_ENTER_REG_ARG))
372 PUSH_REG(reg_map[TMP_REG1]);
374 if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)
375 PUSH_REG(reg_map[SLJIT_S2]);
376 if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
377 PUSH_REG(reg_map[SLJIT_S1]);
378 if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
379 PUSH_REG(reg_map[SLJIT_S0]);
381 size *= SSIZE_OF(sw);
384 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
385 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
387 size += SSIZE_OF(sw);
389 local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
390 compiler->local_size = local_size;
392 word_arg_count = 0;
393 saved_arg_count = 0;
394 args_size = size;
395 while (arg_types) {
396 switch (arg_types & SLJIT_ARG_MASK) {
397 case SLJIT_ARG_TYPE_F64:
398 args_size += SSIZE_OF(f64);
399 break;
400 case SLJIT_ARG_TYPE_F32:
401 args_size += SSIZE_OF(f32);
402 break;
403 default:
404 word_arg_count++;
405 SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
407 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
408 #ifdef _WIN32
409 if (word_arg_count == 3 && local_size > 4 * 4096)
410 r2_offset = local_size + args_size;
411 else
412 #endif
413 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
415 } else {
416 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
417 saved_arg_count++;
420 args_size += SSIZE_OF(sw);
421 break;
423 arg_types >>= SLJIT_ARG_SHIFT;
426 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
428 #ifdef _WIN32
429 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
431 if (local_size > 4096) {
432 if (local_size <= 4 * 4096) {
433 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
435 if (local_size > 2 * 4096)
436 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
437 if (local_size > 3 * 4096)
438 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
440 else {
441 if (options & SLJIT_ENTER_REG_ARG) {
442 SLJIT_ASSERT(r2_offset == -1);
444 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));
445 FAIL_IF(!inst);
446 INC_SIZE(1);
447 PUSH_REG(reg_map[SLJIT_R2]);
449 local_size -= SSIZE_OF(sw);
450 r2_offset = local_size;
453 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
455 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
456 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
458 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
459 FAIL_IF(!inst);
461 INC_SIZE(2);
462 inst[0] = LOOP_i8;
463 inst[1] = (sljit_u8)-16;
464 local_size &= 0xfff;
468 if (local_size > 0) {
469 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
470 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
473 if (r2_offset != -1)
474 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
476 #else /* !_WIN32 */
478 SLJIT_ASSERT(local_size > 0);
480 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
482 #endif /* _WIN32 */
484 locals_offset -= SSIZE_OF(sw);
485 kept_saveds_count = SLJIT_R3 - kept_saveds_count;
487 while (saved_arg_count > 3) {
488 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), locals_offset, kept_saveds_count, 0);
489 kept_saveds_count++;
490 locals_offset -= SSIZE_OF(sw);
491 saved_arg_count--;
494 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
495 size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : locals_offset;
496 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
499 return SLJIT_SUCCESS;
502 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
503 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
504 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
506 sljit_s32 args_size, locals_offset;
508 CHECK_ERROR();
509 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
510 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
512 arg_types >>= SLJIT_ARG_SHIFT;
513 args_size = 0;
515 if (!(options & SLJIT_ENTER_REG_ARG)) {
516 while (arg_types) {
517 switch (arg_types & SLJIT_ARG_MASK) {
518 case SLJIT_ARG_TYPE_F64:
519 args_size += SSIZE_OF(f64);
520 break;
521 case SLJIT_ARG_TYPE_F32:
522 args_size += SSIZE_OF(f32);
523 break;
524 default:
525 args_size += SSIZE_OF(sw);
526 break;
528 arg_types >>= SLJIT_ARG_SHIFT;
532 compiler->args_size = args_size;
534 /* [esp+0] for saving temporaries and function calls. */
535 locals_offset = 2 * SSIZE_OF(sw);
537 if (scratches >= 3)
538 locals_offset = 4 * SSIZE_OF(sw);
540 compiler->scratches_offset = locals_offset;
542 if (scratches > 3)
543 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
545 if (saveds > 3)
546 locals_offset += (saveds - 3) * SSIZE_OF(sw);
548 compiler->locals_offset = locals_offset;
550 saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
552 if (!(options & SLJIT_ENTER_REG_ARG))
553 saveds += SSIZE_OF(sw);
555 compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
556 return SLJIT_SUCCESS;
559 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
561 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
562 sljit_s32 saveds;
563 sljit_uw size;
564 sljit_u8 *inst;
566 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
568 size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
569 (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
571 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
572 size++;
574 if (size == 0)
575 return SLJIT_SUCCESS;
577 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
578 FAIL_IF(!inst);
580 INC_SIZE(size);
582 saveds = compiler->saveds;
584 if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
585 POP_REG(reg_map[SLJIT_S0]);
586 if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
587 POP_REG(reg_map[SLJIT_S1]);
588 if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
589 POP_REG(reg_map[SLJIT_S2]);
591 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
592 POP_REG(reg_map[TMP_REG1]);
594 return SLJIT_SUCCESS;
597 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
599 sljit_u8 *inst;
601 CHECK_ERROR();
602 CHECK(check_sljit_emit_return_void(compiler));
604 SLJIT_ASSERT(compiler->args_size >= 0);
605 SLJIT_ASSERT(compiler->local_size > 0);
607 FAIL_IF(emit_stack_frame_release(compiler));
609 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
610 FAIL_IF(!inst);
611 INC_SIZE(1);
612 RET();
613 return SLJIT_SUCCESS;
616 /* --------------------------------------------------------------------- */
617 /* Call / return instructions */
618 /* --------------------------------------------------------------------- */
620 static sljit_s32 call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
622 sljit_sw stack_size = 0;
623 sljit_s32 word_arg_count = 0;
625 arg_types >>= SLJIT_ARG_SHIFT;
627 while (arg_types) {
628 switch (arg_types & SLJIT_ARG_MASK) {
629 case SLJIT_ARG_TYPE_F64:
630 stack_size += SSIZE_OF(f64);
631 break;
632 case SLJIT_ARG_TYPE_F32:
633 stack_size += SSIZE_OF(f32);
634 break;
635 default:
636 word_arg_count++;
637 stack_size += SSIZE_OF(sw);
638 break;
641 arg_types >>= SLJIT_ARG_SHIFT;
644 if (word_arg_count_ptr)
645 *word_arg_count_ptr = word_arg_count;
647 if (stack_size <= compiler->scratches_offset)
648 return 0;
650 return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
653 static sljit_s32 call_with_args(struct sljit_compiler *compiler,
654 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
656 sljit_s32 float_arg_count = 0;
657 sljit_u8 *inst;
659 if (word_arg_count >= 4)
660 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
662 if (stack_size > 0)
663 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
665 stack_size = 0;
666 word_arg_count = 0;
667 arg_types >>= SLJIT_ARG_SHIFT;
669 while (arg_types) {
670 switch (arg_types & SLJIT_ARG_MASK) {
671 case SLJIT_ARG_TYPE_F64:
672 float_arg_count++;
673 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
674 stack_size += SSIZE_OF(f64);
675 break;
676 case SLJIT_ARG_TYPE_F32:
677 float_arg_count++;
678 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
679 stack_size += SSIZE_OF(f32);
680 break;
681 default:
682 word_arg_count++;
683 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
684 stack_size += SSIZE_OF(sw);
685 break;
688 arg_types >>= SLJIT_ARG_SHIFT;
691 return SLJIT_SUCCESS;
694 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
695 sljit_s32 arg_types, sljit_s32 stack_size)
697 sljit_u8 *inst;
698 sljit_s32 single;
700 if (stack_size > 0)
701 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
703 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
704 return SLJIT_SUCCESS;
706 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
708 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
709 FAIL_IF(!inst);
710 INC_SIZE(3);
711 inst[0] = single ? FSTPS : FSTPD;
712 inst[1] = (0x03 << 3) | 0x04;
713 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
715 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
718 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
719 sljit_s32 *extra_space, sljit_s32 arg_types,
720 sljit_s32 src, sljit_sw srcw)
722 sljit_sw args_size, saved_regs_size;
723 sljit_sw types, word_arg_count, float_arg_count;
724 sljit_sw stack_size, prev_stack_size, min_size, offset;
725 sljit_sw word_arg4_offset;
726 sljit_u8 r2_offset = 0;
727 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
728 sljit_u8* inst;
730 ADJUST_LOCAL_OFFSET(src, srcw);
731 CHECK_EXTRA_REGS(src, srcw, (void)0);
733 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
734 + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
736 word_arg_count = 0;
737 float_arg_count = 0;
738 arg_types >>= SLJIT_ARG_SHIFT;
739 types = 0;
740 args_size = 0;
742 while (arg_types != 0) {
743 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
745 switch (arg_types & SLJIT_ARG_MASK) {
746 case SLJIT_ARG_TYPE_F64:
747 args_size += SSIZE_OF(f64);
748 float_arg_count++;
749 break;
750 case SLJIT_ARG_TYPE_F32:
751 args_size += SSIZE_OF(f32);
752 float_arg_count++;
753 break;
754 default:
755 word_arg_count++;
756 args_size += SSIZE_OF(sw);
757 break;
759 arg_types >>= SLJIT_ARG_SHIFT;
762 if (args_size <= compiler->args_size) {
763 *extra_space = 0;
764 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
766 offset = stack_size + compiler->local_size;
768 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
769 if (word_arg_count >= 1) {
770 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
771 r2_offset = sizeof(sljit_sw);
773 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
776 while (types != 0) {
777 switch (types & SLJIT_ARG_MASK) {
778 case SLJIT_ARG_TYPE_F64:
779 offset -= SSIZE_OF(f64);
780 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
781 float_arg_count--;
782 break;
783 case SLJIT_ARG_TYPE_F32:
784 offset -= SSIZE_OF(f32);
785 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
786 float_arg_count--;
787 break;
788 default:
789 switch (word_arg_count) {
790 case 1:
791 offset -= SSIZE_OF(sw);
792 if (r2_offset != 0) {
793 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
794 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
795 } else
796 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
797 break;
798 case 2:
799 offset -= SSIZE_OF(sw);
800 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
801 break;
802 case 3:
803 offset -= SSIZE_OF(sw);
804 break;
805 case 4:
806 offset -= SSIZE_OF(sw);
807 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
808 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
809 break;
811 word_arg_count--;
812 break;
814 types >>= SLJIT_ARG_SHIFT;
817 return emit_stack_frame_release(compiler);
820 stack_size = args_size + SSIZE_OF(sw);
822 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
823 r2_offset = SSIZE_OF(sw);
824 stack_size += SSIZE_OF(sw);
827 if (word_arg_count >= 3)
828 stack_size += SSIZE_OF(sw);
830 prev_stack_size = SSIZE_OF(sw) + saved_regs_size;
831 min_size = prev_stack_size + compiler->local_size;
833 word_arg4_offset = compiler->scratches_offset;
835 if (stack_size > min_size) {
836 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
837 if (src == SLJIT_MEM1(SLJIT_SP))
838 srcw += stack_size - min_size;
839 word_arg4_offset += stack_size - min_size;
841 else
842 stack_size = min_size;
844 if (word_arg_count >= 3) {
845 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
847 if (word_arg_count >= 4)
848 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
851 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
852 if (word_arg_count >= 1) {
853 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
854 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
856 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
859 /* Restore saved registers. */
860 offset = stack_size - 2 * SSIZE_OF(sw);
861 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
863 if (compiler->saveds > 2 || compiler->scratches > 9) {
864 offset -= SSIZE_OF(sw);
865 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
867 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
868 offset -= SSIZE_OF(sw);
869 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
871 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
872 offset -= SSIZE_OF(sw);
873 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
876 /* Copy fourth argument and return address. */
877 offset = stack_size - SSIZE_OF(sw);
878 *extra_space = args_size;
880 if (word_arg_count >= 4) {
881 offset -= SSIZE_OF(sw);
882 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
885 while (types != 0) {
886 switch (types & SLJIT_ARG_MASK) {
887 case SLJIT_ARG_TYPE_F64:
888 offset -= SSIZE_OF(f64);
889 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
890 float_arg_count--;
891 break;
892 case SLJIT_ARG_TYPE_F32:
893 offset -= SSIZE_OF(f32);
894 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
895 float_arg_count--;
896 break;
897 default:
898 switch (word_arg_count) {
899 case 1:
900 offset -= SSIZE_OF(sw);
901 if (r2_offset != 0) {
902 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
903 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
904 } else
905 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
906 break;
907 case 2:
908 offset -= SSIZE_OF(sw);
909 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
910 break;
911 case 3:
912 offset -= SSIZE_OF(sw);
913 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
914 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
915 break;
917 word_arg_count--;
918 break;
920 types >>= SLJIT_ARG_SHIFT;
923 SLJIT_ASSERT(offset >= 0);
925 if (offset == 0)
926 return SLJIT_SUCCESS;
928 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
929 return SLJIT_SUCCESS;
932 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
934 /* Called when stack consumption cannot be reduced to 0. */
935 sljit_u8 *inst;
937 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
939 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
940 FAIL_IF(!inst);
941 INC_SIZE(1);
942 RET();
944 return SLJIT_SUCCESS;
947 static sljit_s32 call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 is_tail)
949 sljit_s32 word_arg_count = 0;
950 sljit_s32 kept_saveds_count, offset;
952 arg_types >>= SLJIT_ARG_SHIFT;
954 while (arg_types) {
955 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
956 word_arg_count++;
958 arg_types >>= SLJIT_ARG_SHIFT;
961 if (word_arg_count < 4)
962 return SLJIT_SUCCESS;
964 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
966 if (!is_tail)
967 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0);
969 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
970 offset = compiler->local_size + SSIZE_OF(sw);
972 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
973 offset += SSIZE_OF(sw);
974 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
975 offset += SSIZE_OF(sw);
976 if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
977 offset += SSIZE_OF(sw);
979 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
982 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
983 sljit_s32 arg_types)
985 struct sljit_jump *jump;
986 sljit_sw stack_size = 0;
987 sljit_s32 word_arg_count;
989 CHECK_ERROR_PTR();
990 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
992 if (type & SLJIT_CALL_RETURN) {
993 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
994 PTR_FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 1));
995 PTR_FAIL_IF(emit_stack_frame_release(compiler));
997 SLJIT_SKIP_CHECKS(compiler);
998 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1001 stack_size = type;
1002 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1004 SLJIT_SKIP_CHECKS(compiler);
1006 if (stack_size == 0)
1007 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1009 jump = sljit_emit_jump(compiler, type);
1010 PTR_FAIL_IF(jump == NULL);
1012 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1013 return jump;
1016 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1017 PTR_FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 0));
1019 SLJIT_SKIP_CHECKS(compiler);
1020 return sljit_emit_jump(compiler, type);
1023 stack_size = call_get_stack_size(compiler, arg_types, &word_arg_count);
1024 PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count));
1026 SLJIT_SKIP_CHECKS(compiler);
1027 jump = sljit_emit_jump(compiler, type);
1028 PTR_FAIL_IF(jump == NULL);
1030 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1031 return jump;
1034 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1035 sljit_s32 arg_types,
1036 sljit_s32 src, sljit_sw srcw)
1038 sljit_sw stack_size = 0;
1039 sljit_s32 word_arg_count;
1041 CHECK_ERROR();
1042 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1044 if (type & SLJIT_CALL_RETURN) {
1045 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1046 FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 1));
1048 if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0)) {
1049 ADJUST_LOCAL_OFFSET(src, srcw);
1050 CHECK_EXTRA_REGS(src, srcw, (void)0);
1052 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1053 src = TMP_REG1;
1054 srcw = 0;
1057 FAIL_IF(emit_stack_frame_release(compiler));
1059 SLJIT_SKIP_CHECKS(compiler);
1060 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1063 stack_size = type;
1064 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1066 if (!(src & SLJIT_IMM)) {
1067 src = SLJIT_R0;
1068 srcw = 0;
1071 SLJIT_SKIP_CHECKS(compiler);
1073 if (stack_size == 0)
1074 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1076 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1077 return emit_tail_call_end(compiler, stack_size);
1080 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1081 FAIL_IF(call_reg_arg_with_args(compiler, arg_types, 0));
1083 SLJIT_SKIP_CHECKS(compiler);
1084 return sljit_emit_ijump(compiler, type, src, srcw);
1087 stack_size = call_get_stack_size(compiler, arg_types, &word_arg_count);
1088 FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count));
1090 compiler->scratches_offset += stack_size;
1091 compiler->locals_offset += stack_size;
1093 SLJIT_SKIP_CHECKS(compiler);
1094 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1096 compiler->scratches_offset -= stack_size;
1097 compiler->locals_offset -= stack_size;
1099 return post_call_with_args(compiler, arg_types, stack_size);
1102 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1104 sljit_u8 *inst;
1106 CHECK_ERROR();
1107 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1108 ADJUST_LOCAL_OFFSET(dst, dstw);
1110 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1112 if (FAST_IS_REG(dst)) {
1113 /* Unused dest is possible here. */
1114 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1115 FAIL_IF(!inst);
1117 INC_SIZE(1);
1118 POP_REG(reg_map[dst]);
1119 return SLJIT_SUCCESS;
1122 /* Memory. */
1123 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1124 FAIL_IF(!inst);
1125 *inst++ = POP_rm;
1126 return SLJIT_SUCCESS;
1129 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1131 sljit_u8 *inst;
1133 CHECK_EXTRA_REGS(src, srcw, (void)0);
1135 if (FAST_IS_REG(src)) {
1136 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1137 FAIL_IF(!inst);
1139 INC_SIZE(1 + 1);
1140 PUSH_REG(reg_map[src]);
1142 else {
1143 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1144 FAIL_IF(!inst);
1145 *inst++ = GROUP_FF;
1146 *inst |= PUSH_rm;
1148 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1149 FAIL_IF(!inst);
1150 INC_SIZE(1);
1153 RET();
1154 return SLJIT_SUCCESS;
1157 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1159 sljit_sw size;
1161 /* Don't adjust shadow stack if it isn't enabled. */
1162 if (!cpu_has_shadow_stack())
1163 return SLJIT_SUCCESS;
1165 SLJIT_ASSERT(compiler->args_size >= 0);
1166 SLJIT_ASSERT(compiler->local_size > 0);
1168 size = compiler->local_size;
1169 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1170 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1172 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);