Implement four argument support for emit enter.
[sljit.git] / sljit_src / sljitNativeX86_32.c
blob0ead018d7294fe37697cdbdc4755695f794c885e
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 32-bit arch dependent functions. */
29 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
31 sljit_u8 *inst;
33 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
34 FAIL_IF(!inst);
35 INC_SIZE(1 + sizeof(sljit_sw));
36 *inst++ = opcode;
37 sljit_unaligned_store_sw(inst, imm);
38 return SLJIT_SUCCESS;
41 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
43 sljit_s32 type = jump->flags >> TYPE_SHIFT;
45 if (type == SLJIT_JUMP) {
46 *code_ptr++ = JMP_i32;
47 jump->addr++;
49 else if (type >= SLJIT_FAST_CALL) {
50 *code_ptr++ = CALL_i32;
51 jump->addr++;
53 else {
54 *code_ptr++ = GROUP_0F;
55 *code_ptr++ = get_jump_code(type);
56 jump->addr += 2;
59 if (jump->flags & JUMP_LABEL)
60 jump->flags |= PATCH_MW;
61 else
62 sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset);
63 code_ptr += 4;
65 return code_ptr;
68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
69 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
70 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
72 sljit_s32 word_arg_count, float_arg_count, args_size, size, types;
73 sljit_u8 *inst;
75 CHECK_ERROR();
76 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
77 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
79 /* Emit ENDBR32 at function entry if needed. */
80 FAIL_IF(emit_endbranch(compiler));
82 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
84 arg_types >>= SLJIT_DEF_SHIFT;
85 types = arg_types;
86 word_arg_count = 0;
87 float_arg_count = 0;
88 args_size = sizeof(sljit_sw);
89 while (types) {
90 switch (types & SLJIT_DEF_MASK) {
91 case SLJIT_ARG_TYPE_F32:
92 float_arg_count++;
93 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
94 args_size += sizeof(sljit_f32);
95 break;
96 case SLJIT_ARG_TYPE_F64:
97 float_arg_count++;
98 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
99 args_size += sizeof(sljit_f64);
100 break;
101 default:
102 word_arg_count++;
103 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
104 if (word_arg_count > 2)
105 args_size += sizeof(sljit_sw);
106 #else
107 args_size += sizeof(sljit_sw);
108 #endif
109 break;
111 types >>= SLJIT_DEF_SHIFT;
114 args_size -= sizeof(sljit_sw);
115 compiler->args_size = args_size;
117 /* [esp+0] for saving temporaries and function calls. */
118 compiler->stack_tmp_size = 2 * sizeof(sljit_sw);
120 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
121 if (scratches > 3)
122 compiler->stack_tmp_size = 3 * sizeof(sljit_sw);
123 #endif
125 compiler->saveds_offset = compiler->stack_tmp_size;
126 if (scratches > 3)
127 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
129 compiler->locals_offset = compiler->saveds_offset;
131 if (saveds > 3)
132 compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
134 if (options & SLJIT_F64_ALIGNMENT)
135 compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
137 size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
138 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
139 FAIL_IF(!inst);
141 INC_SIZE(size);
142 PUSH_REG(reg_map[TMP_REG1]);
143 if (saveds > 2 || scratches > 9)
144 PUSH_REG(reg_map[SLJIT_S2]);
145 if (saveds > 1 || scratches > 10)
146 PUSH_REG(reg_map[SLJIT_S1]);
147 if (saveds > 0 || scratches > 11)
148 PUSH_REG(reg_map[SLJIT_S0]);
150 if (word_arg_count >= 4)
151 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size * sizeof(sljit_sw));
153 word_arg_count = 0;
154 args_size = (size + 1) * sizeof(sljit_sw);
155 while (arg_types) {
156 switch (arg_types & SLJIT_DEF_MASK) {
157 case SLJIT_ARG_TYPE_F32:
158 args_size += sizeof(sljit_f32);
159 break;
160 case SLJIT_ARG_TYPE_F64:
161 args_size += sizeof(sljit_f64);
162 break;
163 default:
164 word_arg_count++;
165 if (word_arg_count <= 3) {
166 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
167 if (word_arg_count <= 2)
168 break;
169 #endif
170 EMIT_MOV(compiler, SLJIT_S0 + 1 - word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
172 args_size += sizeof(sljit_sw);
173 break;
175 arg_types >>= SLJIT_DEF_SHIFT;
178 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
179 if (word_arg_count > 0)
180 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0);
181 if (word_arg_count > 1)
182 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_R1, 0);
183 #endif
185 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
187 #if defined(__APPLE__)
188 /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */
189 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
190 local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
191 #else
192 if (options & SLJIT_F64_ALIGNMENT)
193 local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
194 else
195 local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
196 #endif
198 compiler->local_size = local_size;
200 #ifdef _WIN32
201 if (local_size > 0) {
202 if (local_size <= 4 * 4096) {
203 if (local_size > 4096)
204 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096);
205 if (local_size > 2 * 4096)
206 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
207 if (local_size > 3 * 4096)
208 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
210 else {
211 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
212 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
214 SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
216 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
217 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
218 SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
219 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
220 SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
222 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
223 FAIL_IF(!inst);
225 INC_SIZE(2);
226 inst[0] = JNE_i8;
227 inst[1] = (sljit_s8) -16;
230 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
232 #endif
234 SLJIT_ASSERT(local_size > 0);
236 #if !defined(__APPLE__)
237 if (options & SLJIT_F64_ALIGNMENT) {
238 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0);
240 /* Some space might allocated during sljit_grow_stack() above on WIN32. */
241 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
242 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw)));
244 #if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
245 if (compiler->local_size > 1024)
246 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
247 TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)));
248 #endif
250 inst = (sljit_u8*)ensure_buf(compiler, 1 + 6);
251 FAIL_IF(!inst);
253 INC_SIZE(6);
254 inst[0] = GROUP_BINARY_81;
255 inst[1] = MOD_REG | AND | reg_map[SLJIT_SP];
256 sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1));
258 /* The real local size must be used. */
259 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0);
261 #endif
262 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
263 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
265 if (word_arg_count == 4)
266 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), compiler->locals_offset - sizeof(sljit_sw), TMP_REG1, 0);
268 return SLJIT_SUCCESS;
271 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
272 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
273 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
275 sljit_s32 args_size;
276 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
277 sljit_s32 word_arg_count = 0;
278 #endif
280 CHECK_ERROR();
281 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
282 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
284 arg_types >>= SLJIT_DEF_SHIFT;
285 args_size = 0;
286 while (arg_types) {
287 switch (arg_types & SLJIT_DEF_MASK) {
288 case SLJIT_ARG_TYPE_F32:
289 args_size += sizeof(sljit_f32);
290 break;
291 case SLJIT_ARG_TYPE_F64:
292 args_size += sizeof(sljit_f64);
293 break;
294 default:
295 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
296 if (word_arg_count >= 2)
297 args_size += sizeof(sljit_sw);
298 word_arg_count++;
299 #else
300 args_size += sizeof(sljit_sw);
301 #endif
302 break;
304 arg_types >>= SLJIT_DEF_SHIFT;
307 compiler->args_size = args_size;
309 /* [esp+0] for saving temporaries and function calls. */
310 compiler->stack_tmp_size = 2 * sizeof(sljit_sw);
312 #if !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
313 if (scratches > 3)
314 compiler->stack_tmp_size = 3 * sizeof(sljit_sw);
315 #endif
317 compiler->saveds_offset = compiler->stack_tmp_size;
318 if (scratches > 3)
319 compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw);
321 compiler->locals_offset = compiler->saveds_offset;
323 if (saveds > 3)
324 compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw);
326 if (options & SLJIT_F64_ALIGNMENT)
327 compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1);
329 #if defined(__APPLE__)
330 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw);
331 compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds;
332 #else
333 if (options & SLJIT_F64_ALIGNMENT)
334 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1));
335 else
336 compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1));
337 #endif
338 return SLJIT_SUCCESS;
341 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
343 sljit_s32 size;
344 sljit_u8 *inst;
346 CHECK_ERROR();
347 CHECK(check_sljit_emit_return_void(compiler));
349 SLJIT_ASSERT(compiler->args_size >= 0);
350 SLJIT_ASSERT(compiler->local_size > 0);
352 #if !defined(__APPLE__)
353 if (compiler->options & SLJIT_F64_ALIGNMENT)
354 EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size)
355 else
356 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
357 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
358 #else
359 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
360 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
361 #endif
363 size = 2 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
364 (compiler->saveds <= 3 ? compiler->saveds : 3);
365 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
366 if (compiler->args_size > 0)
367 size += 2;
368 #endif
369 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
370 FAIL_IF(!inst);
372 INC_SIZE(size);
374 if (compiler->saveds > 0 || compiler->scratches > 11)
375 POP_REG(reg_map[SLJIT_S0]);
376 if (compiler->saveds > 1 || compiler->scratches > 10)
377 POP_REG(reg_map[SLJIT_S1]);
378 if (compiler->saveds > 2 || compiler->scratches > 9)
379 POP_REG(reg_map[SLJIT_S2]);
380 POP_REG(reg_map[TMP_REG1]);
381 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
382 if (compiler->args_size > 0)
383 RET_I16((sljit_u8)compiler->args_size);
384 else
385 RET();
386 #else
387 RET();
388 #endif
390 return SLJIT_SUCCESS;
393 /* --------------------------------------------------------------------- */
394 /* Operators */
395 /* --------------------------------------------------------------------- */
397 /* Size contains the flags as well. */
398 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
399 /* The register or immediate operand. */
400 sljit_s32 a, sljit_sw imma,
401 /* The general operand (not immediate). */
402 sljit_s32 b, sljit_sw immb)
404 sljit_u8 *inst;
405 sljit_u8 *buf_ptr;
406 sljit_s32 flags = size & ~0xf;
407 sljit_s32 inst_size;
409 /* Both cannot be switched on. */
410 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
411 /* Size flags not allowed for typed instructions. */
412 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
413 /* Both size flags cannot be switched on. */
414 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
415 /* SSE2 and immediate is not possible. */
416 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
417 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
418 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
419 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
420 /* We don't support (%ebp). */
421 SLJIT_ASSERT(!(b & SLJIT_MEM) || immb || reg_map[b & REG_MASK] != 5);
423 size &= 0xf;
424 inst_size = size;
426 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
427 inst_size++;
428 if (flags & EX86_PREF_66)
429 inst_size++;
431 /* Calculate size of b. */
432 inst_size += 1; /* mod r/m byte. */
433 if (b & SLJIT_MEM) {
434 if (!(b & REG_MASK))
435 inst_size += sizeof(sljit_sw);
436 else if (immb != 0 && !(b & OFFS_REG_MASK)) {
437 /* Immediate operand. */
438 if (immb <= 127 && immb >= -128)
439 inst_size += sizeof(sljit_s8);
440 else
441 inst_size += sizeof(sljit_sw);
444 if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
445 b |= TO_OFFS_REG(SLJIT_SP);
447 if (b & OFFS_REG_MASK)
448 inst_size += 1; /* SIB byte. */
451 /* Calculate size of a. */
452 if (a & SLJIT_IMM) {
453 if (flags & EX86_BIN_INS) {
454 if (imma <= 127 && imma >= -128) {
455 inst_size += 1;
456 flags |= EX86_BYTE_ARG;
457 } else
458 inst_size += 4;
460 else if (flags & EX86_SHIFT_INS) {
461 imma &= 0x1f;
462 if (imma != 1) {
463 inst_size ++;
464 flags |= EX86_BYTE_ARG;
466 } else if (flags & EX86_BYTE_ARG)
467 inst_size++;
468 else if (flags & EX86_HALF_ARG)
469 inst_size += sizeof(short);
470 else
471 inst_size += sizeof(sljit_sw);
473 else
474 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
476 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
477 PTR_FAIL_IF(!inst);
479 /* Encoding the byte. */
480 INC_SIZE(inst_size);
481 if (flags & EX86_PREF_F2)
482 *inst++ = 0xf2;
483 if (flags & EX86_PREF_F3)
484 *inst++ = 0xf3;
485 if (flags & EX86_PREF_66)
486 *inst++ = 0x66;
488 buf_ptr = inst + size;
490 /* Encode mod/rm byte. */
491 if (!(flags & EX86_SHIFT_INS)) {
492 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
493 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
495 if (a & SLJIT_IMM)
496 *buf_ptr = 0;
497 else if (!(flags & EX86_SSE2_OP1))
498 *buf_ptr = reg_map[a] << 3;
499 else
500 *buf_ptr = a << 3;
502 else {
503 if (a & SLJIT_IMM) {
504 if (imma == 1)
505 *inst = GROUP_SHIFT_1;
506 else
507 *inst = GROUP_SHIFT_N;
508 } else
509 *inst = GROUP_SHIFT_CL;
510 *buf_ptr = 0;
513 if (!(b & SLJIT_MEM))
514 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_map[b] : b);
515 else if (b & REG_MASK) {
516 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
517 if (immb != 0) {
518 if (immb <= 127 && immb >= -128)
519 *buf_ptr |= 0x40;
520 else
521 *buf_ptr |= 0x80;
524 if (!(b & OFFS_REG_MASK))
525 *buf_ptr++ |= reg_map[b & REG_MASK];
526 else {
527 *buf_ptr++ |= 0x04;
528 *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3);
531 if (immb != 0) {
532 if (immb <= 127 && immb >= -128)
533 *buf_ptr++ = immb; /* 8 bit displacement. */
534 else {
535 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
536 buf_ptr += sizeof(sljit_sw);
540 else {
541 *buf_ptr++ |= 0x04;
542 *buf_ptr++ = reg_map[b & REG_MASK] | (reg_map[OFFS_REG(b)] << 3) | (immb << 6);
545 else {
546 *buf_ptr++ |= 0x05;
547 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
548 buf_ptr += sizeof(sljit_sw);
551 if (a & SLJIT_IMM) {
552 if (flags & EX86_BYTE_ARG)
553 *buf_ptr = imma;
554 else if (flags & EX86_HALF_ARG)
555 sljit_unaligned_store_s16(buf_ptr, imma);
556 else if (!(flags & EX86_SHIFT_INS))
557 sljit_unaligned_store_sw(buf_ptr, imma);
560 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
563 /* --------------------------------------------------------------------- */
564 /* Call / return instructions */
565 /* --------------------------------------------------------------------- */
567 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
569 static sljit_s32 c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
571 sljit_s32 stack_size = 0;
572 sljit_s32 word_arg_count = 0;
574 arg_types >>= SLJIT_DEF_SHIFT;
576 while (arg_types) {
577 switch (arg_types & SLJIT_DEF_MASK) {
578 case SLJIT_ARG_TYPE_F32:
579 stack_size += sizeof(sljit_f32);
580 break;
581 case SLJIT_ARG_TYPE_F64:
582 stack_size += sizeof(sljit_f64);
583 break;
584 default:
585 word_arg_count++;
586 if (word_arg_count > 2)
587 stack_size += sizeof(sljit_sw);
588 break;
591 arg_types >>= SLJIT_DEF_SHIFT;
594 if (word_arg_count_ptr)
595 *word_arg_count_ptr = word_arg_count;
597 return stack_size;
600 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
601 sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
603 sljit_u8 *inst;
604 sljit_s32 float_arg_count;
606 if (stack_size == sizeof(sljit_sw) && word_arg_count == 3) {
607 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
608 FAIL_IF(!inst);
609 INC_SIZE(1);
610 PUSH_REG(reg_map[SLJIT_R2]);
612 else if (stack_size > 0) {
613 if (word_arg_count >= 4)
614 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw));
616 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
617 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
619 stack_size = 0;
620 arg_types >>= SLJIT_DEF_SHIFT;
621 word_arg_count = 0;
622 float_arg_count = 0;
623 while (arg_types) {
624 switch (arg_types & SLJIT_DEF_MASK) {
625 case SLJIT_ARG_TYPE_F32:
626 float_arg_count++;
627 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
628 stack_size += sizeof(sljit_f32);
629 break;
630 case SLJIT_ARG_TYPE_F64:
631 float_arg_count++;
632 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
633 stack_size += sizeof(sljit_f64);
634 break;
635 default:
636 word_arg_count++;
637 if (word_arg_count == 3) {
638 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
639 stack_size += sizeof(sljit_sw);
641 else if (word_arg_count == 4) {
642 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
643 stack_size += sizeof(sljit_sw);
645 break;
648 arg_types >>= SLJIT_DEF_SHIFT;
652 if (word_arg_count > 0) {
653 if (swap_args) {
654 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
655 FAIL_IF(!inst);
656 INC_SIZE(1);
658 *inst++ = XCHG_EAX_r | reg_map[SLJIT_R2];
660 else {
661 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
662 FAIL_IF(!inst);
663 INC_SIZE(2);
665 *inst++ = MOV_r_rm;
666 *inst++ = MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0];
670 return SLJIT_SUCCESS;
673 #endif
675 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
677 sljit_s32 stack_size = 0;
678 sljit_s32 word_arg_count = 0;
680 arg_types >>= SLJIT_DEF_SHIFT;
682 while (arg_types) {
683 switch (arg_types & SLJIT_DEF_MASK) {
684 case SLJIT_ARG_TYPE_F32:
685 stack_size += sizeof(sljit_f32);
686 break;
687 case SLJIT_ARG_TYPE_F64:
688 stack_size += sizeof(sljit_f64);
689 break;
690 default:
691 word_arg_count++;
692 stack_size += sizeof(sljit_sw);
693 break;
696 arg_types >>= SLJIT_DEF_SHIFT;
699 if (word_arg_count_ptr)
700 *word_arg_count_ptr = word_arg_count;
702 if (stack_size <= compiler->stack_tmp_size)
703 return 0;
705 #if defined(__APPLE__)
706 return ((stack_size - compiler->stack_tmp_size + 15) & ~15);
707 #else
708 return stack_size - compiler->stack_tmp_size;
709 #endif
712 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
713 sljit_s32 arg_types, sljit_s32 stack_size, sljit_s32 word_arg_count)
715 sljit_s32 float_arg_count = 0;
717 if (word_arg_count >= 4)
718 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->saveds_offset - sizeof(sljit_sw));
720 if (stack_size > 0)
721 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
722 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
724 stack_size = 0;
725 word_arg_count = 0;
726 arg_types >>= SLJIT_DEF_SHIFT;
728 while (arg_types) {
729 switch (arg_types & SLJIT_DEF_MASK) {
730 case SLJIT_ARG_TYPE_F32:
731 float_arg_count++;
732 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
733 stack_size += sizeof(sljit_f32);
734 break;
735 case SLJIT_ARG_TYPE_F64:
736 float_arg_count++;
737 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
738 stack_size += sizeof(sljit_f64);
739 break;
740 default:
741 word_arg_count++;
742 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
743 stack_size += sizeof(sljit_sw);
744 break;
747 arg_types >>= SLJIT_DEF_SHIFT;
750 return SLJIT_SUCCESS;
753 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
754 sljit_s32 arg_types, sljit_s32 stack_size)
756 sljit_u8 *inst;
757 sljit_s32 single;
759 if (stack_size > 0)
760 FAIL_IF(emit_cum_binary(compiler, BINARY_OPCODE(ADD),
761 SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, stack_size));
763 if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
764 return SLJIT_SUCCESS;
766 single = ((arg_types & SLJIT_DEF_MASK) == SLJIT_ARG_TYPE_F32);
768 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
769 FAIL_IF(!inst);
770 INC_SIZE(3);
771 inst[0] = single ? FSTPS : FSTPD;
772 inst[1] = (0x03 << 3) | 0x04;
773 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
775 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
778 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
779 sljit_s32 arg_types)
781 struct sljit_jump *jump;
782 sljit_s32 stack_size = 0;
783 sljit_s32 word_arg_count;
785 CHECK_ERROR_PTR();
786 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
788 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
789 if ((type & 0xff) == SLJIT_CALL) {
790 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
791 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
793 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
794 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
795 compiler->skip_checks = 1;
796 #endif
798 jump = sljit_emit_jump(compiler, type);
799 PTR_FAIL_IF(jump == NULL);
801 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
802 return jump;
804 #endif
806 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
807 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
809 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
810 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
811 compiler->skip_checks = 1;
812 #endif
814 jump = sljit_emit_jump(compiler, type);
815 PTR_FAIL_IF(jump == NULL);
817 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
818 return jump;
821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
822 sljit_s32 arg_types,
823 sljit_s32 src, sljit_sw srcw)
825 sljit_s32 stack_size = 0;
826 sljit_s32 word_arg_count;
827 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
828 sljit_s32 swap_args;
829 #endif
831 CHECK_ERROR();
832 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
834 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
835 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
837 if ((type & 0xff) == SLJIT_CALL) {
838 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
839 swap_args = 0;
841 if (word_arg_count > 0) {
842 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
843 swap_args = 1;
844 if (((src & REG_MASK) | 0x2) == SLJIT_R2)
845 src ^= 0x2;
846 if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
847 src ^= TO_OFFS_REG(0x2);
851 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
853 compiler->saveds_offset += stack_size;
854 compiler->locals_offset += stack_size;
856 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
857 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
858 compiler->skip_checks = 1;
859 #endif
860 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
862 compiler->saveds_offset -= stack_size;
863 compiler->locals_offset -= stack_size;
865 return post_call_with_args(compiler, arg_types, 0);
867 #endif
869 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
870 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
872 compiler->saveds_offset += stack_size;
873 compiler->locals_offset += stack_size;
875 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
876 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
877 compiler->skip_checks = 1;
878 #endif
879 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
881 compiler->saveds_offset -= stack_size;
882 compiler->locals_offset -= stack_size;
884 return post_call_with_args(compiler, arg_types, stack_size);
887 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
889 sljit_u8 *inst;
891 CHECK_ERROR();
892 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
893 ADJUST_LOCAL_OFFSET(dst, dstw);
895 CHECK_EXTRA_REGS(dst, dstw, (void)0);
897 if (FAST_IS_REG(dst)) {
898 /* Unused dest is possible here. */
899 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
900 FAIL_IF(!inst);
902 INC_SIZE(1);
903 POP_REG(reg_map[dst]);
904 return SLJIT_SUCCESS;
907 /* Memory. */
908 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
909 FAIL_IF(!inst);
910 *inst++ = POP_rm;
911 return SLJIT_SUCCESS;
914 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
916 sljit_u8 *inst;
918 CHECK_EXTRA_REGS(src, srcw, (void)0);
920 if (FAST_IS_REG(src)) {
921 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
922 FAIL_IF(!inst);
924 INC_SIZE(1 + 1);
925 PUSH_REG(reg_map[src]);
927 else {
928 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
929 FAIL_IF(!inst);
930 *inst++ = GROUP_FF;
931 *inst |= PUSH_rm;
933 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
934 FAIL_IF(!inst);
935 INC_SIZE(1);
938 RET();
939 return SLJIT_SUCCESS;
942 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
944 sljit_s32 size, saved_size;
945 sljit_s32 has_f64_aligment;
947 /* Don't adjust shadow stack if it isn't enabled. */
948 if (!cpu_has_shadow_stack ())
949 return SLJIT_SUCCESS;
951 SLJIT_ASSERT(compiler->args_size >= 0);
952 SLJIT_ASSERT(compiler->local_size > 0);
954 #if !defined(__APPLE__)
955 has_f64_aligment = compiler->options & SLJIT_F64_ALIGNMENT;
956 #else
957 has_f64_aligment = 0;
958 #endif
960 size = compiler->local_size;
961 saved_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) + (compiler->saveds <= 3 ? compiler->saveds : 3)) * sizeof(sljit_uw);
962 if (has_f64_aligment) {
963 /* mov TMP_REG1, [esp + local_size]. */
964 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), size);
965 /* mov TMP_REG1, [TMP_REG1+ saved_size]. */
966 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), saved_size);
967 /* Move return address to [esp]. */
968 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, TMP_REG1, 0);
969 size = 0;
970 } else
971 size += saved_size;
973 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);