Improve sljit abi calling convention
[sljit.git] / sljit_src / sljitNativeX86_64.c
blobfe65d23a0336a68489a7e59fb68b33573eb08a64
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 64-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(2 + sizeof(sljit_sw));
40 *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
42 sljit_unaligned_store_sw(inst, imm);
43 return SLJIT_SUCCESS;
46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
48 sljit_u8 *inst;
49 sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
51 inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 FAIL_IF(!inst);
53 INC_SIZE(length);
54 if (rex)
55 *inst++ = rex;
56 *inst++ = opcode;
57 sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 return SLJIT_SUCCESS;
61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 /* The register or immediate operand. */
63 sljit_s32 a, sljit_sw imma,
64 /* The general operand (not immediate). */
65 sljit_s32 b, sljit_sw immb)
67 sljit_u8 *inst;
68 sljit_u8 *buf_ptr;
69 sljit_u8 rex = 0;
70 sljit_u8 reg_lmap_b;
71 sljit_uw flags = size;
72 sljit_uw inst_size;
74 /* The immediate operand must be 32 bit. */
75 SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
76 /* Both cannot be switched on. */
77 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 /* Size flags not allowed for typed instructions. */
79 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 /* Both size flags cannot be switched on. */
81 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 /* SSE2 and immediate is not possible. */
83 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
84 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
85 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
86 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
88 size &= 0xf;
89 inst_size = size;
91 if (!compiler->mode32 && !(flags & EX86_NO_REXW))
92 rex |= REX_W;
93 else if (flags & EX86_REX)
94 rex |= REX;
96 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
97 inst_size++;
98 if (flags & EX86_PREF_66)
99 inst_size++;
101 /* Calculate size of b. */
102 inst_size += 1; /* mod r/m byte. */
103 if (b & SLJIT_MEM) {
104 if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
105 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
106 immb = 0;
107 if (b & REG_MASK)
108 b |= TO_OFFS_REG(TMP_REG2);
109 else
110 b |= TMP_REG2;
113 if (!(b & REG_MASK))
114 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
115 else {
116 if (immb != 0 && !(b & OFFS_REG_MASK)) {
117 /* Immediate operand. */
118 if (immb <= 127 && immb >= -128)
119 inst_size += sizeof(sljit_s8);
120 else
121 inst_size += sizeof(sljit_s32);
123 else if (reg_lmap[b & REG_MASK] == 5) {
124 /* Swap registers if possible. */
125 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
126 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
127 else
128 inst_size += sizeof(sljit_s8);
131 if (reg_map[b & REG_MASK] >= 8)
132 rex |= REX_B;
134 if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
135 b |= TO_OFFS_REG(SLJIT_SP);
137 if (b & OFFS_REG_MASK) {
138 inst_size += 1; /* SIB byte. */
139 if (reg_map[OFFS_REG(b)] >= 8)
140 rex |= REX_X;
144 else if (!(flags & EX86_SSE2_OP2)) {
145 if (reg_map[b] >= 8)
146 rex |= REX_B;
148 else if (freg_map[b] >= 8)
149 rex |= REX_B;
151 if (a & SLJIT_IMM) {
152 if (flags & EX86_BIN_INS) {
153 if (imma <= 127 && imma >= -128) {
154 inst_size += 1;
155 flags |= EX86_BYTE_ARG;
156 } else
157 inst_size += 4;
159 else if (flags & EX86_SHIFT_INS) {
160 imma &= compiler->mode32 ? 0x1f : 0x3f;
161 if (imma != 1) {
162 inst_size++;
163 flags |= EX86_BYTE_ARG;
165 } else if (flags & EX86_BYTE_ARG)
166 inst_size++;
167 else if (flags & EX86_HALF_ARG)
168 inst_size += sizeof(short);
169 else
170 inst_size += sizeof(sljit_s32);
172 else {
173 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
174 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
175 if (!(flags & EX86_SSE2_OP1)) {
176 if (reg_map[a] >= 8)
177 rex |= REX_R;
179 else if (freg_map[a] >= 8)
180 rex |= REX_R;
183 if (rex)
184 inst_size++;
186 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
187 PTR_FAIL_IF(!inst);
189 /* Encoding the byte. */
190 INC_SIZE(inst_size);
191 if (flags & EX86_PREF_F2)
192 *inst++ = 0xf2;
193 if (flags & EX86_PREF_F3)
194 *inst++ = 0xf3;
195 if (flags & EX86_PREF_66)
196 *inst++ = 0x66;
197 if (rex)
198 *inst++ = rex;
199 buf_ptr = inst + size;
201 /* Encode mod/rm byte. */
202 if (!(flags & EX86_SHIFT_INS)) {
203 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
204 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
206 if (a & SLJIT_IMM)
207 *buf_ptr = 0;
208 else if (!(flags & EX86_SSE2_OP1))
209 *buf_ptr = U8(reg_lmap[a] << 3);
210 else
211 *buf_ptr = U8(freg_lmap[a] << 3);
213 else {
214 if (a & SLJIT_IMM) {
215 if (imma == 1)
216 *inst = GROUP_SHIFT_1;
217 else
218 *inst = GROUP_SHIFT_N;
219 } else
220 *inst = GROUP_SHIFT_CL;
221 *buf_ptr = 0;
224 if (!(b & SLJIT_MEM)) {
225 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
226 buf_ptr++;
227 } else if (b & REG_MASK) {
228 reg_lmap_b = reg_lmap[b & REG_MASK];
230 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
231 if (immb != 0 || reg_lmap_b == 5) {
232 if (immb <= 127 && immb >= -128)
233 *buf_ptr |= 0x40;
234 else
235 *buf_ptr |= 0x80;
238 if (!(b & OFFS_REG_MASK))
239 *buf_ptr++ |= reg_lmap_b;
240 else {
241 *buf_ptr++ |= 0x04;
242 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
245 if (immb != 0 || reg_lmap_b == 5) {
246 if (immb <= 127 && immb >= -128)
247 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
248 else {
249 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
250 buf_ptr += sizeof(sljit_s32);
254 else {
255 if (reg_lmap_b == 5)
256 *buf_ptr |= 0x40;
258 *buf_ptr++ |= 0x04;
259 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
261 if (reg_lmap_b == 5)
262 *buf_ptr++ = 0;
265 else {
266 *buf_ptr++ |= 0x04;
267 *buf_ptr++ = 0x25;
268 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
269 buf_ptr += sizeof(sljit_s32);
272 if (a & SLJIT_IMM) {
273 if (flags & EX86_BYTE_ARG)
274 *buf_ptr = U8(imma);
275 else if (flags & EX86_HALF_ARG)
276 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
277 else if (!(flags & EX86_SHIFT_INS))
278 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
281 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
284 /* --------------------------------------------------------------------- */
285 /* Enter / return */
286 /* --------------------------------------------------------------------- */
288 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
290 sljit_uw type = jump->flags >> TYPE_SHIFT;
292 int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
294 /* The relative jump below specialized for this case. */
295 SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
297 if (type < SLJIT_JUMP) {
298 /* Invert type. */
299 *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
300 *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
303 *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
304 *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
305 jump->addr = (sljit_uw)code_ptr;
307 if (jump->flags & JUMP_LABEL)
308 jump->flags |= PATCH_MD;
309 else if (short_addr)
310 sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
311 else
312 sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
314 code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
316 *code_ptr++ = REX_B;
317 *code_ptr++ = GROUP_FF;
318 *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
320 return code_ptr;
323 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
325 if (max_label > HALFWORD_MAX) {
326 put_label->addr -= put_label->flags;
327 put_label->flags = PATCH_MD;
328 return code_ptr;
331 if (put_label->flags == 0) {
332 /* Destination is register. */
333 code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
335 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
336 SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
338 if ((code_ptr[0] & 0x07) != 0) {
339 code_ptr[0] = U8(code_ptr[0] & ~0x08);
340 code_ptr += 2 + sizeof(sljit_s32);
342 else {
343 code_ptr[0] = code_ptr[1];
344 code_ptr += 1 + sizeof(sljit_s32);
347 put_label->addr = (sljit_uw)code_ptr;
348 return code_ptr;
351 code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
352 SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
354 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
356 if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
357 code_ptr += 2 + sizeof(sljit_uw);
358 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
361 SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
363 code_ptr[0] = U8(code_ptr[0] & ~0x4);
364 code_ptr[1] = MOV_rm_i32;
365 code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
367 code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
368 put_label->addr = (sljit_uw)code_ptr;
369 put_label->flags = 0;
370 return code_ptr;
373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
374 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
375 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
377 sljit_uw size;
378 sljit_s32 word_arg_count = 0;
379 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
380 sljit_s32 saved_regs_size, tmp, i;
381 #ifdef _WIN64
382 sljit_s32 saved_float_regs_size;
383 sljit_s32 saved_float_regs_offset = 0;
384 sljit_s32 float_arg_count = 0;
385 #endif /* _WIN64 */
386 sljit_u8 *inst;
388 CHECK_ERROR();
389 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
390 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
392 if (options & SLJIT_ENTER_REG_ARG)
393 arg_types = 0;
395 /* Emit ENDBR64 at function entry if needed. */
396 FAIL_IF(emit_endbranch(compiler));
398 compiler->mode32 = 0;
400 /* Including the return address saved by the call instruction. */
401 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
403 tmp = SLJIT_S0 - saveds;
404 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
405 size = reg_map[i] >= 8 ? 2 : 1;
406 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
407 FAIL_IF(!inst);
408 INC_SIZE(size);
409 if (reg_map[i] >= 8)
410 *inst++ = REX_B;
411 PUSH_REG(reg_lmap[i]);
414 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
415 size = reg_map[i] >= 8 ? 2 : 1;
416 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
417 FAIL_IF(!inst);
418 INC_SIZE(size);
419 if (reg_map[i] >= 8)
420 *inst++ = REX_B;
421 PUSH_REG(reg_lmap[i]);
424 #ifdef _WIN64
425 local_size += SLJIT_LOCALS_OFFSET;
426 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
428 if (saved_float_regs_size > 0) {
429 saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
430 local_size = saved_float_regs_offset + saved_float_regs_size;
432 #else /* !_WIN64 */
433 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
434 #endif /* _WIN64 */
436 arg_types >>= SLJIT_ARG_SHIFT;
438 while (arg_types > 0) {
439 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
440 tmp = 0;
441 #ifndef _WIN64
442 switch (word_arg_count) {
443 case 0:
444 tmp = SLJIT_R2;
445 break;
446 case 1:
447 tmp = SLJIT_R1;
448 break;
449 case 2:
450 tmp = TMP_REG1;
451 break;
452 default:
453 tmp = SLJIT_R3;
454 break;
456 #else /* !_WIN64 */
457 switch (word_arg_count + float_arg_count) {
458 case 0:
459 tmp = SLJIT_R3;
460 break;
461 case 1:
462 tmp = SLJIT_R1;
463 break;
464 case 2:
465 tmp = SLJIT_R2;
466 break;
467 default:
468 tmp = TMP_REG1;
469 break;
471 #endif /* _WIN64 */
472 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
473 if (tmp != SLJIT_R0 + word_arg_count)
474 EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
475 } else {
476 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
477 saved_arg_count++;
479 word_arg_count++;
480 } else {
481 #ifdef _WIN64
482 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
483 float_arg_count++;
484 if (float_arg_count != float_arg_count + word_arg_count)
485 FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
486 float_arg_count, float_arg_count + word_arg_count, 0));
487 #endif /* _WIN64 */
489 arg_types >>= SLJIT_ARG_SHIFT;
492 local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
493 compiler->local_size = local_size;
495 #ifdef _WIN64
496 if (local_size > 0) {
497 if (local_size <= 4 * 4096) {
498 if (local_size > 4096)
499 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
500 if (local_size > 2 * 4096)
501 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
502 if (local_size > 3 * 4096)
503 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
505 else {
506 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
508 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
509 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
510 BINARY_IMM32(SUB, 1, TMP_REG1, 0);
512 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
513 FAIL_IF(!inst);
515 INC_SIZE(2);
516 inst[0] = JNE_i8;
517 inst[1] = (sljit_u8)-21;
518 local_size &= 0xfff;
521 if (local_size > 0)
522 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
524 #endif /* _WIN64 */
526 if (local_size > 0)
527 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
529 #ifdef _WIN64
530 if (saved_float_regs_size > 0) {
531 compiler->mode32 = 1;
533 tmp = SLJIT_FS0 - fsaveds;
534 for (i = SLJIT_FS0; i > tmp; i--) {
535 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
536 *inst++ = GROUP_0F;
537 *inst = MOVAPS_xm_x;
538 saved_float_regs_offset += 16;
541 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
542 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
543 *inst++ = GROUP_0F;
544 *inst = MOVAPS_xm_x;
545 saved_float_regs_offset += 16;
548 #endif /* _WIN64 */
550 return SLJIT_SUCCESS;
553 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
554 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
555 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
557 sljit_s32 saved_regs_size;
558 #ifdef _WIN64
559 sljit_s32 saved_float_regs_size;
560 #endif /* _WIN64 */
562 CHECK_ERROR();
563 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
564 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
566 #ifdef _WIN64
567 local_size += SLJIT_LOCALS_OFFSET;
568 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
570 if (saved_float_regs_size > 0)
571 local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
572 #else /* !_WIN64 */
573 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
574 #endif /* _WIN64 */
576 /* Including the return address saved by the call instruction. */
577 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
578 compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
579 return SLJIT_SUCCESS;
582 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
584 sljit_uw size;
585 sljit_s32 i, tmp;
586 sljit_u8 *inst;
587 #ifdef _WIN64
588 sljit_s32 saved_float_regs_offset;
589 sljit_s32 fscratches = compiler->fscratches;
590 sljit_s32 fsaveds = compiler->fsaveds;
591 #endif /* _WIN64 */
593 #ifdef _WIN64
594 saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
596 if (saved_float_regs_offset > 0) {
597 compiler->mode32 = 1;
598 saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
600 tmp = SLJIT_FS0 - fsaveds;
601 for (i = SLJIT_FS0; i > tmp; i--) {
602 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
603 *inst++ = GROUP_0F;
604 *inst = MOVAPS_x_xm;
605 saved_float_regs_offset += 16;
608 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
609 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
610 *inst++ = GROUP_0F;
611 *inst = MOVAPS_x_xm;
612 saved_float_regs_offset += 16;
615 #endif /* _WIN64 */
617 if (compiler->local_size > 0) {
618 if (compiler->local_size <= 127) {
619 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
620 FAIL_IF(!inst);
621 INC_SIZE(4);
622 *inst++ = REX_W;
623 *inst++ = GROUP_BINARY_83;
624 *inst++ = MOD_REG | ADD | 4;
625 *inst = U8(compiler->local_size);
627 else {
628 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
629 FAIL_IF(!inst);
630 INC_SIZE(7);
631 *inst++ = REX_W;
632 *inst++ = GROUP_BINARY_81;
633 *inst++ = MOD_REG | ADD | 4;
634 sljit_unaligned_store_s32(inst, compiler->local_size);
638 tmp = compiler->scratches;
639 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
640 size = reg_map[i] >= 8 ? 2 : 1;
641 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
642 FAIL_IF(!inst);
643 INC_SIZE(size);
644 if (reg_map[i] >= 8)
645 *inst++ = REX_B;
646 POP_REG(reg_lmap[i]);
649 tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
650 for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
651 size = reg_map[i] >= 8 ? 2 : 1;
652 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
653 FAIL_IF(!inst);
654 INC_SIZE(size);
655 if (reg_map[i] >= 8)
656 *inst++ = REX_B;
657 POP_REG(reg_lmap[i]);
660 return SLJIT_SUCCESS;
663 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
665 sljit_u8 *inst;
667 CHECK_ERROR();
668 CHECK(check_sljit_emit_return_void(compiler));
670 FAIL_IF(emit_stack_frame_release(compiler));
672 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
673 FAIL_IF(!inst);
674 INC_SIZE(1);
675 RET();
676 return SLJIT_SUCCESS;
679 /* --------------------------------------------------------------------- */
680 /* Call / return instructions */
681 /* --------------------------------------------------------------------- */
683 #ifndef _WIN64
685 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
687 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
688 sljit_s32 word_arg_count = 0;
690 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
691 SLJIT_ASSERT(!(src & SLJIT_MEM));
693 /* Remove return value. */
694 arg_types >>= SLJIT_ARG_SHIFT;
696 while (arg_types) {
697 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
698 word_arg_count++;
699 arg_types >>= SLJIT_ARG_SHIFT;
702 if (word_arg_count == 0)
703 return SLJIT_SUCCESS;
705 if (word_arg_count >= 3) {
706 if (src == SLJIT_R2)
707 *src_ptr = TMP_REG1;
708 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
711 return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
714 #else
716 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
718 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
719 sljit_s32 arg_count = 0;
720 sljit_s32 word_arg_count = 0;
721 sljit_s32 float_arg_count = 0;
722 sljit_s32 types = 0;
723 sljit_s32 data_trandfer = 0;
724 static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
726 SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
727 SLJIT_ASSERT(!(src & SLJIT_MEM));
729 arg_types >>= SLJIT_ARG_SHIFT;
731 while (arg_types) {
732 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
734 switch (arg_types & SLJIT_ARG_MASK) {
735 case SLJIT_ARG_TYPE_F64:
736 case SLJIT_ARG_TYPE_F32:
737 arg_count++;
738 float_arg_count++;
740 if (arg_count != float_arg_count)
741 data_trandfer = 1;
742 break;
743 default:
744 arg_count++;
745 word_arg_count++;
747 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
748 data_trandfer = 1;
750 if (src == word_arg_regs[arg_count]) {
751 EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
752 *src_ptr = TMP_REG2;
755 break;
758 arg_types >>= SLJIT_ARG_SHIFT;
761 if (!data_trandfer)
762 return SLJIT_SUCCESS;
764 while (types) {
765 switch (types & SLJIT_ARG_MASK) {
766 case SLJIT_ARG_TYPE_F64:
767 if (arg_count != float_arg_count)
768 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
769 arg_count--;
770 float_arg_count--;
771 break;
772 case SLJIT_ARG_TYPE_F32:
773 if (arg_count != float_arg_count)
774 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
775 arg_count--;
776 float_arg_count--;
777 break;
778 default:
779 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
780 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
781 arg_count--;
782 word_arg_count--;
783 break;
786 types >>= SLJIT_ARG_SHIFT;
789 return SLJIT_SUCCESS;
792 #endif
794 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
795 sljit_s32 arg_types)
797 CHECK_ERROR_PTR();
798 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
800 compiler->mode32 = 0;
802 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
803 PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
805 if (type & SLJIT_CALL_RETURN) {
806 PTR_FAIL_IF(emit_stack_frame_release(compiler));
807 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
810 SLJIT_SKIP_CHECKS(compiler);
811 return sljit_emit_jump(compiler, type);
814 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
815 sljit_s32 arg_types,
816 sljit_s32 src, sljit_sw srcw)
818 CHECK_ERROR();
819 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
821 compiler->mode32 = 0;
823 if (src & SLJIT_MEM) {
824 ADJUST_LOCAL_OFFSET(src, srcw);
825 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
826 src = TMP_REG2;
829 if (type & SLJIT_CALL_RETURN) {
830 if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
831 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
832 src = TMP_REG2;
835 FAIL_IF(emit_stack_frame_release(compiler));
838 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
839 FAIL_IF(call_with_args(compiler, arg_types, &src));
841 if (type & SLJIT_CALL_RETURN)
842 type = SLJIT_JUMP;
844 SLJIT_SKIP_CHECKS(compiler);
845 return sljit_emit_ijump(compiler, type, src, srcw);
848 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
850 sljit_u8 *inst;
852 CHECK_ERROR();
853 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
854 ADJUST_LOCAL_OFFSET(dst, dstw);
856 if (FAST_IS_REG(dst)) {
857 if (reg_map[dst] < 8) {
858 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
859 FAIL_IF(!inst);
860 INC_SIZE(1);
861 POP_REG(reg_lmap[dst]);
862 return SLJIT_SUCCESS;
865 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
866 FAIL_IF(!inst);
867 INC_SIZE(2);
868 *inst++ = REX_B;
869 POP_REG(reg_lmap[dst]);
870 return SLJIT_SUCCESS;
873 /* REX_W is not necessary (src is not immediate). */
874 compiler->mode32 = 1;
875 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
876 FAIL_IF(!inst);
877 *inst++ = POP_rm;
878 return SLJIT_SUCCESS;
881 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
883 sljit_u8 *inst;
885 if (FAST_IS_REG(src)) {
886 if (reg_map[src] < 8) {
887 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
888 FAIL_IF(!inst);
890 INC_SIZE(1 + 1);
891 PUSH_REG(reg_lmap[src]);
893 else {
894 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
895 FAIL_IF(!inst);
897 INC_SIZE(2 + 1);
898 *inst++ = REX_B;
899 PUSH_REG(reg_lmap[src]);
902 else {
903 /* REX_W is not necessary (src is not immediate). */
904 compiler->mode32 = 1;
905 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
906 FAIL_IF(!inst);
907 *inst++ = GROUP_FF;
908 *inst |= PUSH_rm;
910 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
911 FAIL_IF(!inst);
912 INC_SIZE(1);
915 RET();
916 return SLJIT_SUCCESS;
919 /* --------------------------------------------------------------------- */
920 /* Extend input */
921 /* --------------------------------------------------------------------- */
923 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
924 sljit_s32 dst, sljit_sw dstw,
925 sljit_s32 src, sljit_sw srcw)
927 sljit_u8* inst;
928 sljit_s32 dst_r;
930 compiler->mode32 = 0;
932 if (src & SLJIT_IMM) {
933 if (FAST_IS_REG(dst)) {
934 if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
935 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
936 FAIL_IF(!inst);
937 *inst = MOV_rm_i32;
938 return SLJIT_SUCCESS;
940 return emit_load_imm64(compiler, dst, srcw);
942 compiler->mode32 = 1;
943 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
944 FAIL_IF(!inst);
945 *inst = MOV_rm_i32;
946 compiler->mode32 = 0;
947 return SLJIT_SUCCESS;
950 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
952 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
953 dst_r = src;
954 else {
955 if (sign) {
956 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
957 FAIL_IF(!inst);
958 *inst++ = MOVSXD_r_rm;
959 } else {
960 compiler->mode32 = 1;
961 FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
962 compiler->mode32 = 0;
966 if (dst & SLJIT_MEM) {
967 compiler->mode32 = 1;
968 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
969 FAIL_IF(!inst);
970 *inst = MOV_rm_r;
971 compiler->mode32 = 0;
974 return SLJIT_SUCCESS;
977 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
979 sljit_s32 tmp, size;
981 /* Don't adjust shadow stack if it isn't enabled. */
982 if (!cpu_has_shadow_stack())
983 return SLJIT_SUCCESS;
985 size = compiler->local_size;
986 tmp = compiler->scratches;
987 if (tmp >= SLJIT_FIRST_SAVED_REG)
988 size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
989 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
990 if (SLJIT_S0 >= tmp)
991 size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
993 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);