Fix x86 mem address encoding
[sljit.git] / sljit_src / sljitNativeX86_64.c
blob86e45126bdb6531c0576a53215a5845bd224a48e
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 /* x86 64-bit arch dependent functions. */
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
35 sljit_u8 *inst;
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(2 + sizeof(sljit_sw));
40 *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
42 sljit_unaligned_store_sw(inst, imm);
43 return SLJIT_SUCCESS;
46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
48 sljit_u8 *inst;
49 sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
51 inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 FAIL_IF(!inst);
53 INC_SIZE(length);
54 if (rex)
55 *inst++ = rex;
56 *inst++ = opcode;
57 sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 return SLJIT_SUCCESS;
61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 /* The register or immediate operand. */
63 sljit_s32 a, sljit_sw imma,
64 /* The general operand (not immediate). */
65 sljit_s32 b, sljit_sw immb)
67 sljit_u8 *inst;
68 sljit_u8 *buf_ptr;
69 sljit_u8 rex = 0;
70 sljit_u8 reg_lmap_b;
71 sljit_uw flags = size;
72 sljit_uw inst_size;
74 /* The immediate operand must be 32 bit. */
75 SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
76 /* Both cannot be switched on. */
77 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 /* Size flags not allowed for typed instructions. */
79 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 /* Both size flags cannot be switched on. */
81 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 /* SSE2 and immediate is not possible. */
83 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
84 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
85 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
86 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
88 size &= 0xf;
89 inst_size = size;
91 if (!compiler->mode32 && !(flags & EX86_NO_REXW))
92 rex |= REX_W;
93 else if (flags & EX86_REX)
94 rex |= REX;
96 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
97 inst_size++;
98 if (flags & EX86_PREF_66)
99 inst_size++;
101 /* Calculate size of b. */
102 inst_size += 1; /* mod r/m byte. */
103 if (b & SLJIT_MEM) {
104 if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
105 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
106 immb = 0;
107 if (b & REG_MASK)
108 b |= TO_OFFS_REG(TMP_REG2);
109 else
110 b |= TMP_REG2;
113 if (!(b & REG_MASK))
114 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
115 else {
116 if (immb != 0 && !(b & OFFS_REG_MASK)) {
117 /* Immediate operand. */
118 if (immb <= 127 && immb >= -128)
119 inst_size += sizeof(sljit_s8);
120 else
121 inst_size += sizeof(sljit_s32);
123 else if (reg_lmap[b & REG_MASK] == 5) {
124 /* Swap registers if possible. */
125 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
126 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
127 else
128 inst_size += sizeof(sljit_s8);
131 if (reg_map[b & REG_MASK] >= 8)
132 rex |= REX_B;
134 if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
135 b |= TO_OFFS_REG(SLJIT_SP);
137 if (b & OFFS_REG_MASK) {
138 inst_size += 1; /* SIB byte. */
139 if (reg_map[OFFS_REG(b)] >= 8)
140 rex |= REX_X;
144 else if (!(flags & EX86_SSE2_OP2)) {
145 if (reg_map[b] >= 8)
146 rex |= REX_B;
148 else if (freg_map[b] >= 8)
149 rex |= REX_B;
151 if (a & SLJIT_IMM) {
152 if (flags & EX86_BIN_INS) {
153 if (imma <= 127 && imma >= -128) {
154 inst_size += 1;
155 flags |= EX86_BYTE_ARG;
156 } else
157 inst_size += 4;
159 else if (flags & EX86_SHIFT_INS) {
160 imma &= compiler->mode32 ? 0x1f : 0x3f;
161 if (imma != 1) {
162 inst_size++;
163 flags |= EX86_BYTE_ARG;
165 } else if (flags & EX86_BYTE_ARG)
166 inst_size++;
167 else if (flags & EX86_HALF_ARG)
168 inst_size += sizeof(short);
169 else
170 inst_size += sizeof(sljit_s32);
172 else {
173 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
174 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
175 if (!(flags & EX86_SSE2_OP1)) {
176 if (reg_map[a] >= 8)
177 rex |= REX_R;
179 else if (freg_map[a] >= 8)
180 rex |= REX_R;
183 if (rex)
184 inst_size++;
186 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
187 PTR_FAIL_IF(!inst);
189 /* Encoding the byte. */
190 INC_SIZE(inst_size);
191 if (flags & EX86_PREF_F2)
192 *inst++ = 0xf2;
193 if (flags & EX86_PREF_F3)
194 *inst++ = 0xf3;
195 if (flags & EX86_PREF_66)
196 *inst++ = 0x66;
197 if (rex)
198 *inst++ = rex;
199 buf_ptr = inst + size;
201 /* Encode mod/rm byte. */
202 if (!(flags & EX86_SHIFT_INS)) {
203 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
204 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
206 if (a & SLJIT_IMM)
207 *buf_ptr = 0;
208 else if (!(flags & EX86_SSE2_OP1))
209 *buf_ptr = U8(reg_lmap[a] << 3);
210 else
211 *buf_ptr = U8(freg_lmap[a] << 3);
213 else {
214 if (a & SLJIT_IMM) {
215 if (imma == 1)
216 *inst = GROUP_SHIFT_1;
217 else
218 *inst = GROUP_SHIFT_N;
219 } else
220 *inst = GROUP_SHIFT_CL;
221 *buf_ptr = 0;
224 if (!(b & SLJIT_MEM)) {
225 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
226 buf_ptr++;
227 } else if (b & REG_MASK) {
228 reg_lmap_b = reg_lmap[b & REG_MASK];
230 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
231 if (immb != 0 || reg_lmap_b == 5) {
232 if (immb <= 127 && immb >= -128)
233 *buf_ptr |= 0x40;
234 else
235 *buf_ptr |= 0x80;
238 if (!(b & OFFS_REG_MASK))
239 *buf_ptr++ |= reg_lmap_b;
240 else {
241 *buf_ptr++ |= 0x04;
242 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
245 if (immb != 0 || reg_lmap_b == 5) {
246 if (immb <= 127 && immb >= -128)
247 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
248 else {
249 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
250 buf_ptr += sizeof(sljit_s32);
254 else {
255 if (reg_lmap_b == 5)
256 *buf_ptr |= 0x40;
258 *buf_ptr++ |= 0x04;
259 *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
261 if (reg_lmap_b == 5)
262 *buf_ptr++ = 0;
265 else {
266 *buf_ptr++ |= 0x04;
267 *buf_ptr++ = 0x25;
268 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
269 buf_ptr += sizeof(sljit_s32);
272 if (a & SLJIT_IMM) {
273 if (flags & EX86_BYTE_ARG)
274 *buf_ptr = U8(imma);
275 else if (flags & EX86_HALF_ARG)
276 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
277 else if (!(flags & EX86_SHIFT_INS))
278 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
281 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
284 /* --------------------------------------------------------------------- */
285 /* Enter / return */
286 /* --------------------------------------------------------------------- */
288 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
290 sljit_uw type = jump->flags >> TYPE_SHIFT;
292 int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
294 /* The relative jump below specialized for this case. */
295 SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
297 if (type < SLJIT_JUMP) {
298 /* Invert type. */
299 *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
300 *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
303 *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
304 *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
305 jump->addr = (sljit_uw)code_ptr;
307 if (jump->flags & JUMP_LABEL)
308 jump->flags |= PATCH_MD;
309 else if (short_addr)
310 sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
311 else
312 sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
314 code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
316 *code_ptr++ = REX_B;
317 *code_ptr++ = GROUP_FF;
318 *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
320 return code_ptr;
323 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
325 if (max_label > HALFWORD_MAX) {
326 put_label->addr -= put_label->flags;
327 put_label->flags = PATCH_MD;
328 return code_ptr;
331 if (put_label->flags == 0) {
332 /* Destination is register. */
333 code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
335 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
336 SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
338 if ((code_ptr[0] & 0x07) != 0) {
339 code_ptr[0] = U8(code_ptr[0] & ~0x08);
340 code_ptr += 2 + sizeof(sljit_s32);
342 else {
343 code_ptr[0] = code_ptr[1];
344 code_ptr += 1 + sizeof(sljit_s32);
347 put_label->addr = (sljit_uw)code_ptr;
348 return code_ptr;
351 code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
352 SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
354 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
356 if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
357 code_ptr += 2 + sizeof(sljit_uw);
358 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
361 SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
363 code_ptr[0] = U8(code_ptr[0] & ~0x4);
364 code_ptr[1] = MOV_rm_i32;
365 code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
367 code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
368 put_label->addr = (sljit_uw)code_ptr;
369 put_label->flags = 0;
370 return code_ptr;
373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
374 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
375 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
377 sljit_uw size;
378 sljit_s32 word_arg_count = 0;
379 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
380 sljit_s32 saved_regs_size, tmp, i;
381 #ifdef _WIN64
382 sljit_s32 saved_float_regs_size;
383 sljit_s32 saved_float_regs_offset = 0;
384 sljit_s32 float_arg_count = 0;
385 #endif /* _WIN64 */
386 sljit_u8 *inst;
388 CHECK_ERROR();
389 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
390 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
392 /* Emit ENDBR64 at function entry if needed. */
393 FAIL_IF(emit_endbranch(compiler));
395 compiler->mode32 = 0;
397 /* Including the return address saved by the call instruction. */
398 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
400 tmp = SLJIT_S0 - saveds;
401 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
402 size = reg_map[i] >= 8 ? 2 : 1;
403 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
404 FAIL_IF(!inst);
405 INC_SIZE(size);
406 if (reg_map[i] >= 8)
407 *inst++ = REX_B;
408 PUSH_REG(reg_lmap[i]);
411 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
412 size = reg_map[i] >= 8 ? 2 : 1;
413 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
414 FAIL_IF(!inst);
415 INC_SIZE(size);
416 if (reg_map[i] >= 8)
417 *inst++ = REX_B;
418 PUSH_REG(reg_lmap[i]);
421 #ifdef _WIN64
422 local_size += SLJIT_LOCALS_OFFSET;
423 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
425 if (saved_float_regs_size > 0) {
426 saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
427 local_size = saved_float_regs_offset + saved_float_regs_size;
429 #else /* !_WIN64 */
430 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
431 #endif /* _WIN64 */
433 arg_types >>= SLJIT_ARG_SHIFT;
435 while (arg_types > 0) {
436 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
437 tmp = 0;
438 #ifndef _WIN64
439 switch (word_arg_count) {
440 case 0:
441 tmp = SLJIT_R2;
442 break;
443 case 1:
444 tmp = SLJIT_R1;
445 break;
446 case 2:
447 tmp = TMP_REG1;
448 break;
449 default:
450 tmp = SLJIT_R3;
451 break;
453 #else /* !_WIN64 */
454 switch (word_arg_count + float_arg_count) {
455 case 0:
456 tmp = SLJIT_R3;
457 break;
458 case 1:
459 tmp = SLJIT_R1;
460 break;
461 case 2:
462 tmp = SLJIT_R2;
463 break;
464 default:
465 tmp = TMP_REG1;
466 break;
468 #endif /* _WIN64 */
469 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
470 if (tmp != SLJIT_R0 + word_arg_count)
471 EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
472 } else {
473 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
474 saved_arg_count++;
476 word_arg_count++;
477 } else {
478 #ifdef _WIN64
479 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
480 float_arg_count++;
481 if (float_arg_count != float_arg_count + word_arg_count)
482 FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
483 float_arg_count, float_arg_count + word_arg_count, 0));
484 #endif /* _WIN64 */
486 arg_types >>= SLJIT_ARG_SHIFT;
489 local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
490 compiler->local_size = local_size;
492 #ifdef _WIN64
493 if (local_size > 0) {
494 if (local_size <= 4 * 4096) {
495 if (local_size > 4096)
496 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
497 if (local_size > 2 * 4096)
498 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
499 if (local_size > 3 * 4096)
500 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
502 else {
503 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
505 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
506 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
507 BINARY_IMM32(SUB, 1, TMP_REG1, 0);
509 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
510 FAIL_IF(!inst);
512 INC_SIZE(2);
513 inst[0] = JNE_i8;
514 inst[1] = (sljit_u8)-21;
515 local_size &= 0xfff;
518 if (local_size > 0)
519 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
521 #endif /* _WIN64 */
523 if (local_size > 0)
524 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
526 #ifdef _WIN64
527 if (saved_float_regs_size > 0) {
528 compiler->mode32 = 1;
530 tmp = SLJIT_FS0 - fsaveds;
531 for (i = SLJIT_FS0; i > tmp; i--) {
532 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
533 *inst++ = GROUP_0F;
534 *inst = MOVAPS_xm_x;
535 saved_float_regs_offset += 16;
538 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
539 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
540 *inst++ = GROUP_0F;
541 *inst = MOVAPS_xm_x;
542 saved_float_regs_offset += 16;
545 #endif /* _WIN64 */
547 return SLJIT_SUCCESS;
550 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
551 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
552 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
554 sljit_s32 saved_regs_size;
555 #ifdef _WIN64
556 sljit_s32 saved_float_regs_size;
557 #endif /* _WIN64 */
559 CHECK_ERROR();
560 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
561 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
563 #ifdef _WIN64
564 local_size += SLJIT_LOCALS_OFFSET;
565 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
567 if (saved_float_regs_size > 0)
568 local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
569 #else /* !_WIN64 */
570 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
571 #endif /* _WIN64 */
573 /* Including the return address saved by the call instruction. */
574 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
575 compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
576 return SLJIT_SUCCESS;
579 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
581 sljit_uw size;
582 sljit_s32 i, tmp;
583 sljit_u8 *inst;
584 #ifdef _WIN64
585 sljit_s32 saved_float_regs_offset;
586 sljit_s32 fscratches = compiler->fscratches;
587 sljit_s32 fsaveds = compiler->fsaveds;
588 #endif /* _WIN64 */
590 #ifdef _WIN64
591 saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
593 if (saved_float_regs_offset > 0) {
594 compiler->mode32 = 1;
595 saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
597 tmp = SLJIT_FS0 - fsaveds;
598 for (i = SLJIT_FS0; i > tmp; i--) {
599 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
600 *inst++ = GROUP_0F;
601 *inst = MOVAPS_x_xm;
602 saved_float_regs_offset += 16;
605 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
606 inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
607 *inst++ = GROUP_0F;
608 *inst = MOVAPS_x_xm;
609 saved_float_regs_offset += 16;
612 #endif /* _WIN64 */
614 if (compiler->local_size > 0) {
615 if (compiler->local_size <= 127) {
616 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
617 FAIL_IF(!inst);
618 INC_SIZE(4);
619 *inst++ = REX_W;
620 *inst++ = GROUP_BINARY_83;
621 *inst++ = MOD_REG | ADD | 4;
622 *inst = U8(compiler->local_size);
624 else {
625 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
626 FAIL_IF(!inst);
627 INC_SIZE(7);
628 *inst++ = REX_W;
629 *inst++ = GROUP_BINARY_81;
630 *inst++ = MOD_REG | ADD | 4;
631 sljit_unaligned_store_s32(inst, compiler->local_size);
635 tmp = compiler->scratches;
636 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
637 size = reg_map[i] >= 8 ? 2 : 1;
638 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
639 FAIL_IF(!inst);
640 INC_SIZE(size);
641 if (reg_map[i] >= 8)
642 *inst++ = REX_B;
643 POP_REG(reg_lmap[i]);
646 tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
647 for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
648 size = reg_map[i] >= 8 ? 2 : 1;
649 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
650 FAIL_IF(!inst);
651 INC_SIZE(size);
652 if (reg_map[i] >= 8)
653 *inst++ = REX_B;
654 POP_REG(reg_lmap[i]);
657 return SLJIT_SUCCESS;
660 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
662 sljit_u8 *inst;
664 CHECK_ERROR();
665 CHECK(check_sljit_emit_return_void(compiler));
667 FAIL_IF(emit_stack_frame_release(compiler));
669 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
670 FAIL_IF(!inst);
671 INC_SIZE(1);
672 RET();
673 return SLJIT_SUCCESS;
676 /* --------------------------------------------------------------------- */
677 /* Call / return instructions */
678 /* --------------------------------------------------------------------- */
680 #ifndef _WIN64
682 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
684 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
685 sljit_s32 word_arg_count = 0;
687 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
688 SLJIT_ASSERT(!(src & SLJIT_MEM));
690 /* Remove return value. */
691 arg_types >>= SLJIT_ARG_SHIFT;
693 while (arg_types) {
694 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
695 word_arg_count++;
696 arg_types >>= SLJIT_ARG_SHIFT;
699 if (word_arg_count == 0)
700 return SLJIT_SUCCESS;
702 if (word_arg_count >= 3) {
703 if (src == SLJIT_R2)
704 *src_ptr = TMP_REG1;
705 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
708 return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
711 #else
713 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
715 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
716 sljit_s32 arg_count = 0;
717 sljit_s32 word_arg_count = 0;
718 sljit_s32 float_arg_count = 0;
719 sljit_s32 types = 0;
720 sljit_s32 data_trandfer = 0;
721 static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
723 SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
724 SLJIT_ASSERT(!(src & SLJIT_MEM));
726 arg_types >>= SLJIT_ARG_SHIFT;
728 while (arg_types) {
729 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
731 switch (arg_types & SLJIT_ARG_MASK) {
732 case SLJIT_ARG_TYPE_F64:
733 case SLJIT_ARG_TYPE_F32:
734 arg_count++;
735 float_arg_count++;
737 if (arg_count != float_arg_count)
738 data_trandfer = 1;
739 break;
740 default:
741 arg_count++;
742 word_arg_count++;
744 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
745 data_trandfer = 1;
747 if (src == word_arg_regs[arg_count]) {
748 EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
749 *src_ptr = TMP_REG2;
752 break;
755 arg_types >>= SLJIT_ARG_SHIFT;
758 if (!data_trandfer)
759 return SLJIT_SUCCESS;
761 while (types) {
762 switch (types & SLJIT_ARG_MASK) {
763 case SLJIT_ARG_TYPE_F64:
764 if (arg_count != float_arg_count)
765 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
766 arg_count--;
767 float_arg_count--;
768 break;
769 case SLJIT_ARG_TYPE_F32:
770 if (arg_count != float_arg_count)
771 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
772 arg_count--;
773 float_arg_count--;
774 break;
775 default:
776 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
777 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
778 arg_count--;
779 word_arg_count--;
780 break;
783 types >>= SLJIT_ARG_SHIFT;
786 return SLJIT_SUCCESS;
789 #endif
791 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
792 sljit_s32 arg_types)
794 CHECK_ERROR_PTR();
795 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
797 compiler->mode32 = 0;
799 PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
801 if (type & SLJIT_CALL_RETURN) {
802 PTR_FAIL_IF(emit_stack_frame_release(compiler));
803 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
806 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
807 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
808 compiler->skip_checks = 1;
809 #endif
810 return sljit_emit_jump(compiler, type);
813 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
814 sljit_s32 arg_types,
815 sljit_s32 src, sljit_sw srcw)
817 CHECK_ERROR();
818 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
820 compiler->mode32 = 0;
822 if (src & SLJIT_MEM) {
823 ADJUST_LOCAL_OFFSET(src, srcw);
824 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
825 src = TMP_REG2;
828 if (type & SLJIT_CALL_RETURN) {
829 if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
830 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
831 src = TMP_REG2;
834 FAIL_IF(emit_stack_frame_release(compiler));
835 type = SLJIT_JUMP;
838 FAIL_IF(call_with_args(compiler, arg_types, &src));
840 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
841 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
842 compiler->skip_checks = 1;
843 #endif
845 return sljit_emit_ijump(compiler, type, src, srcw);
848 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
850 sljit_u8 *inst;
852 CHECK_ERROR();
853 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
854 ADJUST_LOCAL_OFFSET(dst, dstw);
856 if (FAST_IS_REG(dst)) {
857 if (reg_map[dst] < 8) {
858 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
859 FAIL_IF(!inst);
860 INC_SIZE(1);
861 POP_REG(reg_lmap[dst]);
862 return SLJIT_SUCCESS;
865 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
866 FAIL_IF(!inst);
867 INC_SIZE(2);
868 *inst++ = REX_B;
869 POP_REG(reg_lmap[dst]);
870 return SLJIT_SUCCESS;
873 /* REX_W is not necessary (src is not immediate). */
874 compiler->mode32 = 1;
875 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
876 FAIL_IF(!inst);
877 *inst++ = POP_rm;
878 return SLJIT_SUCCESS;
881 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
883 sljit_u8 *inst;
885 if (FAST_IS_REG(src)) {
886 if (reg_map[src] < 8) {
887 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
888 FAIL_IF(!inst);
890 INC_SIZE(1 + 1);
891 PUSH_REG(reg_lmap[src]);
893 else {
894 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
895 FAIL_IF(!inst);
897 INC_SIZE(2 + 1);
898 *inst++ = REX_B;
899 PUSH_REG(reg_lmap[src]);
902 else {
903 /* REX_W is not necessary (src is not immediate). */
904 compiler->mode32 = 1;
905 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
906 FAIL_IF(!inst);
907 *inst++ = GROUP_FF;
908 *inst |= PUSH_rm;
910 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
911 FAIL_IF(!inst);
912 INC_SIZE(1);
915 RET();
916 return SLJIT_SUCCESS;
919 /* --------------------------------------------------------------------- */
920 /* Extend input */
921 /* --------------------------------------------------------------------- */
923 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
924 sljit_s32 dst, sljit_sw dstw,
925 sljit_s32 src, sljit_sw srcw)
927 sljit_u8* inst;
928 sljit_s32 dst_r;
930 compiler->mode32 = 0;
932 if (src & SLJIT_IMM) {
933 if (FAST_IS_REG(dst)) {
934 if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
935 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
936 FAIL_IF(!inst);
937 *inst = MOV_rm_i32;
938 return SLJIT_SUCCESS;
940 return emit_load_imm64(compiler, dst, srcw);
942 compiler->mode32 = 1;
943 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
944 FAIL_IF(!inst);
945 *inst = MOV_rm_i32;
946 compiler->mode32 = 0;
947 return SLJIT_SUCCESS;
950 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
952 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
953 dst_r = src;
954 else {
955 if (sign) {
956 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
957 FAIL_IF(!inst);
958 *inst++ = MOVSXD_r_rm;
959 } else {
960 compiler->mode32 = 1;
961 FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
962 compiler->mode32 = 0;
966 if (dst & SLJIT_MEM) {
967 compiler->mode32 = 1;
968 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
969 FAIL_IF(!inst);
970 *inst = MOV_rm_r;
971 compiler->mode32 = 0;
974 return SLJIT_SUCCESS;
977 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
979 sljit_s32 tmp, size;
981 /* Don't adjust shadow stack if it isn't enabled. */
982 if (!cpu_has_shadow_stack())
983 return SLJIT_SUCCESS;
985 size = compiler->local_size;
986 tmp = compiler->scratches;
987 if (tmp >= SLJIT_FIRST_SAVED_REG)
988 size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
989 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
990 if (SLJIT_S0 >= tmp)
991 size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
993 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);