Rework x86-32 stack layout
[sljit.git] / sljit_src / sljitNativeX86_common.c
blobb82ec96fdd63962eb732484c374479cd7fdbed37
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
29 return "x86" SLJIT_CPUINFO;
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - ESP
39 5 - EBP
40 6 - ESI
41 7 - EDI
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - RSP
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
64 #define TMP_FREG (0)
66 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
68 /* Last register + 1. */
69 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
71 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
72 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
75 #define CHECK_EXTRA_REGS(p, w, do) \
76 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
77 if (p <= compiler->scratches) \
78 w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
79 else \
80 w = SLJIT_LOCALS_OFFSET_BASE + ((p) - SLJIT_S2) * SSIZE_OF(sw); \
81 p = SLJIT_MEM1(SLJIT_SP); \
82 do; \
85 #else /* SLJIT_CONFIG_X86_32 */
87 /* Last register + 1. */
88 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
89 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
91 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
92 Note: avoid to use r12 and r13 for memory addessing
93 therefore r12 is better to be a higher saved register. */
94 #ifndef _WIN64
95 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
96 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
97 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
99 /* low-map. reg_map & 0x7. */
100 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
101 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
103 #else
104 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
105 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
106 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
108 /* low-map. reg_map & 0x7. */
109 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
110 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
112 #endif
114 /* Args: xmm0-xmm3 */
115 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
116 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
118 /* low-map. freg_map & 0x7. */
119 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
120 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
123 #define REX_W 0x48
124 #define REX_R 0x44
125 #define REX_X 0x42
126 #define REX_B 0x41
127 #define REX 0x40
129 #ifndef _WIN64
130 #define HALFWORD_MAX 0x7fffffffl
131 #define HALFWORD_MIN -0x80000000l
132 #else
133 #define HALFWORD_MAX 0x7fffffffll
134 #define HALFWORD_MIN -0x80000000ll
135 #endif
137 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
138 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
140 #define CHECK_EXTRA_REGS(p, w, do)
142 #endif /* SLJIT_CONFIG_X86_32 */
144 #define U8(v) ((sljit_u8)(v))
147 /* Size flags for emit_x86_instruction: */
148 #define EX86_BIN_INS 0x0010
149 #define EX86_SHIFT_INS 0x0020
150 #define EX86_REX 0x0040
151 #define EX86_NO_REXW 0x0080
152 #define EX86_BYTE_ARG 0x0100
153 #define EX86_HALF_ARG 0x0200
154 #define EX86_PREF_66 0x0400
155 #define EX86_PREF_F2 0x0800
156 #define EX86_PREF_F3 0x1000
157 #define EX86_SSE2_OP1 0x2000
158 #define EX86_SSE2_OP2 0x4000
159 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
161 /* --------------------------------------------------------------------- */
162 /* Instrucion forms */
163 /* --------------------------------------------------------------------- */
165 #define ADD (/* BINARY */ 0 << 3)
166 #define ADD_EAX_i32 0x05
167 #define ADD_r_rm 0x03
168 #define ADD_rm_r 0x01
169 #define ADDSD_x_xm 0x58
170 #define ADC (/* BINARY */ 2 << 3)
171 #define ADC_EAX_i32 0x15
172 #define ADC_r_rm 0x13
173 #define ADC_rm_r 0x11
174 #define AND (/* BINARY */ 4 << 3)
175 #define AND_EAX_i32 0x25
176 #define AND_r_rm 0x23
177 #define AND_rm_r 0x21
178 #define ANDPD_x_xm 0x54
179 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
180 #define CALL_i32 0xe8
181 #define CALL_rm (/* GROUP_FF */ 2 << 3)
182 #define CDQ 0x99
183 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
184 #define CMP (/* BINARY */ 7 << 3)
185 #define CMP_EAX_i32 0x3d
186 #define CMP_r_rm 0x3b
187 #define CMP_rm_r 0x39
188 #define CVTPD2PS_x_xm 0x5a
189 #define CVTSI2SD_x_rm 0x2a
190 #define CVTTSD2SI_r_xm 0x2c
191 #define DIV (/* GROUP_F7 */ 6 << 3)
192 #define DIVSD_x_xm 0x5e
193 #define FSTPS 0xd9
194 #define FSTPD 0xdd
195 #define INT3 0xcc
196 #define IDIV (/* GROUP_F7 */ 7 << 3)
197 #define IMUL (/* GROUP_F7 */ 5 << 3)
198 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
199 #define IMUL_r_rm_i8 0x6b
200 #define IMUL_r_rm_i32 0x69
201 #define JE_i8 0x74
202 #define JNE_i8 0x75
203 #define JMP_i8 0xeb
204 #define JMP_i32 0xe9
205 #define JMP_rm (/* GROUP_FF */ 4 << 3)
206 #define LEA_r_m 0x8d
207 #define LOOP_i8 0xe2
208 #define MOV_r_rm 0x8b
209 #define MOV_r_i32 0xb8
210 #define MOV_rm_r 0x89
211 #define MOV_rm_i32 0xc7
212 #define MOV_rm8_i8 0xc6
213 #define MOV_rm8_r8 0x88
214 #define MOVAPS_x_xm 0x28
215 #define MOVAPS_xm_x 0x29
216 #define MOVSD_x_xm 0x10
217 #define MOVSD_xm_x 0x11
218 #define MOVSXD_r_rm 0x63
219 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
220 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
221 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
222 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
223 #define MUL (/* GROUP_F7 */ 4 << 3)
224 #define MULSD_x_xm 0x59
225 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
226 #define NOP 0x90
227 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
228 #define OR (/* BINARY */ 1 << 3)
229 #define OR_r_rm 0x0b
230 #define OR_EAX_i32 0x0d
231 #define OR_rm_r 0x09
232 #define OR_rm8_r8 0x08
233 #define POP_r 0x58
234 #define POP_rm 0x8f
235 #define POPF 0x9d
236 #define PREFETCH 0x18
237 #define PUSH_i32 0x68
238 #define PUSH_r 0x50
239 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
240 #define PUSHF 0x9c
241 #define RET_near 0xc3
242 #define RET_i16 0xc2
243 #define SBB (/* BINARY */ 3 << 3)
244 #define SBB_EAX_i32 0x1d
245 #define SBB_r_rm 0x1b
246 #define SBB_rm_r 0x19
247 #define SAR (/* SHIFT */ 7 << 3)
248 #define SHL (/* SHIFT */ 4 << 3)
249 #define SHR (/* SHIFT */ 5 << 3)
250 #define SUB (/* BINARY */ 5 << 3)
251 #define SUB_EAX_i32 0x2d
252 #define SUB_r_rm 0x2b
253 #define SUB_rm_r 0x29
254 #define SUBSD_x_xm 0x5c
255 #define TEST_EAX_i32 0xa9
256 #define TEST_rm_r 0x85
257 #define UCOMISD_x_xm 0x2e
258 #define UNPCKLPD_x_xm 0x14
259 #define XCHG_EAX_r 0x90
260 #define XCHG_r_rm 0x87
261 #define XOR (/* BINARY */ 6 << 3)
262 #define XOR_EAX_i32 0x35
263 #define XOR_r_rm 0x33
264 #define XOR_rm_r 0x31
265 #define XORPD_x_xm 0x57
267 #define GROUP_0F 0x0f
268 #define GROUP_F7 0xf7
269 #define GROUP_FF 0xff
270 #define GROUP_BINARY_81 0x81
271 #define GROUP_BINARY_83 0x83
272 #define GROUP_SHIFT_1 0xd1
273 #define GROUP_SHIFT_N 0xc1
274 #define GROUP_SHIFT_CL 0xd3
276 #define MOD_REG 0xc0
277 #define MOD_DISP8 0x40
279 #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
281 #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
282 #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
283 #define RET() (*inst++ = RET_near)
284 #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
286 /* Multithreading does not affect these static variables, since they store
287 built-in CPU features. Therefore they can be overwritten by different threads
288 if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2 = -1;
291 #endif
292 static sljit_s32 cpu_has_cmov = -1;
294 #ifdef _WIN32_WCE
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
297 #include <intrin.h>
298 #endif
300 /******************************************************/
301 /* Unaligned-store functions */
302 /******************************************************/
304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
306 SLJIT_MEMCPY(addr, &value, sizeof(value));
309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
311 SLJIT_MEMCPY(addr, &value, sizeof(value));
314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
316 SLJIT_MEMCPY(addr, &value, sizeof(value));
319 /******************************************************/
320 /* Utility functions */
321 /******************************************************/
323 static void get_cpu_features(void)
325 sljit_u32 features;
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
329 int CPUInfo[4];
330 __cpuid(CPUInfo, 1);
331 features = (sljit_u32)CPUInfo[3];
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
335 /* AT&T syntax. */
336 __asm__ (
337 "movl $0x1, %%eax\n"
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 /* On x86-32, there is no red zone, so this
340 should work (no need for a local variable). */
341 "push %%ebx\n"
342 #endif
343 "cpuid\n"
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
345 "pop %%ebx\n"
346 #endif
347 "movl %%edx, %0\n"
348 : "=g" (features)
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 : "%eax", "%ecx", "%edx"
352 #else
353 : "%rax", "%rbx", "%rcx", "%rdx"
354 #endif
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
359 /* Intel syntax. */
360 __asm {
361 mov eax, 1
362 cpuid
363 mov features, edx
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 cpu_has_sse2 = (features >> 26) & 0x1;
370 #endif
371 cpu_has_cmov = (features >> 15) & 0x1;
374 static sljit_u8 get_jump_code(sljit_uw type)
376 switch (type) {
377 case SLJIT_EQUAL:
378 case SLJIT_F_EQUAL:
379 case SLJIT_UNORDERED_OR_EQUAL:
380 case SLJIT_ORDERED_EQUAL: /* Not supported. */
381 return 0x84 /* je */;
383 case SLJIT_NOT_EQUAL:
384 case SLJIT_F_NOT_EQUAL:
385 case SLJIT_ORDERED_NOT_EQUAL:
386 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
387 return 0x85 /* jne */;
389 case SLJIT_LESS:
390 case SLJIT_CARRY:
391 case SLJIT_F_LESS:
392 case SLJIT_UNORDERED_OR_LESS:
393 case SLJIT_UNORDERED_OR_GREATER:
394 return 0x82 /* jc */;
396 case SLJIT_GREATER_EQUAL:
397 case SLJIT_NOT_CARRY:
398 case SLJIT_F_GREATER_EQUAL:
399 case SLJIT_ORDERED_GREATER_EQUAL:
400 case SLJIT_ORDERED_LESS_EQUAL:
401 return 0x83 /* jae */;
403 case SLJIT_GREATER:
404 case SLJIT_F_GREATER:
405 case SLJIT_ORDERED_LESS:
406 case SLJIT_ORDERED_GREATER:
407 return 0x87 /* jnbe */;
409 case SLJIT_LESS_EQUAL:
410 case SLJIT_F_LESS_EQUAL:
411 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
412 case SLJIT_UNORDERED_OR_LESS_EQUAL:
413 return 0x86 /* jbe */;
415 case SLJIT_SIG_LESS:
416 return 0x8c /* jl */;
418 case SLJIT_SIG_GREATER_EQUAL:
419 return 0x8d /* jnl */;
421 case SLJIT_SIG_GREATER:
422 return 0x8f /* jnle */;
424 case SLJIT_SIG_LESS_EQUAL:
425 return 0x8e /* jle */;
427 case SLJIT_OVERFLOW:
428 return 0x80 /* jo */;
430 case SLJIT_NOT_OVERFLOW:
431 return 0x81 /* jno */;
433 case SLJIT_UNORDERED:
434 return 0x8a /* jp */;
436 case SLJIT_ORDERED:
437 return 0x8b /* jpo */;
439 return 0;
442 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
444 #else
445 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
446 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
447 #endif
449 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
451 sljit_uw type = jump->flags >> TYPE_SHIFT;
452 sljit_s32 short_jump;
453 sljit_uw label_addr;
455 if (jump->flags & JUMP_LABEL)
456 label_addr = (sljit_uw)(code + jump->u.label->size);
457 else
458 label_addr = jump->u.target - (sljit_uw)executable_offset;
460 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
462 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
463 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
464 return generate_far_jump_code(jump, code_ptr);
465 #endif
467 if (type == SLJIT_JUMP) {
468 if (short_jump)
469 *code_ptr++ = JMP_i8;
470 else
471 *code_ptr++ = JMP_i32;
472 jump->addr++;
474 else if (type >= SLJIT_FAST_CALL) {
475 short_jump = 0;
476 *code_ptr++ = CALL_i32;
477 jump->addr++;
479 else if (short_jump) {
480 *code_ptr++ = U8(get_jump_code(type) - 0x10);
481 jump->addr++;
483 else {
484 *code_ptr++ = GROUP_0F;
485 *code_ptr++ = get_jump_code(type);
486 jump->addr += 2;
489 if (short_jump) {
490 jump->flags |= PATCH_MB;
491 code_ptr += sizeof(sljit_s8);
492 } else {
493 jump->flags |= PATCH_MW;
494 code_ptr += sizeof(sljit_s32);
497 return code_ptr;
500 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
502 struct sljit_memory_fragment *buf;
503 sljit_u8 *code;
504 sljit_u8 *code_ptr;
505 sljit_u8 *buf_ptr;
506 sljit_u8 *buf_end;
507 sljit_u8 len;
508 sljit_sw executable_offset;
509 sljit_uw jump_addr;
511 struct sljit_label *label;
512 struct sljit_jump *jump;
513 struct sljit_const *const_;
514 struct sljit_put_label *put_label;
516 CHECK_ERROR_PTR();
517 CHECK_PTR(check_sljit_generate_code(compiler));
518 reverse_buf(compiler);
520 /* Second code generation pass. */
521 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data);
522 PTR_FAIL_WITH_EXEC_IF(code);
523 buf = compiler->buf;
525 code_ptr = code;
526 label = compiler->labels;
527 jump = compiler->jumps;
528 const_ = compiler->consts;
529 put_label = compiler->put_labels;
530 executable_offset = SLJIT_EXEC_OFFSET(code);
532 do {
533 buf_ptr = buf->memory;
534 buf_end = buf_ptr + buf->used_size;
535 do {
536 len = *buf_ptr++;
537 if (len > 0) {
538 /* The code is already generated. */
539 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
540 code_ptr += len;
541 buf_ptr += len;
543 else {
544 switch (*buf_ptr) {
545 case 0:
546 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
547 label->size = (sljit_uw)(code_ptr - code);
548 label = label->next;
549 break;
550 case 1:
551 jump->addr = (sljit_uw)code_ptr;
552 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
553 code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
554 else {
555 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
556 code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
557 #else
558 code_ptr = generate_far_jump_code(jump, code_ptr);
559 #endif
561 jump = jump->next;
562 break;
563 case 2:
564 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
565 const_ = const_->next;
566 break;
567 default:
568 SLJIT_ASSERT(*buf_ptr == 3);
569 SLJIT_ASSERT(put_label->label);
570 put_label->addr = (sljit_uw)code_ptr;
571 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
572 code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size);
573 #endif
574 put_label = put_label->next;
575 break;
577 buf_ptr++;
579 } while (buf_ptr < buf_end);
580 SLJIT_ASSERT(buf_ptr == buf_end);
581 buf = buf->next;
582 } while (buf);
584 SLJIT_ASSERT(!label);
585 SLJIT_ASSERT(!jump);
586 SLJIT_ASSERT(!const_);
587 SLJIT_ASSERT(!put_label);
588 SLJIT_ASSERT(code_ptr <= code + compiler->size);
590 jump = compiler->jumps;
591 while (jump) {
592 jump_addr = jump->addr + (sljit_uw)executable_offset;
594 if (jump->flags & PATCH_MB) {
595 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
596 *(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
597 } else if (jump->flags & PATCH_MW) {
598 if (jump->flags & JUMP_LABEL) {
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
600 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
601 #else
602 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
603 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
604 #endif
606 else {
607 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
608 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
609 #else
610 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
611 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
612 #endif
615 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
616 else if (jump->flags & PATCH_MD)
617 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
618 #endif
620 jump = jump->next;
623 put_label = compiler->put_labels;
624 while (put_label) {
625 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
626 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
627 #else
628 if (put_label->flags & PATCH_MD) {
629 SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
630 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
632 else {
633 SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
634 sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
636 #endif
638 put_label = put_label->next;
641 compiler->error = SLJIT_ERR_COMPILED;
642 compiler->executable_offset = executable_offset;
643 compiler->executable_size = (sljit_uw)(code_ptr - code);
645 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
647 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
648 return (void*)code;
651 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
653 switch (feature_type) {
654 case SLJIT_HAS_FPU:
655 #ifdef SLJIT_IS_FPU_AVAILABLE
656 return SLJIT_IS_FPU_AVAILABLE;
657 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
658 if (cpu_has_sse2 == -1)
659 get_cpu_features();
660 return cpu_has_sse2;
661 #else /* SLJIT_DETECT_SSE2 */
662 return 1;
663 #endif /* SLJIT_DETECT_SSE2 */
665 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
666 case SLJIT_HAS_VIRTUAL_REGISTERS:
667 return 1;
668 #endif
670 case SLJIT_HAS_CLZ:
671 case SLJIT_HAS_CMOV:
672 if (cpu_has_cmov == -1)
673 get_cpu_features();
674 return cpu_has_cmov;
676 case SLJIT_HAS_PREFETCH:
677 return 1;
679 case SLJIT_HAS_SSE2:
680 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
681 if (cpu_has_sse2 == -1)
682 get_cpu_features();
683 return cpu_has_sse2;
684 #else
685 return 1;
686 #endif
688 default:
689 return 0;
693 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
695 if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
696 return 0;
698 switch (type) {
699 case SLJIT_ORDERED_EQUAL:
700 case SLJIT_UNORDERED_OR_NOT_EQUAL:
701 return 0;
704 return 1;
707 /* --------------------------------------------------------------------- */
708 /* Operators */
709 /* --------------------------------------------------------------------- */
711 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
713 #define BINARY_IMM32(op_imm, immw, arg, argw) \
714 do { \
715 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
716 FAIL_IF(!inst); \
717 *(inst + 1) |= (op_imm); \
718 } while (0)
720 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
722 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
723 do { \
724 if (IS_HALFWORD(immw) || compiler->mode32) { \
725 BINARY_IMM32(op_imm, immw, arg, argw); \
727 else { \
728 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
729 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
730 FAIL_IF(!inst); \
731 *inst = (op_mr); \
733 } while (0)
735 #define BINARY_EAX_IMM(op_eax_imm, immw) \
736 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
738 #else /* !SLJIT_CONFIG_X86_64 */
740 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
741 BINARY_IMM32(op_imm, immw, arg, argw)
743 #define BINARY_EAX_IMM(op_eax_imm, immw) \
744 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
746 #endif /* SLJIT_CONFIG_X86_64 */
748 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
749 sljit_s32 dst, sljit_sw dstw,
750 sljit_s32 src, sljit_sw srcw);
752 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
753 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
755 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
756 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
758 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
759 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
761 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
762 sljit_s32 src1, sljit_sw src1w,
763 sljit_s32 src2, sljit_sw src2w);
765 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
767 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
768 /* Emit endbr32/endbr64 when CET is enabled. */
769 sljit_u8 *inst;
770 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
771 FAIL_IF(!inst);
772 INC_SIZE(4);
773 *inst++ = 0xf3;
774 *inst++ = 0x0f;
775 *inst++ = 0x1e;
776 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
777 *inst = 0xfb;
778 #else
779 *inst = 0xfa;
780 #endif
781 #else /* !SLJIT_CONFIG_X86_CET */
782 SLJIT_UNUSED_ARG(compiler);
783 #endif /* SLJIT_CONFIG_X86_CET */
784 return SLJIT_SUCCESS;
787 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
789 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
791 sljit_u8 *inst;
792 sljit_s32 size;
794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
795 size = 5;
796 #else
797 size = 4;
798 #endif
800 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
801 FAIL_IF(!inst);
802 INC_SIZE(size);
803 *inst++ = 0xf3;
804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
805 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
806 #endif
807 *inst++ = 0x0f;
808 *inst++ = 0x1e;
809 *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
810 return SLJIT_SUCCESS;
813 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
815 sljit_u8 *inst;
816 sljit_s32 size;
818 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
819 size = 5;
820 #else
821 size = 4;
822 #endif
824 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
825 FAIL_IF(!inst);
826 INC_SIZE(size);
827 *inst++ = 0xf3;
828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
829 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
830 #endif
831 *inst++ = 0x0f;
832 *inst++ = 0xae;
833 *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
834 return SLJIT_SUCCESS;
837 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
839 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
841 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
842 return _get_ssp() != 0;
843 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
844 return 0;
845 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
848 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
849 sljit_s32 src, sljit_sw srcw)
851 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
852 sljit_u8 *inst, *jz_after_cmp_inst;
853 sljit_uw size_jz_after_cmp_inst;
855 sljit_uw size_before_rdssp_inst = compiler->size;
857 /* Generate "RDSSP TMP_REG1". */
858 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
860 /* Load return address on shadow stack into TMP_REG1. */
861 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
862 SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
864 /* Hand code unsupported "mov 0x0(%ebp),%ebp". */
865 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
866 FAIL_IF(!inst);
867 INC_SIZE(3);
868 *inst++ = 0x8b;
869 *inst++ = 0x6d;
870 *inst = 0;
871 #else /* !SLJIT_CONFIG_X86_32 */
872 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
873 #endif /* SLJIT_CONFIG_X86_32 */
875 /* Compare return address against TMP_REG1. */
876 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
878 /* Generate JZ to skip shadow stack ajdustment when shadow
879 stack matches normal stack. */
880 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
881 FAIL_IF(!inst);
882 INC_SIZE(2);
883 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
884 size_jz_after_cmp_inst = compiler->size;
885 jz_after_cmp_inst = inst;
887 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
888 /* REX_W is not necessary. */
889 compiler->mode32 = 1;
890 #endif
891 /* Load 1 into TMP_REG1. */
892 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
894 /* Generate "INCSSP TMP_REG1". */
895 FAIL_IF(emit_incssp(compiler, TMP_REG1));
897 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
898 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
899 FAIL_IF(!inst);
900 INC_SIZE(2);
901 *inst++ = JMP_i8;
902 *inst = size_before_rdssp_inst - compiler->size;
904 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
905 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
906 SLJIT_UNUSED_ARG(compiler);
907 SLJIT_UNUSED_ARG(src);
908 SLJIT_UNUSED_ARG(srcw);
909 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
910 return SLJIT_SUCCESS;
913 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
914 #include "sljitNativeX86_32.c"
915 #else
916 #include "sljitNativeX86_64.c"
917 #endif
919 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
920 sljit_s32 dst, sljit_sw dstw,
921 sljit_s32 src, sljit_sw srcw)
923 sljit_u8* inst;
925 if (FAST_IS_REG(src)) {
926 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
927 FAIL_IF(!inst);
928 *inst = MOV_rm_r;
929 return SLJIT_SUCCESS;
931 if (src & SLJIT_IMM) {
932 if (FAST_IS_REG(dst)) {
933 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
934 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
935 #else
936 if (!compiler->mode32) {
937 if (NOT_HALFWORD(srcw))
938 return emit_load_imm64(compiler, dst, srcw);
940 else
941 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
942 #endif
944 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
945 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
946 /* Immediate to memory move. Only SLJIT_MOV operation copies
947 an immediate directly into memory so TMP_REG1 can be used. */
948 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
949 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
950 FAIL_IF(!inst);
951 *inst = MOV_rm_r;
952 return SLJIT_SUCCESS;
954 #endif
955 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
956 FAIL_IF(!inst);
957 *inst = MOV_rm_i32;
958 return SLJIT_SUCCESS;
960 if (FAST_IS_REG(dst)) {
961 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
962 FAIL_IF(!inst);
963 *inst = MOV_r_rm;
964 return SLJIT_SUCCESS;
967 /* Memory to memory move. Only SLJIT_MOV operation copies
968 data from memory to memory so TMP_REG1 can be used. */
969 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
970 FAIL_IF(!inst);
971 *inst = MOV_r_rm;
972 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
973 FAIL_IF(!inst);
974 *inst = MOV_rm_r;
975 return SLJIT_SUCCESS;
978 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
980 sljit_u8 *inst;
981 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
982 sljit_uw size;
983 #endif
985 CHECK_ERROR();
986 CHECK(check_sljit_emit_op0(compiler, op));
988 switch (GET_OPCODE(op)) {
989 case SLJIT_BREAKPOINT:
990 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
991 FAIL_IF(!inst);
992 INC_SIZE(1);
993 *inst = INT3;
994 break;
995 case SLJIT_NOP:
996 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
997 FAIL_IF(!inst);
998 INC_SIZE(1);
999 *inst = NOP;
1000 break;
1001 case SLJIT_LMUL_UW:
1002 case SLJIT_LMUL_SW:
1003 case SLJIT_DIVMOD_UW:
1004 case SLJIT_DIVMOD_SW:
1005 case SLJIT_DIV_UW:
1006 case SLJIT_DIV_SW:
1007 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1008 #ifdef _WIN64
1009 SLJIT_ASSERT(
1010 reg_map[SLJIT_R0] == 0
1011 && reg_map[SLJIT_R1] == 2
1012 && reg_map[TMP_REG1] > 7);
1013 #else
1014 SLJIT_ASSERT(
1015 reg_map[SLJIT_R0] == 0
1016 && reg_map[SLJIT_R1] < 7
1017 && reg_map[TMP_REG1] == 2);
1018 #endif
1019 compiler->mode32 = op & SLJIT_32;
1020 #endif
1021 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1023 op = GET_OPCODE(op);
1024 if ((op | 0x2) == SLJIT_DIV_UW) {
1025 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1026 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1027 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1028 #else
1029 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1030 #endif
1031 FAIL_IF(!inst);
1032 *inst = XOR_r_rm;
1035 if ((op | 0x2) == SLJIT_DIV_SW) {
1036 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1037 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1038 #endif
1040 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1041 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1042 FAIL_IF(!inst);
1043 INC_SIZE(1);
1044 *inst = CDQ;
1045 #else
1046 if (compiler->mode32) {
1047 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1048 FAIL_IF(!inst);
1049 INC_SIZE(1);
1050 *inst = CDQ;
1051 } else {
1052 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1053 FAIL_IF(!inst);
1054 INC_SIZE(2);
1055 *inst++ = REX_W;
1056 *inst = CDQ;
1058 #endif
1061 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1062 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1063 FAIL_IF(!inst);
1064 INC_SIZE(2);
1065 *inst++ = GROUP_F7;
1066 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1067 #else
1068 #ifdef _WIN64
1069 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1070 #else
1071 size = (!compiler->mode32) ? 3 : 2;
1072 #endif
1073 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1074 FAIL_IF(!inst);
1075 INC_SIZE(size);
1076 #ifdef _WIN64
1077 if (!compiler->mode32)
1078 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1079 else if (op >= SLJIT_DIVMOD_UW)
1080 *inst++ = REX_B;
1081 *inst++ = GROUP_F7;
1082 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1083 #else
1084 if (!compiler->mode32)
1085 *inst++ = REX_W;
1086 *inst++ = GROUP_F7;
1087 *inst = MOD_REG | reg_map[SLJIT_R1];
1088 #endif
1089 #endif
1090 switch (op) {
1091 case SLJIT_LMUL_UW:
1092 *inst |= MUL;
1093 break;
1094 case SLJIT_LMUL_SW:
1095 *inst |= IMUL;
1096 break;
1097 case SLJIT_DIVMOD_UW:
1098 case SLJIT_DIV_UW:
1099 *inst |= DIV;
1100 break;
1101 case SLJIT_DIVMOD_SW:
1102 case SLJIT_DIV_SW:
1103 *inst |= IDIV;
1104 break;
1106 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1107 if (op <= SLJIT_DIVMOD_SW)
1108 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1109 #else
1110 if (op >= SLJIT_DIV_UW)
1111 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1112 #endif
1113 break;
1114 case SLJIT_ENDBR:
1115 return emit_endbranch(compiler);
1116 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1117 return skip_frames_before_return(compiler);
1120 return SLJIT_SUCCESS;
1123 #define ENCODE_PREFIX(prefix) \
1124 do { \
1125 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
1126 FAIL_IF(!inst); \
1127 INC_SIZE(1); \
1128 *inst = U8(prefix); \
1129 } while (0)
1131 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1132 sljit_s32 dst, sljit_sw dstw,
1133 sljit_s32 src, sljit_sw srcw)
1135 sljit_u8* inst;
1136 sljit_s32 dst_r;
1137 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1138 sljit_s32 work_r;
1139 #endif
1141 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1142 compiler->mode32 = 0;
1143 #endif
1145 if (src & SLJIT_IMM) {
1146 if (FAST_IS_REG(dst)) {
1147 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1148 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1149 #else
1150 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1151 FAIL_IF(!inst);
1152 *inst = MOV_rm_i32;
1153 return SLJIT_SUCCESS;
1154 #endif
1156 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1157 FAIL_IF(!inst);
1158 *inst = MOV_rm8_i8;
1159 return SLJIT_SUCCESS;
1162 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1164 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1165 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1166 if (reg_map[src] >= 4) {
1167 SLJIT_ASSERT(dst_r == TMP_REG1);
1168 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1169 } else
1170 dst_r = src;
1171 #else
1172 dst_r = src;
1173 #endif
1175 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1176 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1177 /* src, dst are registers. */
1178 SLJIT_ASSERT(FAST_IS_REG(dst));
1179 if (reg_map[dst] < 4) {
1180 if (dst != src)
1181 EMIT_MOV(compiler, dst, 0, src, 0);
1182 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
1183 FAIL_IF(!inst);
1184 *inst++ = GROUP_0F;
1185 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1187 else {
1188 if (dst != src)
1189 EMIT_MOV(compiler, dst, 0, src, 0);
1190 if (sign) {
1191 /* shl reg, 24 */
1192 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1193 FAIL_IF(!inst);
1194 *inst |= SHL;
1195 /* sar reg, 24 */
1196 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1197 FAIL_IF(!inst);
1198 *inst |= SAR;
1200 else {
1201 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1202 FAIL_IF(!inst);
1203 *(inst + 1) |= AND;
1206 return SLJIT_SUCCESS;
1208 #endif
1209 else {
1210 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1211 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1212 FAIL_IF(!inst);
1213 *inst++ = GROUP_0F;
1214 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1217 if (dst & SLJIT_MEM) {
1218 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1219 if (dst_r == TMP_REG1) {
1220 /* Find a non-used register, whose reg_map[src] < 4. */
1221 if ((dst & REG_MASK) == SLJIT_R0) {
1222 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
1223 work_r = SLJIT_R2;
1224 else
1225 work_r = SLJIT_R1;
1227 else {
1228 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1229 work_r = SLJIT_R0;
1230 else if ((dst & REG_MASK) == SLJIT_R1)
1231 work_r = SLJIT_R2;
1232 else
1233 work_r = SLJIT_R1;
1236 if (work_r == SLJIT_R0) {
1237 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1239 else {
1240 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1241 FAIL_IF(!inst);
1242 *inst = XCHG_r_rm;
1245 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1246 FAIL_IF(!inst);
1247 *inst = MOV_rm8_r8;
1249 if (work_r == SLJIT_R0) {
1250 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1252 else {
1253 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1254 FAIL_IF(!inst);
1255 *inst = XCHG_r_rm;
1258 else {
1259 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1260 FAIL_IF(!inst);
1261 *inst = MOV_rm8_r8;
1263 #else
1264 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1265 FAIL_IF(!inst);
1266 *inst = MOV_rm8_r8;
1267 #endif
1270 return SLJIT_SUCCESS;
1273 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1274 sljit_s32 src, sljit_sw srcw)
1276 sljit_u8* inst;
1278 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1279 compiler->mode32 = 1;
1280 #endif
1282 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1283 FAIL_IF(!inst);
1284 *inst++ = GROUP_0F;
1285 *inst++ = PREFETCH;
1287 if (op == SLJIT_PREFETCH_L1)
1288 *inst |= (1 << 3);
1289 else if (op == SLJIT_PREFETCH_L2)
1290 *inst |= (2 << 3);
1291 else if (op == SLJIT_PREFETCH_L3)
1292 *inst |= (3 << 3);
1294 return SLJIT_SUCCESS;
1297 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1298 sljit_s32 dst, sljit_sw dstw,
1299 sljit_s32 src, sljit_sw srcw)
1301 sljit_u8* inst;
1302 sljit_s32 dst_r;
1304 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1305 compiler->mode32 = 0;
1306 #endif
1308 if (src & SLJIT_IMM) {
1309 if (FAST_IS_REG(dst)) {
1310 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1311 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1312 #else
1313 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1314 FAIL_IF(!inst);
1315 *inst = MOV_rm_i32;
1316 return SLJIT_SUCCESS;
1317 #endif
1319 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1320 FAIL_IF(!inst);
1321 *inst = MOV_rm_i32;
1322 return SLJIT_SUCCESS;
1325 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1327 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1328 dst_r = src;
1329 else {
1330 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1331 FAIL_IF(!inst);
1332 *inst++ = GROUP_0F;
1333 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1336 if (dst & SLJIT_MEM) {
1337 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1338 FAIL_IF(!inst);
1339 *inst = MOV_rm_r;
1342 return SLJIT_SUCCESS;
1345 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1346 sljit_s32 dst, sljit_sw dstw,
1347 sljit_s32 src, sljit_sw srcw)
1349 sljit_u8* inst;
1351 if (dst == src && dstw == srcw) {
1352 /* Same input and output */
1353 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1354 FAIL_IF(!inst);
1355 *inst++ = GROUP_F7;
1356 *inst |= opcode;
1357 return SLJIT_SUCCESS;
1360 if (FAST_IS_REG(dst)) {
1361 EMIT_MOV(compiler, dst, 0, src, srcw);
1362 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1363 FAIL_IF(!inst);
1364 *inst++ = GROUP_F7;
1365 *inst |= opcode;
1366 return SLJIT_SUCCESS;
1369 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1370 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1371 FAIL_IF(!inst);
1372 *inst++ = GROUP_F7;
1373 *inst |= opcode;
1374 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1375 return SLJIT_SUCCESS;
1378 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1379 sljit_s32 dst, sljit_sw dstw,
1380 sljit_s32 src, sljit_sw srcw)
1382 sljit_u8* inst;
1384 if (FAST_IS_REG(dst)) {
1385 EMIT_MOV(compiler, dst, 0, src, srcw);
1386 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1387 FAIL_IF(!inst);
1388 *inst++ = GROUP_F7;
1389 *inst |= NOT_rm;
1390 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1391 FAIL_IF(!inst);
1392 *inst = OR_r_rm;
1393 return SLJIT_SUCCESS;
1396 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1397 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1398 FAIL_IF(!inst);
1399 *inst++ = GROUP_F7;
1400 *inst |= NOT_rm;
1401 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1402 FAIL_IF(!inst);
1403 *inst = OR_r_rm;
1404 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1405 return SLJIT_SUCCESS;
1408 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1409 static const sljit_sw emit_clz_arg = 32 + 31;
1410 #endif
1412 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1413 sljit_s32 dst, sljit_sw dstw,
1414 sljit_s32 src, sljit_sw srcw)
1416 sljit_u8* inst;
1417 sljit_s32 dst_r;
1419 SLJIT_UNUSED_ARG(op_flags);
1421 if (cpu_has_cmov == -1)
1422 get_cpu_features();
1424 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1426 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1427 FAIL_IF(!inst);
1428 *inst++ = GROUP_0F;
1429 *inst = BSR_r_rm;
1431 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1432 if (cpu_has_cmov) {
1433 if (dst_r != TMP_REG1) {
1434 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1435 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1437 else
1438 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1440 FAIL_IF(!inst);
1441 *inst++ = GROUP_0F;
1442 *inst = CMOVE_r_rm;
1444 else
1445 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1447 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1448 #else
1449 if (cpu_has_cmov) {
1450 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31));
1452 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1453 FAIL_IF(!inst);
1454 *inst++ = GROUP_0F;
1455 *inst = CMOVE_r_rm;
1457 else
1458 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)));
1460 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0);
1461 #endif
1463 FAIL_IF(!inst);
1464 *(inst + 1) |= XOR;
1466 if (dst & SLJIT_MEM)
1467 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1468 return SLJIT_SUCCESS;
1471 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1472 sljit_s32 dst, sljit_sw dstw,
1473 sljit_s32 src, sljit_sw srcw)
1475 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1476 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1477 sljit_s32 dst_is_ereg = 0;
1478 #endif
1480 CHECK_ERROR();
1481 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1482 ADJUST_LOCAL_OFFSET(dst, dstw);
1483 ADJUST_LOCAL_OFFSET(src, srcw);
1485 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1486 CHECK_EXTRA_REGS(src, srcw, (void)0);
1487 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1488 compiler->mode32 = op_flags & SLJIT_32;
1489 #endif
1491 op = GET_OPCODE(op);
1493 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1494 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1495 compiler->mode32 = 0;
1496 #endif
1498 if (FAST_IS_REG(src) && src == dst) {
1499 if (!TYPE_CAST_NEEDED(op))
1500 return SLJIT_SUCCESS;
1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504 if (op_flags & SLJIT_32) {
1505 if (src & SLJIT_MEM) {
1506 if (op == SLJIT_MOV_S32)
1507 op = SLJIT_MOV_U32;
1509 else if (src & SLJIT_IMM) {
1510 if (op == SLJIT_MOV_U32)
1511 op = SLJIT_MOV_S32;
1514 #endif
1516 if (src & SLJIT_IMM) {
1517 switch (op) {
1518 case SLJIT_MOV_U8:
1519 srcw = (sljit_u8)srcw;
1520 break;
1521 case SLJIT_MOV_S8:
1522 srcw = (sljit_s8)srcw;
1523 break;
1524 case SLJIT_MOV_U16:
1525 srcw = (sljit_u16)srcw;
1526 break;
1527 case SLJIT_MOV_S16:
1528 srcw = (sljit_s16)srcw;
1529 break;
1530 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1531 case SLJIT_MOV_U32:
1532 srcw = (sljit_u32)srcw;
1533 break;
1534 case SLJIT_MOV_S32:
1535 srcw = (sljit_s32)srcw;
1536 break;
1537 #endif
1539 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1540 if (SLJIT_UNLIKELY(dst_is_ereg))
1541 return emit_mov(compiler, dst, dstw, src, srcw);
1542 #endif
1545 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1546 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1547 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1548 dst = TMP_REG1;
1550 #endif
1552 switch (op) {
1553 case SLJIT_MOV:
1554 case SLJIT_MOV_P:
1555 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1556 case SLJIT_MOV_U32:
1557 case SLJIT_MOV_S32:
1558 case SLJIT_MOV32:
1559 #endif
1560 EMIT_MOV(compiler, dst, dstw, src, srcw);
1561 break;
1562 case SLJIT_MOV_U8:
1563 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1564 break;
1565 case SLJIT_MOV_S8:
1566 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1567 break;
1568 case SLJIT_MOV_U16:
1569 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1570 break;
1571 case SLJIT_MOV_S16:
1572 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1573 break;
1574 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1575 case SLJIT_MOV_U32:
1576 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1577 break;
1578 case SLJIT_MOV_S32:
1579 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1580 break;
1581 case SLJIT_MOV32:
1582 compiler->mode32 = 1;
1583 EMIT_MOV(compiler, dst, dstw, src, srcw);
1584 compiler->mode32 = 0;
1585 break;
1586 #endif
1589 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1590 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1591 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1592 #endif
1593 return SLJIT_SUCCESS;
1596 switch (op) {
1597 case SLJIT_NOT:
1598 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1599 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1600 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1602 case SLJIT_CLZ:
1603 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1606 return SLJIT_SUCCESS;
1609 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1610 sljit_u32 op_types,
1611 sljit_s32 dst, sljit_sw dstw,
1612 sljit_s32 src1, sljit_sw src1w,
1613 sljit_s32 src2, sljit_sw src2w)
1615 sljit_u8* inst;
1616 sljit_u8 op_eax_imm = U8(op_types >> 24);
1617 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1618 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1619 sljit_u8 op_imm = U8(op_types & 0xff);
1621 if (dst == src1 && dstw == src1w) {
1622 if (src2 & SLJIT_IMM) {
1623 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1624 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1625 #else
1626 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1627 #endif
1628 BINARY_EAX_IMM(op_eax_imm, src2w);
1630 else {
1631 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1634 else if (FAST_IS_REG(dst)) {
1635 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1636 FAIL_IF(!inst);
1637 *inst = op_rm;
1639 else if (FAST_IS_REG(src2)) {
1640 /* Special exception for sljit_emit_op_flags. */
1641 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1642 FAIL_IF(!inst);
1643 *inst = op_mr;
1645 else {
1646 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1647 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1648 FAIL_IF(!inst);
1649 *inst = op_mr;
1651 return SLJIT_SUCCESS;
1654 /* Only for cumulative operations. */
1655 if (dst == src2 && dstw == src2w) {
1656 if (src1 & SLJIT_IMM) {
1657 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1658 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1659 #else
1660 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1661 #endif
1662 BINARY_EAX_IMM(op_eax_imm, src1w);
1664 else {
1665 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1668 else if (FAST_IS_REG(dst)) {
1669 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1670 FAIL_IF(!inst);
1671 *inst = op_rm;
1673 else if (FAST_IS_REG(src1)) {
1674 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1675 FAIL_IF(!inst);
1676 *inst = op_mr;
1678 else {
1679 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1680 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1681 FAIL_IF(!inst);
1682 *inst = op_mr;
1684 return SLJIT_SUCCESS;
1687 /* General version. */
1688 if (FAST_IS_REG(dst)) {
1689 EMIT_MOV(compiler, dst, 0, src1, src1w);
1690 if (src2 & SLJIT_IMM) {
1691 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1693 else {
1694 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1695 FAIL_IF(!inst);
1696 *inst = op_rm;
1699 else {
1700 /* This version requires less memory writing. */
1701 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1702 if (src2 & SLJIT_IMM) {
1703 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1705 else {
1706 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1707 FAIL_IF(!inst);
1708 *inst = op_rm;
1710 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1713 return SLJIT_SUCCESS;
1716 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1717 sljit_u32 op_types,
1718 sljit_s32 dst, sljit_sw dstw,
1719 sljit_s32 src1, sljit_sw src1w,
1720 sljit_s32 src2, sljit_sw src2w)
1722 sljit_u8* inst;
1723 sljit_u8 op_eax_imm = U8(op_types >> 24);
1724 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1725 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1726 sljit_u8 op_imm = U8(op_types & 0xff);
1728 if (dst == src1 && dstw == src1w) {
1729 if (src2 & SLJIT_IMM) {
1730 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1731 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1732 #else
1733 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1734 #endif
1735 BINARY_EAX_IMM(op_eax_imm, src2w);
1737 else {
1738 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1741 else if (FAST_IS_REG(dst)) {
1742 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1743 FAIL_IF(!inst);
1744 *inst = op_rm;
1746 else if (FAST_IS_REG(src2)) {
1747 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1748 FAIL_IF(!inst);
1749 *inst = op_mr;
1751 else {
1752 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1753 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1754 FAIL_IF(!inst);
1755 *inst = op_mr;
1757 return SLJIT_SUCCESS;
1760 /* General version. */
1761 if (FAST_IS_REG(dst) && dst != src2) {
1762 EMIT_MOV(compiler, dst, 0, src1, src1w);
1763 if (src2 & SLJIT_IMM) {
1764 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1766 else {
1767 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1768 FAIL_IF(!inst);
1769 *inst = op_rm;
1772 else {
1773 /* This version requires less memory writing. */
1774 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1775 if (src2 & SLJIT_IMM) {
1776 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1778 else {
1779 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1780 FAIL_IF(!inst);
1781 *inst = op_rm;
1783 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1786 return SLJIT_SUCCESS;
1789 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1790 sljit_s32 dst, sljit_sw dstw,
1791 sljit_s32 src1, sljit_sw src1w,
1792 sljit_s32 src2, sljit_sw src2w)
1794 sljit_u8* inst;
1795 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1797 /* Register destination. */
1798 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1799 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1800 FAIL_IF(!inst);
1801 *inst++ = GROUP_0F;
1802 *inst = IMUL_r_rm;
1804 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1805 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1806 FAIL_IF(!inst);
1807 *inst++ = GROUP_0F;
1808 *inst = IMUL_r_rm;
1810 else if (src1 & SLJIT_IMM) {
1811 if (src2 & SLJIT_IMM) {
1812 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1813 src2 = dst_r;
1814 src2w = 0;
1817 if (src1w <= 127 && src1w >= -128) {
1818 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1819 FAIL_IF(!inst);
1820 *inst = IMUL_r_rm_i8;
1821 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1822 FAIL_IF(!inst);
1823 INC_SIZE(1);
1824 *inst = U8(src1w);
1826 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1827 else {
1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1829 FAIL_IF(!inst);
1830 *inst = IMUL_r_rm_i32;
1831 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1832 FAIL_IF(!inst);
1833 INC_SIZE(4);
1834 sljit_unaligned_store_sw(inst, src1w);
1836 #else
1837 else if (IS_HALFWORD(src1w)) {
1838 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1839 FAIL_IF(!inst);
1840 *inst = IMUL_r_rm_i32;
1841 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1842 FAIL_IF(!inst);
1843 INC_SIZE(4);
1844 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1846 else {
1847 if (dst_r != src2)
1848 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1849 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1850 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1851 FAIL_IF(!inst);
1852 *inst++ = GROUP_0F;
1853 *inst = IMUL_r_rm;
1855 #endif
1857 else if (src2 & SLJIT_IMM) {
1858 /* Note: src1 is NOT immediate. */
1860 if (src2w <= 127 && src2w >= -128) {
1861 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1862 FAIL_IF(!inst);
1863 *inst = IMUL_r_rm_i8;
1864 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1865 FAIL_IF(!inst);
1866 INC_SIZE(1);
1867 *inst = U8(src2w);
1869 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1870 else {
1871 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1872 FAIL_IF(!inst);
1873 *inst = IMUL_r_rm_i32;
1874 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1875 FAIL_IF(!inst);
1876 INC_SIZE(4);
1877 sljit_unaligned_store_sw(inst, src2w);
1879 #else
1880 else if (IS_HALFWORD(src2w)) {
1881 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1882 FAIL_IF(!inst);
1883 *inst = IMUL_r_rm_i32;
1884 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1885 FAIL_IF(!inst);
1886 INC_SIZE(4);
1887 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1889 else {
1890 if (dst_r != src1)
1891 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1892 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1893 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1894 FAIL_IF(!inst);
1895 *inst++ = GROUP_0F;
1896 *inst = IMUL_r_rm;
1898 #endif
1900 else {
1901 /* Neither argument is immediate. */
1902 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1903 dst_r = TMP_REG1;
1904 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1905 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1906 FAIL_IF(!inst);
1907 *inst++ = GROUP_0F;
1908 *inst = IMUL_r_rm;
1911 if (dst & SLJIT_MEM)
1912 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1914 return SLJIT_SUCCESS;
1917 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1918 sljit_s32 dst, sljit_sw dstw,
1919 sljit_s32 src1, sljit_sw src1w,
1920 sljit_s32 src2, sljit_sw src2w)
1922 sljit_u8* inst;
1923 sljit_s32 dst_r, done = 0;
1925 /* These cases better be left to handled by normal way. */
1926 if (dst == src1 && dstw == src1w)
1927 return SLJIT_ERR_UNSUPPORTED;
1928 if (dst == src2 && dstw == src2w)
1929 return SLJIT_ERR_UNSUPPORTED;
1931 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1933 if (FAST_IS_REG(src1)) {
1934 if (FAST_IS_REG(src2)) {
1935 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1936 FAIL_IF(!inst);
1937 *inst = LEA_r_m;
1938 done = 1;
1940 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1941 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1942 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1943 #else
1944 if (src2 & SLJIT_IMM) {
1945 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1946 #endif
1947 FAIL_IF(!inst);
1948 *inst = LEA_r_m;
1949 done = 1;
1952 else if (FAST_IS_REG(src2)) {
1953 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1954 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1955 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1956 #else
1957 if (src1 & SLJIT_IMM) {
1958 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1959 #endif
1960 FAIL_IF(!inst);
1961 *inst = LEA_r_m;
1962 done = 1;
1966 if (done) {
1967 if (dst_r == TMP_REG1)
1968 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1969 return SLJIT_SUCCESS;
1971 return SLJIT_ERR_UNSUPPORTED;
1974 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1975 sljit_s32 src1, sljit_sw src1w,
1976 sljit_s32 src2, sljit_sw src2w)
1978 sljit_u8* inst;
1980 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1981 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1982 #else
1983 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1984 #endif
1985 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1986 return SLJIT_SUCCESS;
1989 if (FAST_IS_REG(src1)) {
1990 if (src2 & SLJIT_IMM) {
1991 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1993 else {
1994 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1995 FAIL_IF(!inst);
1996 *inst = CMP_r_rm;
1998 return SLJIT_SUCCESS;
2001 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
2002 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2003 FAIL_IF(!inst);
2004 *inst = CMP_rm_r;
2005 return SLJIT_SUCCESS;
2008 if (src2 & SLJIT_IMM) {
2009 if (src1 & SLJIT_IMM) {
2010 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2011 src1 = TMP_REG1;
2012 src1w = 0;
2014 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2016 else {
2017 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2018 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2019 FAIL_IF(!inst);
2020 *inst = CMP_r_rm;
2022 return SLJIT_SUCCESS;
2025 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2026 sljit_s32 src1, sljit_sw src1w,
2027 sljit_s32 src2, sljit_sw src2w)
2029 sljit_u8* inst;
2031 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2032 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2033 #else
2034 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2035 #endif
2036 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2037 return SLJIT_SUCCESS;
2040 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2041 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2042 #else
2043 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
2044 #endif
2045 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2046 return SLJIT_SUCCESS;
2049 if (!(src1 & SLJIT_IMM)) {
2050 if (src2 & SLJIT_IMM) {
2051 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2052 if (IS_HALFWORD(src2w) || compiler->mode32) {
2053 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2054 FAIL_IF(!inst);
2055 *inst = GROUP_F7;
2057 else {
2058 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
2059 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
2060 FAIL_IF(!inst);
2061 *inst = TEST_rm_r;
2063 #else
2064 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2065 FAIL_IF(!inst);
2066 *inst = GROUP_F7;
2067 #endif
2068 return SLJIT_SUCCESS;
2070 else if (FAST_IS_REG(src1)) {
2071 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2072 FAIL_IF(!inst);
2073 *inst = TEST_rm_r;
2074 return SLJIT_SUCCESS;
2078 if (!(src2 & SLJIT_IMM)) {
2079 if (src1 & SLJIT_IMM) {
2080 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2081 if (IS_HALFWORD(src1w) || compiler->mode32) {
2082 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2083 FAIL_IF(!inst);
2084 *inst = GROUP_F7;
2086 else {
2087 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2088 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2089 FAIL_IF(!inst);
2090 *inst = TEST_rm_r;
2092 #else
2093 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2094 FAIL_IF(!inst);
2095 *inst = GROUP_F7;
2096 #endif
2097 return SLJIT_SUCCESS;
2099 else if (FAST_IS_REG(src2)) {
2100 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2101 FAIL_IF(!inst);
2102 *inst = TEST_rm_r;
2103 return SLJIT_SUCCESS;
2107 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2108 if (src2 & SLJIT_IMM) {
2109 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2110 if (IS_HALFWORD(src2w) || compiler->mode32) {
2111 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2112 FAIL_IF(!inst);
2113 *inst = GROUP_F7;
2115 else {
2116 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2117 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2118 FAIL_IF(!inst);
2119 *inst = TEST_rm_r;
2121 #else
2122 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2123 FAIL_IF(!inst);
2124 *inst = GROUP_F7;
2125 #endif
2127 else {
2128 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2129 FAIL_IF(!inst);
2130 *inst = TEST_rm_r;
2132 return SLJIT_SUCCESS;
2135 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2136 sljit_u8 mode,
2137 sljit_s32 dst, sljit_sw dstw,
2138 sljit_s32 src1, sljit_sw src1w,
2139 sljit_s32 src2, sljit_sw src2w)
2141 sljit_u8* inst;
2143 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2144 if (dst == src1 && dstw == src1w) {
2145 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2146 FAIL_IF(!inst);
2147 *inst |= mode;
2148 return SLJIT_SUCCESS;
2150 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2151 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2152 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2153 FAIL_IF(!inst);
2154 *inst |= mode;
2155 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2156 return SLJIT_SUCCESS;
2158 if (FAST_IS_REG(dst)) {
2159 EMIT_MOV(compiler, dst, 0, src1, src1w);
2160 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2161 FAIL_IF(!inst);
2162 *inst |= mode;
2163 return SLJIT_SUCCESS;
2166 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2168 FAIL_IF(!inst);
2169 *inst |= mode;
2170 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2171 return SLJIT_SUCCESS;
2174 if (dst == SLJIT_PREF_SHIFT_REG) {
2175 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2176 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2177 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2178 FAIL_IF(!inst);
2179 *inst |= mode;
2180 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2182 else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2183 if (src1 != dst)
2184 EMIT_MOV(compiler, dst, 0, src1, src1w);
2185 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2186 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2187 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2188 FAIL_IF(!inst);
2189 *inst |= mode;
2190 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2192 else {
2193 /* This case is complex since ecx itself may be used for
2194 addressing, and this case must be supported as well. */
2195 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2196 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2197 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2198 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2199 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2200 FAIL_IF(!inst);
2201 *inst |= mode;
2202 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2203 #else
2204 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2205 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2206 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2207 FAIL_IF(!inst);
2208 *inst |= mode;
2209 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2210 #endif
2211 if (dst != TMP_REG1)
2212 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2215 return SLJIT_SUCCESS;
2218 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2219 sljit_u8 mode, sljit_s32 set_flags,
2220 sljit_s32 dst, sljit_sw dstw,
2221 sljit_s32 src1, sljit_sw src1w,
2222 sljit_s32 src2, sljit_sw src2w)
2224 /* The CPU does not set flags if the shift count is 0. */
2225 if (src2 & SLJIT_IMM) {
2226 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2227 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2228 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2229 #else
2230 if ((src2w & 0x1f) != 0)
2231 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2232 #endif
2233 if (!set_flags)
2234 return emit_mov(compiler, dst, dstw, src1, src1w);
2235 /* OR dst, src, 0 */
2236 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2237 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2240 if (!set_flags)
2241 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2243 if (!FAST_IS_REG(dst))
2244 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2246 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2248 if (FAST_IS_REG(dst))
2249 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2250 return SLJIT_SUCCESS;
2253 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2254 sljit_s32 dst, sljit_sw dstw,
2255 sljit_s32 src1, sljit_sw src1w,
2256 sljit_s32 src2, sljit_sw src2w)
2258 CHECK_ERROR();
2259 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2260 ADJUST_LOCAL_OFFSET(dst, dstw);
2261 ADJUST_LOCAL_OFFSET(src1, src1w);
2262 ADJUST_LOCAL_OFFSET(src2, src2w);
2264 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2265 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2266 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2268 compiler->mode32 = op & SLJIT_32;
2269 #endif
2271 SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op));
2273 switch (GET_OPCODE(op)) {
2274 case SLJIT_ADD:
2275 if (!HAS_FLAGS(op)) {
2276 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2277 return compiler->error;
2279 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2280 dst, dstw, src1, src1w, src2, src2w);
2281 case SLJIT_ADDC:
2282 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2283 dst, dstw, src1, src1w, src2, src2w);
2284 case SLJIT_SUB:
2285 if (src1 == SLJIT_IMM && src1w == 0)
2286 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2288 if (!HAS_FLAGS(op)) {
2289 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2290 return compiler->error;
2291 if (FAST_IS_REG(dst) && src2 == dst) {
2292 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2293 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2297 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2298 dst, dstw, src1, src1w, src2, src2w);
2299 case SLJIT_SUBC:
2300 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2301 dst, dstw, src1, src1w, src2, src2w);
2302 case SLJIT_MUL:
2303 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2304 case SLJIT_AND:
2305 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2306 dst, dstw, src1, src1w, src2, src2w);
2307 case SLJIT_OR:
2308 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2309 dst, dstw, src1, src1w, src2, src2w);
2310 case SLJIT_XOR:
2311 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2312 dst, dstw, src1, src1w, src2, src2w);
2313 case SLJIT_SHL:
2314 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2315 dst, dstw, src1, src1w, src2, src2w);
2316 case SLJIT_LSHR:
2317 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2318 dst, dstw, src1, src1w, src2, src2w);
2319 case SLJIT_ASHR:
2320 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2321 dst, dstw, src1, src1w, src2, src2w);
2324 return SLJIT_SUCCESS;
2327 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2328 sljit_s32 src1, sljit_sw src1w,
2329 sljit_s32 src2, sljit_sw src2w)
2331 sljit_s32 opcode = GET_OPCODE(op);
2333 CHECK_ERROR();
2334 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2336 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2337 SLJIT_SKIP_CHECKS(compiler);
2338 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2341 ADJUST_LOCAL_OFFSET(src1, src1w);
2342 ADJUST_LOCAL_OFFSET(src2, src2w);
2344 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2345 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2346 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2347 compiler->mode32 = op & SLJIT_32;
2348 #endif
2350 if (opcode == SLJIT_SUB) {
2351 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2353 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2356 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2357 sljit_s32 src, sljit_sw srcw)
2359 CHECK_ERROR();
2360 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2361 ADJUST_LOCAL_OFFSET(src, srcw);
2363 CHECK_EXTRA_REGS(src, srcw, (void)0);
2365 switch (op) {
2366 case SLJIT_FAST_RETURN:
2367 return emit_fast_return(compiler, src, srcw);
2368 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2369 /* Don't adjust shadow stack if it isn't enabled. */
2370 if (!cpu_has_shadow_stack ())
2371 return SLJIT_SUCCESS;
2372 return adjust_shadow_stack(compiler, src, srcw);
2373 case SLJIT_PREFETCH_L1:
2374 case SLJIT_PREFETCH_L2:
2375 case SLJIT_PREFETCH_L3:
2376 case SLJIT_PREFETCH_ONCE:
2377 return emit_prefetch(compiler, op, src, srcw);
2380 return SLJIT_SUCCESS;
2383 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2385 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2386 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2387 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2388 return -1;
2389 #endif
2390 return reg_map[reg];
2393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2395 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2396 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2397 return reg;
2398 #else
2399 return freg_map[reg];
2400 #endif
2403 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2404 void *instruction, sljit_u32 size)
2406 sljit_u8 *inst;
2408 CHECK_ERROR();
2409 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2411 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2412 FAIL_IF(!inst);
2413 INC_SIZE(size);
2414 SLJIT_MEMCPY(inst, instruction, size);
2415 return SLJIT_SUCCESS;
2418 /* --------------------------------------------------------------------- */
2419 /* Floating point operators */
2420 /* --------------------------------------------------------------------- */
2422 /* Alignment(3) + 4 * 16 bytes. */
2423 static sljit_u32 sse2_data[3 + (4 * 4)];
2424 static sljit_u32 *sse2_buffer;
2426 static void init_compiler(void)
2428 /* Align to 16 bytes. */
2429 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
2431 /* Single precision constants (each constant is 16 byte long). */
2432 sse2_buffer[0] = 0x80000000;
2433 sse2_buffer[4] = 0x7fffffff;
2434 /* Double precision constants (each constant is 16 byte long). */
2435 sse2_buffer[8] = 0;
2436 sse2_buffer[9] = 0x80000000;
2437 sse2_buffer[12] = 0xffffffff;
2438 sse2_buffer[13] = 0x7fffffff;
2441 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2442 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2444 sljit_u8 *inst;
2446 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2447 FAIL_IF(!inst);
2448 *inst++ = GROUP_0F;
2449 *inst = opcode;
2450 return SLJIT_SUCCESS;
2453 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2454 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2456 sljit_u8 *inst;
2458 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2459 FAIL_IF(!inst);
2460 *inst++ = GROUP_0F;
2461 *inst = opcode;
2462 return SLJIT_SUCCESS;
2465 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2466 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2468 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2471 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2472 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2474 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2477 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2478 sljit_s32 dst, sljit_sw dstw,
2479 sljit_s32 src, sljit_sw srcw)
2481 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2482 sljit_u8 *inst;
2484 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2485 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2486 compiler->mode32 = 0;
2487 #endif
2489 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2490 FAIL_IF(!inst);
2491 *inst++ = GROUP_0F;
2492 *inst = CVTTSD2SI_r_xm;
2494 if (dst & SLJIT_MEM)
2495 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2496 return SLJIT_SUCCESS;
2499 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2500 sljit_s32 dst, sljit_sw dstw,
2501 sljit_s32 src, sljit_sw srcw)
2503 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2504 sljit_u8 *inst;
2506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2507 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2508 compiler->mode32 = 0;
2509 #endif
2511 if (src & SLJIT_IMM) {
2512 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2513 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2514 srcw = (sljit_s32)srcw;
2515 #endif
2516 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2517 src = TMP_REG1;
2518 srcw = 0;
2521 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2522 FAIL_IF(!inst);
2523 *inst++ = GROUP_0F;
2524 *inst = CVTSI2SD_x_rm;
2526 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2527 compiler->mode32 = 1;
2528 #endif
2529 if (dst_r == TMP_FREG)
2530 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2531 return SLJIT_SUCCESS;
2534 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2535 sljit_s32 src1, sljit_sw src1w,
2536 sljit_s32 src2, sljit_sw src2w)
2538 switch (GET_FLAG_TYPE(op)) {
2539 case SLJIT_ORDERED_LESS:
2540 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2541 case SLJIT_UNORDERED_OR_GREATER:
2542 case SLJIT_ORDERED_LESS_EQUAL:
2543 if (!FAST_IS_REG(src2)) {
2544 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
2545 src2 = TMP_FREG;
2548 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src2, src1, src1w);
2551 if (!FAST_IS_REG(src1)) {
2552 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2553 src1 = TMP_FREG;
2556 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w);
2559 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2560 sljit_s32 dst, sljit_sw dstw,
2561 sljit_s32 src, sljit_sw srcw)
2563 sljit_s32 dst_r;
2565 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2566 compiler->mode32 = 1;
2567 #endif
2569 CHECK_ERROR();
2570 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2572 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2573 if (FAST_IS_REG(dst))
2574 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
2575 if (FAST_IS_REG(src))
2576 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
2577 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
2578 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2581 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2582 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2583 if (FAST_IS_REG(src)) {
2584 /* We overwrite the high bits of source. From SLJIT point of view,
2585 this is not an issue.
2586 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2587 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0));
2589 else {
2590 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
2591 src = TMP_FREG;
2594 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0));
2595 if (dst_r == TMP_FREG)
2596 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2597 return SLJIT_SUCCESS;
2600 if (FAST_IS_REG(dst)) {
2601 dst_r = dst;
2602 if (dst != src)
2603 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2605 else {
2606 dst_r = TMP_FREG;
2607 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2610 switch (GET_OPCODE(op)) {
2611 case SLJIT_NEG_F64:
2612 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8)));
2613 break;
2615 case SLJIT_ABS_F64:
2616 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12)));
2617 break;
2620 if (dst_r == TMP_FREG)
2621 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2622 return SLJIT_SUCCESS;
2625 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2626 sljit_s32 dst, sljit_sw dstw,
2627 sljit_s32 src1, sljit_sw src1w,
2628 sljit_s32 src2, sljit_sw src2w)
2630 sljit_s32 dst_r;
2632 CHECK_ERROR();
2633 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2634 ADJUST_LOCAL_OFFSET(dst, dstw);
2635 ADJUST_LOCAL_OFFSET(src1, src1w);
2636 ADJUST_LOCAL_OFFSET(src2, src2w);
2638 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2639 compiler->mode32 = 1;
2640 #endif
2642 if (FAST_IS_REG(dst)) {
2643 dst_r = dst;
2644 if (dst == src1)
2645 ; /* Do nothing here. */
2646 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2647 /* Swap arguments. */
2648 src2 = src1;
2649 src2w = src1w;
2651 else if (dst != src2)
2652 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
2653 else {
2654 dst_r = TMP_FREG;
2655 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2658 else {
2659 dst_r = TMP_FREG;
2660 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2663 switch (GET_OPCODE(op)) {
2664 case SLJIT_ADD_F64:
2665 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2666 break;
2668 case SLJIT_SUB_F64:
2669 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2670 break;
2672 case SLJIT_MUL_F64:
2673 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2674 break;
2676 case SLJIT_DIV_F64:
2677 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2678 break;
2681 if (dst_r == TMP_FREG)
2682 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2683 return SLJIT_SUCCESS;
2686 /* --------------------------------------------------------------------- */
2687 /* Conditional instructions */
2688 /* --------------------------------------------------------------------- */
2690 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2692 sljit_u8 *inst;
2693 struct sljit_label *label;
2695 CHECK_ERROR_PTR();
2696 CHECK_PTR(check_sljit_emit_label(compiler));
2698 if (compiler->last_label && compiler->last_label->size == compiler->size)
2699 return compiler->last_label;
2701 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2702 PTR_FAIL_IF(!label);
2703 set_label(label, compiler);
2705 inst = (sljit_u8*)ensure_buf(compiler, 2);
2706 PTR_FAIL_IF(!inst);
2708 *inst++ = 0;
2709 *inst++ = 0;
2711 return label;
2714 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2716 sljit_u8 *inst;
2717 struct sljit_jump *jump;
2719 CHECK_ERROR_PTR();
2720 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2722 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2723 PTR_FAIL_IF_NULL(jump);
2724 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
2725 type &= 0xff;
2727 /* Worst case size. */
2728 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2729 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2730 #else
2731 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2732 #endif
2734 inst = (sljit_u8*)ensure_buf(compiler, 2);
2735 PTR_FAIL_IF_NULL(inst);
2737 *inst++ = 0;
2738 *inst++ = 1;
2739 return jump;
2742 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2744 sljit_u8 *inst;
2745 struct sljit_jump *jump;
2747 CHECK_ERROR();
2748 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2749 ADJUST_LOCAL_OFFSET(src, srcw);
2751 CHECK_EXTRA_REGS(src, srcw, (void)0);
2753 if (src == SLJIT_IMM) {
2754 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2755 FAIL_IF_NULL(jump);
2756 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
2757 jump->u.target = (sljit_uw)srcw;
2759 /* Worst case size. */
2760 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2761 compiler->size += 5;
2762 #else
2763 compiler->size += 10 + 3;
2764 #endif
2766 inst = (sljit_u8*)ensure_buf(compiler, 2);
2767 FAIL_IF_NULL(inst);
2769 *inst++ = 0;
2770 *inst++ = 1;
2772 else {
2773 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2774 /* REX_W is not necessary (src is not immediate). */
2775 compiler->mode32 = 1;
2776 #endif
2777 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2778 FAIL_IF(!inst);
2779 *inst++ = GROUP_FF;
2780 *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
2782 return SLJIT_SUCCESS;
2785 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2786 sljit_s32 dst, sljit_sw dstw,
2787 sljit_s32 type)
2789 sljit_u8 *inst;
2790 sljit_u8 cond_set = 0;
2791 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2792 sljit_s32 reg;
2793 #endif
2794 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2795 sljit_s32 dst_save = dst;
2796 sljit_sw dstw_save = dstw;
2798 CHECK_ERROR();
2799 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2801 ADJUST_LOCAL_OFFSET(dst, dstw);
2802 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2804 /* setcc = jcc + 0x10. */
2805 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
2807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2808 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2809 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2810 FAIL_IF(!inst);
2811 INC_SIZE(4 + 3);
2812 /* Set low register to conditional flag. */
2813 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2814 *inst++ = GROUP_0F;
2815 *inst++ = cond_set;
2816 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2817 *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
2818 *inst++ = OR_rm8_r8;
2819 *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
2820 return SLJIT_SUCCESS;
2823 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2825 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2826 FAIL_IF(!inst);
2827 INC_SIZE(4 + 4);
2828 /* Set low register to conditional flag. */
2829 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2830 *inst++ = GROUP_0F;
2831 *inst++ = cond_set;
2832 *inst++ = MOD_REG | reg_lmap[reg];
2833 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2834 /* The movzx instruction does not affect flags. */
2835 *inst++ = GROUP_0F;
2836 *inst++ = MOVZX_r_rm8;
2837 *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
2839 if (reg != TMP_REG1)
2840 return SLJIT_SUCCESS;
2842 if (GET_OPCODE(op) < SLJIT_ADD) {
2843 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2844 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2847 SLJIT_SKIP_CHECKS(compiler);
2848 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2850 #else
2851 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2852 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2853 if (reg_map[dst] <= 4) {
2854 /* Low byte is accessible. */
2855 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2856 FAIL_IF(!inst);
2857 INC_SIZE(3 + 3);
2858 /* Set low byte to conditional flag. */
2859 *inst++ = GROUP_0F;
2860 *inst++ = cond_set;
2861 *inst++ = U8(MOD_REG | reg_map[dst]);
2863 *inst++ = GROUP_0F;
2864 *inst++ = MOVZX_r_rm8;
2865 *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
2866 return SLJIT_SUCCESS;
2869 /* Low byte is not accessible. */
2870 if (cpu_has_cmov == -1)
2871 get_cpu_features();
2873 if (cpu_has_cmov) {
2874 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2875 /* a xor reg, reg operation would overwrite the flags. */
2876 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2878 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2879 FAIL_IF(!inst);
2880 INC_SIZE(3);
2882 *inst++ = GROUP_0F;
2883 /* cmovcc = setcc - 0x50. */
2884 *inst++ = U8(cond_set - 0x50);
2885 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]);
2886 return SLJIT_SUCCESS;
2889 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2890 FAIL_IF(!inst);
2891 INC_SIZE(1 + 3 + 3 + 1);
2892 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2893 /* Set al to conditional flag. */
2894 *inst++ = GROUP_0F;
2895 *inst++ = cond_set;
2896 *inst++ = MOD_REG | 0 /* eax */;
2898 *inst++ = GROUP_0F;
2899 *inst++ = MOVZX_r_rm8;
2900 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */);
2901 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2902 return SLJIT_SUCCESS;
2905 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2906 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2908 if (dst != SLJIT_R0) {
2909 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2910 FAIL_IF(!inst);
2911 INC_SIZE(1 + 3 + 2 + 1);
2912 /* Set low register to conditional flag. */
2913 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2914 *inst++ = GROUP_0F;
2915 *inst++ = cond_set;
2916 *inst++ = MOD_REG | 0 /* eax */;
2917 *inst++ = OR_rm8_r8;
2918 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2919 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2921 else {
2922 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2923 FAIL_IF(!inst);
2924 INC_SIZE(2 + 3 + 2 + 2);
2925 /* Set low register to conditional flag. */
2926 *inst++ = XCHG_r_rm;
2927 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
2928 *inst++ = GROUP_0F;
2929 *inst++ = cond_set;
2930 *inst++ = MOD_REG | 1 /* ecx */;
2931 *inst++ = OR_rm8_r8;
2932 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2933 *inst++ = XCHG_r_rm;
2934 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
2936 return SLJIT_SUCCESS;
2939 /* Set TMP_REG1 to the bit. */
2940 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2941 FAIL_IF(!inst);
2942 INC_SIZE(1 + 3 + 3 + 1);
2943 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2944 /* Set al to conditional flag. */
2945 *inst++ = GROUP_0F;
2946 *inst++ = cond_set;
2947 *inst++ = MOD_REG | 0 /* eax */;
2949 *inst++ = GROUP_0F;
2950 *inst++ = MOVZX_r_rm8;
2951 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2953 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2955 if (GET_OPCODE(op) < SLJIT_ADD)
2956 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2958 SLJIT_SKIP_CHECKS(compiler);
2959 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2960 #endif /* SLJIT_CONFIG_X86_64 */
2963 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2964 sljit_s32 dst_reg,
2965 sljit_s32 src, sljit_sw srcw)
2967 sljit_u8* inst;
2969 CHECK_ERROR();
2970 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2972 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2973 dst_reg &= ~SLJIT_32;
2975 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2976 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2977 #else
2978 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2979 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2980 #endif
2982 /* ADJUST_LOCAL_OFFSET is not needed. */
2983 CHECK_EXTRA_REGS(src, srcw, (void)0);
2985 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2986 compiler->mode32 = dst_reg & SLJIT_32;
2987 dst_reg &= ~SLJIT_32;
2988 #endif
2990 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2991 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2992 src = TMP_REG1;
2993 srcw = 0;
2996 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2997 FAIL_IF(!inst);
2998 *inst++ = GROUP_0F;
2999 *inst = U8(get_jump_code((sljit_uw)type) - 0x40);
3000 return SLJIT_SUCCESS;
3003 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3005 CHECK_ERROR();
3006 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3007 ADJUST_LOCAL_OFFSET(dst, dstw);
3009 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3011 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3012 compiler->mode32 = 0;
3013 #endif
3015 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3017 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3018 if (NOT_HALFWORD(offset)) {
3019 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
3020 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
3021 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
3022 return compiler->error;
3023 #else
3024 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
3025 #endif
3027 #endif
3029 if (offset != 0)
3030 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
3031 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
3034 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3036 sljit_u8 *inst;
3037 struct sljit_const *const_;
3038 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3039 sljit_s32 reg;
3040 #endif
3042 CHECK_ERROR_PTR();
3043 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3044 ADJUST_LOCAL_OFFSET(dst, dstw);
3046 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3048 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3049 PTR_FAIL_IF(!const_);
3050 set_const(const_, compiler);
3052 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3053 compiler->mode32 = 0;
3054 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3056 if (emit_load_imm64(compiler, reg, init_value))
3057 return NULL;
3058 #else
3059 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
3060 return NULL;
3061 #endif
3063 inst = (sljit_u8*)ensure_buf(compiler, 2);
3064 PTR_FAIL_IF(!inst);
3066 *inst++ = 0;
3067 *inst++ = 2;
3069 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3070 if (dst & SLJIT_MEM)
3071 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3072 return NULL;
3073 #endif
3075 return const_;
3078 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3080 struct sljit_put_label *put_label;
3081 sljit_u8 *inst;
3082 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3083 sljit_s32 reg;
3084 sljit_uw start_size;
3085 #endif
3087 CHECK_ERROR_PTR();
3088 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3089 ADJUST_LOCAL_OFFSET(dst, dstw);
3091 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3093 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3094 PTR_FAIL_IF(!put_label);
3095 set_put_label(put_label, compiler, 0);
3097 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3098 compiler->mode32 = 0;
3099 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3101 if (emit_load_imm64(compiler, reg, 0))
3102 return NULL;
3103 #else
3104 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
3105 return NULL;
3106 #endif
3108 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3109 if (dst & SLJIT_MEM) {
3110 start_size = compiler->size;
3111 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3112 return NULL;
3113 put_label->flags = compiler->size - start_size;
3115 #endif
3117 inst = (sljit_u8*)ensure_buf(compiler, 2);
3118 PTR_FAIL_IF(!inst);
3120 *inst++ = 0;
3121 *inst++ = 3;
3123 return put_label;
3126 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3128 SLJIT_UNUSED_ARG(executable_offset);
3130 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
3131 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3132 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
3133 #else
3134 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
3135 #endif
3136 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
3139 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3141 SLJIT_UNUSED_ARG(executable_offset);
3143 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
3144 sljit_unaligned_store_sw((void*)addr, new_constant);
3145 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);