Improve floating point compare.
[sljit.git] / sljit_src / sljitNativeX86_common.c
blobaf141ad8a59e794853e87af087d40228fa6c4d21
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 return "x86" SLJIT_CPUINFO " ABI:fastcall";
31 #else
32 return "x86" SLJIT_CPUINFO;
33 #endif
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - ESP
43 5 - EBP
44 6 - ESI
45 7 - EDI
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - RSP
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
68 #define TMP_FREG (0)
70 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
72 /* Last register + 1. */
73 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
75 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
76 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
79 #define CHECK_EXTRA_REGS(p, w, do) \
80 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
81 if (p <= compiler->scratches) \
82 w = compiler->scratches_offset + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
83 else \
84 w = compiler->locals_offset + ((p) - SLJIT_S2) * SSIZE_OF(sw); \
85 p = SLJIT_MEM1(SLJIT_SP); \
86 do; \
89 #else /* SLJIT_CONFIG_X86_32 */
91 /* Last register + 1. */
92 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
93 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
95 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
96 Note: avoid to use r12 and r13 for memory addessing
97 therefore r12 is better to be a higher saved register. */
98 #ifndef _WIN64
99 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
100 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
101 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
103 /* low-map. reg_map & 0x7. */
104 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
105 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
107 #else
108 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
109 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
110 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
112 /* low-map. reg_map & 0x7. */
113 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
114 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
116 #endif
118 /* Args: xmm0-xmm3 */
119 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
120 4, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
122 /* low-map. freg_map & 0x7. */
123 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
124 4, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
127 #define REX_W 0x48
128 #define REX_R 0x44
129 #define REX_X 0x42
130 #define REX_B 0x41
131 #define REX 0x40
133 #ifndef _WIN64
134 #define HALFWORD_MAX 0x7fffffffl
135 #define HALFWORD_MIN -0x80000000l
136 #else
137 #define HALFWORD_MAX 0x7fffffffll
138 #define HALFWORD_MIN -0x80000000ll
139 #endif
141 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
142 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
144 #define CHECK_EXTRA_REGS(p, w, do)
146 #endif /* SLJIT_CONFIG_X86_32 */
148 #define U8(v) ((sljit_u8)(v))
151 /* Size flags for emit_x86_instruction: */
152 #define EX86_BIN_INS 0x0010
153 #define EX86_SHIFT_INS 0x0020
154 #define EX86_REX 0x0040
155 #define EX86_NO_REXW 0x0080
156 #define EX86_BYTE_ARG 0x0100
157 #define EX86_HALF_ARG 0x0200
158 #define EX86_PREF_66 0x0400
159 #define EX86_PREF_F2 0x0800
160 #define EX86_PREF_F3 0x1000
161 #define EX86_SSE2_OP1 0x2000
162 #define EX86_SSE2_OP2 0x4000
163 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
165 /* --------------------------------------------------------------------- */
166 /* Instrucion forms */
167 /* --------------------------------------------------------------------- */
169 #define ADD (/* BINARY */ 0 << 3)
170 #define ADD_EAX_i32 0x05
171 #define ADD_r_rm 0x03
172 #define ADD_rm_r 0x01
173 #define ADDSD_x_xm 0x58
174 #define ADC (/* BINARY */ 2 << 3)
175 #define ADC_EAX_i32 0x15
176 #define ADC_r_rm 0x13
177 #define ADC_rm_r 0x11
178 #define AND (/* BINARY */ 4 << 3)
179 #define AND_EAX_i32 0x25
180 #define AND_r_rm 0x23
181 #define AND_rm_r 0x21
182 #define ANDPD_x_xm 0x54
183 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
184 #define CALL_i32 0xe8
185 #define CALL_rm (/* GROUP_FF */ 2 << 3)
186 #define CDQ 0x99
187 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
188 #define CMP (/* BINARY */ 7 << 3)
189 #define CMP_EAX_i32 0x3d
190 #define CMP_r_rm 0x3b
191 #define CMP_rm_r 0x39
192 #define CVTPD2PS_x_xm 0x5a
193 #define CVTSI2SD_x_rm 0x2a
194 #define CVTTSD2SI_r_xm 0x2c
195 #define DIV (/* GROUP_F7 */ 6 << 3)
196 #define DIVSD_x_xm 0x5e
197 #define FSTPS 0xd9
198 #define FSTPD 0xdd
199 #define INT3 0xcc
200 #define IDIV (/* GROUP_F7 */ 7 << 3)
201 #define IMUL (/* GROUP_F7 */ 5 << 3)
202 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
203 #define IMUL_r_rm_i8 0x6b
204 #define IMUL_r_rm_i32 0x69
205 #define JE_i8 0x74
206 #define JNE_i8 0x75
207 #define JMP_i8 0xeb
208 #define JMP_i32 0xe9
209 #define JMP_rm (/* GROUP_FF */ 4 << 3)
210 #define LEA_r_m 0x8d
211 #define LOOP_i8 0xe2
212 #define MOV_r_rm 0x8b
213 #define MOV_r_i32 0xb8
214 #define MOV_rm_r 0x89
215 #define MOV_rm_i32 0xc7
216 #define MOV_rm8_i8 0xc6
217 #define MOV_rm8_r8 0x88
218 #define MOVAPS_x_xm 0x28
219 #define MOVAPS_xm_x 0x29
220 #define MOVSD_x_xm 0x10
221 #define MOVSD_xm_x 0x11
222 #define MOVSXD_r_rm 0x63
223 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
224 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
225 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
226 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
227 #define MUL (/* GROUP_F7 */ 4 << 3)
228 #define MULSD_x_xm 0x59
229 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
230 #define NOP 0x90
231 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
232 #define OR (/* BINARY */ 1 << 3)
233 #define OR_r_rm 0x0b
234 #define OR_EAX_i32 0x0d
235 #define OR_rm_r 0x09
236 #define OR_rm8_r8 0x08
237 #define POP_r 0x58
238 #define POP_rm 0x8f
239 #define POPF 0x9d
240 #define PREFETCH 0x18
241 #define PUSH_i32 0x68
242 #define PUSH_r 0x50
243 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
244 #define PUSHF 0x9c
245 #define RET_near 0xc3
246 #define RET_i16 0xc2
247 #define SBB (/* BINARY */ 3 << 3)
248 #define SBB_EAX_i32 0x1d
249 #define SBB_r_rm 0x1b
250 #define SBB_rm_r 0x19
251 #define SAR (/* SHIFT */ 7 << 3)
252 #define SHL (/* SHIFT */ 4 << 3)
253 #define SHR (/* SHIFT */ 5 << 3)
254 #define SUB (/* BINARY */ 5 << 3)
255 #define SUB_EAX_i32 0x2d
256 #define SUB_r_rm 0x2b
257 #define SUB_rm_r 0x29
258 #define SUBSD_x_xm 0x5c
259 #define TEST_EAX_i32 0xa9
260 #define TEST_rm_r 0x85
261 #define UCOMISD_x_xm 0x2e
262 #define UNPCKLPD_x_xm 0x14
263 #define XCHG_EAX_r 0x90
264 #define XCHG_r_rm 0x87
265 #define XOR (/* BINARY */ 6 << 3)
266 #define XOR_EAX_i32 0x35
267 #define XOR_r_rm 0x33
268 #define XOR_rm_r 0x31
269 #define XORPD_x_xm 0x57
271 #define GROUP_0F 0x0f
272 #define GROUP_F7 0xf7
273 #define GROUP_FF 0xff
274 #define GROUP_BINARY_81 0x81
275 #define GROUP_BINARY_83 0x83
276 #define GROUP_SHIFT_1 0xd1
277 #define GROUP_SHIFT_N 0xc1
278 #define GROUP_SHIFT_CL 0xd3
280 #define MOD_REG 0xc0
281 #define MOD_DISP8 0x40
283 #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
285 #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
286 #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
287 #define RET() (*inst++ = RET_near)
288 #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
290 /* Multithreading does not affect these static variables, since they store
291 built-in CPU features. Therefore they can be overwritten by different threads
292 if they detect the CPU features in the same time. */
293 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
294 static sljit_s32 cpu_has_sse2 = -1;
295 #endif
296 static sljit_s32 cpu_has_cmov = -1;
298 #ifdef _WIN32_WCE
299 #include <cmnintrin.h>
300 #elif defined(_MSC_VER) && _MSC_VER >= 1400
301 #include <intrin.h>
302 #endif
304 /******************************************************/
305 /* Unaligned-store functions */
306 /******************************************************/
308 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
310 SLJIT_MEMCPY(addr, &value, sizeof(value));
313 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
315 SLJIT_MEMCPY(addr, &value, sizeof(value));
318 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
320 SLJIT_MEMCPY(addr, &value, sizeof(value));
323 /******************************************************/
324 /* Utility functions */
325 /******************************************************/
327 static void get_cpu_features(void)
329 sljit_u32 features;
331 #if defined(_MSC_VER) && _MSC_VER >= 1400
333 int CPUInfo[4];
334 __cpuid(CPUInfo, 1);
335 features = (sljit_u32)CPUInfo[3];
337 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
339 /* AT&T syntax. */
340 __asm__ (
341 "movl $0x1, %%eax\n"
342 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
343 /* On x86-32, there is no red zone, so this
344 should work (no need for a local variable). */
345 "push %%ebx\n"
346 #endif
347 "cpuid\n"
348 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
349 "pop %%ebx\n"
350 #endif
351 "movl %%edx, %0\n"
352 : "=g" (features)
354 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
355 : "%eax", "%ecx", "%edx"
356 #else
357 : "%rax", "%rbx", "%rcx", "%rdx"
358 #endif
361 #else /* _MSC_VER && _MSC_VER >= 1400 */
363 /* Intel syntax. */
364 __asm {
365 mov eax, 1
366 cpuid
367 mov features, edx
370 #endif /* _MSC_VER && _MSC_VER >= 1400 */
372 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
373 cpu_has_sse2 = (features >> 26) & 0x1;
374 #endif
375 cpu_has_cmov = (features >> 15) & 0x1;
378 static sljit_u8 get_jump_code(sljit_uw type)
380 switch (type) {
381 case SLJIT_EQUAL:
382 case SLJIT_F_EQUAL:
383 case SLJIT_UNORDERED_OR_EQUAL:
384 case SLJIT_ORDERED_EQUAL: /* Not supported. */
385 return 0x84 /* je */;
387 case SLJIT_NOT_EQUAL:
388 case SLJIT_F_NOT_EQUAL:
389 case SLJIT_ORDERED_NOT_EQUAL:
390 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not supported. */
391 return 0x85 /* jne */;
393 case SLJIT_LESS:
394 case SLJIT_CARRY:
395 case SLJIT_F_LESS:
396 case SLJIT_UNORDERED_OR_LESS:
397 case SLJIT_ORDERED_LESS: /* Not supported. */
398 return 0x82 /* jc */;
400 case SLJIT_GREATER_EQUAL:
401 case SLJIT_NOT_CARRY:
402 case SLJIT_F_GREATER_EQUAL:
403 case SLJIT_ORDERED_GREATER_EQUAL:
404 case SLJIT_UNORDERED_OR_GREATER_EQUAL: /* Not supported. */
405 return 0x83 /* jae */;
407 case SLJIT_GREATER:
408 case SLJIT_F_GREATER:
409 case SLJIT_ORDERED_GREATER:
410 case SLJIT_UNORDERED_OR_GREATER: /* Not supported. */
411 return 0x87 /* jnbe */;
413 case SLJIT_LESS_EQUAL:
414 case SLJIT_F_LESS_EQUAL:
415 case SLJIT_UNORDERED_OR_LESS_EQUAL:
416 case SLJIT_ORDERED_LESS_EQUAL: /* Not supported. */
417 return 0x86 /* jbe */;
419 case SLJIT_SIG_LESS:
420 return 0x8c /* jl */;
422 case SLJIT_SIG_GREATER_EQUAL:
423 return 0x8d /* jnl */;
425 case SLJIT_SIG_GREATER:
426 return 0x8f /* jnle */;
428 case SLJIT_SIG_LESS_EQUAL:
429 return 0x8e /* jle */;
431 case SLJIT_OVERFLOW:
432 return 0x80 /* jo */;
434 case SLJIT_NOT_OVERFLOW:
435 return 0x81 /* jno */;
437 case SLJIT_UNORDERED:
438 return 0x8a /* jp */;
440 case SLJIT_ORDERED:
441 return 0x8b /* jpo */;
443 return 0;
446 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
447 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
448 #else
449 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
450 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
451 #endif
453 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
455 sljit_uw type = jump->flags >> TYPE_SHIFT;
456 sljit_s32 short_jump;
457 sljit_uw label_addr;
459 if (jump->flags & JUMP_LABEL)
460 label_addr = (sljit_uw)(code + jump->u.label->size);
461 else
462 label_addr = jump->u.target - (sljit_uw)executable_offset;
464 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
466 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
467 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
468 return generate_far_jump_code(jump, code_ptr);
469 #endif
471 if (type == SLJIT_JUMP) {
472 if (short_jump)
473 *code_ptr++ = JMP_i8;
474 else
475 *code_ptr++ = JMP_i32;
476 jump->addr++;
478 else if (type >= SLJIT_FAST_CALL) {
479 short_jump = 0;
480 *code_ptr++ = CALL_i32;
481 jump->addr++;
483 else if (short_jump) {
484 *code_ptr++ = U8(get_jump_code(type) - 0x10);
485 jump->addr++;
487 else {
488 *code_ptr++ = GROUP_0F;
489 *code_ptr++ = get_jump_code(type);
490 jump->addr += 2;
493 if (short_jump) {
494 jump->flags |= PATCH_MB;
495 code_ptr += sizeof(sljit_s8);
496 } else {
497 jump->flags |= PATCH_MW;
498 code_ptr += sizeof(sljit_s32);
501 return code_ptr;
504 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
506 struct sljit_memory_fragment *buf;
507 sljit_u8 *code;
508 sljit_u8 *code_ptr;
509 sljit_u8 *buf_ptr;
510 sljit_u8 *buf_end;
511 sljit_u8 len;
512 sljit_sw executable_offset;
513 sljit_uw jump_addr;
515 struct sljit_label *label;
516 struct sljit_jump *jump;
517 struct sljit_const *const_;
518 struct sljit_put_label *put_label;
520 CHECK_ERROR_PTR();
521 CHECK_PTR(check_sljit_generate_code(compiler));
522 reverse_buf(compiler);
524 /* Second code generation pass. */
525 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data);
526 PTR_FAIL_WITH_EXEC_IF(code);
527 buf = compiler->buf;
529 code_ptr = code;
530 label = compiler->labels;
531 jump = compiler->jumps;
532 const_ = compiler->consts;
533 put_label = compiler->put_labels;
534 executable_offset = SLJIT_EXEC_OFFSET(code);
536 do {
537 buf_ptr = buf->memory;
538 buf_end = buf_ptr + buf->used_size;
539 do {
540 len = *buf_ptr++;
541 if (len > 0) {
542 /* The code is already generated. */
543 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
544 code_ptr += len;
545 buf_ptr += len;
547 else {
548 switch (*buf_ptr) {
549 case 0:
550 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
551 label->size = (sljit_uw)(code_ptr - code);
552 label = label->next;
553 break;
554 case 1:
555 jump->addr = (sljit_uw)code_ptr;
556 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
557 code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
558 else {
559 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
560 code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
561 #else
562 code_ptr = generate_far_jump_code(jump, code_ptr);
563 #endif
565 jump = jump->next;
566 break;
567 case 2:
568 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
569 const_ = const_->next;
570 break;
571 default:
572 SLJIT_ASSERT(*buf_ptr == 3);
573 SLJIT_ASSERT(put_label->label);
574 put_label->addr = (sljit_uw)code_ptr;
575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
576 code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size);
577 #endif
578 put_label = put_label->next;
579 break;
581 buf_ptr++;
583 } while (buf_ptr < buf_end);
584 SLJIT_ASSERT(buf_ptr == buf_end);
585 buf = buf->next;
586 } while (buf);
588 SLJIT_ASSERT(!label);
589 SLJIT_ASSERT(!jump);
590 SLJIT_ASSERT(!const_);
591 SLJIT_ASSERT(!put_label);
592 SLJIT_ASSERT(code_ptr <= code + compiler->size);
594 jump = compiler->jumps;
595 while (jump) {
596 jump_addr = jump->addr + (sljit_uw)executable_offset;
598 if (jump->flags & PATCH_MB) {
599 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
600 *(sljit_u8*)jump->addr = U8(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
601 } else if (jump->flags & PATCH_MW) {
602 if (jump->flags & JUMP_LABEL) {
603 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
604 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
605 #else
606 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
607 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
608 #endif
610 else {
611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
613 #else
614 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
615 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
616 #endif
619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
620 else if (jump->flags & PATCH_MD)
621 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
622 #endif
624 jump = jump->next;
627 put_label = compiler->put_labels;
628 while (put_label) {
629 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
630 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
631 #else
632 if (put_label->flags & PATCH_MD) {
633 SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
634 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
636 else {
637 SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
638 sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
640 #endif
642 put_label = put_label->next;
645 compiler->error = SLJIT_ERR_COMPILED;
646 compiler->executable_offset = executable_offset;
647 compiler->executable_size = (sljit_uw)(code_ptr - code);
649 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
651 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
652 return (void*)code;
655 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
657 switch (feature_type) {
658 case SLJIT_HAS_FPU:
659 #ifdef SLJIT_IS_FPU_AVAILABLE
660 return SLJIT_IS_FPU_AVAILABLE;
661 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
662 if (cpu_has_sse2 == -1)
663 get_cpu_features();
664 return cpu_has_sse2;
665 #else /* SLJIT_DETECT_SSE2 */
666 return 1;
667 #endif /* SLJIT_DETECT_SSE2 */
669 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
670 case SLJIT_HAS_VIRTUAL_REGISTERS:
671 return 1;
672 #endif
674 case SLJIT_HAS_CLZ:
675 case SLJIT_HAS_CMOV:
676 if (cpu_has_cmov == -1)
677 get_cpu_features();
678 return cpu_has_cmov;
680 case SLJIT_HAS_PREFETCH:
681 return 1;
683 case SLJIT_HAS_SSE2:
684 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
685 if (cpu_has_sse2 == -1)
686 get_cpu_features();
687 return cpu_has_sse2;
688 #else
689 return 1;
690 #endif
692 default:
693 return 0;
697 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
699 if (type < SLJIT_UNORDERED || type > SLJIT_ORDERED_LESS_EQUAL)
700 return 0;
702 switch (type) {
703 case SLJIT_ORDERED_EQUAL:
704 case SLJIT_UNORDERED_OR_NOT_EQUAL:
705 case SLJIT_ORDERED_LESS:
706 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
707 case SLJIT_UNORDERED_OR_GREATER:
708 case SLJIT_ORDERED_LESS_EQUAL:
709 return 0;
712 return 1;
715 /* --------------------------------------------------------------------- */
716 /* Operators */
717 /* --------------------------------------------------------------------- */
719 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
721 #define BINARY_IMM32(op_imm, immw, arg, argw) \
722 do { \
723 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
724 FAIL_IF(!inst); \
725 *(inst + 1) |= (op_imm); \
726 } while (0)
728 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
730 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
731 do { \
732 if (IS_HALFWORD(immw) || compiler->mode32) { \
733 BINARY_IMM32(op_imm, immw, arg, argw); \
735 else { \
736 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
737 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
738 FAIL_IF(!inst); \
739 *inst = (op_mr); \
741 } while (0)
743 #define BINARY_EAX_IMM(op_eax_imm, immw) \
744 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
746 #else /* !SLJIT_CONFIG_X86_64 */
748 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
749 BINARY_IMM32(op_imm, immw, arg, argw)
751 #define BINARY_EAX_IMM(op_eax_imm, immw) \
752 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
754 #endif /* SLJIT_CONFIG_X86_64 */
756 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
757 sljit_s32 dst, sljit_sw dstw,
758 sljit_s32 src, sljit_sw srcw);
760 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
761 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
763 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
764 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
766 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
767 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
769 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
770 sljit_s32 src1, sljit_sw src1w,
771 sljit_s32 src2, sljit_sw src2w);
773 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
775 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
776 /* Emit endbr32/endbr64 when CET is enabled. */
777 sljit_u8 *inst;
778 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
779 FAIL_IF(!inst);
780 INC_SIZE(4);
781 *inst++ = 0xf3;
782 *inst++ = 0x0f;
783 *inst++ = 0x1e;
784 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
785 *inst = 0xfb;
786 #else
787 *inst = 0xfa;
788 #endif
789 #else /* !SLJIT_CONFIG_X86_CET */
790 SLJIT_UNUSED_ARG(compiler);
791 #endif /* SLJIT_CONFIG_X86_CET */
792 return SLJIT_SUCCESS;
795 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
797 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
799 sljit_u8 *inst;
800 sljit_s32 size;
802 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
803 size = 5;
804 #else
805 size = 4;
806 #endif
808 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
809 FAIL_IF(!inst);
810 INC_SIZE(size);
811 *inst++ = 0xf3;
812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
813 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
814 #endif
815 *inst++ = 0x0f;
816 *inst++ = 0x1e;
817 *inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
818 return SLJIT_SUCCESS;
821 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
823 sljit_u8 *inst;
824 sljit_s32 size;
826 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
827 size = 5;
828 #else
829 size = 4;
830 #endif
832 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
833 FAIL_IF(!inst);
834 INC_SIZE(size);
835 *inst++ = 0xf3;
836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
837 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
838 #endif
839 *inst++ = 0x0f;
840 *inst++ = 0xae;
841 *inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
842 return SLJIT_SUCCESS;
845 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
847 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
849 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
850 return _get_ssp() != 0;
851 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
852 return 0;
853 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
856 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
857 sljit_s32 src, sljit_sw srcw)
859 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
860 sljit_u8 *inst, *jz_after_cmp_inst;
861 sljit_uw size_jz_after_cmp_inst;
863 sljit_uw size_before_rdssp_inst = compiler->size;
865 /* Generate "RDSSP TMP_REG1". */
866 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
868 /* Load return address on shadow stack into TMP_REG1. */
869 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
870 SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
872 /* Hand code unsupported "mov 0x0(%ebp),%ebp". */
873 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
874 FAIL_IF(!inst);
875 INC_SIZE(3);
876 *inst++ = 0x8b;
877 *inst++ = 0x6d;
878 *inst = 0;
879 #else /* !SLJIT_CONFIG_X86_32 */
880 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
881 #endif /* SLJIT_CONFIG_X86_32 */
883 /* Compare return address against TMP_REG1. */
884 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
886 /* Generate JZ to skip shadow stack ajdustment when shadow
887 stack matches normal stack. */
888 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
889 FAIL_IF(!inst);
890 INC_SIZE(2);
891 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
892 size_jz_after_cmp_inst = compiler->size;
893 jz_after_cmp_inst = inst;
895 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
896 /* REX_W is not necessary. */
897 compiler->mode32 = 1;
898 #endif
899 /* Load 1 into TMP_REG1. */
900 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
902 /* Generate "INCSSP TMP_REG1". */
903 FAIL_IF(emit_incssp(compiler, TMP_REG1));
905 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
906 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
907 FAIL_IF(!inst);
908 INC_SIZE(2);
909 *inst++ = JMP_i8;
910 *inst = size_before_rdssp_inst - compiler->size;
912 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
913 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
914 SLJIT_UNUSED_ARG(compiler);
915 SLJIT_UNUSED_ARG(src);
916 SLJIT_UNUSED_ARG(srcw);
917 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
918 return SLJIT_SUCCESS;
921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
922 #include "sljitNativeX86_32.c"
923 #else
924 #include "sljitNativeX86_64.c"
925 #endif
927 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
928 sljit_s32 dst, sljit_sw dstw,
929 sljit_s32 src, sljit_sw srcw)
931 sljit_u8* inst;
933 if (FAST_IS_REG(src)) {
934 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
935 FAIL_IF(!inst);
936 *inst = MOV_rm_r;
937 return SLJIT_SUCCESS;
939 if (src & SLJIT_IMM) {
940 if (FAST_IS_REG(dst)) {
941 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
942 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
943 #else
944 if (!compiler->mode32) {
945 if (NOT_HALFWORD(srcw))
946 return emit_load_imm64(compiler, dst, srcw);
948 else
949 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
950 #endif
952 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
953 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
954 /* Immediate to memory move. Only SLJIT_MOV operation copies
955 an immediate directly into memory so TMP_REG1 can be used. */
956 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
957 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
958 FAIL_IF(!inst);
959 *inst = MOV_rm_r;
960 return SLJIT_SUCCESS;
962 #endif
963 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
964 FAIL_IF(!inst);
965 *inst = MOV_rm_i32;
966 return SLJIT_SUCCESS;
968 if (FAST_IS_REG(dst)) {
969 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
970 FAIL_IF(!inst);
971 *inst = MOV_r_rm;
972 return SLJIT_SUCCESS;
975 /* Memory to memory move. Only SLJIT_MOV operation copies
976 data from memory to memory so TMP_REG1 can be used. */
977 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
978 FAIL_IF(!inst);
979 *inst = MOV_r_rm;
980 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
981 FAIL_IF(!inst);
982 *inst = MOV_rm_r;
983 return SLJIT_SUCCESS;
986 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
988 sljit_u8 *inst;
989 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
990 sljit_uw size;
991 #endif
993 CHECK_ERROR();
994 CHECK(check_sljit_emit_op0(compiler, op));
996 switch (GET_OPCODE(op)) {
997 case SLJIT_BREAKPOINT:
998 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
999 FAIL_IF(!inst);
1000 INC_SIZE(1);
1001 *inst = INT3;
1002 break;
1003 case SLJIT_NOP:
1004 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1005 FAIL_IF(!inst);
1006 INC_SIZE(1);
1007 *inst = NOP;
1008 break;
1009 case SLJIT_LMUL_UW:
1010 case SLJIT_LMUL_SW:
1011 case SLJIT_DIVMOD_UW:
1012 case SLJIT_DIVMOD_SW:
1013 case SLJIT_DIV_UW:
1014 case SLJIT_DIV_SW:
1015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1016 #ifdef _WIN64
1017 SLJIT_ASSERT(
1018 reg_map[SLJIT_R0] == 0
1019 && reg_map[SLJIT_R1] == 2
1020 && reg_map[TMP_REG1] > 7);
1021 #else
1022 SLJIT_ASSERT(
1023 reg_map[SLJIT_R0] == 0
1024 && reg_map[SLJIT_R1] < 7
1025 && reg_map[TMP_REG1] == 2);
1026 #endif
1027 compiler->mode32 = op & SLJIT_32;
1028 #endif
1029 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1031 op = GET_OPCODE(op);
1032 if ((op | 0x2) == SLJIT_DIV_UW) {
1033 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1034 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1035 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1036 #else
1037 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1038 #endif
1039 FAIL_IF(!inst);
1040 *inst = XOR_r_rm;
1043 if ((op | 0x2) == SLJIT_DIV_SW) {
1044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1045 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1046 #endif
1048 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1049 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1050 FAIL_IF(!inst);
1051 INC_SIZE(1);
1052 *inst = CDQ;
1053 #else
1054 if (compiler->mode32) {
1055 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1056 FAIL_IF(!inst);
1057 INC_SIZE(1);
1058 *inst = CDQ;
1059 } else {
1060 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1061 FAIL_IF(!inst);
1062 INC_SIZE(2);
1063 *inst++ = REX_W;
1064 *inst = CDQ;
1066 #endif
1069 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1070 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1071 FAIL_IF(!inst);
1072 INC_SIZE(2);
1073 *inst++ = GROUP_F7;
1074 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1075 #else
1076 #ifdef _WIN64
1077 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1078 #else
1079 size = (!compiler->mode32) ? 3 : 2;
1080 #endif
1081 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1082 FAIL_IF(!inst);
1083 INC_SIZE(size);
1084 #ifdef _WIN64
1085 if (!compiler->mode32)
1086 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1087 else if (op >= SLJIT_DIVMOD_UW)
1088 *inst++ = REX_B;
1089 *inst++ = GROUP_F7;
1090 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1091 #else
1092 if (!compiler->mode32)
1093 *inst++ = REX_W;
1094 *inst++ = GROUP_F7;
1095 *inst = MOD_REG | reg_map[SLJIT_R1];
1096 #endif
1097 #endif
1098 switch (op) {
1099 case SLJIT_LMUL_UW:
1100 *inst |= MUL;
1101 break;
1102 case SLJIT_LMUL_SW:
1103 *inst |= IMUL;
1104 break;
1105 case SLJIT_DIVMOD_UW:
1106 case SLJIT_DIV_UW:
1107 *inst |= DIV;
1108 break;
1109 case SLJIT_DIVMOD_SW:
1110 case SLJIT_DIV_SW:
1111 *inst |= IDIV;
1112 break;
1114 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1115 if (op <= SLJIT_DIVMOD_SW)
1116 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1117 #else
1118 if (op >= SLJIT_DIV_UW)
1119 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1120 #endif
1121 break;
1122 case SLJIT_ENDBR:
1123 return emit_endbranch(compiler);
1124 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1125 return skip_frames_before_return(compiler);
1128 return SLJIT_SUCCESS;
1131 #define ENCODE_PREFIX(prefix) \
1132 do { \
1133 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
1134 FAIL_IF(!inst); \
1135 INC_SIZE(1); \
1136 *inst = U8(prefix); \
1137 } while (0)
1139 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1140 sljit_s32 dst, sljit_sw dstw,
1141 sljit_s32 src, sljit_sw srcw)
1143 sljit_u8* inst;
1144 sljit_s32 dst_r;
1145 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1146 sljit_s32 work_r;
1147 #endif
1149 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1150 compiler->mode32 = 0;
1151 #endif
1153 if (src & SLJIT_IMM) {
1154 if (FAST_IS_REG(dst)) {
1155 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1156 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1157 #else
1158 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1159 FAIL_IF(!inst);
1160 *inst = MOV_rm_i32;
1161 return SLJIT_SUCCESS;
1162 #endif
1164 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1165 FAIL_IF(!inst);
1166 *inst = MOV_rm8_i8;
1167 return SLJIT_SUCCESS;
1170 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1172 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1173 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1174 if (reg_map[src] >= 4) {
1175 SLJIT_ASSERT(dst_r == TMP_REG1);
1176 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1177 } else
1178 dst_r = src;
1179 #else
1180 dst_r = src;
1181 #endif
1183 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1184 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1185 /* src, dst are registers. */
1186 SLJIT_ASSERT(FAST_IS_REG(dst));
1187 if (reg_map[dst] < 4) {
1188 if (dst != src)
1189 EMIT_MOV(compiler, dst, 0, src, 0);
1190 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
1191 FAIL_IF(!inst);
1192 *inst++ = GROUP_0F;
1193 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1195 else {
1196 if (dst != src)
1197 EMIT_MOV(compiler, dst, 0, src, 0);
1198 if (sign) {
1199 /* shl reg, 24 */
1200 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1201 FAIL_IF(!inst);
1202 *inst |= SHL;
1203 /* sar reg, 24 */
1204 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1205 FAIL_IF(!inst);
1206 *inst |= SAR;
1208 else {
1209 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1210 FAIL_IF(!inst);
1211 *(inst + 1) |= AND;
1214 return SLJIT_SUCCESS;
1216 #endif
1217 else {
1218 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1219 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1220 FAIL_IF(!inst);
1221 *inst++ = GROUP_0F;
1222 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1225 if (dst & SLJIT_MEM) {
1226 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1227 if (dst_r == TMP_REG1) {
1228 /* Find a non-used register, whose reg_map[src] < 4. */
1229 if ((dst & REG_MASK) == SLJIT_R0) {
1230 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
1231 work_r = SLJIT_R2;
1232 else
1233 work_r = SLJIT_R1;
1235 else {
1236 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1237 work_r = SLJIT_R0;
1238 else if ((dst & REG_MASK) == SLJIT_R1)
1239 work_r = SLJIT_R2;
1240 else
1241 work_r = SLJIT_R1;
1244 if (work_r == SLJIT_R0) {
1245 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1247 else {
1248 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1249 FAIL_IF(!inst);
1250 *inst = XCHG_r_rm;
1253 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1254 FAIL_IF(!inst);
1255 *inst = MOV_rm8_r8;
1257 if (work_r == SLJIT_R0) {
1258 ENCODE_PREFIX(XCHG_EAX_r | reg_map[TMP_REG1]);
1260 else {
1261 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1262 FAIL_IF(!inst);
1263 *inst = XCHG_r_rm;
1266 else {
1267 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1268 FAIL_IF(!inst);
1269 *inst = MOV_rm8_r8;
1271 #else
1272 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1273 FAIL_IF(!inst);
1274 *inst = MOV_rm8_r8;
1275 #endif
1278 return SLJIT_SUCCESS;
1281 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1282 sljit_s32 src, sljit_sw srcw)
1284 sljit_u8* inst;
1286 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1287 compiler->mode32 = 1;
1288 #endif
1290 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1291 FAIL_IF(!inst);
1292 *inst++ = GROUP_0F;
1293 *inst++ = PREFETCH;
1295 if (op == SLJIT_PREFETCH_L1)
1296 *inst |= (1 << 3);
1297 else if (op == SLJIT_PREFETCH_L2)
1298 *inst |= (2 << 3);
1299 else if (op == SLJIT_PREFETCH_L3)
1300 *inst |= (3 << 3);
1302 return SLJIT_SUCCESS;
1305 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1306 sljit_s32 dst, sljit_sw dstw,
1307 sljit_s32 src, sljit_sw srcw)
1309 sljit_u8* inst;
1310 sljit_s32 dst_r;
1312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1313 compiler->mode32 = 0;
1314 #endif
1316 if (src & SLJIT_IMM) {
1317 if (FAST_IS_REG(dst)) {
1318 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1319 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1320 #else
1321 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1322 FAIL_IF(!inst);
1323 *inst = MOV_rm_i32;
1324 return SLJIT_SUCCESS;
1325 #endif
1327 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1328 FAIL_IF(!inst);
1329 *inst = MOV_rm_i32;
1330 return SLJIT_SUCCESS;
1333 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1335 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1336 dst_r = src;
1337 else {
1338 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1339 FAIL_IF(!inst);
1340 *inst++ = GROUP_0F;
1341 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1344 if (dst & SLJIT_MEM) {
1345 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1346 FAIL_IF(!inst);
1347 *inst = MOV_rm_r;
1350 return SLJIT_SUCCESS;
1353 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1354 sljit_s32 dst, sljit_sw dstw,
1355 sljit_s32 src, sljit_sw srcw)
1357 sljit_u8* inst;
1359 if (dst == src && dstw == srcw) {
1360 /* Same input and output */
1361 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1362 FAIL_IF(!inst);
1363 *inst++ = GROUP_F7;
1364 *inst |= opcode;
1365 return SLJIT_SUCCESS;
1368 if (FAST_IS_REG(dst)) {
1369 EMIT_MOV(compiler, dst, 0, src, srcw);
1370 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1371 FAIL_IF(!inst);
1372 *inst++ = GROUP_F7;
1373 *inst |= opcode;
1374 return SLJIT_SUCCESS;
1377 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1378 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1379 FAIL_IF(!inst);
1380 *inst++ = GROUP_F7;
1381 *inst |= opcode;
1382 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1383 return SLJIT_SUCCESS;
1386 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1387 sljit_s32 dst, sljit_sw dstw,
1388 sljit_s32 src, sljit_sw srcw)
1390 sljit_u8* inst;
1392 if (FAST_IS_REG(dst)) {
1393 EMIT_MOV(compiler, dst, 0, src, srcw);
1394 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1395 FAIL_IF(!inst);
1396 *inst++ = GROUP_F7;
1397 *inst |= NOT_rm;
1398 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1399 FAIL_IF(!inst);
1400 *inst = OR_r_rm;
1401 return SLJIT_SUCCESS;
1404 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1405 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1406 FAIL_IF(!inst);
1407 *inst++ = GROUP_F7;
1408 *inst |= NOT_rm;
1409 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1410 FAIL_IF(!inst);
1411 *inst = OR_r_rm;
1412 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1413 return SLJIT_SUCCESS;
1416 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1417 static const sljit_sw emit_clz_arg = 32 + 31;
1418 #endif
1420 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1421 sljit_s32 dst, sljit_sw dstw,
1422 sljit_s32 src, sljit_sw srcw)
1424 sljit_u8* inst;
1425 sljit_s32 dst_r;
1427 SLJIT_UNUSED_ARG(op_flags);
1429 if (cpu_has_cmov == -1)
1430 get_cpu_features();
1432 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1434 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1435 FAIL_IF(!inst);
1436 *inst++ = GROUP_0F;
1437 *inst = BSR_r_rm;
1439 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1440 if (cpu_has_cmov) {
1441 if (dst_r != TMP_REG1) {
1442 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1443 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1445 else
1446 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1448 FAIL_IF(!inst);
1449 *inst++ = GROUP_0F;
1450 *inst = CMOVE_r_rm;
1452 else
1453 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1455 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1456 #else
1457 if (cpu_has_cmov) {
1458 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31));
1460 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1461 FAIL_IF(!inst);
1462 *inst++ = GROUP_0F;
1463 *inst = CMOVE_r_rm;
1465 else
1466 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_32) ? (64 + 63) : (32 + 31)));
1468 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_32) ? 63 : 31, dst_r, 0);
1469 #endif
1471 FAIL_IF(!inst);
1472 *(inst + 1) |= XOR;
1474 if (dst & SLJIT_MEM)
1475 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1476 return SLJIT_SUCCESS;
1479 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1480 sljit_s32 dst, sljit_sw dstw,
1481 sljit_s32 src, sljit_sw srcw)
1483 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1484 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1485 sljit_s32 dst_is_ereg = 0;
1486 #endif
1488 CHECK_ERROR();
1489 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1490 ADJUST_LOCAL_OFFSET(dst, dstw);
1491 ADJUST_LOCAL_OFFSET(src, srcw);
1493 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1494 CHECK_EXTRA_REGS(src, srcw, (void)0);
1495 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1496 compiler->mode32 = op_flags & SLJIT_32;
1497 #endif
1499 op = GET_OPCODE(op);
1501 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1502 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1503 compiler->mode32 = 0;
1504 #endif
1506 if (FAST_IS_REG(src) && src == dst) {
1507 if (!TYPE_CAST_NEEDED(op))
1508 return SLJIT_SUCCESS;
1511 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1512 if (op_flags & SLJIT_32) {
1513 if (src & SLJIT_MEM) {
1514 if (op == SLJIT_MOV_S32)
1515 op = SLJIT_MOV_U32;
1517 else if (src & SLJIT_IMM) {
1518 if (op == SLJIT_MOV_U32)
1519 op = SLJIT_MOV_S32;
1522 #endif
1524 if (src & SLJIT_IMM) {
1525 switch (op) {
1526 case SLJIT_MOV_U8:
1527 srcw = (sljit_u8)srcw;
1528 break;
1529 case SLJIT_MOV_S8:
1530 srcw = (sljit_s8)srcw;
1531 break;
1532 case SLJIT_MOV_U16:
1533 srcw = (sljit_u16)srcw;
1534 break;
1535 case SLJIT_MOV_S16:
1536 srcw = (sljit_s16)srcw;
1537 break;
1538 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1539 case SLJIT_MOV_U32:
1540 srcw = (sljit_u32)srcw;
1541 break;
1542 case SLJIT_MOV_S32:
1543 srcw = (sljit_s32)srcw;
1544 break;
1545 #endif
1547 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1548 if (SLJIT_UNLIKELY(dst_is_ereg))
1549 return emit_mov(compiler, dst, dstw, src, srcw);
1550 #endif
1553 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1554 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1555 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1556 dst = TMP_REG1;
1558 #endif
1560 switch (op) {
1561 case SLJIT_MOV:
1562 case SLJIT_MOV_P:
1563 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1564 case SLJIT_MOV_U32:
1565 case SLJIT_MOV_S32:
1566 case SLJIT_MOV32:
1567 #endif
1568 EMIT_MOV(compiler, dst, dstw, src, srcw);
1569 break;
1570 case SLJIT_MOV_U8:
1571 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1572 break;
1573 case SLJIT_MOV_S8:
1574 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1575 break;
1576 case SLJIT_MOV_U16:
1577 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1578 break;
1579 case SLJIT_MOV_S16:
1580 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1581 break;
1582 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1583 case SLJIT_MOV_U32:
1584 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1585 break;
1586 case SLJIT_MOV_S32:
1587 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1588 break;
1589 case SLJIT_MOV32:
1590 compiler->mode32 = 1;
1591 EMIT_MOV(compiler, dst, dstw, src, srcw);
1592 compiler->mode32 = 0;
1593 break;
1594 #endif
1597 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1598 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1599 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1600 #endif
1601 return SLJIT_SUCCESS;
1604 switch (op) {
1605 case SLJIT_NOT:
1606 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1607 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1608 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1610 case SLJIT_CLZ:
1611 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1614 return SLJIT_SUCCESS;
1617 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1618 sljit_u32 op_types,
1619 sljit_s32 dst, sljit_sw dstw,
1620 sljit_s32 src1, sljit_sw src1w,
1621 sljit_s32 src2, sljit_sw src2w)
1623 sljit_u8* inst;
1624 sljit_u8 op_eax_imm = U8(op_types >> 24);
1625 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1626 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1627 sljit_u8 op_imm = U8(op_types & 0xff);
1629 if (dst == src1 && dstw == src1w) {
1630 if (src2 & SLJIT_IMM) {
1631 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1632 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1633 #else
1634 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1635 #endif
1636 BINARY_EAX_IMM(op_eax_imm, src2w);
1638 else {
1639 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1642 else if (FAST_IS_REG(dst)) {
1643 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1644 FAIL_IF(!inst);
1645 *inst = op_rm;
1647 else if (FAST_IS_REG(src2)) {
1648 /* Special exception for sljit_emit_op_flags. */
1649 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1650 FAIL_IF(!inst);
1651 *inst = op_mr;
1653 else {
1654 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1655 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1656 FAIL_IF(!inst);
1657 *inst = op_mr;
1659 return SLJIT_SUCCESS;
1662 /* Only for cumulative operations. */
1663 if (dst == src2 && dstw == src2w) {
1664 if (src1 & SLJIT_IMM) {
1665 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1666 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1667 #else
1668 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1669 #endif
1670 BINARY_EAX_IMM(op_eax_imm, src1w);
1672 else {
1673 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1676 else if (FAST_IS_REG(dst)) {
1677 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1678 FAIL_IF(!inst);
1679 *inst = op_rm;
1681 else if (FAST_IS_REG(src1)) {
1682 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1683 FAIL_IF(!inst);
1684 *inst = op_mr;
1686 else {
1687 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1688 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1689 FAIL_IF(!inst);
1690 *inst = op_mr;
1692 return SLJIT_SUCCESS;
1695 /* General version. */
1696 if (FAST_IS_REG(dst)) {
1697 EMIT_MOV(compiler, dst, 0, src1, src1w);
1698 if (src2 & SLJIT_IMM) {
1699 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1701 else {
1702 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1703 FAIL_IF(!inst);
1704 *inst = op_rm;
1707 else {
1708 /* This version requires less memory writing. */
1709 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1710 if (src2 & SLJIT_IMM) {
1711 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1713 else {
1714 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1715 FAIL_IF(!inst);
1716 *inst = op_rm;
1718 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1721 return SLJIT_SUCCESS;
1724 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1725 sljit_u32 op_types,
1726 sljit_s32 dst, sljit_sw dstw,
1727 sljit_s32 src1, sljit_sw src1w,
1728 sljit_s32 src2, sljit_sw src2w)
1730 sljit_u8* inst;
1731 sljit_u8 op_eax_imm = U8(op_types >> 24);
1732 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1733 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1734 sljit_u8 op_imm = U8(op_types & 0xff);
1736 if (dst == src1 && dstw == src1w) {
1737 if (src2 & SLJIT_IMM) {
1738 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1739 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1740 #else
1741 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1742 #endif
1743 BINARY_EAX_IMM(op_eax_imm, src2w);
1745 else {
1746 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1749 else if (FAST_IS_REG(dst)) {
1750 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1751 FAIL_IF(!inst);
1752 *inst = op_rm;
1754 else if (FAST_IS_REG(src2)) {
1755 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1756 FAIL_IF(!inst);
1757 *inst = op_mr;
1759 else {
1760 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1761 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1762 FAIL_IF(!inst);
1763 *inst = op_mr;
1765 return SLJIT_SUCCESS;
1768 /* General version. */
1769 if (FAST_IS_REG(dst) && dst != src2) {
1770 EMIT_MOV(compiler, dst, 0, src1, src1w);
1771 if (src2 & SLJIT_IMM) {
1772 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1774 else {
1775 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1776 FAIL_IF(!inst);
1777 *inst = op_rm;
1780 else {
1781 /* This version requires less memory writing. */
1782 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1783 if (src2 & SLJIT_IMM) {
1784 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1786 else {
1787 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1788 FAIL_IF(!inst);
1789 *inst = op_rm;
1791 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1794 return SLJIT_SUCCESS;
1797 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1798 sljit_s32 dst, sljit_sw dstw,
1799 sljit_s32 src1, sljit_sw src1w,
1800 sljit_s32 src2, sljit_sw src2w)
1802 sljit_u8* inst;
1803 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1805 /* Register destination. */
1806 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1807 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1808 FAIL_IF(!inst);
1809 *inst++ = GROUP_0F;
1810 *inst = IMUL_r_rm;
1812 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1813 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1814 FAIL_IF(!inst);
1815 *inst++ = GROUP_0F;
1816 *inst = IMUL_r_rm;
1818 else if (src1 & SLJIT_IMM) {
1819 if (src2 & SLJIT_IMM) {
1820 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1821 src2 = dst_r;
1822 src2w = 0;
1825 if (src1w <= 127 && src1w >= -128) {
1826 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1827 FAIL_IF(!inst);
1828 *inst = IMUL_r_rm_i8;
1829 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1830 FAIL_IF(!inst);
1831 INC_SIZE(1);
1832 *inst = U8(src1w);
1834 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1835 else {
1836 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1837 FAIL_IF(!inst);
1838 *inst = IMUL_r_rm_i32;
1839 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1840 FAIL_IF(!inst);
1841 INC_SIZE(4);
1842 sljit_unaligned_store_sw(inst, src1w);
1844 #else
1845 else if (IS_HALFWORD(src1w)) {
1846 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1847 FAIL_IF(!inst);
1848 *inst = IMUL_r_rm_i32;
1849 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1850 FAIL_IF(!inst);
1851 INC_SIZE(4);
1852 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1854 else {
1855 if (dst_r != src2)
1856 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1857 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1858 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1859 FAIL_IF(!inst);
1860 *inst++ = GROUP_0F;
1861 *inst = IMUL_r_rm;
1863 #endif
1865 else if (src2 & SLJIT_IMM) {
1866 /* Note: src1 is NOT immediate. */
1868 if (src2w <= 127 && src2w >= -128) {
1869 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1870 FAIL_IF(!inst);
1871 *inst = IMUL_r_rm_i8;
1872 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1873 FAIL_IF(!inst);
1874 INC_SIZE(1);
1875 *inst = U8(src2w);
1877 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1878 else {
1879 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1880 FAIL_IF(!inst);
1881 *inst = IMUL_r_rm_i32;
1882 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1883 FAIL_IF(!inst);
1884 INC_SIZE(4);
1885 sljit_unaligned_store_sw(inst, src2w);
1887 #else
1888 else if (IS_HALFWORD(src2w)) {
1889 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1890 FAIL_IF(!inst);
1891 *inst = IMUL_r_rm_i32;
1892 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1893 FAIL_IF(!inst);
1894 INC_SIZE(4);
1895 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1897 else {
1898 if (dst_r != src1)
1899 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1900 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1901 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1902 FAIL_IF(!inst);
1903 *inst++ = GROUP_0F;
1904 *inst = IMUL_r_rm;
1906 #endif
1908 else {
1909 /* Neither argument is immediate. */
1910 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1911 dst_r = TMP_REG1;
1912 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1913 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1914 FAIL_IF(!inst);
1915 *inst++ = GROUP_0F;
1916 *inst = IMUL_r_rm;
1919 if (dst & SLJIT_MEM)
1920 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1922 return SLJIT_SUCCESS;
1925 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1926 sljit_s32 dst, sljit_sw dstw,
1927 sljit_s32 src1, sljit_sw src1w,
1928 sljit_s32 src2, sljit_sw src2w)
1930 sljit_u8* inst;
1931 sljit_s32 dst_r, done = 0;
1933 /* These cases better be left to handled by normal way. */
1934 if (dst == src1 && dstw == src1w)
1935 return SLJIT_ERR_UNSUPPORTED;
1936 if (dst == src2 && dstw == src2w)
1937 return SLJIT_ERR_UNSUPPORTED;
1939 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1941 if (FAST_IS_REG(src1)) {
1942 if (FAST_IS_REG(src2)) {
1943 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1944 FAIL_IF(!inst);
1945 *inst = LEA_r_m;
1946 done = 1;
1948 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1949 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1950 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1951 #else
1952 if (src2 & SLJIT_IMM) {
1953 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1954 #endif
1955 FAIL_IF(!inst);
1956 *inst = LEA_r_m;
1957 done = 1;
1960 else if (FAST_IS_REG(src2)) {
1961 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1962 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1963 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1964 #else
1965 if (src1 & SLJIT_IMM) {
1966 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1967 #endif
1968 FAIL_IF(!inst);
1969 *inst = LEA_r_m;
1970 done = 1;
1974 if (done) {
1975 if (dst_r == TMP_REG1)
1976 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1977 return SLJIT_SUCCESS;
1979 return SLJIT_ERR_UNSUPPORTED;
1982 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1983 sljit_s32 src1, sljit_sw src1w,
1984 sljit_s32 src2, sljit_sw src2w)
1986 sljit_u8* inst;
1988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1989 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1990 #else
1991 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1992 #endif
1993 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1994 return SLJIT_SUCCESS;
1997 if (FAST_IS_REG(src1)) {
1998 if (src2 & SLJIT_IMM) {
1999 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2001 else {
2002 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2003 FAIL_IF(!inst);
2004 *inst = CMP_r_rm;
2006 return SLJIT_SUCCESS;
2009 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
2010 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2011 FAIL_IF(!inst);
2012 *inst = CMP_rm_r;
2013 return SLJIT_SUCCESS;
2016 if (src2 & SLJIT_IMM) {
2017 if (src1 & SLJIT_IMM) {
2018 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2019 src1 = TMP_REG1;
2020 src1w = 0;
2022 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2024 else {
2025 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2026 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2027 FAIL_IF(!inst);
2028 *inst = CMP_r_rm;
2030 return SLJIT_SUCCESS;
2033 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2034 sljit_s32 src1, sljit_sw src1w,
2035 sljit_s32 src2, sljit_sw src2w)
2037 sljit_u8* inst;
2039 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2040 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2041 #else
2042 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2043 #endif
2044 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2045 return SLJIT_SUCCESS;
2048 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2049 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2050 #else
2051 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
2052 #endif
2053 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2054 return SLJIT_SUCCESS;
2057 if (!(src1 & SLJIT_IMM)) {
2058 if (src2 & SLJIT_IMM) {
2059 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2060 if (IS_HALFWORD(src2w) || compiler->mode32) {
2061 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2062 FAIL_IF(!inst);
2063 *inst = GROUP_F7;
2065 else {
2066 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
2067 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
2068 FAIL_IF(!inst);
2069 *inst = TEST_rm_r;
2071 #else
2072 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2073 FAIL_IF(!inst);
2074 *inst = GROUP_F7;
2075 #endif
2076 return SLJIT_SUCCESS;
2078 else if (FAST_IS_REG(src1)) {
2079 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2080 FAIL_IF(!inst);
2081 *inst = TEST_rm_r;
2082 return SLJIT_SUCCESS;
2086 if (!(src2 & SLJIT_IMM)) {
2087 if (src1 & SLJIT_IMM) {
2088 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2089 if (IS_HALFWORD(src1w) || compiler->mode32) {
2090 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2091 FAIL_IF(!inst);
2092 *inst = GROUP_F7;
2094 else {
2095 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2096 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2097 FAIL_IF(!inst);
2098 *inst = TEST_rm_r;
2100 #else
2101 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2102 FAIL_IF(!inst);
2103 *inst = GROUP_F7;
2104 #endif
2105 return SLJIT_SUCCESS;
2107 else if (FAST_IS_REG(src2)) {
2108 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2109 FAIL_IF(!inst);
2110 *inst = TEST_rm_r;
2111 return SLJIT_SUCCESS;
2115 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2116 if (src2 & SLJIT_IMM) {
2117 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2118 if (IS_HALFWORD(src2w) || compiler->mode32) {
2119 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2120 FAIL_IF(!inst);
2121 *inst = GROUP_F7;
2123 else {
2124 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2125 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2126 FAIL_IF(!inst);
2127 *inst = TEST_rm_r;
2129 #else
2130 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2131 FAIL_IF(!inst);
2132 *inst = GROUP_F7;
2133 #endif
2135 else {
2136 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2137 FAIL_IF(!inst);
2138 *inst = TEST_rm_r;
2140 return SLJIT_SUCCESS;
2143 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2144 sljit_u8 mode,
2145 sljit_s32 dst, sljit_sw dstw,
2146 sljit_s32 src1, sljit_sw src1w,
2147 sljit_s32 src2, sljit_sw src2w)
2149 sljit_u8* inst;
2151 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2152 if (dst == src1 && dstw == src1w) {
2153 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2154 FAIL_IF(!inst);
2155 *inst |= mode;
2156 return SLJIT_SUCCESS;
2158 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2159 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2160 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2161 FAIL_IF(!inst);
2162 *inst |= mode;
2163 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2164 return SLJIT_SUCCESS;
2166 if (FAST_IS_REG(dst)) {
2167 EMIT_MOV(compiler, dst, 0, src1, src1w);
2168 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2169 FAIL_IF(!inst);
2170 *inst |= mode;
2171 return SLJIT_SUCCESS;
2174 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2175 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2176 FAIL_IF(!inst);
2177 *inst |= mode;
2178 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2179 return SLJIT_SUCCESS;
2182 if (dst == SLJIT_PREF_SHIFT_REG) {
2183 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2184 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2185 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2186 FAIL_IF(!inst);
2187 *inst |= mode;
2188 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2190 else if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2191 if (src1 != dst)
2192 EMIT_MOV(compiler, dst, 0, src1, src1w);
2193 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2194 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2195 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2196 FAIL_IF(!inst);
2197 *inst |= mode;
2198 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2200 else {
2201 /* This case is complex since ecx itself may be used for
2202 addressing, and this case must be supported as well. */
2203 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2205 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2206 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2207 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2208 FAIL_IF(!inst);
2209 *inst |= mode;
2210 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2211 #else
2212 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2213 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2214 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2215 FAIL_IF(!inst);
2216 *inst |= mode;
2217 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2218 #endif
2219 if (dst != TMP_REG1)
2220 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2223 return SLJIT_SUCCESS;
2226 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2227 sljit_u8 mode, sljit_s32 set_flags,
2228 sljit_s32 dst, sljit_sw dstw,
2229 sljit_s32 src1, sljit_sw src1w,
2230 sljit_s32 src2, sljit_sw src2w)
2232 /* The CPU does not set flags if the shift count is 0. */
2233 if (src2 & SLJIT_IMM) {
2234 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2235 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2236 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2237 #else
2238 if ((src2w & 0x1f) != 0)
2239 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2240 #endif
2241 if (!set_flags)
2242 return emit_mov(compiler, dst, dstw, src1, src1w);
2243 /* OR dst, src, 0 */
2244 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2245 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2248 if (!set_flags)
2249 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2251 if (!FAST_IS_REG(dst))
2252 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2254 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2256 if (FAST_IS_REG(dst))
2257 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2258 return SLJIT_SUCCESS;
2261 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2262 sljit_s32 dst, sljit_sw dstw,
2263 sljit_s32 src1, sljit_sw src1w,
2264 sljit_s32 src2, sljit_sw src2w)
2266 CHECK_ERROR();
2267 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2268 ADJUST_LOCAL_OFFSET(dst, dstw);
2269 ADJUST_LOCAL_OFFSET(src1, src1w);
2270 ADJUST_LOCAL_OFFSET(src2, src2w);
2272 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2273 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2274 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2275 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2276 compiler->mode32 = op & SLJIT_32;
2277 #endif
2279 SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op));
2281 switch (GET_OPCODE(op)) {
2282 case SLJIT_ADD:
2283 if (!HAS_FLAGS(op)) {
2284 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2285 return compiler->error;
2287 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2288 dst, dstw, src1, src1w, src2, src2w);
2289 case SLJIT_ADDC:
2290 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2291 dst, dstw, src1, src1w, src2, src2w);
2292 case SLJIT_SUB:
2293 if (src1 == SLJIT_IMM && src1w == 0)
2294 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2296 if (!HAS_FLAGS(op)) {
2297 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2298 return compiler->error;
2299 if (FAST_IS_REG(dst) && src2 == dst) {
2300 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2301 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2305 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2306 dst, dstw, src1, src1w, src2, src2w);
2307 case SLJIT_SUBC:
2308 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2309 dst, dstw, src1, src1w, src2, src2w);
2310 case SLJIT_MUL:
2311 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2312 case SLJIT_AND:
2313 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2314 dst, dstw, src1, src1w, src2, src2w);
2315 case SLJIT_OR:
2316 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2317 dst, dstw, src1, src1w, src2, src2w);
2318 case SLJIT_XOR:
2319 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2320 dst, dstw, src1, src1w, src2, src2w);
2321 case SLJIT_SHL:
2322 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2323 dst, dstw, src1, src1w, src2, src2w);
2324 case SLJIT_LSHR:
2325 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2326 dst, dstw, src1, src1w, src2, src2w);
2327 case SLJIT_ASHR:
2328 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2329 dst, dstw, src1, src1w, src2, src2w);
2332 return SLJIT_SUCCESS;
2335 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2336 sljit_s32 src1, sljit_sw src1w,
2337 sljit_s32 src2, sljit_sw src2w)
2339 sljit_s32 opcode = GET_OPCODE(op);
2341 CHECK_ERROR();
2342 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2344 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2345 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2346 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2347 compiler->skip_checks = 1;
2348 #endif
2349 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2352 ADJUST_LOCAL_OFFSET(src1, src1w);
2353 ADJUST_LOCAL_OFFSET(src2, src2w);
2355 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2356 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2357 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2358 compiler->mode32 = op & SLJIT_32;
2359 #endif
2361 if (opcode == SLJIT_SUB) {
2362 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2364 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2367 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2368 sljit_s32 src, sljit_sw srcw)
2370 CHECK_ERROR();
2371 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2372 ADJUST_LOCAL_OFFSET(src, srcw);
2374 CHECK_EXTRA_REGS(src, srcw, (void)0);
2376 switch (op) {
2377 case SLJIT_FAST_RETURN:
2378 return emit_fast_return(compiler, src, srcw);
2379 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2380 /* Don't adjust shadow stack if it isn't enabled. */
2381 if (!cpu_has_shadow_stack ())
2382 return SLJIT_SUCCESS;
2383 return adjust_shadow_stack(compiler, src, srcw);
2384 case SLJIT_PREFETCH_L1:
2385 case SLJIT_PREFETCH_L2:
2386 case SLJIT_PREFETCH_L3:
2387 case SLJIT_PREFETCH_ONCE:
2388 return emit_prefetch(compiler, op, src, srcw);
2391 return SLJIT_SUCCESS;
2394 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2396 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2397 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2398 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2399 return -1;
2400 #endif
2401 return reg_map[reg];
2404 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2406 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2407 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2408 return reg;
2409 #else
2410 return freg_map[reg];
2411 #endif
2414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2415 void *instruction, sljit_u32 size)
2417 sljit_u8 *inst;
2419 CHECK_ERROR();
2420 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2422 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2423 FAIL_IF(!inst);
2424 INC_SIZE(size);
2425 SLJIT_MEMCPY(inst, instruction, size);
2426 return SLJIT_SUCCESS;
2429 /* --------------------------------------------------------------------- */
2430 /* Floating point operators */
2431 /* --------------------------------------------------------------------- */
2433 /* Alignment(3) + 4 * 16 bytes. */
2434 static sljit_u32 sse2_data[3 + (4 * 4)];
2435 static sljit_u32 *sse2_buffer;
2437 static void init_compiler(void)
2439 /* Align to 16 bytes. */
2440 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
2442 /* Single precision constants (each constant is 16 byte long). */
2443 sse2_buffer[0] = 0x80000000;
2444 sse2_buffer[4] = 0x7fffffff;
2445 /* Double precision constants (each constant is 16 byte long). */
2446 sse2_buffer[8] = 0;
2447 sse2_buffer[9] = 0x80000000;
2448 sse2_buffer[12] = 0xffffffff;
2449 sse2_buffer[13] = 0x7fffffff;
2452 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2453 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2455 sljit_u8 *inst;
2457 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2458 FAIL_IF(!inst);
2459 *inst++ = GROUP_0F;
2460 *inst = opcode;
2461 return SLJIT_SUCCESS;
2464 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2465 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2467 sljit_u8 *inst;
2469 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2470 FAIL_IF(!inst);
2471 *inst++ = GROUP_0F;
2472 *inst = opcode;
2473 return SLJIT_SUCCESS;
2476 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2477 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2479 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2482 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2483 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2485 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2488 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2489 sljit_s32 dst, sljit_sw dstw,
2490 sljit_s32 src, sljit_sw srcw)
2492 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2493 sljit_u8 *inst;
2495 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2496 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2497 compiler->mode32 = 0;
2498 #endif
2500 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2501 FAIL_IF(!inst);
2502 *inst++ = GROUP_0F;
2503 *inst = CVTTSD2SI_r_xm;
2505 if (dst & SLJIT_MEM)
2506 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2507 return SLJIT_SUCCESS;
2510 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2511 sljit_s32 dst, sljit_sw dstw,
2512 sljit_s32 src, sljit_sw srcw)
2514 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2515 sljit_u8 *inst;
2517 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2518 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2519 compiler->mode32 = 0;
2520 #endif
2522 if (src & SLJIT_IMM) {
2523 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2524 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2525 srcw = (sljit_s32)srcw;
2526 #endif
2527 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2528 src = TMP_REG1;
2529 srcw = 0;
2532 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2533 FAIL_IF(!inst);
2534 *inst++ = GROUP_0F;
2535 *inst = CVTSI2SD_x_rm;
2537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538 compiler->mode32 = 1;
2539 #endif
2540 if (dst_r == TMP_FREG)
2541 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2542 return SLJIT_SUCCESS;
2545 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2546 sljit_s32 src1, sljit_sw src1w,
2547 sljit_s32 src2, sljit_sw src2w)
2549 if (!FAST_IS_REG(src1)) {
2550 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2551 src1 = TMP_FREG;
2554 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_32), src1, src2, src2w);
2557 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2558 sljit_s32 dst, sljit_sw dstw,
2559 sljit_s32 src, sljit_sw srcw)
2561 sljit_s32 dst_r;
2563 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2564 compiler->mode32 = 1;
2565 #endif
2567 CHECK_ERROR();
2568 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2570 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2571 if (FAST_IS_REG(dst))
2572 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
2573 if (FAST_IS_REG(src))
2574 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
2575 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
2576 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2579 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2580 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2581 if (FAST_IS_REG(src)) {
2582 /* We overwrite the high bits of source. From SLJIT point of view,
2583 this is not an issue.
2584 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2585 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_32, src, src, 0));
2587 else {
2588 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
2589 src = TMP_FREG;
2592 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_32, dst_r, src, 0));
2593 if (dst_r == TMP_FREG)
2594 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2595 return SLJIT_SUCCESS;
2598 if (FAST_IS_REG(dst)) {
2599 dst_r = dst;
2600 if (dst != src)
2601 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2603 else {
2604 dst_r = TMP_FREG;
2605 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src, srcw));
2608 switch (GET_OPCODE(op)) {
2609 case SLJIT_NEG_F64:
2610 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer : sse2_buffer + 8)));
2611 break;
2613 case SLJIT_ABS_F64:
2614 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_32 ? sse2_buffer + 4 : sse2_buffer + 12)));
2615 break;
2618 if (dst_r == TMP_FREG)
2619 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2620 return SLJIT_SUCCESS;
2623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2624 sljit_s32 dst, sljit_sw dstw,
2625 sljit_s32 src1, sljit_sw src1w,
2626 sljit_s32 src2, sljit_sw src2w)
2628 sljit_s32 dst_r;
2630 CHECK_ERROR();
2631 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2632 ADJUST_LOCAL_OFFSET(dst, dstw);
2633 ADJUST_LOCAL_OFFSET(src1, src1w);
2634 ADJUST_LOCAL_OFFSET(src2, src2w);
2636 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2637 compiler->mode32 = 1;
2638 #endif
2640 if (FAST_IS_REG(dst)) {
2641 dst_r = dst;
2642 if (dst == src1)
2643 ; /* Do nothing here. */
2644 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2645 /* Swap arguments. */
2646 src2 = src1;
2647 src2w = src1w;
2649 else if (dst != src2)
2650 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
2651 else {
2652 dst_r = TMP_FREG;
2653 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2656 else {
2657 dst_r = TMP_FREG;
2658 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2661 switch (GET_OPCODE(op)) {
2662 case SLJIT_ADD_F64:
2663 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2664 break;
2666 case SLJIT_SUB_F64:
2667 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2668 break;
2670 case SLJIT_MUL_F64:
2671 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2672 break;
2674 case SLJIT_DIV_F64:
2675 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_32, dst_r, src2, src2w));
2676 break;
2679 if (dst_r == TMP_FREG)
2680 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2681 return SLJIT_SUCCESS;
2684 /* --------------------------------------------------------------------- */
2685 /* Conditional instructions */
2686 /* --------------------------------------------------------------------- */
2688 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2690 sljit_u8 *inst;
2691 struct sljit_label *label;
2693 CHECK_ERROR_PTR();
2694 CHECK_PTR(check_sljit_emit_label(compiler));
2696 if (compiler->last_label && compiler->last_label->size == compiler->size)
2697 return compiler->last_label;
2699 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2700 PTR_FAIL_IF(!label);
2701 set_label(label, compiler);
2703 inst = (sljit_u8*)ensure_buf(compiler, 2);
2704 PTR_FAIL_IF(!inst);
2706 *inst++ = 0;
2707 *inst++ = 0;
2709 return label;
2712 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2714 sljit_u8 *inst;
2715 struct sljit_jump *jump;
2717 CHECK_ERROR_PTR();
2718 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2720 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2721 PTR_FAIL_IF_NULL(jump);
2722 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
2723 type &= 0xff;
2725 /* Worst case size. */
2726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2727 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2728 #else
2729 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2730 #endif
2732 inst = (sljit_u8*)ensure_buf(compiler, 2);
2733 PTR_FAIL_IF_NULL(inst);
2735 *inst++ = 0;
2736 *inst++ = 1;
2737 return jump;
2740 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2742 sljit_u8 *inst;
2743 struct sljit_jump *jump;
2745 CHECK_ERROR();
2746 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2747 ADJUST_LOCAL_OFFSET(src, srcw);
2749 CHECK_EXTRA_REGS(src, srcw, (void)0);
2751 if (src == SLJIT_IMM) {
2752 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2753 FAIL_IF_NULL(jump);
2754 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
2755 jump->u.target = (sljit_uw)srcw;
2757 /* Worst case size. */
2758 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2759 compiler->size += 5;
2760 #else
2761 compiler->size += 10 + 3;
2762 #endif
2764 inst = (sljit_u8*)ensure_buf(compiler, 2);
2765 FAIL_IF_NULL(inst);
2767 *inst++ = 0;
2768 *inst++ = 1;
2770 else {
2771 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2772 /* REX_W is not necessary (src is not immediate). */
2773 compiler->mode32 = 1;
2774 #endif
2775 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2776 FAIL_IF(!inst);
2777 *inst++ = GROUP_FF;
2778 *inst = U8(*inst | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
2780 return SLJIT_SUCCESS;
2783 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2784 sljit_s32 dst, sljit_sw dstw,
2785 sljit_s32 type)
2787 sljit_u8 *inst;
2788 sljit_u8 cond_set = 0;
2789 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2790 sljit_s32 reg;
2791 #endif
2792 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2793 sljit_s32 dst_save = dst;
2794 sljit_sw dstw_save = dstw;
2796 CHECK_ERROR();
2797 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2799 ADJUST_LOCAL_OFFSET(dst, dstw);
2800 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2802 type &= 0xff;
2803 /* setcc = jcc + 0x10. */
2804 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
2806 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2807 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2808 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2809 FAIL_IF(!inst);
2810 INC_SIZE(4 + 3);
2811 /* Set low register to conditional flag. */
2812 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2813 *inst++ = GROUP_0F;
2814 *inst++ = cond_set;
2815 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2816 *inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
2817 *inst++ = OR_rm8_r8;
2818 *inst++ = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
2819 return SLJIT_SUCCESS;
2822 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2824 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2825 FAIL_IF(!inst);
2826 INC_SIZE(4 + 4);
2827 /* Set low register to conditional flag. */
2828 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2829 *inst++ = GROUP_0F;
2830 *inst++ = cond_set;
2831 *inst++ = MOD_REG | reg_lmap[reg];
2832 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2833 /* The movzx instruction does not affect flags. */
2834 *inst++ = GROUP_0F;
2835 *inst++ = MOVZX_r_rm8;
2836 *inst = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
2838 if (reg != TMP_REG1)
2839 return SLJIT_SUCCESS;
2841 if (GET_OPCODE(op) < SLJIT_ADD) {
2842 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2843 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2846 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2847 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2848 compiler->skip_checks = 1;
2849 #endif
2850 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2852 #else
2853 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2854 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2855 if (reg_map[dst] <= 4) {
2856 /* Low byte is accessible. */
2857 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2858 FAIL_IF(!inst);
2859 INC_SIZE(3 + 3);
2860 /* Set low byte to conditional flag. */
2861 *inst++ = GROUP_0F;
2862 *inst++ = cond_set;
2863 *inst++ = U8(MOD_REG | reg_map[dst]);
2865 *inst++ = GROUP_0F;
2866 *inst++ = MOVZX_r_rm8;
2867 *inst = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
2868 return SLJIT_SUCCESS;
2871 /* Low byte is not accessible. */
2872 if (cpu_has_cmov == -1)
2873 get_cpu_features();
2875 if (cpu_has_cmov) {
2876 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2877 /* a xor reg, reg operation would overwrite the flags. */
2878 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2880 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2881 FAIL_IF(!inst);
2882 INC_SIZE(3);
2884 *inst++ = GROUP_0F;
2885 /* cmovcc = setcc - 0x50. */
2886 *inst++ = U8(cond_set - 0x50);
2887 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1]);
2888 return SLJIT_SUCCESS;
2891 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2892 FAIL_IF(!inst);
2893 INC_SIZE(1 + 3 + 3 + 1);
2894 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2895 /* Set al to conditional flag. */
2896 *inst++ = GROUP_0F;
2897 *inst++ = cond_set;
2898 *inst++ = MOD_REG | 0 /* eax */;
2900 *inst++ = GROUP_0F;
2901 *inst++ = MOVZX_r_rm8;
2902 *inst++ = U8(MOD_REG | (reg_map[dst] << 3) | 0 /* eax */);
2903 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2904 return SLJIT_SUCCESS;
2907 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2908 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2910 if (dst != SLJIT_R0) {
2911 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2912 FAIL_IF(!inst);
2913 INC_SIZE(1 + 3 + 2 + 1);
2914 /* Set low register to conditional flag. */
2915 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2916 *inst++ = GROUP_0F;
2917 *inst++ = cond_set;
2918 *inst++ = MOD_REG | 0 /* eax */;
2919 *inst++ = OR_rm8_r8;
2920 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2921 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2923 else {
2924 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2925 FAIL_IF(!inst);
2926 INC_SIZE(2 + 3 + 2 + 2);
2927 /* Set low register to conditional flag. */
2928 *inst++ = XCHG_r_rm;
2929 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
2930 *inst++ = GROUP_0F;
2931 *inst++ = cond_set;
2932 *inst++ = MOD_REG | 1 /* ecx */;
2933 *inst++ = OR_rm8_r8;
2934 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2935 *inst++ = XCHG_r_rm;
2936 *inst++ = U8(MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1]);
2938 return SLJIT_SUCCESS;
2941 /* Set TMP_REG1 to the bit. */
2942 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2943 FAIL_IF(!inst);
2944 INC_SIZE(1 + 3 + 3 + 1);
2945 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2946 /* Set al to conditional flag. */
2947 *inst++ = GROUP_0F;
2948 *inst++ = cond_set;
2949 *inst++ = MOD_REG | 0 /* eax */;
2951 *inst++ = GROUP_0F;
2952 *inst++ = MOVZX_r_rm8;
2953 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2955 *inst++ = U8(XCHG_EAX_r | reg_map[TMP_REG1]);
2957 if (GET_OPCODE(op) < SLJIT_ADD)
2958 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2960 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2961 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2962 compiler->skip_checks = 1;
2963 #endif
2964 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2965 #endif /* SLJIT_CONFIG_X86_64 */
2968 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2969 sljit_s32 dst_reg,
2970 sljit_s32 src, sljit_sw srcw)
2972 sljit_u8* inst;
2974 CHECK_ERROR();
2975 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2977 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2978 dst_reg &= ~SLJIT_32;
2980 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2981 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2982 #else
2983 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2984 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2985 #endif
2987 /* ADJUST_LOCAL_OFFSET is not needed. */
2988 CHECK_EXTRA_REGS(src, srcw, (void)0);
2990 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2991 compiler->mode32 = dst_reg & SLJIT_32;
2992 dst_reg &= ~SLJIT_32;
2993 #endif
2995 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2996 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2997 src = TMP_REG1;
2998 srcw = 0;
3001 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3002 FAIL_IF(!inst);
3003 *inst++ = GROUP_0F;
3004 *inst = U8(get_jump_code(type & 0xff) - 0x40);
3005 return SLJIT_SUCCESS;
3008 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3010 CHECK_ERROR();
3011 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3012 ADJUST_LOCAL_OFFSET(dst, dstw);
3014 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3016 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3017 compiler->mode32 = 0;
3018 #endif
3020 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3022 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3023 if (NOT_HALFWORD(offset)) {
3024 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
3025 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
3026 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
3027 return compiler->error;
3028 #else
3029 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
3030 #endif
3032 #endif
3034 if (offset != 0)
3035 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
3036 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
3039 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3041 sljit_u8 *inst;
3042 struct sljit_const *const_;
3043 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3044 sljit_s32 reg;
3045 #endif
3047 CHECK_ERROR_PTR();
3048 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3049 ADJUST_LOCAL_OFFSET(dst, dstw);
3051 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3053 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3054 PTR_FAIL_IF(!const_);
3055 set_const(const_, compiler);
3057 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3058 compiler->mode32 = 0;
3059 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3061 if (emit_load_imm64(compiler, reg, init_value))
3062 return NULL;
3063 #else
3064 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
3065 return NULL;
3066 #endif
3068 inst = (sljit_u8*)ensure_buf(compiler, 2);
3069 PTR_FAIL_IF(!inst);
3071 *inst++ = 0;
3072 *inst++ = 2;
3074 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3075 if (dst & SLJIT_MEM)
3076 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3077 return NULL;
3078 #endif
3080 return const_;
3083 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3085 struct sljit_put_label *put_label;
3086 sljit_u8 *inst;
3087 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3088 sljit_s32 reg;
3089 sljit_uw start_size;
3090 #endif
3092 CHECK_ERROR_PTR();
3093 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3094 ADJUST_LOCAL_OFFSET(dst, dstw);
3096 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3098 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3099 PTR_FAIL_IF(!put_label);
3100 set_put_label(put_label, compiler, 0);
3102 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3103 compiler->mode32 = 0;
3104 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3106 if (emit_load_imm64(compiler, reg, 0))
3107 return NULL;
3108 #else
3109 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
3110 return NULL;
3111 #endif
3113 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3114 if (dst & SLJIT_MEM) {
3115 start_size = compiler->size;
3116 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3117 return NULL;
3118 put_label->flags = compiler->size - start_size;
3120 #endif
3122 inst = (sljit_u8*)ensure_buf(compiler, 2);
3123 PTR_FAIL_IF(!inst);
3125 *inst++ = 0;
3126 *inst++ = 3;
3128 return put_label;
3131 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3133 SLJIT_UNUSED_ARG(executable_offset);
3135 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
3136 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3137 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
3138 #else
3139 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
3140 #endif
3141 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
3144 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3146 SLJIT_UNUSED_ARG(executable_offset);
3148 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
3149 sljit_unaligned_store_sw((void*)addr, new_constant);
3150 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);