Support integer division on Thumb2-windows. Patch by Martin Storsjo.
[sljit.git] / sljit_src / sljitNativeX86_common.c
blobab7b36adb2eab9bdbea97380b46536888cefedd4
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 return "x86" SLJIT_CPUINFO " ABI:fastcall";
31 #else
32 return "x86" SLJIT_CPUINFO;
33 #endif
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - ESP
43 5 - EBP
44 6 - ESI
45 7 - EDI
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - RSP
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
70 /* Last register + 1. */
71 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
73 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
74 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 if (p <= compiler->scratches) \
80 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
81 else \
82 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
83 p = SLJIT_MEM1(SLJIT_SP); \
84 do; \
87 #else /* SLJIT_CONFIG_X86_32 */
89 /* Last register + 1. */
90 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
94 Note: avoid to use r12 and r13 for memory addessing
95 therefore r12 is better to be a higher saved register. */
96 #ifndef _WIN64
97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
98 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
99 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
101 /* low-map. reg_map & 0x7. */
102 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
105 #else
106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
108 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
110 /* low-map. reg_map & 0x7. */
111 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
112 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
114 #endif
116 /* Args: xmm0-xmm3 */
117 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
118 4, 0, 1, 2, 3, 5, 6
120 /* low-map. freg_map & 0x7. */
121 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
122 4, 0, 1, 2, 3, 5, 6
125 #define REX_W 0x48
126 #define REX_R 0x44
127 #define REX_X 0x42
128 #define REX_B 0x41
129 #define REX 0x40
131 #ifndef _WIN64
132 #define HALFWORD_MAX 0x7fffffffl
133 #define HALFWORD_MIN -0x80000000l
134 #else
135 #define HALFWORD_MAX 0x7fffffffll
136 #define HALFWORD_MIN -0x80000000ll
137 #endif
139 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
140 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
142 #define CHECK_EXTRA_REGS(p, w, do)
144 #endif /* SLJIT_CONFIG_X86_32 */
146 #define TMP_FREG (0)
148 /* Size flags for emit_x86_instruction: */
149 #define EX86_BIN_INS 0x0010
150 #define EX86_SHIFT_INS 0x0020
151 #define EX86_REX 0x0040
152 #define EX86_NO_REXW 0x0080
153 #define EX86_BYTE_ARG 0x0100
154 #define EX86_HALF_ARG 0x0200
155 #define EX86_PREF_66 0x0400
156 #define EX86_PREF_F2 0x0800
157 #define EX86_PREF_F3 0x1000
158 #define EX86_SSE2_OP1 0x2000
159 #define EX86_SSE2_OP2 0x4000
160 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
162 /* --------------------------------------------------------------------- */
163 /* Instrucion forms */
164 /* --------------------------------------------------------------------- */
166 #define ADD (/* BINARY */ 0 << 3)
167 #define ADD_EAX_i32 0x05
168 #define ADD_r_rm 0x03
169 #define ADD_rm_r 0x01
170 #define ADDSD_x_xm 0x58
171 #define ADC (/* BINARY */ 2 << 3)
172 #define ADC_EAX_i32 0x15
173 #define ADC_r_rm 0x13
174 #define ADC_rm_r 0x11
175 #define AND (/* BINARY */ 4 << 3)
176 #define AND_EAX_i32 0x25
177 #define AND_r_rm 0x23
178 #define AND_rm_r 0x21
179 #define ANDPD_x_xm 0x54
180 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
181 #define CALL_i32 0xe8
182 #define CALL_rm (/* GROUP_FF */ 2 << 3)
183 #define CDQ 0x99
184 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
185 #define CMP (/* BINARY */ 7 << 3)
186 #define CMP_EAX_i32 0x3d
187 #define CMP_r_rm 0x3b
188 #define CMP_rm_r 0x39
189 #define CVTPD2PS_x_xm 0x5a
190 #define CVTSI2SD_x_rm 0x2a
191 #define CVTTSD2SI_r_xm 0x2c
192 #define DIV (/* GROUP_F7 */ 6 << 3)
193 #define DIVSD_x_xm 0x5e
194 #define FSTPS 0xd9
195 #define FSTPD 0xdd
196 #define INT3 0xcc
197 #define IDIV (/* GROUP_F7 */ 7 << 3)
198 #define IMUL (/* GROUP_F7 */ 5 << 3)
199 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
200 #define IMUL_r_rm_i8 0x6b
201 #define IMUL_r_rm_i32 0x69
202 #define JE_i8 0x74
203 #define JNE_i8 0x75
204 #define JMP_i8 0xeb
205 #define JMP_i32 0xe9
206 #define JMP_rm (/* GROUP_FF */ 4 << 3)
207 #define LEA_r_m 0x8d
208 #define MOV_r_rm 0x8b
209 #define MOV_r_i32 0xb8
210 #define MOV_rm_r 0x89
211 #define MOV_rm_i32 0xc7
212 #define MOV_rm8_i8 0xc6
213 #define MOV_rm8_r8 0x88
214 #define MOVSD_x_xm 0x10
215 #define MOVSD_xm_x 0x11
216 #define MOVSXD_r_rm 0x63
217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
218 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
220 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
221 #define MUL (/* GROUP_F7 */ 4 << 3)
222 #define MULSD_x_xm 0x59
223 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
224 #define NOP 0x90
225 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
226 #define OR (/* BINARY */ 1 << 3)
227 #define OR_r_rm 0x0b
228 #define OR_EAX_i32 0x0d
229 #define OR_rm_r 0x09
230 #define OR_rm8_r8 0x08
231 #define POP_r 0x58
232 #define POP_rm 0x8f
233 #define POPF 0x9d
234 #define PREFETCH 0x18
235 #define PUSH_i32 0x68
236 #define PUSH_r 0x50
237 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
238 #define PUSHF 0x9c
239 #define RET_near 0xc3
240 #define RET_i16 0xc2
241 #define SBB (/* BINARY */ 3 << 3)
242 #define SBB_EAX_i32 0x1d
243 #define SBB_r_rm 0x1b
244 #define SBB_rm_r 0x19
245 #define SAR (/* SHIFT */ 7 << 3)
246 #define SHL (/* SHIFT */ 4 << 3)
247 #define SHR (/* SHIFT */ 5 << 3)
248 #define SUB (/* BINARY */ 5 << 3)
249 #define SUB_EAX_i32 0x2d
250 #define SUB_r_rm 0x2b
251 #define SUB_rm_r 0x29
252 #define SUBSD_x_xm 0x5c
253 #define TEST_EAX_i32 0xa9
254 #define TEST_rm_r 0x85
255 #define UCOMISD_x_xm 0x2e
256 #define UNPCKLPD_x_xm 0x14
257 #define XCHG_EAX_r 0x90
258 #define XCHG_r_rm 0x87
259 #define XOR (/* BINARY */ 6 << 3)
260 #define XOR_EAX_i32 0x35
261 #define XOR_r_rm 0x33
262 #define XOR_rm_r 0x31
263 #define XORPD_x_xm 0x57
265 #define GROUP_0F 0x0f
266 #define GROUP_F7 0xf7
267 #define GROUP_FF 0xff
268 #define GROUP_BINARY_81 0x81
269 #define GROUP_BINARY_83 0x83
270 #define GROUP_SHIFT_1 0xd1
271 #define GROUP_SHIFT_N 0xc1
272 #define GROUP_SHIFT_CL 0xd3
274 #define MOD_REG 0xc0
275 #define MOD_DISP8 0x40
277 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
279 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
280 #define POP_REG(r) (*inst++ = (POP_r + (r)))
281 #define RET() (*inst++ = (RET_near))
282 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
283 /* r32, r/m32 */
284 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
286 /* Multithreading does not affect these static variables, since they store
287 built-in CPU features. Therefore they can be overwritten by different threads
288 if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2 = -1;
291 #endif
292 static sljit_s32 cpu_has_cmov = -1;
294 #ifdef _WIN32_WCE
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
297 #include <intrin.h>
298 #endif
300 /******************************************************/
301 /* Unaligned-store functions */
302 /******************************************************/
304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
306 SLJIT_MEMCPY(addr, &value, sizeof(value));
309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
311 SLJIT_MEMCPY(addr, &value, sizeof(value));
314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
316 SLJIT_MEMCPY(addr, &value, sizeof(value));
319 /******************************************************/
320 /* Utility functions */
321 /******************************************************/
323 static void get_cpu_features(void)
325 sljit_u32 features;
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
329 int CPUInfo[4];
330 __cpuid(CPUInfo, 1);
331 features = (sljit_u32)CPUInfo[3];
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
335 /* AT&T syntax. */
336 __asm__ (
337 "movl $0x1, %%eax\n"
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 /* On x86-32, there is no red zone, so this
340 should work (no need for a local variable). */
341 "push %%ebx\n"
342 #endif
343 "cpuid\n"
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
345 "pop %%ebx\n"
346 #endif
347 "movl %%edx, %0\n"
348 : "=g" (features)
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 : "%eax", "%ecx", "%edx"
352 #else
353 : "%rax", "%rbx", "%rcx", "%rdx"
354 #endif
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
359 /* Intel syntax. */
360 __asm {
361 mov eax, 1
362 cpuid
363 mov features, edx
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 cpu_has_sse2 = (features >> 26) & 0x1;
370 #endif
371 cpu_has_cmov = (features >> 15) & 0x1;
374 static sljit_u8 get_jump_code(sljit_s32 type)
376 switch (type) {
377 case SLJIT_EQUAL:
378 case SLJIT_EQUAL_F64:
379 return 0x84 /* je */;
381 case SLJIT_NOT_EQUAL:
382 case SLJIT_NOT_EQUAL_F64:
383 return 0x85 /* jne */;
385 case SLJIT_LESS:
386 case SLJIT_LESS_F64:
387 return 0x82 /* jc */;
389 case SLJIT_GREATER_EQUAL:
390 case SLJIT_GREATER_EQUAL_F64:
391 return 0x83 /* jae */;
393 case SLJIT_GREATER:
394 case SLJIT_GREATER_F64:
395 return 0x87 /* jnbe */;
397 case SLJIT_LESS_EQUAL:
398 case SLJIT_LESS_EQUAL_F64:
399 return 0x86 /* jbe */;
401 case SLJIT_SIG_LESS:
402 return 0x8c /* jl */;
404 case SLJIT_SIG_GREATER_EQUAL:
405 return 0x8d /* jnl */;
407 case SLJIT_SIG_GREATER:
408 return 0x8f /* jnle */;
410 case SLJIT_SIG_LESS_EQUAL:
411 return 0x8e /* jle */;
413 case SLJIT_OVERFLOW:
414 case SLJIT_MUL_OVERFLOW:
415 return 0x80 /* jo */;
417 case SLJIT_NOT_OVERFLOW:
418 case SLJIT_MUL_NOT_OVERFLOW:
419 return 0x81 /* jno */;
421 case SLJIT_UNORDERED_F64:
422 return 0x8a /* jp */;
424 case SLJIT_ORDERED_F64:
425 return 0x8b /* jpo */;
427 return 0;
430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
431 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
432 #else
433 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
434 #endif
436 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
438 sljit_s32 short_jump;
439 sljit_uw label_addr;
441 if (jump->flags & JUMP_LABEL)
442 label_addr = (sljit_uw)(code + jump->u.label->size);
443 else
444 label_addr = jump->u.target - executable_offset;
446 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
448 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
449 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
450 return generate_far_jump_code(jump, code_ptr, type);
451 #endif
453 if (type == SLJIT_JUMP) {
454 if (short_jump)
455 *code_ptr++ = JMP_i8;
456 else
457 *code_ptr++ = JMP_i32;
458 jump->addr++;
460 else if (type >= SLJIT_FAST_CALL) {
461 short_jump = 0;
462 *code_ptr++ = CALL_i32;
463 jump->addr++;
465 else if (short_jump) {
466 *code_ptr++ = get_jump_code(type) - 0x10;
467 jump->addr++;
469 else {
470 *code_ptr++ = GROUP_0F;
471 *code_ptr++ = get_jump_code(type);
472 jump->addr += 2;
475 if (short_jump) {
476 jump->flags |= PATCH_MB;
477 code_ptr += sizeof(sljit_s8);
478 } else {
479 jump->flags |= PATCH_MW;
480 code_ptr += sizeof(sljit_s32);
483 return code_ptr;
486 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
488 struct sljit_memory_fragment *buf;
489 sljit_u8 *code;
490 sljit_u8 *code_ptr;
491 sljit_u8 *buf_ptr;
492 sljit_u8 *buf_end;
493 sljit_u8 len;
494 sljit_sw executable_offset;
495 sljit_sw jump_addr;
497 struct sljit_label *label;
498 struct sljit_jump *jump;
499 struct sljit_const *const_;
501 CHECK_ERROR_PTR();
502 CHECK_PTR(check_sljit_generate_code(compiler));
503 reverse_buf(compiler);
505 /* Second code generation pass. */
506 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
507 PTR_FAIL_WITH_EXEC_IF(code);
508 buf = compiler->buf;
510 code_ptr = code;
511 label = compiler->labels;
512 jump = compiler->jumps;
513 const_ = compiler->consts;
514 executable_offset = SLJIT_EXEC_OFFSET(code);
516 do {
517 buf_ptr = buf->memory;
518 buf_end = buf_ptr + buf->used_size;
519 do {
520 len = *buf_ptr++;
521 if (len > 0) {
522 /* The code is already generated. */
523 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
524 code_ptr += len;
525 buf_ptr += len;
527 else {
528 if (*buf_ptr >= 2) {
529 jump->addr = (sljit_uw)code_ptr;
530 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
531 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
532 else {
533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
534 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
535 #else
536 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
537 #endif
539 jump = jump->next;
541 else if (*buf_ptr == 0) {
542 label->addr = ((sljit_uw)code_ptr) + executable_offset;
543 label->size = code_ptr - code;
544 label = label->next;
546 else { /* *buf_ptr is 1 */
547 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
548 const_ = const_->next;
550 buf_ptr++;
552 } while (buf_ptr < buf_end);
553 SLJIT_ASSERT(buf_ptr == buf_end);
554 buf = buf->next;
555 } while (buf);
557 SLJIT_ASSERT(!label);
558 SLJIT_ASSERT(!jump);
559 SLJIT_ASSERT(!const_);
561 jump = compiler->jumps;
562 while (jump) {
563 jump_addr = jump->addr + executable_offset;
565 if (jump->flags & PATCH_MB) {
566 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
567 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
568 } else if (jump->flags & PATCH_MW) {
569 if (jump->flags & JUMP_LABEL) {
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
572 #else
573 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
574 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
575 #endif
577 else {
578 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
579 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
580 #else
581 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
582 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
583 #endif
586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
587 else if (jump->flags & PATCH_MD)
588 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
589 #endif
591 jump = jump->next;
594 /* Some space may be wasted because of short jumps. */
595 SLJIT_ASSERT(code_ptr <= code + compiler->size);
596 compiler->error = SLJIT_ERR_COMPILED;
597 compiler->executable_offset = executable_offset;
598 compiler->executable_size = code_ptr - code;
599 return (void*)(code + executable_offset);
602 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
604 switch (feature_type) {
605 case SLJIT_HAS_FPU:
606 #ifdef SLJIT_IS_FPU_AVAILABLE
607 return SLJIT_IS_FPU_AVAILABLE;
608 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
609 if (cpu_has_sse2 == -1)
610 get_cpu_features();
611 return cpu_has_sse2;
612 #else /* SLJIT_DETECT_SSE2 */
613 return 1;
614 #endif /* SLJIT_DETECT_SSE2 */
616 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
617 case SLJIT_HAS_VIRTUAL_REGISTERS:
618 return 1;
619 #endif
621 case SLJIT_HAS_CLZ:
622 case SLJIT_HAS_CMOV:
623 if (cpu_has_cmov == -1)
624 get_cpu_features();
625 return cpu_has_cmov;
627 case SLJIT_HAS_SSE2:
628 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
629 if (cpu_has_sse2 == -1)
630 get_cpu_features();
631 return cpu_has_sse2;
632 #else
633 return 1;
634 #endif
636 default:
637 return 0;
641 /* --------------------------------------------------------------------- */
642 /* Operators */
643 /* --------------------------------------------------------------------- */
645 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
647 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
648 sljit_u32 op_types,
649 sljit_s32 dst, sljit_sw dstw,
650 sljit_s32 src1, sljit_sw src1w,
651 sljit_s32 src2, sljit_sw src2w);
653 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
654 sljit_u32 op_types,
655 sljit_s32 dst, sljit_sw dstw,
656 sljit_s32 src1, sljit_sw src1w,
657 sljit_s32 src2, sljit_sw src2w);
659 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
660 sljit_s32 dst, sljit_sw dstw,
661 sljit_s32 src, sljit_sw srcw);
663 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
664 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
666 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
667 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
669 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
670 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
672 #ifdef _WIN32
673 #include <malloc.h>
675 static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
677 /* Workaround for calling the internal _chkstk() function on Windows.
678 This function touches all 4k pages belongs to the requested stack space,
679 which size is passed in local_size. This is necessary on Windows where
680 the stack can only grow in 4k steps. However, this function just burn
681 CPU cycles if the stack is large enough. However, you don't know it in
682 advance, so it must always be called. I think this is a bad design in
683 general even if it has some reasons. */
684 *(volatile sljit_s32*)alloca(local_size) = 0;
687 #endif
689 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
690 #include "sljitNativeX86_32.c"
691 #else
692 #include "sljitNativeX86_64.c"
693 #endif
695 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
696 sljit_s32 dst, sljit_sw dstw,
697 sljit_s32 src, sljit_sw srcw)
699 sljit_u8* inst;
701 SLJIT_ASSERT(dst != SLJIT_UNUSED);
703 if (FAST_IS_REG(src)) {
704 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
705 FAIL_IF(!inst);
706 *inst = MOV_rm_r;
707 return SLJIT_SUCCESS;
709 if (src & SLJIT_IMM) {
710 if (FAST_IS_REG(dst)) {
711 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
712 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
713 #else
714 if (!compiler->mode32) {
715 if (NOT_HALFWORD(srcw))
716 return emit_load_imm64(compiler, dst, srcw);
718 else
719 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
720 #endif
722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
723 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
724 /* Immediate to memory move. Only SLJIT_MOV operation copies
725 an immediate directly into memory so TMP_REG1 can be used. */
726 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
727 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
728 FAIL_IF(!inst);
729 *inst = MOV_rm_r;
730 return SLJIT_SUCCESS;
732 #endif
733 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
734 FAIL_IF(!inst);
735 *inst = MOV_rm_i32;
736 return SLJIT_SUCCESS;
738 if (FAST_IS_REG(dst)) {
739 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
740 FAIL_IF(!inst);
741 *inst = MOV_r_rm;
742 return SLJIT_SUCCESS;
745 /* Memory to memory move. Only SLJIT_MOV operation copies
746 data from memory to memory so TMP_REG1 can be used. */
747 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
748 FAIL_IF(!inst);
749 *inst = MOV_r_rm;
750 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
751 FAIL_IF(!inst);
752 *inst = MOV_rm_r;
753 return SLJIT_SUCCESS;
756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
758 sljit_u8 *inst;
759 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
760 sljit_s32 size;
761 #endif
763 CHECK_ERROR();
764 CHECK(check_sljit_emit_op0(compiler, op));
766 switch (GET_OPCODE(op)) {
767 case SLJIT_BREAKPOINT:
768 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
769 FAIL_IF(!inst);
770 INC_SIZE(1);
771 *inst = INT3;
772 break;
773 case SLJIT_NOP:
774 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
775 FAIL_IF(!inst);
776 INC_SIZE(1);
777 *inst = NOP;
778 break;
779 case SLJIT_LMUL_UW:
780 case SLJIT_LMUL_SW:
781 case SLJIT_DIVMOD_UW:
782 case SLJIT_DIVMOD_SW:
783 case SLJIT_DIV_UW:
784 case SLJIT_DIV_SW:
785 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
786 #ifdef _WIN64
787 SLJIT_ASSERT(
788 reg_map[SLJIT_R0] == 0
789 && reg_map[SLJIT_R1] == 2
790 && reg_map[TMP_REG1] > 7);
791 #else
792 SLJIT_ASSERT(
793 reg_map[SLJIT_R0] == 0
794 && reg_map[SLJIT_R1] < 7
795 && reg_map[TMP_REG1] == 2);
796 #endif
797 compiler->mode32 = op & SLJIT_I32_OP;
798 #endif
799 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
801 op = GET_OPCODE(op);
802 if ((op | 0x2) == SLJIT_DIV_UW) {
803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
804 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
805 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
806 #else
807 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
808 #endif
809 FAIL_IF(!inst);
810 *inst = XOR_r_rm;
813 if ((op | 0x2) == SLJIT_DIV_SW) {
814 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
815 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
816 #endif
818 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
819 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
820 FAIL_IF(!inst);
821 INC_SIZE(1);
822 *inst = CDQ;
823 #else
824 if (compiler->mode32) {
825 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
826 FAIL_IF(!inst);
827 INC_SIZE(1);
828 *inst = CDQ;
829 } else {
830 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
831 FAIL_IF(!inst);
832 INC_SIZE(2);
833 *inst++ = REX_W;
834 *inst = CDQ;
836 #endif
839 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
840 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
841 FAIL_IF(!inst);
842 INC_SIZE(2);
843 *inst++ = GROUP_F7;
844 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
845 #else
846 #ifdef _WIN64
847 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
848 #else
849 size = (!compiler->mode32) ? 3 : 2;
850 #endif
851 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
852 FAIL_IF(!inst);
853 INC_SIZE(size);
854 #ifdef _WIN64
855 if (!compiler->mode32)
856 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
857 else if (op >= SLJIT_DIVMOD_UW)
858 *inst++ = REX_B;
859 *inst++ = GROUP_F7;
860 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
861 #else
862 if (!compiler->mode32)
863 *inst++ = REX_W;
864 *inst++ = GROUP_F7;
865 *inst = MOD_REG | reg_map[SLJIT_R1];
866 #endif
867 #endif
868 switch (op) {
869 case SLJIT_LMUL_UW:
870 *inst |= MUL;
871 break;
872 case SLJIT_LMUL_SW:
873 *inst |= IMUL;
874 break;
875 case SLJIT_DIVMOD_UW:
876 case SLJIT_DIV_UW:
877 *inst |= DIV;
878 break;
879 case SLJIT_DIVMOD_SW:
880 case SLJIT_DIV_SW:
881 *inst |= IDIV;
882 break;
884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
885 if (op <= SLJIT_DIVMOD_SW)
886 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
887 #else
888 if (op >= SLJIT_DIV_UW)
889 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
890 #endif
891 break;
894 return SLJIT_SUCCESS;
897 #define ENCODE_PREFIX(prefix) \
898 do { \
899 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
900 FAIL_IF(!inst); \
901 INC_SIZE(1); \
902 *inst = (prefix); \
903 } while (0)
905 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
906 sljit_s32 dst, sljit_sw dstw,
907 sljit_s32 src, sljit_sw srcw)
909 sljit_u8* inst;
910 sljit_s32 dst_r;
911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
912 sljit_s32 work_r;
913 #endif
915 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
916 compiler->mode32 = 0;
917 #endif
919 if (src & SLJIT_IMM) {
920 if (FAST_IS_REG(dst)) {
921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
922 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
923 #else
924 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
925 FAIL_IF(!inst);
926 *inst = MOV_rm_i32;
927 return SLJIT_SUCCESS;
928 #endif
930 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
931 FAIL_IF(!inst);
932 *inst = MOV_rm8_i8;
933 return SLJIT_SUCCESS;
936 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
938 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
939 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
940 if (reg_map[src] >= 4) {
941 SLJIT_ASSERT(dst_r == TMP_REG1);
942 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
943 } else
944 dst_r = src;
945 #else
946 dst_r = src;
947 #endif
949 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
950 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
951 /* src, dst are registers. */
952 SLJIT_ASSERT(SLOW_IS_REG(dst));
953 if (reg_map[dst] < 4) {
954 if (dst != src)
955 EMIT_MOV(compiler, dst, 0, src, 0);
956 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
957 FAIL_IF(!inst);
958 *inst++ = GROUP_0F;
959 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
961 else {
962 if (dst != src)
963 EMIT_MOV(compiler, dst, 0, src, 0);
964 if (sign) {
965 /* shl reg, 24 */
966 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
967 FAIL_IF(!inst);
968 *inst |= SHL;
969 /* sar reg, 24 */
970 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
971 FAIL_IF(!inst);
972 *inst |= SAR;
974 else {
975 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
976 FAIL_IF(!inst);
977 *(inst + 1) |= AND;
980 return SLJIT_SUCCESS;
982 #endif
983 else {
984 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
985 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
986 FAIL_IF(!inst);
987 *inst++ = GROUP_0F;
988 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
991 if (dst & SLJIT_MEM) {
992 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
993 if (dst_r == TMP_REG1) {
994 /* Find a non-used register, whose reg_map[src] < 4. */
995 if ((dst & REG_MASK) == SLJIT_R0) {
996 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
997 work_r = SLJIT_R2;
998 else
999 work_r = SLJIT_R1;
1001 else {
1002 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1003 work_r = SLJIT_R0;
1004 else if ((dst & REG_MASK) == SLJIT_R1)
1005 work_r = SLJIT_R2;
1006 else
1007 work_r = SLJIT_R1;
1010 if (work_r == SLJIT_R0) {
1011 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1013 else {
1014 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1015 FAIL_IF(!inst);
1016 *inst = XCHG_r_rm;
1019 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1020 FAIL_IF(!inst);
1021 *inst = MOV_rm8_r8;
1023 if (work_r == SLJIT_R0) {
1024 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1026 else {
1027 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1028 FAIL_IF(!inst);
1029 *inst = XCHG_r_rm;
1032 else {
1033 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1034 FAIL_IF(!inst);
1035 *inst = MOV_rm8_r8;
1037 #else
1038 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1039 FAIL_IF(!inst);
1040 *inst = MOV_rm8_r8;
1041 #endif
1044 return SLJIT_SUCCESS;
1047 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1048 sljit_s32 src, sljit_sw srcw)
1050 sljit_u8* inst;
1052 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1053 compiler->mode32 = 1;
1054 #endif
1056 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1057 FAIL_IF(!inst);
1058 *inst++ = GROUP_0F;
1059 *inst++ = PREFETCH;
1061 if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
1062 *inst |= (3 << 3);
1063 else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
1064 *inst |= (2 << 3);
1065 else
1066 *inst |= (1 << 3);
1068 return SLJIT_SUCCESS;
1071 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1072 sljit_s32 dst, sljit_sw dstw,
1073 sljit_s32 src, sljit_sw srcw)
1075 sljit_u8* inst;
1076 sljit_s32 dst_r;
1078 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1079 compiler->mode32 = 0;
1080 #endif
1082 if (src & SLJIT_IMM) {
1083 if (FAST_IS_REG(dst)) {
1084 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1085 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1086 #else
1087 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1088 FAIL_IF(!inst);
1089 *inst = MOV_rm_i32;
1090 return SLJIT_SUCCESS;
1091 #endif
1093 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1094 FAIL_IF(!inst);
1095 *inst = MOV_rm_i32;
1096 return SLJIT_SUCCESS;
1099 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1101 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1102 dst_r = src;
1103 else {
1104 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1105 FAIL_IF(!inst);
1106 *inst++ = GROUP_0F;
1107 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1110 if (dst & SLJIT_MEM) {
1111 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1112 FAIL_IF(!inst);
1113 *inst = MOV_rm_r;
1116 return SLJIT_SUCCESS;
1119 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1120 sljit_s32 dst, sljit_sw dstw,
1121 sljit_s32 src, sljit_sw srcw)
1123 sljit_u8* inst;
1125 if (dst == src && dstw == srcw) {
1126 /* Same input and output */
1127 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1128 FAIL_IF(!inst);
1129 *inst++ = GROUP_F7;
1130 *inst |= opcode;
1131 return SLJIT_SUCCESS;
1134 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
1135 dst = TMP_REG1;
1137 if (FAST_IS_REG(dst)) {
1138 EMIT_MOV(compiler, dst, 0, src, srcw);
1139 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1140 FAIL_IF(!inst);
1141 *inst++ = GROUP_F7;
1142 *inst |= opcode;
1143 return SLJIT_SUCCESS;
1146 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1147 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1148 FAIL_IF(!inst);
1149 *inst++ = GROUP_F7;
1150 *inst |= opcode;
1151 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1152 return SLJIT_SUCCESS;
1155 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1156 sljit_s32 dst, sljit_sw dstw,
1157 sljit_s32 src, sljit_sw srcw)
1159 sljit_u8* inst;
1161 if (dst == SLJIT_UNUSED)
1162 dst = TMP_REG1;
1164 if (FAST_IS_REG(dst)) {
1165 EMIT_MOV(compiler, dst, 0, src, srcw);
1166 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1167 FAIL_IF(!inst);
1168 *inst++ = GROUP_F7;
1169 *inst |= NOT_rm;
1170 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1171 FAIL_IF(!inst);
1172 *inst = OR_r_rm;
1173 return SLJIT_SUCCESS;
1176 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1177 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1178 FAIL_IF(!inst);
1179 *inst++ = GROUP_F7;
1180 *inst |= NOT_rm;
1181 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1182 FAIL_IF(!inst);
1183 *inst = OR_r_rm;
1184 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1185 return SLJIT_SUCCESS;
1188 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1189 static const sljit_sw emit_clz_arg = 32 + 31;
1190 #endif
1192 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1193 sljit_s32 dst, sljit_sw dstw,
1194 sljit_s32 src, sljit_sw srcw)
1196 sljit_u8* inst;
1197 sljit_s32 dst_r;
1199 SLJIT_UNUSED_ARG(op_flags);
1201 if (cpu_has_cmov == -1)
1202 get_cpu_features();
1204 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1206 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1207 FAIL_IF(!inst);
1208 *inst++ = GROUP_0F;
1209 *inst = BSR_r_rm;
1211 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1212 if (cpu_has_cmov) {
1213 if (dst_r != TMP_REG1) {
1214 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1215 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1217 else
1218 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1220 FAIL_IF(!inst);
1221 *inst++ = GROUP_0F;
1222 *inst = CMOVE_r_rm;
1224 else
1225 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1227 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1228 #else
1229 if (cpu_has_cmov) {
1230 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
1232 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1233 FAIL_IF(!inst);
1234 *inst++ = GROUP_0F;
1235 *inst = CMOVE_r_rm;
1237 else
1238 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1241 #endif
1243 FAIL_IF(!inst);
1244 *(inst + 1) |= XOR;
1246 if (dst & SLJIT_MEM)
1247 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1248 return SLJIT_SUCCESS;
1251 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1252 sljit_s32 dst, sljit_sw dstw,
1253 sljit_s32 src, sljit_sw srcw)
1255 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1256 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1257 sljit_s32 dst_is_ereg = 0;
1258 #endif
1260 CHECK_ERROR();
1261 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1262 ADJUST_LOCAL_OFFSET(dst, dstw);
1263 ADJUST_LOCAL_OFFSET(src, srcw);
1265 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1266 CHECK_EXTRA_REGS(src, srcw, (void)0);
1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1268 compiler->mode32 = op_flags & SLJIT_I32_OP;
1269 #endif
1271 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1272 if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1273 return emit_prefetch(compiler, op, src, srcw);
1274 return SLJIT_SUCCESS;
1277 op = GET_OPCODE(op);
1279 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1280 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1281 compiler->mode32 = 0;
1282 #endif
1284 if (FAST_IS_REG(src) && src == dst) {
1285 if (!TYPE_CAST_NEEDED(op))
1286 return SLJIT_SUCCESS;
1289 if (op_flags & SLJIT_I32_OP) {
1290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1291 if (src & SLJIT_MEM) {
1292 if (op == SLJIT_MOV_S32)
1293 op = SLJIT_MOV_U32;
1295 else if (src & SLJIT_IMM) {
1296 if (op == SLJIT_MOV_U32)
1297 op = SLJIT_MOV_S32;
1299 #endif
1302 if (src & SLJIT_IMM) {
1303 switch (op) {
1304 case SLJIT_MOV_U8:
1305 srcw = (sljit_u8)srcw;
1306 break;
1307 case SLJIT_MOV_S8:
1308 srcw = (sljit_s8)srcw;
1309 break;
1310 case SLJIT_MOV_U16:
1311 srcw = (sljit_u16)srcw;
1312 break;
1313 case SLJIT_MOV_S16:
1314 srcw = (sljit_s16)srcw;
1315 break;
1316 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1317 case SLJIT_MOV_U32:
1318 srcw = (sljit_u32)srcw;
1319 break;
1320 case SLJIT_MOV_S32:
1321 srcw = (sljit_s32)srcw;
1322 break;
1323 #endif
1325 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1326 if (SLJIT_UNLIKELY(dst_is_ereg))
1327 return emit_mov(compiler, dst, dstw, src, srcw);
1328 #endif
1331 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1332 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1333 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1334 dst = TMP_REG1;
1336 #endif
1338 switch (op) {
1339 case SLJIT_MOV:
1340 case SLJIT_MOV_P:
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1342 case SLJIT_MOV_U32:
1343 case SLJIT_MOV_S32:
1344 #endif
1345 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1346 break;
1347 case SLJIT_MOV_U8:
1348 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1349 break;
1350 case SLJIT_MOV_S8:
1351 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1352 break;
1353 case SLJIT_MOV_U16:
1354 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1355 break;
1356 case SLJIT_MOV_S16:
1357 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1358 break;
1359 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1360 case SLJIT_MOV_U32:
1361 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1362 break;
1363 case SLJIT_MOV_S32:
1364 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1365 break;
1366 #endif
1369 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1370 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1371 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1372 #endif
1373 return SLJIT_SUCCESS;
1376 switch (op) {
1377 case SLJIT_NOT:
1378 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1379 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1380 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1382 case SLJIT_NEG:
1383 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1385 case SLJIT_CLZ:
1386 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1389 return SLJIT_SUCCESS;
1392 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1394 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1395 if (IS_HALFWORD(immw) || compiler->mode32) { \
1396 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1397 FAIL_IF(!inst); \
1398 *(inst + 1) |= (op_imm); \
1400 else { \
1401 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1402 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1403 FAIL_IF(!inst); \
1404 *inst = (op_mr); \
1407 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1408 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1410 #else
1412 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1413 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1414 FAIL_IF(!inst); \
1415 *(inst + 1) |= (op_imm);
1417 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1418 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1420 #endif
1422 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1423 sljit_u32 op_types,
1424 sljit_s32 dst, sljit_sw dstw,
1425 sljit_s32 src1, sljit_sw src1w,
1426 sljit_s32 src2, sljit_sw src2w)
1428 sljit_u8* inst;
1429 sljit_u8 op_eax_imm = (op_types >> 24);
1430 sljit_u8 op_rm = (op_types >> 16) & 0xff;
1431 sljit_u8 op_mr = (op_types >> 8) & 0xff;
1432 sljit_u8 op_imm = op_types & 0xff;
1434 if (dst == SLJIT_UNUSED) {
1435 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1436 if (src2 & SLJIT_IMM) {
1437 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1439 else {
1440 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1441 FAIL_IF(!inst);
1442 *inst = op_rm;
1444 return SLJIT_SUCCESS;
1447 if (dst == src1 && dstw == src1w) {
1448 if (src2 & SLJIT_IMM) {
1449 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1450 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1451 #else
1452 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1453 #endif
1454 BINARY_EAX_IMM(op_eax_imm, src2w);
1456 else {
1457 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1460 else if (FAST_IS_REG(dst)) {
1461 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1462 FAIL_IF(!inst);
1463 *inst = op_rm;
1465 else if (FAST_IS_REG(src2)) {
1466 /* Special exception for sljit_emit_op_flags. */
1467 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1468 FAIL_IF(!inst);
1469 *inst = op_mr;
1471 else {
1472 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1473 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1474 FAIL_IF(!inst);
1475 *inst = op_mr;
1477 return SLJIT_SUCCESS;
1480 /* Only for cumulative operations. */
1481 if (dst == src2 && dstw == src2w) {
1482 if (src1 & SLJIT_IMM) {
1483 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1484 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1485 #else
1486 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1487 #endif
1488 BINARY_EAX_IMM(op_eax_imm, src1w);
1490 else {
1491 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1494 else if (FAST_IS_REG(dst)) {
1495 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1496 FAIL_IF(!inst);
1497 *inst = op_rm;
1499 else if (FAST_IS_REG(src1)) {
1500 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1501 FAIL_IF(!inst);
1502 *inst = op_mr;
1504 else {
1505 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1506 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1507 FAIL_IF(!inst);
1508 *inst = op_mr;
1510 return SLJIT_SUCCESS;
1513 /* General version. */
1514 if (FAST_IS_REG(dst)) {
1515 EMIT_MOV(compiler, dst, 0, src1, src1w);
1516 if (src2 & SLJIT_IMM) {
1517 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1519 else {
1520 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1521 FAIL_IF(!inst);
1522 *inst = op_rm;
1525 else {
1526 /* This version requires less memory writing. */
1527 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1528 if (src2 & SLJIT_IMM) {
1529 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1531 else {
1532 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1533 FAIL_IF(!inst);
1534 *inst = op_rm;
1536 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1539 return SLJIT_SUCCESS;
1542 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1543 sljit_u32 op_types,
1544 sljit_s32 dst, sljit_sw dstw,
1545 sljit_s32 src1, sljit_sw src1w,
1546 sljit_s32 src2, sljit_sw src2w)
1548 sljit_u8* inst;
1549 sljit_u8 op_eax_imm = (op_types >> 24);
1550 sljit_u8 op_rm = (op_types >> 16) & 0xff;
1551 sljit_u8 op_mr = (op_types >> 8) & 0xff;
1552 sljit_u8 op_imm = op_types & 0xff;
1554 if (dst == SLJIT_UNUSED) {
1555 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1556 if (src2 & SLJIT_IMM) {
1557 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1559 else {
1560 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1561 FAIL_IF(!inst);
1562 *inst = op_rm;
1564 return SLJIT_SUCCESS;
1567 if (dst == src1 && dstw == src1w) {
1568 if (src2 & SLJIT_IMM) {
1569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1570 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1571 #else
1572 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1573 #endif
1574 BINARY_EAX_IMM(op_eax_imm, src2w);
1576 else {
1577 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1580 else if (FAST_IS_REG(dst)) {
1581 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1582 FAIL_IF(!inst);
1583 *inst = op_rm;
1585 else if (FAST_IS_REG(src2)) {
1586 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1587 FAIL_IF(!inst);
1588 *inst = op_mr;
1590 else {
1591 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1592 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1593 FAIL_IF(!inst);
1594 *inst = op_mr;
1596 return SLJIT_SUCCESS;
1599 /* General version. */
1600 if (FAST_IS_REG(dst) && dst != src2) {
1601 EMIT_MOV(compiler, dst, 0, src1, src1w);
1602 if (src2 & SLJIT_IMM) {
1603 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1605 else {
1606 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1607 FAIL_IF(!inst);
1608 *inst = op_rm;
1611 else {
1612 /* This version requires less memory writing. */
1613 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1614 if (src2 & SLJIT_IMM) {
1615 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1617 else {
1618 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1619 FAIL_IF(!inst);
1620 *inst = op_rm;
1622 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1625 return SLJIT_SUCCESS;
1628 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1629 sljit_s32 dst, sljit_sw dstw,
1630 sljit_s32 src1, sljit_sw src1w,
1631 sljit_s32 src2, sljit_sw src2w)
1633 sljit_u8* inst;
1634 sljit_s32 dst_r;
1636 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1638 /* Register destination. */
1639 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1640 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1641 FAIL_IF(!inst);
1642 *inst++ = GROUP_0F;
1643 *inst = IMUL_r_rm;
1645 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1646 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1647 FAIL_IF(!inst);
1648 *inst++ = GROUP_0F;
1649 *inst = IMUL_r_rm;
1651 else if (src1 & SLJIT_IMM) {
1652 if (src2 & SLJIT_IMM) {
1653 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1654 src2 = dst_r;
1655 src2w = 0;
1658 if (src1w <= 127 && src1w >= -128) {
1659 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1660 FAIL_IF(!inst);
1661 *inst = IMUL_r_rm_i8;
1662 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1663 FAIL_IF(!inst);
1664 INC_SIZE(1);
1665 *inst = (sljit_s8)src1w;
1667 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1668 else {
1669 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1670 FAIL_IF(!inst);
1671 *inst = IMUL_r_rm_i32;
1672 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1673 FAIL_IF(!inst);
1674 INC_SIZE(4);
1675 sljit_unaligned_store_sw(inst, src1w);
1677 #else
1678 else if (IS_HALFWORD(src1w)) {
1679 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1680 FAIL_IF(!inst);
1681 *inst = IMUL_r_rm_i32;
1682 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1683 FAIL_IF(!inst);
1684 INC_SIZE(4);
1685 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1687 else {
1688 if (dst_r != src2)
1689 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1690 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1691 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1692 FAIL_IF(!inst);
1693 *inst++ = GROUP_0F;
1694 *inst = IMUL_r_rm;
1696 #endif
1698 else if (src2 & SLJIT_IMM) {
1699 /* Note: src1 is NOT immediate. */
1701 if (src2w <= 127 && src2w >= -128) {
1702 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1703 FAIL_IF(!inst);
1704 *inst = IMUL_r_rm_i8;
1705 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1706 FAIL_IF(!inst);
1707 INC_SIZE(1);
1708 *inst = (sljit_s8)src2w;
1710 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1711 else {
1712 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1713 FAIL_IF(!inst);
1714 *inst = IMUL_r_rm_i32;
1715 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1716 FAIL_IF(!inst);
1717 INC_SIZE(4);
1718 sljit_unaligned_store_sw(inst, src2w);
1720 #else
1721 else if (IS_HALFWORD(src2w)) {
1722 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1723 FAIL_IF(!inst);
1724 *inst = IMUL_r_rm_i32;
1725 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1726 FAIL_IF(!inst);
1727 INC_SIZE(4);
1728 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1730 else {
1731 if (dst_r != src1)
1732 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1733 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1734 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1735 FAIL_IF(!inst);
1736 *inst++ = GROUP_0F;
1737 *inst = IMUL_r_rm;
1739 #endif
1741 else {
1742 /* Neither argument is immediate. */
1743 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1744 dst_r = TMP_REG1;
1745 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1746 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1747 FAIL_IF(!inst);
1748 *inst++ = GROUP_0F;
1749 *inst = IMUL_r_rm;
1752 if (dst & SLJIT_MEM)
1753 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1755 return SLJIT_SUCCESS;
1758 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1759 sljit_s32 dst, sljit_sw dstw,
1760 sljit_s32 src1, sljit_sw src1w,
1761 sljit_s32 src2, sljit_sw src2w)
1763 sljit_u8* inst;
1764 sljit_s32 dst_r, done = 0;
1766 /* These cases better be left to handled by normal way. */
1767 if (dst == src1 && dstw == src1w)
1768 return SLJIT_ERR_UNSUPPORTED;
1769 if (dst == src2 && dstw == src2w)
1770 return SLJIT_ERR_UNSUPPORTED;
1772 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1774 if (FAST_IS_REG(src1)) {
1775 if (FAST_IS_REG(src2)) {
1776 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1777 FAIL_IF(!inst);
1778 *inst = LEA_r_m;
1779 done = 1;
1781 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1782 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1783 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1784 #else
1785 if (src2 & SLJIT_IMM) {
1786 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1787 #endif
1788 FAIL_IF(!inst);
1789 *inst = LEA_r_m;
1790 done = 1;
1793 else if (FAST_IS_REG(src2)) {
1794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1795 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1796 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1797 #else
1798 if (src1 & SLJIT_IMM) {
1799 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1800 #endif
1801 FAIL_IF(!inst);
1802 *inst = LEA_r_m;
1803 done = 1;
1807 if (done) {
1808 if (dst_r == TMP_REG1)
1809 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1810 return SLJIT_SUCCESS;
1812 return SLJIT_ERR_UNSUPPORTED;
1815 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1816 sljit_s32 src1, sljit_sw src1w,
1817 sljit_s32 src2, sljit_sw src2w)
1819 sljit_u8* inst;
1821 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1822 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1823 #else
1824 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1825 #endif
1826 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1827 return SLJIT_SUCCESS;
1830 if (FAST_IS_REG(src1)) {
1831 if (src2 & SLJIT_IMM) {
1832 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1834 else {
1835 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1836 FAIL_IF(!inst);
1837 *inst = CMP_r_rm;
1839 return SLJIT_SUCCESS;
1842 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1843 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1844 FAIL_IF(!inst);
1845 *inst = CMP_rm_r;
1846 return SLJIT_SUCCESS;
1849 if (src2 & SLJIT_IMM) {
1850 if (src1 & SLJIT_IMM) {
1851 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1852 src1 = TMP_REG1;
1853 src1w = 0;
1855 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1857 else {
1858 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1859 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1860 FAIL_IF(!inst);
1861 *inst = CMP_r_rm;
1863 return SLJIT_SUCCESS;
1866 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1867 sljit_s32 src1, sljit_sw src1w,
1868 sljit_s32 src2, sljit_sw src2w)
1870 sljit_u8* inst;
1872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1873 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1874 #else
1875 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1876 #endif
1877 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1878 return SLJIT_SUCCESS;
1881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1882 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1883 #else
1884 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1885 #endif
1886 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1887 return SLJIT_SUCCESS;
1890 if (!(src1 & SLJIT_IMM)) {
1891 if (src2 & SLJIT_IMM) {
1892 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1893 if (IS_HALFWORD(src2w) || compiler->mode32) {
1894 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1895 FAIL_IF(!inst);
1896 *inst = GROUP_F7;
1898 else {
1899 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
1900 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
1901 FAIL_IF(!inst);
1902 *inst = TEST_rm_r;
1904 #else
1905 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1906 FAIL_IF(!inst);
1907 *inst = GROUP_F7;
1908 #endif
1909 return SLJIT_SUCCESS;
1911 else if (FAST_IS_REG(src1)) {
1912 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1913 FAIL_IF(!inst);
1914 *inst = TEST_rm_r;
1915 return SLJIT_SUCCESS;
1919 if (!(src2 & SLJIT_IMM)) {
1920 if (src1 & SLJIT_IMM) {
1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922 if (IS_HALFWORD(src1w) || compiler->mode32) {
1923 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1924 FAIL_IF(!inst);
1925 *inst = GROUP_F7;
1927 else {
1928 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
1929 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1930 FAIL_IF(!inst);
1931 *inst = TEST_rm_r;
1933 #else
1934 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1935 FAIL_IF(!inst);
1936 *inst = GROUP_F7;
1937 #endif
1938 return SLJIT_SUCCESS;
1940 else if (FAST_IS_REG(src2)) {
1941 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1942 FAIL_IF(!inst);
1943 *inst = TEST_rm_r;
1944 return SLJIT_SUCCESS;
1948 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1949 if (src2 & SLJIT_IMM) {
1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1951 if (IS_HALFWORD(src2w) || compiler->mode32) {
1952 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1953 FAIL_IF(!inst);
1954 *inst = GROUP_F7;
1956 else {
1957 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1958 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1959 FAIL_IF(!inst);
1960 *inst = TEST_rm_r;
1962 #else
1963 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1964 FAIL_IF(!inst);
1965 *inst = GROUP_F7;
1966 #endif
1968 else {
1969 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1970 FAIL_IF(!inst);
1971 *inst = TEST_rm_r;
1973 return SLJIT_SUCCESS;
1976 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1977 sljit_u8 mode,
1978 sljit_s32 dst, sljit_sw dstw,
1979 sljit_s32 src1, sljit_sw src1w,
1980 sljit_s32 src2, sljit_sw src2w)
1982 sljit_u8* inst;
1984 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1985 if (dst == src1 && dstw == src1w) {
1986 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1987 FAIL_IF(!inst);
1988 *inst |= mode;
1989 return SLJIT_SUCCESS;
1991 if (dst == SLJIT_UNUSED) {
1992 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1993 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
1994 FAIL_IF(!inst);
1995 *inst |= mode;
1996 return SLJIT_SUCCESS;
1998 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1999 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2000 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2001 FAIL_IF(!inst);
2002 *inst |= mode;
2003 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2004 return SLJIT_SUCCESS;
2006 if (FAST_IS_REG(dst)) {
2007 EMIT_MOV(compiler, dst, 0, src1, src1w);
2008 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2009 FAIL_IF(!inst);
2010 *inst |= mode;
2011 return SLJIT_SUCCESS;
2014 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2016 FAIL_IF(!inst);
2017 *inst |= mode;
2018 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2019 return SLJIT_SUCCESS;
2022 if (dst == SLJIT_PREF_SHIFT_REG) {
2023 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2024 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2025 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2026 FAIL_IF(!inst);
2027 *inst |= mode;
2028 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2030 else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2031 if (src1 != dst)
2032 EMIT_MOV(compiler, dst, 0, src1, src1w);
2033 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2034 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2035 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2036 FAIL_IF(!inst);
2037 *inst |= mode;
2038 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2040 else {
2041 /* This case is complex since ecx itself may be used for
2042 addressing, and this case must be supported as well. */
2043 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2045 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2046 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2047 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2048 FAIL_IF(!inst);
2049 *inst |= mode;
2050 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2051 #else
2052 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 FAIL_IF(!inst);
2056 *inst |= mode;
2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2058 #endif
2059 if (dst != SLJIT_UNUSED)
2060 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2063 return SLJIT_SUCCESS;
2066 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2067 sljit_u8 mode, sljit_s32 set_flags,
2068 sljit_s32 dst, sljit_sw dstw,
2069 sljit_s32 src1, sljit_sw src1w,
2070 sljit_s32 src2, sljit_sw src2w)
2072 /* The CPU does not set flags if the shift count is 0. */
2073 if (src2 & SLJIT_IMM) {
2074 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2075 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2076 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2077 #else
2078 if ((src2w & 0x1f) != 0)
2079 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2080 #endif
2081 if (!set_flags)
2082 return emit_mov(compiler, dst, dstw, src1, src1w);
2083 /* OR dst, src, 0 */
2084 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2085 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2088 if (!set_flags)
2089 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2091 if (!FAST_IS_REG(dst))
2092 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2094 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2096 if (FAST_IS_REG(dst))
2097 return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
2098 return SLJIT_SUCCESS;
2101 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2102 sljit_s32 dst, sljit_sw dstw,
2103 sljit_s32 src1, sljit_sw src1w,
2104 sljit_s32 src2, sljit_sw src2w)
2106 CHECK_ERROR();
2107 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2108 ADJUST_LOCAL_OFFSET(dst, dstw);
2109 ADJUST_LOCAL_OFFSET(src1, src1w);
2110 ADJUST_LOCAL_OFFSET(src2, src2w);
2112 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2113 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2114 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2115 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2116 compiler->mode32 = op & SLJIT_I32_OP;
2117 #endif
2119 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2120 return SLJIT_SUCCESS;
2122 switch (GET_OPCODE(op)) {
2123 case SLJIT_ADD:
2124 if (!HAS_FLAGS(op)) {
2125 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2126 return compiler->error;
2128 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2129 dst, dstw, src1, src1w, src2, src2w);
2130 case SLJIT_ADDC:
2131 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2132 dst, dstw, src1, src1w, src2, src2w);
2133 case SLJIT_SUB:
2134 if (!HAS_FLAGS(op)) {
2135 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2136 return compiler->error;
2139 if (dst == SLJIT_UNUSED)
2140 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2141 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2142 dst, dstw, src1, src1w, src2, src2w);
2143 case SLJIT_SUBC:
2144 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2145 dst, dstw, src1, src1w, src2, src2w);
2146 case SLJIT_MUL:
2147 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2148 case SLJIT_AND:
2149 if (dst == SLJIT_UNUSED)
2150 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2151 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2152 dst, dstw, src1, src1w, src2, src2w);
2153 case SLJIT_OR:
2154 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2155 dst, dstw, src1, src1w, src2, src2w);
2156 case SLJIT_XOR:
2157 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2158 dst, dstw, src1, src1w, src2, src2w);
2159 case SLJIT_SHL:
2160 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2161 dst, dstw, src1, src1w, src2, src2w);
2162 case SLJIT_LSHR:
2163 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2164 dst, dstw, src1, src1w, src2, src2w);
2165 case SLJIT_ASHR:
2166 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2167 dst, dstw, src1, src1w, src2, src2w);
2170 return SLJIT_SUCCESS;
2173 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2175 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2176 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2177 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2178 return -1;
2179 #endif
2180 return reg_map[reg];
2183 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2185 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2186 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2187 return reg;
2188 #else
2189 return freg_map[reg];
2190 #endif
2193 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2194 void *instruction, sljit_s32 size)
2196 sljit_u8 *inst;
2198 CHECK_ERROR();
2199 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2201 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2202 FAIL_IF(!inst);
2203 INC_SIZE(size);
2204 SLJIT_MEMCPY(inst, instruction, size);
2205 return SLJIT_SUCCESS;
2208 /* --------------------------------------------------------------------- */
2209 /* Floating point operators */
2210 /* --------------------------------------------------------------------- */
2212 /* Alignment(3) + 4 * 16 bytes. */
2213 static sljit_s32 sse2_data[3 + (4 * 4)];
2214 static sljit_s32 *sse2_buffer;
2216 static void init_compiler(void)
2218 /* Align to 16 bytes. */
2219 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2221 /* Single precision constants (each constant is 16 byte long). */
2222 sse2_buffer[0] = 0x80000000;
2223 sse2_buffer[4] = 0x7fffffff;
2224 /* Double precision constants (each constant is 16 byte long). */
2225 sse2_buffer[8] = 0;
2226 sse2_buffer[9] = 0x80000000;
2227 sse2_buffer[12] = 0xffffffff;
2228 sse2_buffer[13] = 0x7fffffff;
2231 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2232 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2234 sljit_u8 *inst;
2236 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2237 FAIL_IF(!inst);
2238 *inst++ = GROUP_0F;
2239 *inst = opcode;
2240 return SLJIT_SUCCESS;
2243 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2244 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2246 sljit_u8 *inst;
2248 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2249 FAIL_IF(!inst);
2250 *inst++ = GROUP_0F;
2251 *inst = opcode;
2252 return SLJIT_SUCCESS;
2255 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2256 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2258 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2261 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2262 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2264 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2267 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2268 sljit_s32 dst, sljit_sw dstw,
2269 sljit_s32 src, sljit_sw srcw)
2271 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2272 sljit_u8 *inst;
2274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2275 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2276 compiler->mode32 = 0;
2277 #endif
2279 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2280 FAIL_IF(!inst);
2281 *inst++ = GROUP_0F;
2282 *inst = CVTTSD2SI_r_xm;
2284 if (dst & SLJIT_MEM)
2285 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2286 return SLJIT_SUCCESS;
2289 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2290 sljit_s32 dst, sljit_sw dstw,
2291 sljit_s32 src, sljit_sw srcw)
2293 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2294 sljit_u8 *inst;
2296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2297 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2298 compiler->mode32 = 0;
2299 #endif
2301 if (src & SLJIT_IMM) {
2302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2303 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2304 srcw = (sljit_s32)srcw;
2305 #endif
2306 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2307 src = TMP_REG1;
2308 srcw = 0;
2311 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2312 FAIL_IF(!inst);
2313 *inst++ = GROUP_0F;
2314 *inst = CVTSI2SD_x_rm;
2316 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2317 compiler->mode32 = 1;
2318 #endif
2319 if (dst_r == TMP_FREG)
2320 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2321 return SLJIT_SUCCESS;
2324 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2325 sljit_s32 src1, sljit_sw src1w,
2326 sljit_s32 src2, sljit_sw src2w)
2328 if (!FAST_IS_REG(src1)) {
2329 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2330 src1 = TMP_FREG;
2333 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2336 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2337 sljit_s32 dst, sljit_sw dstw,
2338 sljit_s32 src, sljit_sw srcw)
2340 sljit_s32 dst_r;
2342 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2343 compiler->mode32 = 1;
2344 #endif
2346 CHECK_ERROR();
2347 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2349 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2350 if (FAST_IS_REG(dst))
2351 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2352 if (FAST_IS_REG(src))
2353 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2354 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2355 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2358 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2359 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2360 if (FAST_IS_REG(src)) {
2361 /* We overwrite the high bits of source. From SLJIT point of view,
2362 this is not an issue.
2363 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2364 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2366 else {
2367 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2368 src = TMP_FREG;
2371 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2372 if (dst_r == TMP_FREG)
2373 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2374 return SLJIT_SUCCESS;
2377 if (FAST_IS_REG(dst)) {
2378 dst_r = dst;
2379 if (dst != src)
2380 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2382 else {
2383 dst_r = TMP_FREG;
2384 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2387 switch (GET_OPCODE(op)) {
2388 case SLJIT_NEG_F64:
2389 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2390 break;
2392 case SLJIT_ABS_F64:
2393 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2394 break;
2397 if (dst_r == TMP_FREG)
2398 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2399 return SLJIT_SUCCESS;
2402 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2403 sljit_s32 dst, sljit_sw dstw,
2404 sljit_s32 src1, sljit_sw src1w,
2405 sljit_s32 src2, sljit_sw src2w)
2407 sljit_s32 dst_r;
2409 CHECK_ERROR();
2410 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2411 ADJUST_LOCAL_OFFSET(dst, dstw);
2412 ADJUST_LOCAL_OFFSET(src1, src1w);
2413 ADJUST_LOCAL_OFFSET(src2, src2w);
2415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2416 compiler->mode32 = 1;
2417 #endif
2419 if (FAST_IS_REG(dst)) {
2420 dst_r = dst;
2421 if (dst == src1)
2422 ; /* Do nothing here. */
2423 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2424 /* Swap arguments. */
2425 src2 = src1;
2426 src2w = src1w;
2428 else if (dst != src2)
2429 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2430 else {
2431 dst_r = TMP_FREG;
2432 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2435 else {
2436 dst_r = TMP_FREG;
2437 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2440 switch (GET_OPCODE(op)) {
2441 case SLJIT_ADD_F64:
2442 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2443 break;
2445 case SLJIT_SUB_F64:
2446 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2447 break;
2449 case SLJIT_MUL_F64:
2450 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2451 break;
2453 case SLJIT_DIV_F64:
2454 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2455 break;
2458 if (dst_r == TMP_FREG)
2459 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2460 return SLJIT_SUCCESS;
2463 /* --------------------------------------------------------------------- */
2464 /* Conditional instructions */
2465 /* --------------------------------------------------------------------- */
2467 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2469 sljit_u8 *inst;
2470 struct sljit_label *label;
2472 CHECK_ERROR_PTR();
2473 CHECK_PTR(check_sljit_emit_label(compiler));
2475 if (compiler->last_label && compiler->last_label->size == compiler->size)
2476 return compiler->last_label;
2478 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2479 PTR_FAIL_IF(!label);
2480 set_label(label, compiler);
2482 inst = (sljit_u8*)ensure_buf(compiler, 2);
2483 PTR_FAIL_IF(!inst);
2485 *inst++ = 0;
2486 *inst++ = 0;
2488 return label;
2491 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2493 sljit_u8 *inst;
2494 struct sljit_jump *jump;
2496 CHECK_ERROR_PTR();
2497 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2499 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2500 PTR_FAIL_IF_NULL(jump);
2501 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2502 type &= 0xff;
2504 /* Worst case size. */
2505 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2506 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2507 #else
2508 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2509 #endif
2511 inst = (sljit_u8*)ensure_buf(compiler, 2);
2512 PTR_FAIL_IF_NULL(inst);
2514 *inst++ = 0;
2515 *inst++ = type + 2;
2516 return jump;
2519 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2521 sljit_u8 *inst;
2522 struct sljit_jump *jump;
2524 CHECK_ERROR();
2525 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2526 ADJUST_LOCAL_OFFSET(src, srcw);
2528 CHECK_EXTRA_REGS(src, srcw, (void)0);
2530 if (src == SLJIT_IMM) {
2531 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2532 FAIL_IF_NULL(jump);
2533 set_jump(jump, compiler, JUMP_ADDR);
2534 jump->u.target = srcw;
2536 /* Worst case size. */
2537 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2538 compiler->size += 5;
2539 #else
2540 compiler->size += 10 + 3;
2541 #endif
2543 inst = (sljit_u8*)ensure_buf(compiler, 2);
2544 FAIL_IF_NULL(inst);
2546 *inst++ = 0;
2547 *inst++ = type + 2;
2549 else {
2550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2551 /* REX_W is not necessary (src is not immediate). */
2552 compiler->mode32 = 1;
2553 #endif
2554 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2555 FAIL_IF(!inst);
2556 *inst++ = GROUP_FF;
2557 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2559 return SLJIT_SUCCESS;
2562 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2563 sljit_s32 dst, sljit_sw dstw,
2564 sljit_s32 type)
2566 sljit_u8 *inst;
2567 sljit_u8 cond_set = 0;
2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2569 sljit_s32 reg;
2570 #endif
2571 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2572 sljit_s32 dst_save = dst;
2573 sljit_sw dstw_save = dstw;
2575 CHECK_ERROR();
2576 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2578 ADJUST_LOCAL_OFFSET(dst, dstw);
2579 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2581 type &= 0xff;
2582 /* setcc = jcc + 0x10. */
2583 cond_set = get_jump_code(type) + 0x10;
2585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2586 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2587 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2588 FAIL_IF(!inst);
2589 INC_SIZE(4 + 3);
2590 /* Set low register to conditional flag. */
2591 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2592 *inst++ = GROUP_0F;
2593 *inst++ = cond_set;
2594 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2595 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2596 *inst++ = OR_rm8_r8;
2597 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2598 return SLJIT_SUCCESS;
2601 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2603 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2604 FAIL_IF(!inst);
2605 INC_SIZE(4 + 4);
2606 /* Set low register to conditional flag. */
2607 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2608 *inst++ = GROUP_0F;
2609 *inst++ = cond_set;
2610 *inst++ = MOD_REG | reg_lmap[reg];
2611 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2612 /* The movzx instruction does not affect flags. */
2613 *inst++ = GROUP_0F;
2614 *inst++ = MOVZX_r_rm8;
2615 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2617 if (reg != TMP_REG1)
2618 return SLJIT_SUCCESS;
2620 if (GET_OPCODE(op) < SLJIT_ADD) {
2621 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2622 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2625 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2626 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2627 compiler->skip_checks = 1;
2628 #endif
2629 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2631 #else
2632 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2633 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2634 if (reg_map[dst] <= 4) {
2635 /* Low byte is accessible. */
2636 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2637 FAIL_IF(!inst);
2638 INC_SIZE(3 + 3);
2639 /* Set low byte to conditional flag. */
2640 *inst++ = GROUP_0F;
2641 *inst++ = cond_set;
2642 *inst++ = MOD_REG | reg_map[dst];
2644 *inst++ = GROUP_0F;
2645 *inst++ = MOVZX_r_rm8;
2646 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2647 return SLJIT_SUCCESS;
2650 /* Low byte is not accessible. */
2651 if (cpu_has_cmov == -1)
2652 get_cpu_features();
2654 if (cpu_has_cmov) {
2655 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2656 /* a xor reg, reg operation would overwrite the flags. */
2657 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2659 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2660 FAIL_IF(!inst);
2661 INC_SIZE(3);
2663 *inst++ = GROUP_0F;
2664 /* cmovcc = setcc - 0x50. */
2665 *inst++ = cond_set - 0x50;
2666 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2667 return SLJIT_SUCCESS;
2670 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2671 FAIL_IF(!inst);
2672 INC_SIZE(1 + 3 + 3 + 1);
2673 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2674 /* Set al to conditional flag. */
2675 *inst++ = GROUP_0F;
2676 *inst++ = cond_set;
2677 *inst++ = MOD_REG | 0 /* eax */;
2679 *inst++ = GROUP_0F;
2680 *inst++ = MOVZX_r_rm8;
2681 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2682 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2683 return SLJIT_SUCCESS;
2686 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2687 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2689 if (dst != SLJIT_R0) {
2690 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2691 FAIL_IF(!inst);
2692 INC_SIZE(1 + 3 + 2 + 1);
2693 /* Set low register to conditional flag. */
2694 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2695 *inst++ = GROUP_0F;
2696 *inst++ = cond_set;
2697 *inst++ = MOD_REG | 0 /* eax */;
2698 *inst++ = OR_rm8_r8;
2699 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2700 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2702 else {
2703 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2704 FAIL_IF(!inst);
2705 INC_SIZE(2 + 3 + 2 + 2);
2706 /* Set low register to conditional flag. */
2707 *inst++ = XCHG_r_rm;
2708 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2709 *inst++ = GROUP_0F;
2710 *inst++ = cond_set;
2711 *inst++ = MOD_REG | 1 /* ecx */;
2712 *inst++ = OR_rm8_r8;
2713 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2714 *inst++ = XCHG_r_rm;
2715 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2717 return SLJIT_SUCCESS;
2720 /* Set TMP_REG1 to the bit. */
2721 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2722 FAIL_IF(!inst);
2723 INC_SIZE(1 + 3 + 3 + 1);
2724 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2725 /* Set al to conditional flag. */
2726 *inst++ = GROUP_0F;
2727 *inst++ = cond_set;
2728 *inst++ = MOD_REG | 0 /* eax */;
2730 *inst++ = GROUP_0F;
2731 *inst++ = MOVZX_r_rm8;
2732 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2734 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2736 if (GET_OPCODE(op) < SLJIT_ADD)
2737 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2739 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2740 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2741 compiler->skip_checks = 1;
2742 #endif
2743 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2744 #endif /* SLJIT_CONFIG_X86_64 */
2747 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2748 sljit_s32 dst_reg,
2749 sljit_s32 src, sljit_sw srcw)
2751 sljit_u8* inst;
2753 CHECK_ERROR();
2754 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2756 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2757 dst_reg &= ~SLJIT_I32_OP;
2759 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2760 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2761 #else
2762 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2763 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2764 #endif
2766 /* ADJUST_LOCAL_OFFSET is not needed. */
2767 CHECK_EXTRA_REGS(src, srcw, (void)0);
2769 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2770 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2771 dst_reg &= ~SLJIT_I32_OP;
2772 #endif
2774 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2775 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2776 src = TMP_REG1;
2777 srcw = 0;
2780 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2781 FAIL_IF(!inst);
2782 *inst++ = GROUP_0F;
2783 *inst = get_jump_code(type & 0xff) - 0x40;
2784 return SLJIT_SUCCESS;
2787 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2789 CHECK_ERROR();
2790 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2791 ADJUST_LOCAL_OFFSET(dst, dstw);
2793 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2795 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2796 compiler->mode32 = 0;
2797 #endif
2799 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2802 if (NOT_HALFWORD(offset)) {
2803 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2804 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2805 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2806 return compiler->error;
2807 #else
2808 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2809 #endif
2811 #endif
2813 if (offset != 0)
2814 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2815 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2818 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2820 sljit_u8 *inst;
2821 struct sljit_const *const_;
2822 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2823 sljit_s32 reg;
2824 #endif
2826 CHECK_ERROR_PTR();
2827 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2828 ADJUST_LOCAL_OFFSET(dst, dstw);
2830 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2832 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2833 PTR_FAIL_IF(!const_);
2834 set_const(const_, compiler);
2836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2837 compiler->mode32 = 0;
2838 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
2840 if (emit_load_imm64(compiler, reg, init_value))
2841 return NULL;
2842 #else
2843 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2844 return NULL;
2845 #endif
2847 inst = (sljit_u8*)ensure_buf(compiler, 2);
2848 PTR_FAIL_IF(!inst);
2850 *inst++ = 0;
2851 *inst++ = 1;
2853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2854 if (dst & SLJIT_MEM)
2855 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2856 return NULL;
2857 #endif
2859 return const_;
2862 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2864 SLJIT_UNUSED_ARG(executable_offset);
2865 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2866 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2867 #else
2868 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2869 #endif
2872 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2874 SLJIT_UNUSED_ARG(executable_offset);
2875 sljit_unaligned_store_sw((void*)addr, new_constant);