2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE
const char* sljit_get_platform_name(void)
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 return "x86" SLJIT_CPUINFO
" ABI:fastcall";
32 return "x86" SLJIT_CPUINFO
;
58 8 - R8 - From now on REX prefix is required
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
70 /* Last register + 1. */
71 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
73 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 3] = {
74 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 if (p <= compiler->scratches) \
80 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
82 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
83 p = SLJIT_MEM1(SLJIT_SP); \
87 #else /* SLJIT_CONFIG_X86_32 */
89 /* Last register + 1. */
90 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
94 Note: avoid to use r12 and r13 for memory addessing
95 therefore r12 is better to be a higher saved register. */
97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
98 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
99 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
101 /* low-map. reg_map & 0x7. */
102 static const sljit_u8 reg_lmap
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
103 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
107 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
108 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
110 /* low-map. reg_map & 0x7. */
111 static const sljit_u8 reg_lmap
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
112 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
116 /* Args: xmm0-xmm3 */
117 static const sljit_u8 freg_map
[SLJIT_NUMBER_OF_FLOAT_REGISTERS
+ 1] = {
120 /* low-map. freg_map & 0x7. */
121 static const sljit_u8 freg_lmap
[SLJIT_NUMBER_OF_FLOAT_REGISTERS
+ 1] = {
132 #define HALFWORD_MAX 0x7fffffffl
133 #define HALFWORD_MIN -0x80000000l
135 #define HALFWORD_MAX 0x7fffffffll
136 #define HALFWORD_MIN -0x80000000ll
139 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
140 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
142 #define CHECK_EXTRA_REGS(p, w, do)
144 #endif /* SLJIT_CONFIG_X86_32 */
148 /* Size flags for emit_x86_instruction: */
149 #define EX86_BIN_INS 0x0010
150 #define EX86_SHIFT_INS 0x0020
151 #define EX86_REX 0x0040
152 #define EX86_NO_REXW 0x0080
153 #define EX86_BYTE_ARG 0x0100
154 #define EX86_HALF_ARG 0x0200
155 #define EX86_PREF_66 0x0400
156 #define EX86_PREF_F2 0x0800
157 #define EX86_PREF_F3 0x1000
158 #define EX86_SSE2_OP1 0x2000
159 #define EX86_SSE2_OP2 0x4000
160 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
162 /* --------------------------------------------------------------------- */
163 /* Instrucion forms */
164 /* --------------------------------------------------------------------- */
166 #define ADD (/* BINARY */ 0 << 3)
167 #define ADD_EAX_i32 0x05
168 #define ADD_r_rm 0x03
169 #define ADD_rm_r 0x01
170 #define ADDSD_x_xm 0x58
171 #define ADC (/* BINARY */ 2 << 3)
172 #define ADC_EAX_i32 0x15
173 #define ADC_r_rm 0x13
174 #define ADC_rm_r 0x11
175 #define AND (/* BINARY */ 4 << 3)
176 #define AND_EAX_i32 0x25
177 #define AND_r_rm 0x23
178 #define AND_rm_r 0x21
179 #define ANDPD_x_xm 0x54
180 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
181 #define CALL_i32 0xe8
182 #define CALL_rm (/* GROUP_FF */ 2 << 3)
184 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
185 #define CMP (/* BINARY */ 7 << 3)
186 #define CMP_EAX_i32 0x3d
187 #define CMP_r_rm 0x3b
188 #define CMP_rm_r 0x39
189 #define CVTPD2PS_x_xm 0x5a
190 #define CVTSI2SD_x_rm 0x2a
191 #define CVTTSD2SI_r_xm 0x2c
192 #define DIV (/* GROUP_F7 */ 6 << 3)
193 #define DIVSD_x_xm 0x5e
197 #define IDIV (/* GROUP_F7 */ 7 << 3)
198 #define IMUL (/* GROUP_F7 */ 5 << 3)
199 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
200 #define IMUL_r_rm_i8 0x6b
201 #define IMUL_r_rm_i32 0x69
206 #define JMP_rm (/* GROUP_FF */ 4 << 3)
208 #define MOV_r_rm 0x8b
209 #define MOV_r_i32 0xb8
210 #define MOV_rm_r 0x89
211 #define MOV_rm_i32 0xc7
212 #define MOV_rm8_i8 0xc6
213 #define MOV_rm8_r8 0x88
214 #define MOVSD_x_xm 0x10
215 #define MOVSD_xm_x 0x11
216 #define MOVSXD_r_rm 0x63
217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
218 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
220 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
221 #define MUL (/* GROUP_F7 */ 4 << 3)
222 #define MULSD_x_xm 0x59
223 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
225 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
226 #define OR (/* BINARY */ 1 << 3)
228 #define OR_EAX_i32 0x0d
230 #define OR_rm8_r8 0x08
234 #define PREFETCH 0x18
235 #define PUSH_i32 0x68
237 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
239 #define RET_near 0xc3
241 #define SBB (/* BINARY */ 3 << 3)
242 #define SBB_EAX_i32 0x1d
243 #define SBB_r_rm 0x1b
244 #define SBB_rm_r 0x19
245 #define SAR (/* SHIFT */ 7 << 3)
246 #define SHL (/* SHIFT */ 4 << 3)
247 #define SHR (/* SHIFT */ 5 << 3)
248 #define SUB (/* BINARY */ 5 << 3)
249 #define SUB_EAX_i32 0x2d
250 #define SUB_r_rm 0x2b
251 #define SUB_rm_r 0x29
252 #define SUBSD_x_xm 0x5c
253 #define TEST_EAX_i32 0xa9
254 #define TEST_rm_r 0x85
255 #define UCOMISD_x_xm 0x2e
256 #define UNPCKLPD_x_xm 0x14
257 #define XCHG_EAX_r 0x90
258 #define XCHG_r_rm 0x87
259 #define XOR (/* BINARY */ 6 << 3)
260 #define XOR_EAX_i32 0x35
261 #define XOR_r_rm 0x33
262 #define XOR_rm_r 0x31
263 #define XORPD_x_xm 0x57
265 #define GROUP_0F 0x0f
266 #define GROUP_F7 0xf7
267 #define GROUP_FF 0xff
268 #define GROUP_BINARY_81 0x81
269 #define GROUP_BINARY_83 0x83
270 #define GROUP_SHIFT_1 0xd1
271 #define GROUP_SHIFT_N 0xc1
272 #define GROUP_SHIFT_CL 0xd3
275 #define MOD_DISP8 0x40
277 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
279 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
280 #define POP_REG(r) (*inst++ = (POP_r + (r)))
281 #define RET() (*inst++ = (RET_near))
282 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
284 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
286 /* Multithreading does not affect these static variables, since they store
287 built-in CPU features. Therefore they can be overwritten by different threads
288 if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2
= -1;
292 static sljit_s32 cpu_has_cmov
= -1;
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
300 /******************************************************/
301 /* Unaligned-store functions */
302 /******************************************************/
304 static SLJIT_INLINE
void sljit_unaligned_store_s16(void *addr
, sljit_s16 value
)
306 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
309 static SLJIT_INLINE
void sljit_unaligned_store_s32(void *addr
, sljit_s32 value
)
311 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
314 static SLJIT_INLINE
void sljit_unaligned_store_sw(void *addr
, sljit_sw value
)
316 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
319 /******************************************************/
320 /* Utility functions */
321 /******************************************************/
323 static void get_cpu_features(void)
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
331 features
= (sljit_u32
)CPUInfo
[3];
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 /* On x86-32, there is no red zone, so this
340 should work (no need for a local variable). */
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 : "%eax", "%ecx", "%edx"
353 : "%rax", "%rbx", "%rcx", "%rdx"
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 cpu_has_sse2
= (features
>> 26) & 0x1;
371 cpu_has_cmov
= (features
>> 15) & 0x1;
374 static sljit_u8
get_jump_code(sljit_s32 type
)
378 case SLJIT_EQUAL_F64
:
379 return 0x84 /* je */;
381 case SLJIT_NOT_EQUAL
:
382 case SLJIT_NOT_EQUAL_F64
:
383 return 0x85 /* jne */;
387 return 0x82 /* jc */;
389 case SLJIT_GREATER_EQUAL
:
390 case SLJIT_GREATER_EQUAL_F64
:
391 return 0x83 /* jae */;
394 case SLJIT_GREATER_F64
:
395 return 0x87 /* jnbe */;
397 case SLJIT_LESS_EQUAL
:
398 case SLJIT_LESS_EQUAL_F64
:
399 return 0x86 /* jbe */;
402 return 0x8c /* jl */;
404 case SLJIT_SIG_GREATER_EQUAL
:
405 return 0x8d /* jnl */;
407 case SLJIT_SIG_GREATER
:
408 return 0x8f /* jnle */;
410 case SLJIT_SIG_LESS_EQUAL
:
411 return 0x8e /* jle */;
414 case SLJIT_MUL_OVERFLOW
:
415 return 0x80 /* jo */;
417 case SLJIT_NOT_OVERFLOW
:
418 case SLJIT_MUL_NOT_OVERFLOW
:
419 return 0x81 /* jno */;
421 case SLJIT_UNORDERED_F64
:
422 return 0x8a /* jp */;
424 case SLJIT_ORDERED_F64
:
425 return 0x8b /* jpo */;
430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
431 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_sw executable_offset
);
433 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
);
434 static sljit_u8
* generate_put_label_code(struct sljit_put_label
*put_label
, sljit_u8
*code_ptr
, sljit_uw max_label
);
437 static sljit_u8
* generate_near_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_u8
*code
, sljit_sw executable_offset
)
439 sljit_s32 type
= jump
->flags
>> TYPE_SHIFT
;
440 sljit_s32 short_jump
;
443 if (jump
->flags
& JUMP_LABEL
)
444 label_addr
= (sljit_uw
)(code
+ jump
->u
.label
->size
);
446 label_addr
= jump
->u
.target
- executable_offset
;
448 short_jump
= (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) >= -128 && (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) <= 127;
450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
451 if ((sljit_sw
)(label_addr
- (jump
->addr
+ 1)) > HALFWORD_MAX
|| (sljit_sw
)(label_addr
- (jump
->addr
+ 1)) < HALFWORD_MIN
)
452 return generate_far_jump_code(jump
, code_ptr
);
455 if (type
== SLJIT_JUMP
) {
457 *code_ptr
++ = JMP_i8
;
459 *code_ptr
++ = JMP_i32
;
462 else if (type
>= SLJIT_FAST_CALL
) {
464 *code_ptr
++ = CALL_i32
;
467 else if (short_jump
) {
468 *code_ptr
++ = get_jump_code(type
) - 0x10;
472 *code_ptr
++ = GROUP_0F
;
473 *code_ptr
++ = get_jump_code(type
);
478 jump
->flags
|= PATCH_MB
;
479 code_ptr
+= sizeof(sljit_s8
);
481 jump
->flags
|= PATCH_MW
;
482 code_ptr
+= sizeof(sljit_s32
);
488 SLJIT_API_FUNC_ATTRIBUTE
void* sljit_generate_code(struct sljit_compiler
*compiler
)
490 struct sljit_memory_fragment
*buf
;
496 sljit_sw executable_offset
;
499 struct sljit_label
*label
;
500 struct sljit_jump
*jump
;
501 struct sljit_const
*const_
;
502 struct sljit_put_label
*put_label
;
505 CHECK_PTR(check_sljit_generate_code(compiler
));
506 reverse_buf(compiler
);
508 /* Second code generation pass. */
509 code
= (sljit_u8
*)SLJIT_MALLOC_EXEC(compiler
->size
);
510 PTR_FAIL_WITH_EXEC_IF(code
);
514 label
= compiler
->labels
;
515 jump
= compiler
->jumps
;
516 const_
= compiler
->consts
;
517 put_label
= compiler
->put_labels
;
518 executable_offset
= SLJIT_EXEC_OFFSET(code
);
521 buf_ptr
= buf
->memory
;
522 buf_end
= buf_ptr
+ buf
->used_size
;
526 /* The code is already generated. */
527 SLJIT_MEMCPY(code_ptr
, buf_ptr
, len
);
534 label
->addr
= (sljit_uw
)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
);
535 label
->size
= code_ptr
- code
;
539 jump
->addr
= (sljit_uw
)code_ptr
;
540 if (!(jump
->flags
& SLJIT_REWRITABLE_JUMP
))
541 code_ptr
= generate_near_jump_code(jump
, code_ptr
, code
, executable_offset
);
543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
544 code_ptr
= generate_far_jump_code(jump
, code_ptr
, executable_offset
);
546 code_ptr
= generate_far_jump_code(jump
, code_ptr
);
552 const_
->addr
= ((sljit_uw
)code_ptr
) - sizeof(sljit_sw
);
553 const_
= const_
->next
;
556 SLJIT_ASSERT(*buf_ptr
== 3);
557 SLJIT_ASSERT(put_label
->label
);
558 put_label
->addr
= (sljit_uw
)code_ptr
;
559 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
560 code_ptr
= generate_put_label_code(put_label
, code_ptr
, (sljit_uw
)(SLJIT_ADD_EXEC_OFFSET(code
, executable_offset
) + put_label
->label
->size
));
562 put_label
= put_label
->next
;
567 } while (buf_ptr
< buf_end
);
568 SLJIT_ASSERT(buf_ptr
== buf_end
);
572 SLJIT_ASSERT(!label
);
574 SLJIT_ASSERT(!const_
);
575 SLJIT_ASSERT(!put_label
);
576 SLJIT_ASSERT(code_ptr
<= code
+ compiler
->size
);
578 jump
= compiler
->jumps
;
580 jump_addr
= jump
->addr
+ executable_offset
;
582 if (jump
->flags
& PATCH_MB
) {
583 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
))) >= -128 && (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
))) <= 127);
584 *(sljit_u8
*)jump
->addr
= (sljit_u8
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
)));
585 } else if (jump
->flags
& PATCH_MW
) {
586 if (jump
->flags
& JUMP_LABEL
) {
587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
588 sljit_unaligned_store_sw((void*)jump
->addr
, (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_sw
))));
590 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))) >= HALFWORD_MIN
&& (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))) <= HALFWORD_MAX
);
591 sljit_unaligned_store_s32((void*)jump
->addr
, (sljit_s32
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))));
595 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
596 sljit_unaligned_store_sw((void*)jump
->addr
, (sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_sw
))));
598 SLJIT_ASSERT((sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))) >= HALFWORD_MIN
&& (sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))) <= HALFWORD_MAX
);
599 sljit_unaligned_store_s32((void*)jump
->addr
, (sljit_s32
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))));
603 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
604 else if (jump
->flags
& PATCH_MD
)
605 sljit_unaligned_store_sw((void*)jump
->addr
, jump
->u
.label
->addr
);
611 put_label
= compiler
->put_labels
;
613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
614 sljit_unaligned_store_sw((void*)(put_label
->addr
- sizeof(sljit_sw
)), (sljit_sw
)put_label
->label
->addr
);
616 if (put_label
->flags
& PATCH_MD
) {
617 SLJIT_ASSERT(put_label
->label
->addr
> HALFWORD_MAX
);
618 sljit_unaligned_store_sw((void*)(put_label
->addr
- sizeof(sljit_sw
)), (sljit_sw
)put_label
->label
->addr
);
621 SLJIT_ASSERT(put_label
->label
->addr
<= HALFWORD_MAX
);
622 sljit_unaligned_store_s32((void*)(put_label
->addr
- sizeof(sljit_s32
)), (sljit_s32
)put_label
->label
->addr
);
626 put_label
= put_label
->next
;
629 compiler
->error
= SLJIT_ERR_COMPILED
;
630 compiler
->executable_offset
= executable_offset
;
631 compiler
->executable_size
= code_ptr
- code
;
632 return (void*)(code
+ executable_offset
);
635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_has_cpu_feature(sljit_s32 feature_type
)
637 switch (feature_type
) {
639 #ifdef SLJIT_IS_FPU_AVAILABLE
640 return SLJIT_IS_FPU_AVAILABLE
;
641 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
642 if (cpu_has_sse2
== -1)
645 #else /* SLJIT_DETECT_SSE2 */
647 #endif /* SLJIT_DETECT_SSE2 */
649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
650 case SLJIT_HAS_VIRTUAL_REGISTERS
:
656 if (cpu_has_cmov
== -1)
661 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
662 if (cpu_has_sse2
== -1)
674 /* --------------------------------------------------------------------- */
676 /* --------------------------------------------------------------------- */
678 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
680 static sljit_s32
emit_cum_binary(struct sljit_compiler
*compiler
,
682 sljit_s32 dst
, sljit_sw dstw
,
683 sljit_s32 src1
, sljit_sw src1w
,
684 sljit_s32 src2
, sljit_sw src2w
);
686 static sljit_s32
emit_non_cum_binary(struct sljit_compiler
*compiler
,
688 sljit_s32 dst
, sljit_sw dstw
,
689 sljit_s32 src1
, sljit_sw src1w
,
690 sljit_s32 src2
, sljit_sw src2w
);
692 static sljit_s32
emit_mov(struct sljit_compiler
*compiler
,
693 sljit_s32 dst
, sljit_sw dstw
,
694 sljit_s32 src
, sljit_sw srcw
);
696 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
697 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
699 static SLJIT_INLINE sljit_s32
emit_sse2_store(struct sljit_compiler
*compiler
,
700 sljit_s32 single
, sljit_s32 dst
, sljit_sw dstw
, sljit_s32 src
);
702 static SLJIT_INLINE sljit_s32
emit_sse2_load(struct sljit_compiler
*compiler
,
703 sljit_s32 single
, sljit_s32 dst
, sljit_s32 src
, sljit_sw srcw
);
705 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
706 #include "sljitNativeX86_32.c"
708 #include "sljitNativeX86_64.c"
711 static sljit_s32
emit_mov(struct sljit_compiler
*compiler
,
712 sljit_s32 dst
, sljit_sw dstw
,
713 sljit_s32 src
, sljit_sw srcw
)
717 SLJIT_ASSERT(dst
!= SLJIT_UNUSED
);
719 if (FAST_IS_REG(src
)) {
720 inst
= emit_x86_instruction(compiler
, 1, src
, 0, dst
, dstw
);
723 return SLJIT_SUCCESS
;
725 if (src
& SLJIT_IMM
) {
726 if (FAST_IS_REG(dst
)) {
727 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
728 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
730 if (!compiler
->mode32
) {
731 if (NOT_HALFWORD(srcw
))
732 return emit_load_imm64(compiler
, dst
, srcw
);
735 return emit_do_imm32(compiler
, (reg_map
[dst
] >= 8) ? REX_B
: 0, MOV_r_i32
+ reg_lmap
[dst
], srcw
);
738 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
739 if (!compiler
->mode32
&& NOT_HALFWORD(srcw
)) {
740 /* Immediate to memory move. Only SLJIT_MOV operation copies
741 an immediate directly into memory so TMP_REG1 can be used. */
742 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, srcw
));
743 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
746 return SLJIT_SUCCESS
;
749 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, dstw
);
752 return SLJIT_SUCCESS
;
754 if (FAST_IS_REG(dst
)) {
755 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src
, srcw
);
758 return SLJIT_SUCCESS
;
761 /* Memory to memory move. Only SLJIT_MOV operation copies
762 data from memory to memory so TMP_REG1 can be used. */
763 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src
, srcw
);
766 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
769 return SLJIT_SUCCESS
;
772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op0(struct sljit_compiler
*compiler
, sljit_s32 op
)
775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
780 CHECK(check_sljit_emit_op0(compiler
, op
));
782 switch (GET_OPCODE(op
)) {
783 case SLJIT_BREAKPOINT
:
784 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
790 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
797 case SLJIT_DIVMOD_UW
:
798 case SLJIT_DIVMOD_SW
:
801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
804 reg_map
[SLJIT_R0
] == 0
805 && reg_map
[SLJIT_R1
] == 2
806 && reg_map
[TMP_REG1
] > 7);
809 reg_map
[SLJIT_R0
] == 0
810 && reg_map
[SLJIT_R1
] < 7
811 && reg_map
[TMP_REG1
] == 2);
813 compiler
->mode32
= op
& SLJIT_I32_OP
;
815 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW
& 0x2) == 0 && SLJIT_DIV_UW
- 0x2 == SLJIT_DIVMOD_UW
, bad_div_opcode_assignments
);
818 if ((op
| 0x2) == SLJIT_DIV_UW
) {
819 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
820 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_R1
, 0);
821 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R1
, 0, SLJIT_R1
, 0);
823 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, TMP_REG1
, 0);
829 if ((op
| 0x2) == SLJIT_DIV_SW
) {
830 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
831 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_R1
, 0);
834 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
835 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
840 if (compiler
->mode32
) {
841 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
846 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
855 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
856 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
860 *inst
= MOD_REG
| ((op
>= SLJIT_DIVMOD_UW
) ? reg_map
[TMP_REG1
] : reg_map
[SLJIT_R1
]);
863 size
= (!compiler
->mode32
|| op
>= SLJIT_DIVMOD_UW
) ? 3 : 2;
865 size
= (!compiler
->mode32
) ? 3 : 2;
867 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
871 if (!compiler
->mode32
)
872 *inst
++ = REX_W
| ((op
>= SLJIT_DIVMOD_UW
) ? REX_B
: 0);
873 else if (op
>= SLJIT_DIVMOD_UW
)
876 *inst
= MOD_REG
| ((op
>= SLJIT_DIVMOD_UW
) ? reg_lmap
[TMP_REG1
] : reg_lmap
[SLJIT_R1
]);
878 if (!compiler
->mode32
)
881 *inst
= MOD_REG
| reg_map
[SLJIT_R1
];
891 case SLJIT_DIVMOD_UW
:
895 case SLJIT_DIVMOD_SW
:
900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
901 if (op
<= SLJIT_DIVMOD_SW
)
902 EMIT_MOV(compiler
, SLJIT_R1
, 0, TMP_REG1
, 0);
904 if (op
>= SLJIT_DIV_UW
)
905 EMIT_MOV(compiler
, SLJIT_R1
, 0, TMP_REG1
, 0);
910 return SLJIT_SUCCESS
;
913 #define ENCODE_PREFIX(prefix) \
915 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
921 static sljit_s32
emit_mov_byte(struct sljit_compiler
*compiler
, sljit_s32 sign
,
922 sljit_s32 dst
, sljit_sw dstw
,
923 sljit_s32 src
, sljit_sw srcw
)
927 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
931 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
932 compiler
->mode32
= 0;
935 if (src
& SLJIT_IMM
) {
936 if (FAST_IS_REG(dst
)) {
937 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
938 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
940 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
943 return SLJIT_SUCCESS
;
946 inst
= emit_x86_instruction(compiler
, 1 | EX86_BYTE_ARG
| EX86_NO_REXW
, SLJIT_IMM
, srcw
, dst
, dstw
);
949 return SLJIT_SUCCESS
;
952 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
954 if ((dst
& SLJIT_MEM
) && FAST_IS_REG(src
)) {
955 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
956 if (reg_map
[src
] >= 4) {
957 SLJIT_ASSERT(dst_r
== TMP_REG1
);
958 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, 0);
965 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
966 else if (FAST_IS_REG(src
) && reg_map
[src
] >= 4) {
967 /* src, dst are registers. */
968 SLJIT_ASSERT(SLOW_IS_REG(dst
));
969 if (reg_map
[dst
] < 4) {
971 EMIT_MOV(compiler
, dst
, 0, src
, 0);
972 inst
= emit_x86_instruction(compiler
, 2, dst
, 0, dst
, 0);
975 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
979 EMIT_MOV(compiler
, dst
, 0, src
, 0);
982 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
986 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
991 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 0xff, dst
, 0);
996 return SLJIT_SUCCESS
;
1000 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1001 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1004 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
1007 if (dst
& SLJIT_MEM
) {
1008 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1009 if (dst_r
== TMP_REG1
) {
1010 /* Find a non-used register, whose reg_map[src] < 4. */
1011 if ((dst
& REG_MASK
) == SLJIT_R0
) {
1012 if ((dst
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_R1
))
1018 if ((dst
& OFFS_REG_MASK
) != TO_OFFS_REG(SLJIT_R0
))
1020 else if ((dst
& REG_MASK
) == SLJIT_R1
)
1026 if (work_r
== SLJIT_R0
) {
1027 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REG1
]);
1030 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
1035 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst
, dstw
);
1039 if (work_r
== SLJIT_R0
) {
1040 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REG1
]);
1043 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
1049 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, dst
, dstw
);
1054 inst
= emit_x86_instruction(compiler
, 1 | EX86_REX
| EX86_NO_REXW
, dst_r
, 0, dst
, dstw
);
1060 return SLJIT_SUCCESS
;
1063 static sljit_s32
emit_prefetch(struct sljit_compiler
*compiler
, sljit_s32 op
,
1064 sljit_s32 src
, sljit_sw srcw
)
1068 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1069 compiler
->mode32
= 1;
1072 inst
= emit_x86_instruction(compiler
, 2, 0, 0, src
, srcw
);
1077 if (op
>= SLJIT_MOV_U8
&& op
<= SLJIT_MOV_S8
)
1079 else if (op
>= SLJIT_MOV_U16
&& op
<= SLJIT_MOV_S16
)
1084 return SLJIT_SUCCESS
;
1087 static sljit_s32
emit_mov_half(struct sljit_compiler
*compiler
, sljit_s32 sign
,
1088 sljit_s32 dst
, sljit_sw dstw
,
1089 sljit_s32 src
, sljit_sw srcw
)
1094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1095 compiler
->mode32
= 0;
1098 if (src
& SLJIT_IMM
) {
1099 if (FAST_IS_REG(dst
)) {
1100 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1101 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
1103 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
1106 return SLJIT_SUCCESS
;
1109 inst
= emit_x86_instruction(compiler
, 1 | EX86_HALF_ARG
| EX86_NO_REXW
| EX86_PREF_66
, SLJIT_IMM
, srcw
, dst
, dstw
);
1112 return SLJIT_SUCCESS
;
1115 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1117 if ((dst
& SLJIT_MEM
) && FAST_IS_REG(src
))
1120 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1123 *inst
= sign
? MOVSX_r_rm16
: MOVZX_r_rm16
;
1126 if (dst
& SLJIT_MEM
) {
1127 inst
= emit_x86_instruction(compiler
, 1 | EX86_NO_REXW
| EX86_PREF_66
, dst_r
, 0, dst
, dstw
);
1132 return SLJIT_SUCCESS
;
1135 static sljit_s32
emit_unary(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
1136 sljit_s32 dst
, sljit_sw dstw
,
1137 sljit_s32 src
, sljit_sw srcw
)
1141 if (dst
== src
&& dstw
== srcw
) {
1142 /* Same input and output */
1143 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1147 return SLJIT_SUCCESS
;
1150 if (SLJIT_UNLIKELY(dst
== SLJIT_UNUSED
))
1153 if (FAST_IS_REG(dst
)) {
1154 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1155 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, 0);
1159 return SLJIT_SUCCESS
;
1162 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
1163 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REG1
, 0);
1167 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1168 return SLJIT_SUCCESS
;
1171 static sljit_s32
emit_not_with_flags(struct sljit_compiler
*compiler
,
1172 sljit_s32 dst
, sljit_sw dstw
,
1173 sljit_s32 src
, sljit_sw srcw
)
1177 if (dst
== SLJIT_UNUSED
)
1180 if (FAST_IS_REG(dst
)) {
1181 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1182 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, 0);
1186 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, dst
, 0);
1189 return SLJIT_SUCCESS
;
1192 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
1193 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REG1
, 0);
1197 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, TMP_REG1
, 0);
1200 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1201 return SLJIT_SUCCESS
;
1204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1205 static const sljit_sw emit_clz_arg
= 32 + 31;
1208 static sljit_s32
emit_clz(struct sljit_compiler
*compiler
, sljit_s32 op_flags
,
1209 sljit_s32 dst
, sljit_sw dstw
,
1210 sljit_s32 src
, sljit_sw srcw
)
1215 SLJIT_UNUSED_ARG(op_flags
);
1217 if (cpu_has_cmov
== -1)
1220 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1222 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1229 if (dst_r
!= TMP_REG1
) {
1230 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, 32 + 31);
1231 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG1
, 0);
1234 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, SLJIT_MEM0(), (sljit_sw
)&emit_clz_arg
);
1241 FAIL_IF(sljit_emit_cmov_generic(compiler
, SLJIT_EQUAL
, dst_r
, SLJIT_IMM
, 32 + 31));
1243 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 31, dst_r
, 0);
1246 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? (64 + 63) : (32 + 31));
1248 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1254 FAIL_IF(sljit_emit_cmov_generic(compiler
, SLJIT_EQUAL
, dst_r
, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? (64 + 63) : (32 + 31)));
1256 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? 63 : 31, dst_r
, 0);
1262 if (dst
& SLJIT_MEM
)
1263 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1264 return SLJIT_SUCCESS
;
1267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1268 sljit_s32 dst
, sljit_sw dstw
,
1269 sljit_s32 src
, sljit_sw srcw
)
1271 sljit_s32 op_flags
= GET_ALL_FLAGS(op
);
1272 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1273 sljit_s32 dst_is_ereg
= 0;
1277 CHECK(check_sljit_emit_op1(compiler
, op
, dst
, dstw
, src
, srcw
));
1278 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1279 ADJUST_LOCAL_OFFSET(src
, srcw
);
1281 CHECK_EXTRA_REGS(dst
, dstw
, dst_is_ereg
= 1);
1282 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 compiler
->mode32
= op_flags
& SLJIT_I32_OP
;
1287 if (dst
== SLJIT_UNUSED
&& !HAS_FLAGS(op
)) {
1288 if (op
<= SLJIT_MOV_P
&& (src
& SLJIT_MEM
))
1289 return emit_prefetch(compiler
, op
, src
, srcw
);
1290 return SLJIT_SUCCESS
;
1293 op
= GET_OPCODE(op
);
1295 if (op
>= SLJIT_MOV
&& op
<= SLJIT_MOV_P
) {
1296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1297 compiler
->mode32
= 0;
1300 if (FAST_IS_REG(src
) && src
== dst
) {
1301 if (!TYPE_CAST_NEEDED(op
))
1302 return SLJIT_SUCCESS
;
1305 if (op_flags
& SLJIT_I32_OP
) {
1306 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1307 if (src
& SLJIT_MEM
) {
1308 if (op
== SLJIT_MOV_S32
)
1311 else if (src
& SLJIT_IMM
) {
1312 if (op
== SLJIT_MOV_U32
)
1318 if (src
& SLJIT_IMM
) {
1321 srcw
= (sljit_u8
)srcw
;
1324 srcw
= (sljit_s8
)srcw
;
1327 srcw
= (sljit_u16
)srcw
;
1330 srcw
= (sljit_s16
)srcw
;
1332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1334 srcw
= (sljit_u32
)srcw
;
1337 srcw
= (sljit_s32
)srcw
;
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1342 if (SLJIT_UNLIKELY(dst_is_ereg
))
1343 return emit_mov(compiler
, dst
, dstw
, src
, srcw
);
1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1348 if (SLJIT_UNLIKELY(dst_is_ereg
) && (!(op
== SLJIT_MOV
|| op
== SLJIT_MOV_U32
|| op
== SLJIT_MOV_S32
|| op
== SLJIT_MOV_P
) || (src
& SLJIT_MEM
))) {
1349 SLJIT_ASSERT(dst
== SLJIT_MEM1(SLJIT_SP
));
1357 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1361 FAIL_IF(emit_mov(compiler
, dst
, dstw
, src
, srcw
));
1364 FAIL_IF(emit_mov_byte(compiler
, 0, dst
, dstw
, src
, srcw
));
1367 FAIL_IF(emit_mov_byte(compiler
, 1, dst
, dstw
, src
, srcw
));
1370 FAIL_IF(emit_mov_half(compiler
, 0, dst
, dstw
, src
, srcw
));
1373 FAIL_IF(emit_mov_half(compiler
, 1, dst
, dstw
, src
, srcw
));
1375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1377 FAIL_IF(emit_mov_int(compiler
, 0, dst
, dstw
, src
, srcw
));
1380 FAIL_IF(emit_mov_int(compiler
, 1, dst
, dstw
, src
, srcw
));
1385 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1386 if (SLJIT_UNLIKELY(dst_is_ereg
) && dst
== TMP_REG1
)
1387 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), dstw
, TMP_REG1
, 0);
1389 return SLJIT_SUCCESS
;
1394 if (SLJIT_UNLIKELY(op_flags
& SLJIT_SET_Z
))
1395 return emit_not_with_flags(compiler
, dst
, dstw
, src
, srcw
);
1396 return emit_unary(compiler
, NOT_rm
, dst
, dstw
, src
, srcw
);
1399 return emit_unary(compiler
, NEG_rm
, dst
, dstw
, src
, srcw
);
1402 return emit_clz(compiler
, op_flags
, dst
, dstw
, src
, srcw
);
1405 return SLJIT_SUCCESS
;
1408 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1410 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1411 if (IS_HALFWORD(immw) || compiler->mode32) { \
1412 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1414 *(inst + 1) |= (op_imm); \
1417 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1418 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1423 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1424 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1428 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1429 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1431 *(inst + 1) |= (op_imm);
1433 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1434 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1438 static sljit_s32
emit_cum_binary(struct sljit_compiler
*compiler
,
1440 sljit_s32 dst
, sljit_sw dstw
,
1441 sljit_s32 src1
, sljit_sw src1w
,
1442 sljit_s32 src2
, sljit_sw src2w
)
1445 sljit_u8 op_eax_imm
= (op_types
>> 24);
1446 sljit_u8 op_rm
= (op_types
>> 16) & 0xff;
1447 sljit_u8 op_mr
= (op_types
>> 8) & 0xff;
1448 sljit_u8 op_imm
= op_types
& 0xff;
1450 if (dst
== SLJIT_UNUSED
) {
1451 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1452 if (src2
& SLJIT_IMM
) {
1453 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1456 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1460 return SLJIT_SUCCESS
;
1463 if (dst
== src1
&& dstw
== src1w
) {
1464 if (src2
& SLJIT_IMM
) {
1465 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1466 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1468 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128)) {
1470 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1473 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1476 else if (FAST_IS_REG(dst
)) {
1477 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1481 else if (FAST_IS_REG(src2
)) {
1482 /* Special exception for sljit_emit_op_flags. */
1483 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1488 EMIT_MOV(compiler
, TMP_REG1
, 0, src2
, src2w
);
1489 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1493 return SLJIT_SUCCESS
;
1496 /* Only for cumulative operations. */
1497 if (dst
== src2
&& dstw
== src2w
) {
1498 if (src1
& SLJIT_IMM
) {
1499 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1500 if ((dst
== SLJIT_R0
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1502 if ((dst
== SLJIT_R0
) && (src1w
> 127 || src1w
< -128)) {
1504 BINARY_EAX_IMM(op_eax_imm
, src1w
);
1507 BINARY_IMM(op_imm
, op_mr
, src1w
, dst
, dstw
);
1510 else if (FAST_IS_REG(dst
)) {
1511 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src1
, src1w
);
1515 else if (FAST_IS_REG(src1
)) {
1516 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, dst
, dstw
);
1521 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1522 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1526 return SLJIT_SUCCESS
;
1529 /* General version. */
1530 if (FAST_IS_REG(dst
)) {
1531 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1532 if (src2
& SLJIT_IMM
) {
1533 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1536 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1542 /* This version requires less memory writing. */
1543 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1544 if (src2
& SLJIT_IMM
) {
1545 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1548 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1552 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1555 return SLJIT_SUCCESS
;
1558 static sljit_s32
emit_non_cum_binary(struct sljit_compiler
*compiler
,
1560 sljit_s32 dst
, sljit_sw dstw
,
1561 sljit_s32 src1
, sljit_sw src1w
,
1562 sljit_s32 src2
, sljit_sw src2w
)
1565 sljit_u8 op_eax_imm
= (op_types
>> 24);
1566 sljit_u8 op_rm
= (op_types
>> 16) & 0xff;
1567 sljit_u8 op_mr
= (op_types
>> 8) & 0xff;
1568 sljit_u8 op_imm
= op_types
& 0xff;
1570 if (dst
== SLJIT_UNUSED
) {
1571 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1572 if (src2
& SLJIT_IMM
) {
1573 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1576 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1580 return SLJIT_SUCCESS
;
1583 if (dst
== src1
&& dstw
== src1w
) {
1584 if (src2
& SLJIT_IMM
) {
1585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1586 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1588 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128)) {
1590 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1593 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1596 else if (FAST_IS_REG(dst
)) {
1597 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1601 else if (FAST_IS_REG(src2
)) {
1602 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1607 EMIT_MOV(compiler
, TMP_REG1
, 0, src2
, src2w
);
1608 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1612 return SLJIT_SUCCESS
;
1615 /* General version. */
1616 if (FAST_IS_REG(dst
) && dst
!= src2
) {
1617 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1618 if (src2
& SLJIT_IMM
) {
1619 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1622 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1628 /* This version requires less memory writing. */
1629 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1630 if (src2
& SLJIT_IMM
) {
1631 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1634 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1638 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1641 return SLJIT_SUCCESS
;
1644 static sljit_s32
emit_mul(struct sljit_compiler
*compiler
,
1645 sljit_s32 dst
, sljit_sw dstw
,
1646 sljit_s32 src1
, sljit_sw src1w
,
1647 sljit_s32 src2
, sljit_sw src2w
)
1652 dst_r
= SLOW_IS_REG(dst
) ? dst
: TMP_REG1
;
1654 /* Register destination. */
1655 if (dst_r
== src1
&& !(src2
& SLJIT_IMM
)) {
1656 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1661 else if (dst_r
== src2
&& !(src1
& SLJIT_IMM
)) {
1662 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src1
, src1w
);
1667 else if (src1
& SLJIT_IMM
) {
1668 if (src2
& SLJIT_IMM
) {
1669 EMIT_MOV(compiler
, dst_r
, 0, SLJIT_IMM
, src2w
);
1674 if (src1w
<= 127 && src1w
>= -128) {
1675 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1677 *inst
= IMUL_r_rm_i8
;
1678 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1681 *inst
= (sljit_s8
)src1w
;
1683 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1685 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1687 *inst
= IMUL_r_rm_i32
;
1688 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1691 sljit_unaligned_store_sw(inst
, src1w
);
1694 else if (IS_HALFWORD(src1w
)) {
1695 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1697 *inst
= IMUL_r_rm_i32
;
1698 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1701 sljit_unaligned_store_s32(inst
, (sljit_s32
)src1w
);
1705 EMIT_MOV(compiler
, dst_r
, 0, src2
, src2w
);
1706 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src1w
));
1707 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1714 else if (src2
& SLJIT_IMM
) {
1715 /* Note: src1 is NOT immediate. */
1717 if (src2w
<= 127 && src2w
>= -128) {
1718 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1720 *inst
= IMUL_r_rm_i8
;
1721 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1724 *inst
= (sljit_s8
)src2w
;
1726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1728 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1730 *inst
= IMUL_r_rm_i32
;
1731 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1734 sljit_unaligned_store_sw(inst
, src2w
);
1737 else if (IS_HALFWORD(src2w
)) {
1738 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1740 *inst
= IMUL_r_rm_i32
;
1741 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1744 sljit_unaligned_store_s32(inst
, (sljit_s32
)src2w
);
1748 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1749 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1750 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1758 /* Neither argument is immediate. */
1759 if (ADDRESSING_DEPENDS_ON(src2
, dst_r
))
1761 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1762 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1768 if (dst
& SLJIT_MEM
)
1769 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1771 return SLJIT_SUCCESS
;
1774 static sljit_s32
emit_lea_binary(struct sljit_compiler
*compiler
,
1775 sljit_s32 dst
, sljit_sw dstw
,
1776 sljit_s32 src1
, sljit_sw src1w
,
1777 sljit_s32 src2
, sljit_sw src2w
)
1780 sljit_s32 dst_r
, done
= 0;
1782 /* These cases better be left to handled by normal way. */
1783 if (dst
== src1
&& dstw
== src1w
)
1784 return SLJIT_ERR_UNSUPPORTED
;
1785 if (dst
== src2
&& dstw
== src2w
)
1786 return SLJIT_ERR_UNSUPPORTED
;
1788 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1790 if (FAST_IS_REG(src1
)) {
1791 if (FAST_IS_REG(src2
)) {
1792 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM2(src1
, src2
), 0);
1797 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1798 if ((src2
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1799 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), (sljit_s32
)src2w
);
1801 if (src2
& SLJIT_IMM
) {
1802 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), src2w
);
1809 else if (FAST_IS_REG(src2
)) {
1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1811 if ((src1
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1812 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), (sljit_s32
)src1w
);
1814 if (src1
& SLJIT_IMM
) {
1815 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), src1w
);
1824 if (dst_r
== TMP_REG1
)
1825 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
1826 return SLJIT_SUCCESS
;
1828 return SLJIT_ERR_UNSUPPORTED
;
1831 static sljit_s32
emit_cmp_binary(struct sljit_compiler
*compiler
,
1832 sljit_s32 src1
, sljit_sw src1w
,
1833 sljit_s32 src2
, sljit_sw src2w
)
1837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1838 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1840 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1842 BINARY_EAX_IMM(CMP_EAX_i32
, src2w
);
1843 return SLJIT_SUCCESS
;
1846 if (FAST_IS_REG(src1
)) {
1847 if (src2
& SLJIT_IMM
) {
1848 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, 0);
1851 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1855 return SLJIT_SUCCESS
;
1858 if (FAST_IS_REG(src2
) && !(src1
& SLJIT_IMM
)) {
1859 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1862 return SLJIT_SUCCESS
;
1865 if (src2
& SLJIT_IMM
) {
1866 if (src1
& SLJIT_IMM
) {
1867 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1871 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, src1w
);
1874 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1875 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1879 return SLJIT_SUCCESS
;
1882 static sljit_s32
emit_test_binary(struct sljit_compiler
*compiler
,
1883 sljit_s32 src1
, sljit_sw src1w
,
1884 sljit_s32 src2
, sljit_sw src2w
)
1888 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1889 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1891 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1893 BINARY_EAX_IMM(TEST_EAX_i32
, src2w
);
1894 return SLJIT_SUCCESS
;
1897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1898 if (src2
== SLJIT_R0
&& (src1
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1900 if (src2
== SLJIT_R0
&& (src1
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128)) {
1902 BINARY_EAX_IMM(TEST_EAX_i32
, src1w
);
1903 return SLJIT_SUCCESS
;
1906 if (!(src1
& SLJIT_IMM
)) {
1907 if (src2
& SLJIT_IMM
) {
1908 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1909 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1910 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, src1w
);
1915 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, src2w
));
1916 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src1
, src1w
);
1921 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, src1w
);
1925 return SLJIT_SUCCESS
;
1927 else if (FAST_IS_REG(src1
)) {
1928 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1931 return SLJIT_SUCCESS
;
1935 if (!(src2
& SLJIT_IMM
)) {
1936 if (src1
& SLJIT_IMM
) {
1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1938 if (IS_HALFWORD(src1w
) || compiler
->mode32
) {
1939 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src1w
, src2
, src2w
);
1944 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, src1w
));
1945 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1950 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, src2
, src2w
);
1954 return SLJIT_SUCCESS
;
1956 else if (FAST_IS_REG(src2
)) {
1957 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1960 return SLJIT_SUCCESS
;
1964 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1965 if (src2
& SLJIT_IMM
) {
1966 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1967 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1968 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REG1
, 0);
1973 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1974 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, TMP_REG1
, 0);
1979 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REG1
, 0);
1985 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1989 return SLJIT_SUCCESS
;
1992 static sljit_s32
emit_shift(struct sljit_compiler
*compiler
,
1994 sljit_s32 dst
, sljit_sw dstw
,
1995 sljit_s32 src1
, sljit_sw src1w
,
1996 sljit_s32 src2
, sljit_sw src2w
)
2000 if ((src2
& SLJIT_IMM
) || (src2
== SLJIT_PREF_SHIFT_REG
)) {
2001 if (dst
== src1
&& dstw
== src1w
) {
2002 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, dstw
);
2005 return SLJIT_SUCCESS
;
2007 if (dst
== SLJIT_UNUSED
) {
2008 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2009 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REG1
, 0);
2012 return SLJIT_SUCCESS
;
2014 if (dst
== SLJIT_PREF_SHIFT_REG
&& src2
== SLJIT_PREF_SHIFT_REG
) {
2015 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2016 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2019 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2020 return SLJIT_SUCCESS
;
2022 if (FAST_IS_REG(dst
)) {
2023 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2024 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, 0);
2027 return SLJIT_SUCCESS
;
2030 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2031 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REG1
, 0);
2034 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
2035 return SLJIT_SUCCESS
;
2038 if (dst
== SLJIT_PREF_SHIFT_REG
) {
2039 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2040 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2041 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2044 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2046 else if (SLOW_IS_REG(dst
) && dst
!= src2
&& !ADDRESSING_DEPENDS_ON(src2
, dst
)) {
2048 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2049 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2050 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2051 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, dst
, 0);
2054 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2057 /* This case is complex since ecx itself may be used for
2058 addressing, and this case must be supported as well. */
2059 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2060 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2061 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_PREF_SHIFT_REG
, 0);
2062 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2063 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2066 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
2068 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2069 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2070 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2073 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG2
, 0);
2075 if (dst
!= SLJIT_UNUSED
)
2076 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2079 return SLJIT_SUCCESS
;
2082 static sljit_s32
emit_shift_with_flags(struct sljit_compiler
*compiler
,
2083 sljit_u8 mode
, sljit_s32 set_flags
,
2084 sljit_s32 dst
, sljit_sw dstw
,
2085 sljit_s32 src1
, sljit_sw src1w
,
2086 sljit_s32 src2
, sljit_sw src2w
)
2088 /* The CPU does not set flags if the shift count is 0. */
2089 if (src2
& SLJIT_IMM
) {
2090 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2091 if ((src2w
& 0x3f) != 0 || (compiler
->mode32
&& (src2w
& 0x1f) != 0))
2092 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2094 if ((src2w
& 0x1f) != 0)
2095 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2098 return emit_mov(compiler
, dst
, dstw
, src1
, src1w
);
2099 /* OR dst, src, 0 */
2100 return emit_cum_binary(compiler
, BINARY_OPCODE(OR
),
2101 dst
, dstw
, src1
, src1w
, SLJIT_IMM
, 0);
2105 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2107 if (!FAST_IS_REG(dst
))
2108 FAIL_IF(emit_cmp_binary(compiler
, src1
, src1w
, SLJIT_IMM
, 0));
2110 FAIL_IF(emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2112 if (FAST_IS_REG(dst
))
2113 return emit_cmp_binary(compiler
, (dst
== SLJIT_UNUSED
) ? TMP_REG1
: dst
, dstw
, SLJIT_IMM
, 0);
2114 return SLJIT_SUCCESS
;
2117 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op2(struct sljit_compiler
*compiler
, sljit_s32 op
,
2118 sljit_s32 dst
, sljit_sw dstw
,
2119 sljit_s32 src1
, sljit_sw src1w
,
2120 sljit_s32 src2
, sljit_sw src2w
)
2123 CHECK(check_sljit_emit_op2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2124 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2125 ADJUST_LOCAL_OFFSET(src1
, src1w
);
2126 ADJUST_LOCAL_OFFSET(src2
, src2w
);
2128 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2129 CHECK_EXTRA_REGS(src1
, src1w
, (void)0);
2130 CHECK_EXTRA_REGS(src2
, src2w
, (void)0);
2131 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2132 compiler
->mode32
= op
& SLJIT_I32_OP
;
2135 if (dst
== SLJIT_UNUSED
&& !HAS_FLAGS(op
))
2136 return SLJIT_SUCCESS
;
2138 switch (GET_OPCODE(op
)) {
2140 if (!HAS_FLAGS(op
)) {
2141 if (emit_lea_binary(compiler
, dst
, dstw
, src1
, src1w
, src2
, src2w
) != SLJIT_ERR_UNSUPPORTED
)
2142 return compiler
->error
;
2144 return emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
2145 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2147 return emit_cum_binary(compiler
, BINARY_OPCODE(ADC
),
2148 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2150 if (!HAS_FLAGS(op
)) {
2151 if ((src2
& SLJIT_IMM
) && emit_lea_binary(compiler
, dst
, dstw
, src1
, src1w
, SLJIT_IMM
, -src2w
) != SLJIT_ERR_UNSUPPORTED
)
2152 return compiler
->error
;
2155 if (dst
== SLJIT_UNUSED
)
2156 return emit_cmp_binary(compiler
, src1
, src1w
, src2
, src2w
);
2157 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
2158 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2160 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SBB
),
2161 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2163 return emit_mul(compiler
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2165 if (dst
== SLJIT_UNUSED
)
2166 return emit_test_binary(compiler
, src1
, src1w
, src2
, src2w
);
2167 return emit_cum_binary(compiler
, BINARY_OPCODE(AND
),
2168 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2170 return emit_cum_binary(compiler
, BINARY_OPCODE(OR
),
2171 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2173 return emit_cum_binary(compiler
, BINARY_OPCODE(XOR
),
2174 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2176 return emit_shift_with_flags(compiler
, SHL
, HAS_FLAGS(op
),
2177 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2179 return emit_shift_with_flags(compiler
, SHR
, HAS_FLAGS(op
),
2180 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2182 return emit_shift_with_flags(compiler
, SAR
, HAS_FLAGS(op
),
2183 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2186 return SLJIT_SUCCESS
;
2189 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_register_index(sljit_s32 reg
)
2191 CHECK_REG_INDEX(check_sljit_get_register_index(reg
));
2192 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2193 if (reg
>= SLJIT_R3
&& reg
<= SLJIT_R8
)
2196 return reg_map
[reg
];
2199 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_float_register_index(sljit_s32 reg
)
2201 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg
));
2202 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2205 return freg_map
[reg
];
2209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_custom(struct sljit_compiler
*compiler
,
2210 void *instruction
, sljit_s32 size
)
2215 CHECK(check_sljit_emit_op_custom(compiler
, instruction
, size
));
2217 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
2220 SLJIT_MEMCPY(inst
, instruction
, size
);
2221 return SLJIT_SUCCESS
;
2224 /* --------------------------------------------------------------------- */
2225 /* Floating point operators */
2226 /* --------------------------------------------------------------------- */
2228 /* Alignment(3) + 4 * 16 bytes. */
2229 static sljit_s32 sse2_data
[3 + (4 * 4)];
2230 static sljit_s32
*sse2_buffer
;
2232 static void init_compiler(void)
2234 /* Align to 16 bytes. */
2235 sse2_buffer
= (sljit_s32
*)(((sljit_uw
)sse2_data
+ 15) & ~0xf);
2237 /* Single precision constants (each constant is 16 byte long). */
2238 sse2_buffer
[0] = 0x80000000;
2239 sse2_buffer
[4] = 0x7fffffff;
2240 /* Double precision constants (each constant is 16 byte long). */
2242 sse2_buffer
[9] = 0x80000000;
2243 sse2_buffer
[12] = 0xffffffff;
2244 sse2_buffer
[13] = 0x7fffffff;
2247 static sljit_s32
emit_sse2(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
2248 sljit_s32 single
, sljit_s32 xmm1
, sljit_s32 xmm2
, sljit_sw xmm2w
)
2252 inst
= emit_x86_instruction(compiler
, 2 | (single
? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2256 return SLJIT_SUCCESS
;
2259 static sljit_s32
emit_sse2_logic(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
2260 sljit_s32 pref66
, sljit_s32 xmm1
, sljit_s32 xmm2
, sljit_sw xmm2w
)
2264 inst
= emit_x86_instruction(compiler
, 2 | (pref66
? EX86_PREF_66
: 0) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2268 return SLJIT_SUCCESS
;
2271 static SLJIT_INLINE sljit_s32
emit_sse2_load(struct sljit_compiler
*compiler
,
2272 sljit_s32 single
, sljit_s32 dst
, sljit_s32 src
, sljit_sw srcw
)
2274 return emit_sse2(compiler
, MOVSD_x_xm
, single
, dst
, src
, srcw
);
2277 static SLJIT_INLINE sljit_s32
emit_sse2_store(struct sljit_compiler
*compiler
,
2278 sljit_s32 single
, sljit_s32 dst
, sljit_sw dstw
, sljit_s32 src
)
2280 return emit_sse2(compiler
, MOVSD_xm_x
, single
, src
, dst
, dstw
);
2283 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler
*compiler
, sljit_s32 op
,
2284 sljit_s32 dst
, sljit_sw dstw
,
2285 sljit_s32 src
, sljit_sw srcw
)
2287 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2291 if (GET_OPCODE(op
) == SLJIT_CONV_SW_FROM_F64
)
2292 compiler
->mode32
= 0;
2295 inst
= emit_x86_instruction(compiler
, 2 | ((op
& SLJIT_F32_OP
) ? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2_OP2
, dst_r
, 0, src
, srcw
);
2298 *inst
= CVTTSD2SI_r_xm
;
2300 if (dst
& SLJIT_MEM
)
2301 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2302 return SLJIT_SUCCESS
;
2305 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler
*compiler
, sljit_s32 op
,
2306 sljit_s32 dst
, sljit_sw dstw
,
2307 sljit_s32 src
, sljit_sw srcw
)
2309 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG
;
2312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2313 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_SW
)
2314 compiler
->mode32
= 0;
2317 if (src
& SLJIT_IMM
) {
2318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2319 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_S32
)
2320 srcw
= (sljit_s32
)srcw
;
2322 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
2327 inst
= emit_x86_instruction(compiler
, 2 | ((op
& SLJIT_F32_OP
) ? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2_OP1
, dst_r
, 0, src
, srcw
);
2330 *inst
= CVTSI2SD_x_rm
;
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 compiler
->mode32
= 1;
2335 if (dst_r
== TMP_FREG
)
2336 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2337 return SLJIT_SUCCESS
;
2340 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_cmp(struct sljit_compiler
*compiler
, sljit_s32 op
,
2341 sljit_s32 src1
, sljit_sw src1w
,
2342 sljit_s32 src2
, sljit_sw src2w
)
2344 if (!FAST_IS_REG(src1
)) {
2345 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2349 return emit_sse2_logic(compiler
, UCOMISD_x_xm
, !(op
& SLJIT_F32_OP
), src1
, src2
, src2w
);
2352 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_s32 op
,
2353 sljit_s32 dst
, sljit_sw dstw
,
2354 sljit_s32 src
, sljit_sw srcw
)
2358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2359 compiler
->mode32
= 1;
2363 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler
, op
, dst
, dstw
, src
, srcw
);
2365 if (GET_OPCODE(op
) == SLJIT_MOV_F64
) {
2366 if (FAST_IS_REG(dst
))
2367 return emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst
, src
, srcw
);
2368 if (FAST_IS_REG(src
))
2369 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, src
);
2370 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src
, srcw
));
2371 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2374 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_F32
) {
2375 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG
;
2376 if (FAST_IS_REG(src
)) {
2377 /* We overwrite the high bits of source. From SLJIT point of view,
2378 this is not an issue.
2379 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2380 FAIL_IF(emit_sse2_logic(compiler
, UNPCKLPD_x_xm
, op
& SLJIT_F32_OP
, src
, src
, 0));
2383 FAIL_IF(emit_sse2_load(compiler
, !(op
& SLJIT_F32_OP
), TMP_FREG
, src
, srcw
));
2387 FAIL_IF(emit_sse2_logic(compiler
, CVTPD2PS_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src
, 0));
2388 if (dst_r
== TMP_FREG
)
2389 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2390 return SLJIT_SUCCESS
;
2393 if (FAST_IS_REG(dst
)) {
2396 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src
, srcw
));
2400 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src
, srcw
));
2403 switch (GET_OPCODE(op
)) {
2405 FAIL_IF(emit_sse2_logic(compiler
, XORPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_F32_OP
? sse2_buffer
: sse2_buffer
+ 8)));
2409 FAIL_IF(emit_sse2_logic(compiler
, ANDPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_F32_OP
? sse2_buffer
+ 4 : sse2_buffer
+ 12)));
2413 if (dst_r
== TMP_FREG
)
2414 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2415 return SLJIT_SUCCESS
;
2418 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_s32 op
,
2419 sljit_s32 dst
, sljit_sw dstw
,
2420 sljit_s32 src1
, sljit_sw src1w
,
2421 sljit_s32 src2
, sljit_sw src2w
)
2426 CHECK(check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2427 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2428 ADJUST_LOCAL_OFFSET(src1
, src1w
);
2429 ADJUST_LOCAL_OFFSET(src2
, src2w
);
2431 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2432 compiler
->mode32
= 1;
2435 if (FAST_IS_REG(dst
)) {
2438 ; /* Do nothing here. */
2439 else if (dst
== src2
&& (op
== SLJIT_ADD_F64
|| op
== SLJIT_MUL_F64
)) {
2440 /* Swap arguments. */
2444 else if (dst
!= src2
)
2445 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src1
, src1w
));
2448 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2453 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2456 switch (GET_OPCODE(op
)) {
2458 FAIL_IF(emit_sse2(compiler
, ADDSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2462 FAIL_IF(emit_sse2(compiler
, SUBSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2466 FAIL_IF(emit_sse2(compiler
, MULSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2470 FAIL_IF(emit_sse2(compiler
, DIVSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2474 if (dst_r
== TMP_FREG
)
2475 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2476 return SLJIT_SUCCESS
;
2479 /* --------------------------------------------------------------------- */
2480 /* Conditional instructions */
2481 /* --------------------------------------------------------------------- */
2483 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_label
* sljit_emit_label(struct sljit_compiler
*compiler
)
2486 struct sljit_label
*label
;
2489 CHECK_PTR(check_sljit_emit_label(compiler
));
2491 if (compiler
->last_label
&& compiler
->last_label
->size
== compiler
->size
)
2492 return compiler
->last_label
;
2494 label
= (struct sljit_label
*)ensure_abuf(compiler
, sizeof(struct sljit_label
));
2495 PTR_FAIL_IF(!label
);
2496 set_label(label
, compiler
);
2498 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2507 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_jump(struct sljit_compiler
*compiler
, sljit_s32 type
)
2510 struct sljit_jump
*jump
;
2513 CHECK_PTR(check_sljit_emit_jump(compiler
, type
));
2515 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2516 PTR_FAIL_IF_NULL(jump
);
2517 set_jump(jump
, compiler
, (type
& SLJIT_REWRITABLE_JUMP
) | ((type
& 0xff) << TYPE_SHIFT
));
2519 /* Worst case size. */
2520 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2521 compiler
->size
+= (type
>= SLJIT_JUMP
) ? 5 : 6;
2523 compiler
->size
+= (type
>= SLJIT_JUMP
) ? (10 + 3) : (2 + 10 + 3);
2526 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2527 PTR_FAIL_IF_NULL(inst
);
2534 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_ijump(struct sljit_compiler
*compiler
, sljit_s32 type
, sljit_s32 src
, sljit_sw srcw
)
2537 struct sljit_jump
*jump
;
2540 CHECK(check_sljit_emit_ijump(compiler
, type
, src
, srcw
));
2541 ADJUST_LOCAL_OFFSET(src
, srcw
);
2543 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
2545 if (src
== SLJIT_IMM
) {
2546 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2548 set_jump(jump
, compiler
, JUMP_ADDR
| (type
<< TYPE_SHIFT
));
2549 jump
->u
.target
= srcw
;
2551 /* Worst case size. */
2552 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2553 compiler
->size
+= 5;
2555 compiler
->size
+= 10 + 3;
2558 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2565 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2566 /* REX_W is not necessary (src is not immediate). */
2567 compiler
->mode32
= 1;
2569 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
2572 *inst
|= (type
>= SLJIT_FAST_CALL
) ? CALL_rm
: JMP_rm
;
2574 return SLJIT_SUCCESS
;
2577 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_flags(struct sljit_compiler
*compiler
, sljit_s32 op
,
2578 sljit_s32 dst
, sljit_sw dstw
,
2582 sljit_u8 cond_set
= 0;
2583 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2586 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2587 sljit_s32 dst_save
= dst
;
2588 sljit_sw dstw_save
= dstw
;
2591 CHECK(check_sljit_emit_op_flags(compiler
, op
, dst
, dstw
, type
));
2593 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2594 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2597 /* setcc = jcc + 0x10. */
2598 cond_set
= get_jump_code(type
) + 0x10;
2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2601 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && FAST_IS_REG(dst
)) {
2602 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4 + 3);
2605 /* Set low register to conditional flag. */
2606 *inst
++ = (reg_map
[TMP_REG1
] <= 7) ? REX
: REX_B
;
2609 *inst
++ = MOD_REG
| reg_lmap
[TMP_REG1
];
2610 *inst
++ = REX
| (reg_map
[TMP_REG1
] <= 7 ? 0 : REX_R
) | (reg_map
[dst
] <= 7 ? 0 : REX_B
);
2611 *inst
++ = OR_rm8_r8
;
2612 *inst
++ = MOD_REG
| (reg_lmap
[TMP_REG1
] << 3) | reg_lmap
[dst
];
2613 return SLJIT_SUCCESS
;
2616 reg
= (GET_OPCODE(op
) < SLJIT_ADD
&& FAST_IS_REG(dst
)) ? dst
: TMP_REG1
;
2618 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4 + 4);
2621 /* Set low register to conditional flag. */
2622 *inst
++ = (reg_map
[reg
] <= 7) ? REX
: REX_B
;
2625 *inst
++ = MOD_REG
| reg_lmap
[reg
];
2626 *inst
++ = REX_W
| (reg_map
[reg
] <= 7 ? 0 : (REX_B
| REX_R
));
2627 /* The movzx instruction does not affect flags. */
2629 *inst
++ = MOVZX_r_rm8
;
2630 *inst
= MOD_REG
| (reg_lmap
[reg
] << 3) | reg_lmap
[reg
];
2632 if (reg
!= TMP_REG1
)
2633 return SLJIT_SUCCESS
;
2635 if (GET_OPCODE(op
) < SLJIT_ADD
) {
2636 compiler
->mode32
= GET_OPCODE(op
) != SLJIT_MOV
;
2637 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2640 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2641 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2642 compiler
->skip_checks
= 1;
2644 return sljit_emit_op2(compiler
, op
, dst_save
, dstw_save
, dst_save
, dstw_save
, TMP_REG1
, 0);
2647 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2648 if (GET_OPCODE(op
) < SLJIT_ADD
&& FAST_IS_REG(dst
)) {
2649 if (reg_map
[dst
] <= 4) {
2650 /* Low byte is accessible. */
2651 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3 + 3);
2654 /* Set low byte to conditional flag. */
2657 *inst
++ = MOD_REG
| reg_map
[dst
];
2660 *inst
++ = MOVZX_r_rm8
;
2661 *inst
= MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[dst
];
2662 return SLJIT_SUCCESS
;
2665 /* Low byte is not accessible. */
2666 if (cpu_has_cmov
== -1)
2670 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, 1);
2671 /* a xor reg, reg operation would overwrite the flags. */
2672 EMIT_MOV(compiler
, dst
, 0, SLJIT_IMM
, 0);
2674 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
2679 /* cmovcc = setcc - 0x50. */
2680 *inst
++ = cond_set
- 0x50;
2681 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[TMP_REG1
];
2682 return SLJIT_SUCCESS
;
2685 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2687 INC_SIZE(1 + 3 + 3 + 1);
2688 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2689 /* Set al to conditional flag. */
2692 *inst
++ = MOD_REG
| 0 /* eax */;
2695 *inst
++ = MOVZX_r_rm8
;
2696 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | 0 /* eax */;
2697 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2698 return SLJIT_SUCCESS
;
2701 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && FAST_IS_REG(dst
) && reg_map
[dst
] <= 4) {
2702 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0);
2704 if (dst
!= SLJIT_R0
) {
2705 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 2 + 1);
2707 INC_SIZE(1 + 3 + 2 + 1);
2708 /* Set low register to conditional flag. */
2709 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2712 *inst
++ = MOD_REG
| 0 /* eax */;
2713 *inst
++ = OR_rm8_r8
;
2714 *inst
++ = MOD_REG
| (0 /* eax */ << 3) | reg_map
[dst
];
2715 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2718 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2 + 3 + 2 + 2);
2720 INC_SIZE(2 + 3 + 2 + 2);
2721 /* Set low register to conditional flag. */
2722 *inst
++ = XCHG_r_rm
;
2723 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REG1
];
2726 *inst
++ = MOD_REG
| 1 /* ecx */;
2727 *inst
++ = OR_rm8_r8
;
2728 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | 0 /* eax */;
2729 *inst
++ = XCHG_r_rm
;
2730 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REG1
];
2732 return SLJIT_SUCCESS
;
2735 /* Set TMP_REG1 to the bit. */
2736 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2738 INC_SIZE(1 + 3 + 3 + 1);
2739 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2740 /* Set al to conditional flag. */
2743 *inst
++ = MOD_REG
| 0 /* eax */;
2746 *inst
++ = MOVZX_r_rm8
;
2747 *inst
++ = MOD_REG
| (0 << 3) /* eax */ | 0 /* eax */;
2749 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2751 if (GET_OPCODE(op
) < SLJIT_ADD
)
2752 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2754 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2755 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2756 compiler
->skip_checks
= 1;
2758 return sljit_emit_op2(compiler
, op
, dst_save
, dstw_save
, dst_save
, dstw_save
, TMP_REG1
, 0);
2759 #endif /* SLJIT_CONFIG_X86_64 */
2762 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_cmov(struct sljit_compiler
*compiler
, sljit_s32 type
,
2764 sljit_s32 src
, sljit_sw srcw
)
2769 CHECK(check_sljit_emit_cmov(compiler
, type
, dst_reg
, src
, srcw
));
2771 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2772 dst_reg
&= ~SLJIT_I32_OP
;
2774 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV
) || (dst_reg
>= SLJIT_R3
&& dst_reg
<= SLJIT_S3
))
2775 return sljit_emit_cmov_generic(compiler
, type
, dst_reg
, src
, srcw
);
2777 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV
))
2778 return sljit_emit_cmov_generic(compiler
, type
, dst_reg
, src
, srcw
);
2781 /* ADJUST_LOCAL_OFFSET is not needed. */
2782 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
2784 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2785 compiler
->mode32
= dst_reg
& SLJIT_I32_OP
;
2786 dst_reg
&= ~SLJIT_I32_OP
;
2789 if (SLJIT_UNLIKELY(src
& SLJIT_IMM
)) {
2790 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, srcw
);
2795 inst
= emit_x86_instruction(compiler
, 2, dst_reg
, 0, src
, srcw
);
2798 *inst
= get_jump_code(type
& 0xff) - 0x40;
2799 return SLJIT_SUCCESS
;
2802 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_local_base(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw offset
)
2805 CHECK(check_sljit_get_local_base(compiler
, dst
, dstw
, offset
));
2806 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2808 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2811 compiler
->mode32
= 0;
2814 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP
), offset
);
2816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2817 if (NOT_HALFWORD(offset
)) {
2818 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, offset
));
2819 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2820 SLJIT_ASSERT(emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, TMP_REG1
, 0) != SLJIT_ERR_UNSUPPORTED
);
2821 return compiler
->error
;
2823 return emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, TMP_REG1
, 0);
2829 return emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, SLJIT_IMM
, offset
);
2830 return emit_mov(compiler
, dst
, dstw
, SLJIT_SP
, 0);
2833 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_const
* sljit_emit_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw init_value
)
2836 struct sljit_const
*const_
;
2837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2842 CHECK_PTR(check_sljit_emit_const(compiler
, dst
, dstw
, init_value
));
2843 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2845 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2847 const_
= (struct sljit_const
*)ensure_abuf(compiler
, sizeof(struct sljit_const
));
2848 PTR_FAIL_IF(!const_
);
2849 set_const(const_
, compiler
);
2851 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2852 compiler
->mode32
= 0;
2853 reg
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2855 if (emit_load_imm64(compiler
, reg
, init_value
))
2858 if (emit_mov(compiler
, dst
, dstw
, SLJIT_IMM
, init_value
))
2862 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2869 if (dst
& SLJIT_MEM
)
2870 if (emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0))
2877 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_put_label
* sljit_emit_put_label(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
2879 struct sljit_put_label
*put_label
;
2881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2883 sljit_uw start_size
;
2887 CHECK_PTR(check_sljit_emit_put_label(compiler
, dst
, dstw
));
2888 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2890 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2892 put_label
= (struct sljit_put_label
*)ensure_abuf(compiler
, sizeof(struct sljit_put_label
));
2893 PTR_FAIL_IF(!put_label
);
2894 set_put_label(put_label
, compiler
, 0);
2896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2897 compiler
->mode32
= 0;
2898 reg
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2900 if (emit_load_imm64(compiler
, reg
, 0))
2903 if (emit_mov(compiler
, dst
, dstw
, SLJIT_IMM
, 0))
2907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2908 if (dst
& SLJIT_MEM
) {
2909 start_size
= compiler
->size
;
2910 if (emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0))
2912 put_label
->flags
= compiler
->size
- start_size
;
2916 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2925 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_jump_addr(sljit_uw addr
, sljit_uw new_target
, sljit_sw executable_offset
)
2927 SLJIT_UNUSED_ARG(executable_offset
);
2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2929 sljit_unaligned_store_sw((void*)addr
, new_target
- (addr
+ 4) - (sljit_uw
)executable_offset
);
2931 sljit_unaligned_store_sw((void*)addr
, (sljit_sw
) new_target
);
2935 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_const(sljit_uw addr
, sljit_sw new_constant
, sljit_sw executable_offset
)
2937 SLJIT_UNUSED_ARG(executable_offset
);
2938 sljit_unaligned_store_sw((void*)addr
, new_constant
);