2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE
const char* sljit_get_platform_name(void)
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 return "x86" SLJIT_CPUINFO
" ABI:fastcall";
32 return "x86" SLJIT_CPUINFO
;
58 8 - R8 - From now on REX prefix is required
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
70 /* Last register + 1. */
71 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
73 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 3] = {
74 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 if (p <= compiler->scratches) \
80 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
82 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
83 p = SLJIT_MEM1(SLJIT_SP); \
87 #else /* SLJIT_CONFIG_X86_32 */
89 /* Last register + 1. */
90 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
94 Note: avoid to use r12 and r13 for memory addessing
95 therefore r12 is better to be a higher saved register. */
97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
98 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
99 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
101 /* low-map. reg_map & 0x7. */
102 static const sljit_u8 reg_lmap
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
103 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
107 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
108 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
110 /* low-map. reg_map & 0x7. */
111 static const sljit_u8 reg_lmap
[SLJIT_NUMBER_OF_REGISTERS
+ 4] = {
112 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
116 /* Args: xmm0-xmm3 */
117 static const sljit_u8 freg_map
[SLJIT_NUMBER_OF_FLOAT_REGISTERS
+ 1] = {
120 /* low-map. freg_map & 0x7. */
121 static const sljit_u8 freg_lmap
[SLJIT_NUMBER_OF_FLOAT_REGISTERS
+ 1] = {
132 #define HALFWORD_MAX 0x7fffffffl
133 #define HALFWORD_MIN -0x80000000l
135 #define HALFWORD_MAX 0x7fffffffll
136 #define HALFWORD_MIN -0x80000000ll
139 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
140 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
142 #define CHECK_EXTRA_REGS(p, w, do)
144 #endif /* SLJIT_CONFIG_X86_32 */
148 /* Size flags for emit_x86_instruction: */
149 #define EX86_BIN_INS 0x0010
150 #define EX86_SHIFT_INS 0x0020
151 #define EX86_REX 0x0040
152 #define EX86_NO_REXW 0x0080
153 #define EX86_BYTE_ARG 0x0100
154 #define EX86_HALF_ARG 0x0200
155 #define EX86_PREF_66 0x0400
156 #define EX86_PREF_F2 0x0800
157 #define EX86_PREF_F3 0x1000
158 #define EX86_SSE2_OP1 0x2000
159 #define EX86_SSE2_OP2 0x4000
160 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
162 /* --------------------------------------------------------------------- */
163 /* Instrucion forms */
164 /* --------------------------------------------------------------------- */
166 #define ADD (/* BINARY */ 0 << 3)
167 #define ADD_EAX_i32 0x05
168 #define ADD_r_rm 0x03
169 #define ADD_rm_r 0x01
170 #define ADDSD_x_xm 0x58
171 #define ADC (/* BINARY */ 2 << 3)
172 #define ADC_EAX_i32 0x15
173 #define ADC_r_rm 0x13
174 #define ADC_rm_r 0x11
175 #define AND (/* BINARY */ 4 << 3)
176 #define AND_EAX_i32 0x25
177 #define AND_r_rm 0x23
178 #define AND_rm_r 0x21
179 #define ANDPD_x_xm 0x54
180 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
181 #define CALL_i32 0xe8
182 #define CALL_rm (/* GROUP_FF */ 2 << 3)
184 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
185 #define CMP (/* BINARY */ 7 << 3)
186 #define CMP_EAX_i32 0x3d
187 #define CMP_r_rm 0x3b
188 #define CMP_rm_r 0x39
189 #define CVTPD2PS_x_xm 0x5a
190 #define CVTSI2SD_x_rm 0x2a
191 #define CVTTSD2SI_r_xm 0x2c
192 #define DIV (/* GROUP_F7 */ 6 << 3)
193 #define DIVSD_x_xm 0x5e
197 #define IDIV (/* GROUP_F7 */ 7 << 3)
198 #define IMUL (/* GROUP_F7 */ 5 << 3)
199 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
200 #define IMUL_r_rm_i8 0x6b
201 #define IMUL_r_rm_i32 0x69
206 #define JMP_rm (/* GROUP_FF */ 4 << 3)
208 #define MOV_r_rm 0x8b
209 #define MOV_r_i32 0xb8
210 #define MOV_rm_r 0x89
211 #define MOV_rm_i32 0xc7
212 #define MOV_rm8_i8 0xc6
213 #define MOV_rm8_r8 0x88
214 #define MOVSD_x_xm 0x10
215 #define MOVSD_xm_x 0x11
216 #define MOVSXD_r_rm 0x63
217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
218 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
220 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
221 #define MUL (/* GROUP_F7 */ 4 << 3)
222 #define MULSD_x_xm 0x59
223 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
225 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
226 #define OR (/* BINARY */ 1 << 3)
228 #define OR_EAX_i32 0x0d
230 #define OR_rm8_r8 0x08
234 #define PREFETCH 0x18
235 #define PUSH_i32 0x68
237 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
239 #define RET_near 0xc3
241 #define SBB (/* BINARY */ 3 << 3)
242 #define SBB_EAX_i32 0x1d
243 #define SBB_r_rm 0x1b
244 #define SBB_rm_r 0x19
245 #define SAR (/* SHIFT */ 7 << 3)
246 #define SHL (/* SHIFT */ 4 << 3)
247 #define SHR (/* SHIFT */ 5 << 3)
248 #define SUB (/* BINARY */ 5 << 3)
249 #define SUB_EAX_i32 0x2d
250 #define SUB_r_rm 0x2b
251 #define SUB_rm_r 0x29
252 #define SUBSD_x_xm 0x5c
253 #define TEST_EAX_i32 0xa9
254 #define TEST_rm_r 0x85
255 #define UCOMISD_x_xm 0x2e
256 #define UNPCKLPD_x_xm 0x14
257 #define XCHG_EAX_r 0x90
258 #define XCHG_r_rm 0x87
259 #define XOR (/* BINARY */ 6 << 3)
260 #define XOR_EAX_i32 0x35
261 #define XOR_r_rm 0x33
262 #define XOR_rm_r 0x31
263 #define XORPD_x_xm 0x57
265 #define GROUP_0F 0x0f
266 #define GROUP_F7 0xf7
267 #define GROUP_FF 0xff
268 #define GROUP_BINARY_81 0x81
269 #define GROUP_BINARY_83 0x83
270 #define GROUP_SHIFT_1 0xd1
271 #define GROUP_SHIFT_N 0xc1
272 #define GROUP_SHIFT_CL 0xd3
275 #define MOD_DISP8 0x40
277 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
279 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
280 #define POP_REG(r) (*inst++ = (POP_r + (r)))
281 #define RET() (*inst++ = (RET_near))
282 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
284 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
286 /* Multithreading does not affect these static variables, since they store
287 built-in CPU features. Therefore they can be overwritten by different threads
288 if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2
= -1;
292 static sljit_s32 cpu_has_cmov
= -1;
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
300 /******************************************************/
301 /* Unaligned-store functions */
302 /******************************************************/
304 static SLJIT_INLINE
void sljit_unaligned_store_s16(void *addr
, sljit_s16 value
)
306 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
309 static SLJIT_INLINE
void sljit_unaligned_store_s32(void *addr
, sljit_s32 value
)
311 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
314 static SLJIT_INLINE
void sljit_unaligned_store_sw(void *addr
, sljit_sw value
)
316 SLJIT_MEMCPY(addr
, &value
, sizeof(value
));
319 /******************************************************/
320 /* Utility functions */
321 /******************************************************/
323 static void get_cpu_features(void)
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
331 features
= (sljit_u32
)CPUInfo
[3];
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 /* On x86-32, there is no red zone, so this
340 should work (no need for a local variable). */
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 : "%eax", "%ecx", "%edx"
353 : "%rax", "%rbx", "%rcx", "%rdx"
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 cpu_has_sse2
= (features
>> 26) & 0x1;
371 cpu_has_cmov
= (features
>> 15) & 0x1;
374 static sljit_u8
get_jump_code(sljit_s32 type
)
378 case SLJIT_EQUAL_F64
:
379 return 0x84 /* je */;
381 case SLJIT_NOT_EQUAL
:
382 case SLJIT_NOT_EQUAL_F64
:
383 return 0x85 /* jne */;
387 return 0x82 /* jc */;
389 case SLJIT_GREATER_EQUAL
:
390 case SLJIT_GREATER_EQUAL_F64
:
391 return 0x83 /* jae */;
394 case SLJIT_GREATER_F64
:
395 return 0x87 /* jnbe */;
397 case SLJIT_LESS_EQUAL
:
398 case SLJIT_LESS_EQUAL_F64
:
399 return 0x86 /* jbe */;
402 return 0x8c /* jl */;
404 case SLJIT_SIG_GREATER_EQUAL
:
405 return 0x8d /* jnl */;
407 case SLJIT_SIG_GREATER
:
408 return 0x8f /* jnle */;
410 case SLJIT_SIG_LESS_EQUAL
:
411 return 0x8e /* jle */;
414 case SLJIT_MUL_OVERFLOW
:
415 return 0x80 /* jo */;
417 case SLJIT_NOT_OVERFLOW
:
418 case SLJIT_MUL_NOT_OVERFLOW
:
419 return 0x81 /* jno */;
421 case SLJIT_UNORDERED_F64
:
422 return 0x8a /* jp */;
424 case SLJIT_ORDERED_F64
:
425 return 0x8b /* jpo */;
430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
431 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_s32 type
, sljit_sw executable_offset
);
433 static sljit_u8
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_s32 type
);
436 static sljit_u8
* generate_near_jump_code(struct sljit_jump
*jump
, sljit_u8
*code_ptr
, sljit_u8
*code
, sljit_s32 type
, sljit_sw executable_offset
)
438 sljit_s32 short_jump
;
441 if (jump
->flags
& JUMP_LABEL
)
442 label_addr
= (sljit_uw
)(code
+ jump
->u
.label
->size
);
444 label_addr
= jump
->u
.target
- executable_offset
;
446 short_jump
= (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) >= -128 && (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) <= 127;
448 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
449 if ((sljit_sw
)(label_addr
- (jump
->addr
+ 1)) > HALFWORD_MAX
|| (sljit_sw
)(label_addr
- (jump
->addr
+ 1)) < HALFWORD_MIN
)
450 return generate_far_jump_code(jump
, code_ptr
, type
);
453 if (type
== SLJIT_JUMP
) {
455 *code_ptr
++ = JMP_i8
;
457 *code_ptr
++ = JMP_i32
;
460 else if (type
>= SLJIT_FAST_CALL
) {
462 *code_ptr
++ = CALL_i32
;
465 else if (short_jump
) {
466 *code_ptr
++ = get_jump_code(type
) - 0x10;
470 *code_ptr
++ = GROUP_0F
;
471 *code_ptr
++ = get_jump_code(type
);
476 jump
->flags
|= PATCH_MB
;
477 code_ptr
+= sizeof(sljit_s8
);
479 jump
->flags
|= PATCH_MW
;
480 code_ptr
+= sizeof(sljit_s32
);
486 SLJIT_API_FUNC_ATTRIBUTE
void* sljit_generate_code(struct sljit_compiler
*compiler
)
488 struct sljit_memory_fragment
*buf
;
494 sljit_sw executable_offset
;
497 struct sljit_label
*label
;
498 struct sljit_jump
*jump
;
499 struct sljit_const
*const_
;
502 CHECK_PTR(check_sljit_generate_code(compiler
));
503 reverse_buf(compiler
);
505 /* Second code generation pass. */
506 code
= (sljit_u8
*)SLJIT_MALLOC_EXEC(compiler
->size
);
507 PTR_FAIL_WITH_EXEC_IF(code
);
511 label
= compiler
->labels
;
512 jump
= compiler
->jumps
;
513 const_
= compiler
->consts
;
514 executable_offset
= SLJIT_EXEC_OFFSET(code
);
517 buf_ptr
= buf
->memory
;
518 buf_end
= buf_ptr
+ buf
->used_size
;
522 /* The code is already generated. */
523 SLJIT_MEMCPY(code_ptr
, buf_ptr
, len
);
529 jump
->addr
= (sljit_uw
)code_ptr
;
530 if (!(jump
->flags
& SLJIT_REWRITABLE_JUMP
))
531 code_ptr
= generate_near_jump_code(jump
, code_ptr
, code
, *buf_ptr
- 2, executable_offset
);
533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
534 code_ptr
= generate_far_jump_code(jump
, code_ptr
, *buf_ptr
- 2, executable_offset
);
536 code_ptr
= generate_far_jump_code(jump
, code_ptr
, *buf_ptr
- 2);
541 else if (*buf_ptr
== 0) {
542 label
->addr
= ((sljit_uw
)code_ptr
) + executable_offset
;
543 label
->size
= code_ptr
- code
;
546 else { /* *buf_ptr is 1 */
547 const_
->addr
= ((sljit_uw
)code_ptr
) - sizeof(sljit_sw
);
548 const_
= const_
->next
;
552 } while (buf_ptr
< buf_end
);
553 SLJIT_ASSERT(buf_ptr
== buf_end
);
557 SLJIT_ASSERT(!label
);
559 SLJIT_ASSERT(!const_
);
561 jump
= compiler
->jumps
;
563 jump_addr
= jump
->addr
+ executable_offset
;
565 if (jump
->flags
& PATCH_MB
) {
566 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
))) >= -128 && (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
))) <= 127);
567 *(sljit_u8
*)jump
->addr
= (sljit_u8
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s8
)));
568 } else if (jump
->flags
& PATCH_MW
) {
569 if (jump
->flags
& JUMP_LABEL
) {
570 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571 sljit_unaligned_store_sw((void*)jump
->addr
, (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_sw
))));
573 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))) >= HALFWORD_MIN
&& (sljit_sw
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))) <= HALFWORD_MAX
);
574 sljit_unaligned_store_s32((void*)jump
->addr
, (sljit_s32
)(jump
->u
.label
->addr
- (jump_addr
+ sizeof(sljit_s32
))));
578 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
579 sljit_unaligned_store_sw((void*)jump
->addr
, (sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_sw
))));
581 SLJIT_ASSERT((sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))) >= HALFWORD_MIN
&& (sljit_sw
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))) <= HALFWORD_MAX
);
582 sljit_unaligned_store_s32((void*)jump
->addr
, (sljit_s32
)(jump
->u
.target
- (jump_addr
+ sizeof(sljit_s32
))));
586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
587 else if (jump
->flags
& PATCH_MD
)
588 sljit_unaligned_store_sw((void*)jump
->addr
, jump
->u
.label
->addr
);
594 /* Some space may be wasted because of short jumps. */
595 SLJIT_ASSERT(code_ptr
<= code
+ compiler
->size
);
596 compiler
->error
= SLJIT_ERR_COMPILED
;
597 compiler
->executable_offset
= executable_offset
;
598 compiler
->executable_size
= code_ptr
- code
;
599 return (void*)(code
+ executable_offset
);
602 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_has_cpu_feature(sljit_s32 feature_type
)
604 switch (feature_type
) {
606 #ifdef SLJIT_IS_FPU_AVAILABLE
607 return SLJIT_IS_FPU_AVAILABLE
;
608 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
609 if (cpu_has_sse2
== -1)
612 #else /* SLJIT_DETECT_SSE2 */
614 #endif /* SLJIT_DETECT_SSE2 */
616 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
617 case SLJIT_HAS_VIRTUAL_REGISTERS
:
623 if (cpu_has_cmov
== -1)
628 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
629 if (cpu_has_sse2
== -1)
641 /* --------------------------------------------------------------------- */
643 /* --------------------------------------------------------------------- */
645 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
647 static sljit_s32
emit_cum_binary(struct sljit_compiler
*compiler
,
649 sljit_s32 dst
, sljit_sw dstw
,
650 sljit_s32 src1
, sljit_sw src1w
,
651 sljit_s32 src2
, sljit_sw src2w
);
653 static sljit_s32
emit_non_cum_binary(struct sljit_compiler
*compiler
,
655 sljit_s32 dst
, sljit_sw dstw
,
656 sljit_s32 src1
, sljit_sw src1w
,
657 sljit_s32 src2
, sljit_sw src2w
);
659 static sljit_s32
emit_mov(struct sljit_compiler
*compiler
,
660 sljit_s32 dst
, sljit_sw dstw
,
661 sljit_s32 src
, sljit_sw srcw
);
663 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
664 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
666 static SLJIT_INLINE sljit_s32
emit_sse2_store(struct sljit_compiler
*compiler
,
667 sljit_s32 single
, sljit_s32 dst
, sljit_sw dstw
, sljit_s32 src
);
669 static SLJIT_INLINE sljit_s32
emit_sse2_load(struct sljit_compiler
*compiler
,
670 sljit_s32 single
, sljit_s32 dst
, sljit_s32 src
, sljit_sw srcw
);
675 static void SLJIT_FUNC
sljit_grow_stack(sljit_sw local_size
)
677 /* Workaround for calling the internal _chkstk() function on Windows.
678 This function touches all 4k pages belongs to the requested stack space,
679 which size is passed in local_size. This is necessary on Windows where
680 the stack can only grow in 4k steps. However, this function just burn
681 CPU cycles if the stack is large enough. However, you don't know it in
682 advance, so it must always be called. I think this is a bad design in
683 general even if it has some reasons. */
684 *(volatile sljit_s32
*)alloca(local_size
) = 0;
689 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
690 #include "sljitNativeX86_32.c"
692 #include "sljitNativeX86_64.c"
695 static sljit_s32
emit_mov(struct sljit_compiler
*compiler
,
696 sljit_s32 dst
, sljit_sw dstw
,
697 sljit_s32 src
, sljit_sw srcw
)
701 SLJIT_ASSERT(dst
!= SLJIT_UNUSED
);
703 if (FAST_IS_REG(src
)) {
704 inst
= emit_x86_instruction(compiler
, 1, src
, 0, dst
, dstw
);
707 return SLJIT_SUCCESS
;
709 if (src
& SLJIT_IMM
) {
710 if (FAST_IS_REG(dst
)) {
711 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
712 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
714 if (!compiler
->mode32
) {
715 if (NOT_HALFWORD(srcw
))
716 return emit_load_imm64(compiler
, dst
, srcw
);
719 return emit_do_imm32(compiler
, (reg_map
[dst
] >= 8) ? REX_B
: 0, MOV_r_i32
+ reg_lmap
[dst
], srcw
);
722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
723 if (!compiler
->mode32
&& NOT_HALFWORD(srcw
)) {
724 /* Immediate to memory move. Only SLJIT_MOV operation copies
725 an immediate directly into memory so TMP_REG1 can be used. */
726 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, srcw
));
727 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
730 return SLJIT_SUCCESS
;
733 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, dstw
);
736 return SLJIT_SUCCESS
;
738 if (FAST_IS_REG(dst
)) {
739 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src
, srcw
);
742 return SLJIT_SUCCESS
;
745 /* Memory to memory move. Only SLJIT_MOV operation copies
746 data from memory to memory so TMP_REG1 can be used. */
747 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src
, srcw
);
750 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
753 return SLJIT_SUCCESS
;
756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op0(struct sljit_compiler
*compiler
, sljit_s32 op
)
759 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
764 CHECK(check_sljit_emit_op0(compiler
, op
));
766 switch (GET_OPCODE(op
)) {
767 case SLJIT_BREAKPOINT
:
768 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
774 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
781 case SLJIT_DIVMOD_UW
:
782 case SLJIT_DIVMOD_SW
:
785 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
788 reg_map
[SLJIT_R0
] == 0
789 && reg_map
[SLJIT_R1
] == 2
790 && reg_map
[TMP_REG1
] > 7);
793 reg_map
[SLJIT_R0
] == 0
794 && reg_map
[SLJIT_R1
] < 7
795 && reg_map
[TMP_REG1
] == 2);
797 compiler
->mode32
= op
& SLJIT_I32_OP
;
799 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW
& 0x2) == 0 && SLJIT_DIV_UW
- 0x2 == SLJIT_DIVMOD_UW
, bad_div_opcode_assignments
);
802 if ((op
| 0x2) == SLJIT_DIV_UW
) {
803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
804 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_R1
, 0);
805 inst
= emit_x86_instruction(compiler
, 1, SLJIT_R1
, 0, SLJIT_R1
, 0);
807 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, TMP_REG1
, 0);
813 if ((op
| 0x2) == SLJIT_DIV_SW
) {
814 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
815 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_R1
, 0);
818 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
819 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
824 if (compiler
->mode32
) {
825 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
830 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
839 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
840 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2);
844 *inst
= MOD_REG
| ((op
>= SLJIT_DIVMOD_UW
) ? reg_map
[TMP_REG1
] : reg_map
[SLJIT_R1
]);
847 size
= (!compiler
->mode32
|| op
>= SLJIT_DIVMOD_UW
) ? 3 : 2;
849 size
= (!compiler
->mode32
) ? 3 : 2;
851 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
855 if (!compiler
->mode32
)
856 *inst
++ = REX_W
| ((op
>= SLJIT_DIVMOD_UW
) ? REX_B
: 0);
857 else if (op
>= SLJIT_DIVMOD_UW
)
860 *inst
= MOD_REG
| ((op
>= SLJIT_DIVMOD_UW
) ? reg_lmap
[TMP_REG1
] : reg_lmap
[SLJIT_R1
]);
862 if (!compiler
->mode32
)
865 *inst
= MOD_REG
| reg_map
[SLJIT_R1
];
875 case SLJIT_DIVMOD_UW
:
879 case SLJIT_DIVMOD_SW
:
884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
885 if (op
<= SLJIT_DIVMOD_SW
)
886 EMIT_MOV(compiler
, SLJIT_R1
, 0, TMP_REG1
, 0);
888 if (op
>= SLJIT_DIV_UW
)
889 EMIT_MOV(compiler
, SLJIT_R1
, 0, TMP_REG1
, 0);
894 return SLJIT_SUCCESS
;
897 #define ENCODE_PREFIX(prefix) \
899 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
905 static sljit_s32
emit_mov_byte(struct sljit_compiler
*compiler
, sljit_s32 sign
,
906 sljit_s32 dst
, sljit_sw dstw
,
907 sljit_s32 src
, sljit_sw srcw
)
911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
915 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
916 compiler
->mode32
= 0;
919 if (src
& SLJIT_IMM
) {
920 if (FAST_IS_REG(dst
)) {
921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
922 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
924 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
927 return SLJIT_SUCCESS
;
930 inst
= emit_x86_instruction(compiler
, 1 | EX86_BYTE_ARG
| EX86_NO_REXW
, SLJIT_IMM
, srcw
, dst
, dstw
);
933 return SLJIT_SUCCESS
;
936 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
938 if ((dst
& SLJIT_MEM
) && FAST_IS_REG(src
)) {
939 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
940 if (reg_map
[src
] >= 4) {
941 SLJIT_ASSERT(dst_r
== TMP_REG1
);
942 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, 0);
949 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
950 else if (FAST_IS_REG(src
) && reg_map
[src
] >= 4) {
951 /* src, dst are registers. */
952 SLJIT_ASSERT(SLOW_IS_REG(dst
));
953 if (reg_map
[dst
] < 4) {
955 EMIT_MOV(compiler
, dst
, 0, src
, 0);
956 inst
= emit_x86_instruction(compiler
, 2, dst
, 0, dst
, 0);
959 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
963 EMIT_MOV(compiler
, dst
, 0, src
, 0);
966 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
970 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
975 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 0xff, dst
, 0);
980 return SLJIT_SUCCESS
;
984 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
985 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
988 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
991 if (dst
& SLJIT_MEM
) {
992 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
993 if (dst_r
== TMP_REG1
) {
994 /* Find a non-used register, whose reg_map[src] < 4. */
995 if ((dst
& REG_MASK
) == SLJIT_R0
) {
996 if ((dst
& OFFS_REG_MASK
) == TO_OFFS_REG(SLJIT_R1
))
1002 if ((dst
& OFFS_REG_MASK
) != TO_OFFS_REG(SLJIT_R0
))
1004 else if ((dst
& REG_MASK
) == SLJIT_R1
)
1010 if (work_r
== SLJIT_R0
) {
1011 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REG1
]);
1014 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
1019 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst
, dstw
);
1023 if (work_r
== SLJIT_R0
) {
1024 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REG1
]);
1027 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
1033 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, dst
, dstw
);
1038 inst
= emit_x86_instruction(compiler
, 1 | EX86_REX
| EX86_NO_REXW
, dst_r
, 0, dst
, dstw
);
1044 return SLJIT_SUCCESS
;
1047 static sljit_s32
emit_prefetch(struct sljit_compiler
*compiler
, sljit_s32 op
,
1048 sljit_s32 src
, sljit_sw srcw
)
1052 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1053 compiler
->mode32
= 1;
1056 inst
= emit_x86_instruction(compiler
, 2, 0, 0, src
, srcw
);
1061 if (op
>= SLJIT_MOV_U8
&& op
<= SLJIT_MOV_S8
)
1063 else if (op
>= SLJIT_MOV_U16
&& op
<= SLJIT_MOV_S16
)
1068 return SLJIT_SUCCESS
;
1071 static sljit_s32
emit_mov_half(struct sljit_compiler
*compiler
, sljit_s32 sign
,
1072 sljit_s32 dst
, sljit_sw dstw
,
1073 sljit_s32 src
, sljit_sw srcw
)
1078 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1079 compiler
->mode32
= 0;
1082 if (src
& SLJIT_IMM
) {
1083 if (FAST_IS_REG(dst
)) {
1084 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1085 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
1087 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
1090 return SLJIT_SUCCESS
;
1093 inst
= emit_x86_instruction(compiler
, 1 | EX86_HALF_ARG
| EX86_NO_REXW
| EX86_PREF_66
, SLJIT_IMM
, srcw
, dst
, dstw
);
1096 return SLJIT_SUCCESS
;
1099 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1101 if ((dst
& SLJIT_MEM
) && FAST_IS_REG(src
))
1104 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1107 *inst
= sign
? MOVSX_r_rm16
: MOVZX_r_rm16
;
1110 if (dst
& SLJIT_MEM
) {
1111 inst
= emit_x86_instruction(compiler
, 1 | EX86_NO_REXW
| EX86_PREF_66
, dst_r
, 0, dst
, dstw
);
1116 return SLJIT_SUCCESS
;
1119 static sljit_s32
emit_unary(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
1120 sljit_s32 dst
, sljit_sw dstw
,
1121 sljit_s32 src
, sljit_sw srcw
)
1125 if (dst
== src
&& dstw
== srcw
) {
1126 /* Same input and output */
1127 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1131 return SLJIT_SUCCESS
;
1134 if (SLJIT_UNLIKELY(dst
== SLJIT_UNUSED
))
1137 if (FAST_IS_REG(dst
)) {
1138 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1139 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, 0);
1143 return SLJIT_SUCCESS
;
1146 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
1147 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REG1
, 0);
1151 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1152 return SLJIT_SUCCESS
;
1155 static sljit_s32
emit_not_with_flags(struct sljit_compiler
*compiler
,
1156 sljit_s32 dst
, sljit_sw dstw
,
1157 sljit_s32 src
, sljit_sw srcw
)
1161 if (dst
== SLJIT_UNUSED
)
1164 if (FAST_IS_REG(dst
)) {
1165 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1166 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, 0);
1170 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, dst
, 0);
1173 return SLJIT_SUCCESS
;
1176 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
1177 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REG1
, 0);
1181 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, TMP_REG1
, 0);
1184 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1185 return SLJIT_SUCCESS
;
1188 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1189 static const sljit_sw emit_clz_arg
= 32 + 31;
1192 static sljit_s32
emit_clz(struct sljit_compiler
*compiler
, sljit_s32 op_flags
,
1193 sljit_s32 dst
, sljit_sw dstw
,
1194 sljit_s32 src
, sljit_sw srcw
)
1199 SLJIT_UNUSED_ARG(op_flags
);
1201 if (cpu_has_cmov
== -1)
1204 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1206 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1211 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1213 if (dst_r
!= TMP_REG1
) {
1214 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, 32 + 31);
1215 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG1
, 0);
1218 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, SLJIT_MEM0(), (sljit_sw
)&emit_clz_arg
);
1225 FAIL_IF(sljit_emit_cmov_generic(compiler
, SLJIT_EQUAL
, dst_r
, SLJIT_IMM
, 32 + 31));
1227 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 31, dst_r
, 0);
1230 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? (64 + 63) : (32 + 31));
1232 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1238 FAIL_IF(sljit_emit_cmov_generic(compiler
, SLJIT_EQUAL
, dst_r
, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? (64 + 63) : (32 + 31)));
1240 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, !(op_flags
& SLJIT_I32_OP
) ? 63 : 31, dst_r
, 0);
1246 if (dst
& SLJIT_MEM
)
1247 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1248 return SLJIT_SUCCESS
;
1251 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1252 sljit_s32 dst
, sljit_sw dstw
,
1253 sljit_s32 src
, sljit_sw srcw
)
1255 sljit_s32 op_flags
= GET_ALL_FLAGS(op
);
1256 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1257 sljit_s32 dst_is_ereg
= 0;
1261 CHECK(check_sljit_emit_op1(compiler
, op
, dst
, dstw
, src
, srcw
));
1262 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1263 ADJUST_LOCAL_OFFSET(src
, srcw
);
1265 CHECK_EXTRA_REGS(dst
, dstw
, dst_is_ereg
= 1);
1266 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
1267 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1268 compiler
->mode32
= op_flags
& SLJIT_I32_OP
;
1271 if (dst
== SLJIT_UNUSED
&& !HAS_FLAGS(op
)) {
1272 if (op
<= SLJIT_MOV_P
&& (src
& SLJIT_MEM
))
1273 return emit_prefetch(compiler
, op
, src
, srcw
);
1274 return SLJIT_SUCCESS
;
1277 op
= GET_OPCODE(op
);
1279 if (op
>= SLJIT_MOV
&& op
<= SLJIT_MOV_P
) {
1280 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1281 compiler
->mode32
= 0;
1284 if (FAST_IS_REG(src
) && src
== dst
) {
1285 if (!TYPE_CAST_NEEDED(op
))
1286 return SLJIT_SUCCESS
;
1289 if (op_flags
& SLJIT_I32_OP
) {
1290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1291 if (src
& SLJIT_MEM
) {
1292 if (op
== SLJIT_MOV_S32
)
1295 else if (src
& SLJIT_IMM
) {
1296 if (op
== SLJIT_MOV_U32
)
1302 if (src
& SLJIT_IMM
) {
1305 srcw
= (sljit_u8
)srcw
;
1308 srcw
= (sljit_s8
)srcw
;
1311 srcw
= (sljit_u16
)srcw
;
1314 srcw
= (sljit_s16
)srcw
;
1316 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1318 srcw
= (sljit_u32
)srcw
;
1321 srcw
= (sljit_s32
)srcw
;
1325 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1326 if (SLJIT_UNLIKELY(dst_is_ereg
))
1327 return emit_mov(compiler
, dst
, dstw
, src
, srcw
);
1331 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1332 if (SLJIT_UNLIKELY(dst_is_ereg
) && (!(op
== SLJIT_MOV
|| op
== SLJIT_MOV_U32
|| op
== SLJIT_MOV_S32
|| op
== SLJIT_MOV_P
) || (src
& SLJIT_MEM
))) {
1333 SLJIT_ASSERT(dst
== SLJIT_MEM1(SLJIT_SP
));
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1345 FAIL_IF(emit_mov(compiler
, dst
, dstw
, src
, srcw
));
1348 FAIL_IF(emit_mov_byte(compiler
, 0, dst
, dstw
, src
, srcw
));
1351 FAIL_IF(emit_mov_byte(compiler
, 1, dst
, dstw
, src
, srcw
));
1354 FAIL_IF(emit_mov_half(compiler
, 0, dst
, dstw
, src
, srcw
));
1357 FAIL_IF(emit_mov_half(compiler
, 1, dst
, dstw
, src
, srcw
));
1359 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1361 FAIL_IF(emit_mov_int(compiler
, 0, dst
, dstw
, src
, srcw
));
1364 FAIL_IF(emit_mov_int(compiler
, 1, dst
, dstw
, src
, srcw
));
1369 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1370 if (SLJIT_UNLIKELY(dst_is_ereg
) && dst
== TMP_REG1
)
1371 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_SP
), dstw
, TMP_REG1
, 0);
1373 return SLJIT_SUCCESS
;
1378 if (SLJIT_UNLIKELY(op_flags
& SLJIT_SET_Z
))
1379 return emit_not_with_flags(compiler
, dst
, dstw
, src
, srcw
);
1380 return emit_unary(compiler
, NOT_rm
, dst
, dstw
, src
, srcw
);
1383 return emit_unary(compiler
, NEG_rm
, dst
, dstw
, src
, srcw
);
1386 return emit_clz(compiler
, op_flags
, dst
, dstw
, src
, srcw
);
1389 return SLJIT_SUCCESS
;
1392 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1394 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1395 if (IS_HALFWORD(immw) || compiler->mode32) { \
1396 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1398 *(inst + 1) |= (op_imm); \
1401 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1402 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1407 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1408 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1412 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1413 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1415 *(inst + 1) |= (op_imm);
1417 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1418 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1422 static sljit_s32
emit_cum_binary(struct sljit_compiler
*compiler
,
1424 sljit_s32 dst
, sljit_sw dstw
,
1425 sljit_s32 src1
, sljit_sw src1w
,
1426 sljit_s32 src2
, sljit_sw src2w
)
1429 sljit_u8 op_eax_imm
= (op_types
>> 24);
1430 sljit_u8 op_rm
= (op_types
>> 16) & 0xff;
1431 sljit_u8 op_mr
= (op_types
>> 8) & 0xff;
1432 sljit_u8 op_imm
= op_types
& 0xff;
1434 if (dst
== SLJIT_UNUSED
) {
1435 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1436 if (src2
& SLJIT_IMM
) {
1437 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1440 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1444 return SLJIT_SUCCESS
;
1447 if (dst
== src1
&& dstw
== src1w
) {
1448 if (src2
& SLJIT_IMM
) {
1449 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1450 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1452 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128)) {
1454 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1457 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1460 else if (FAST_IS_REG(dst
)) {
1461 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1465 else if (FAST_IS_REG(src2
)) {
1466 /* Special exception for sljit_emit_op_flags. */
1467 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1472 EMIT_MOV(compiler
, TMP_REG1
, 0, src2
, src2w
);
1473 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1477 return SLJIT_SUCCESS
;
1480 /* Only for cumulative operations. */
1481 if (dst
== src2
&& dstw
== src2w
) {
1482 if (src1
& SLJIT_IMM
) {
1483 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1484 if ((dst
== SLJIT_R0
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1486 if ((dst
== SLJIT_R0
) && (src1w
> 127 || src1w
< -128)) {
1488 BINARY_EAX_IMM(op_eax_imm
, src1w
);
1491 BINARY_IMM(op_imm
, op_mr
, src1w
, dst
, dstw
);
1494 else if (FAST_IS_REG(dst
)) {
1495 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src1
, src1w
);
1499 else if (FAST_IS_REG(src1
)) {
1500 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, dst
, dstw
);
1505 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1506 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1510 return SLJIT_SUCCESS
;
1513 /* General version. */
1514 if (FAST_IS_REG(dst
)) {
1515 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1516 if (src2
& SLJIT_IMM
) {
1517 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1520 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1526 /* This version requires less memory writing. */
1527 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1528 if (src2
& SLJIT_IMM
) {
1529 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1532 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1536 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1539 return SLJIT_SUCCESS
;
1542 static sljit_s32
emit_non_cum_binary(struct sljit_compiler
*compiler
,
1544 sljit_s32 dst
, sljit_sw dstw
,
1545 sljit_s32 src1
, sljit_sw src1w
,
1546 sljit_s32 src2
, sljit_sw src2w
)
1549 sljit_u8 op_eax_imm
= (op_types
>> 24);
1550 sljit_u8 op_rm
= (op_types
>> 16) & 0xff;
1551 sljit_u8 op_mr
= (op_types
>> 8) & 0xff;
1552 sljit_u8 op_imm
= op_types
& 0xff;
1554 if (dst
== SLJIT_UNUSED
) {
1555 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1556 if (src2
& SLJIT_IMM
) {
1557 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1560 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1564 return SLJIT_SUCCESS
;
1567 if (dst
== src1
&& dstw
== src1w
) {
1568 if (src2
& SLJIT_IMM
) {
1569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1570 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1572 if ((dst
== SLJIT_R0
) && (src2w
> 127 || src2w
< -128)) {
1574 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1577 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1580 else if (FAST_IS_REG(dst
)) {
1581 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1585 else if (FAST_IS_REG(src2
)) {
1586 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1591 EMIT_MOV(compiler
, TMP_REG1
, 0, src2
, src2w
);
1592 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, dst
, dstw
);
1596 return SLJIT_SUCCESS
;
1599 /* General version. */
1600 if (FAST_IS_REG(dst
) && dst
!= src2
) {
1601 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1602 if (src2
& SLJIT_IMM
) {
1603 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1606 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1612 /* This version requires less memory writing. */
1613 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1614 if (src2
& SLJIT_IMM
) {
1615 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REG1
, 0);
1618 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1622 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1625 return SLJIT_SUCCESS
;
1628 static sljit_s32
emit_mul(struct sljit_compiler
*compiler
,
1629 sljit_s32 dst
, sljit_sw dstw
,
1630 sljit_s32 src1
, sljit_sw src1w
,
1631 sljit_s32 src2
, sljit_sw src2w
)
1636 dst_r
= SLOW_IS_REG(dst
) ? dst
: TMP_REG1
;
1638 /* Register destination. */
1639 if (dst_r
== src1
&& !(src2
& SLJIT_IMM
)) {
1640 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1645 else if (dst_r
== src2
&& !(src1
& SLJIT_IMM
)) {
1646 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src1
, src1w
);
1651 else if (src1
& SLJIT_IMM
) {
1652 if (src2
& SLJIT_IMM
) {
1653 EMIT_MOV(compiler
, dst_r
, 0, SLJIT_IMM
, src2w
);
1658 if (src1w
<= 127 && src1w
>= -128) {
1659 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1661 *inst
= IMUL_r_rm_i8
;
1662 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1665 *inst
= (sljit_s8
)src1w
;
1667 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1669 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1671 *inst
= IMUL_r_rm_i32
;
1672 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1675 sljit_unaligned_store_sw(inst
, src1w
);
1678 else if (IS_HALFWORD(src1w
)) {
1679 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1681 *inst
= IMUL_r_rm_i32
;
1682 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1685 sljit_unaligned_store_s32(inst
, (sljit_s32
)src1w
);
1689 EMIT_MOV(compiler
, dst_r
, 0, src2
, src2w
);
1690 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src1w
));
1691 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1698 else if (src2
& SLJIT_IMM
) {
1699 /* Note: src1 is NOT immediate. */
1701 if (src2w
<= 127 && src2w
>= -128) {
1702 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1704 *inst
= IMUL_r_rm_i8
;
1705 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1);
1708 *inst
= (sljit_s8
)src2w
;
1710 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1712 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1714 *inst
= IMUL_r_rm_i32
;
1715 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1718 sljit_unaligned_store_sw(inst
, src2w
);
1721 else if (IS_HALFWORD(src2w
)) {
1722 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1724 *inst
= IMUL_r_rm_i32
;
1725 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4);
1728 sljit_unaligned_store_s32(inst
, (sljit_s32
)src2w
);
1732 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1733 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1734 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1742 /* Neither argument is immediate. */
1743 if (ADDRESSING_DEPENDS_ON(src2
, dst_r
))
1745 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1746 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1752 if (dst
& SLJIT_MEM
)
1753 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
1755 return SLJIT_SUCCESS
;
1758 static sljit_s32
emit_lea_binary(struct sljit_compiler
*compiler
,
1759 sljit_s32 dst
, sljit_sw dstw
,
1760 sljit_s32 src1
, sljit_sw src1w
,
1761 sljit_s32 src2
, sljit_sw src2w
)
1764 sljit_s32 dst_r
, done
= 0;
1766 /* These cases better be left to handled by normal way. */
1767 if (dst
== src1
&& dstw
== src1w
)
1768 return SLJIT_ERR_UNSUPPORTED
;
1769 if (dst
== src2
&& dstw
== src2w
)
1770 return SLJIT_ERR_UNSUPPORTED
;
1772 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1774 if (FAST_IS_REG(src1
)) {
1775 if (FAST_IS_REG(src2
)) {
1776 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM2(src1
, src2
), 0);
1781 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1782 if ((src2
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1783 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), (sljit_s32
)src2w
);
1785 if (src2
& SLJIT_IMM
) {
1786 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), src2w
);
1793 else if (FAST_IS_REG(src2
)) {
1794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1795 if ((src1
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1796 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), (sljit_s32
)src1w
);
1798 if (src1
& SLJIT_IMM
) {
1799 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), src1w
);
1808 if (dst_r
== TMP_REG1
)
1809 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
1810 return SLJIT_SUCCESS
;
1812 return SLJIT_ERR_UNSUPPORTED
;
1815 static sljit_s32
emit_cmp_binary(struct sljit_compiler
*compiler
,
1816 sljit_s32 src1
, sljit_sw src1w
,
1817 sljit_s32 src2
, sljit_sw src2w
)
1821 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1822 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1824 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1826 BINARY_EAX_IMM(CMP_EAX_i32
, src2w
);
1827 return SLJIT_SUCCESS
;
1830 if (FAST_IS_REG(src1
)) {
1831 if (src2
& SLJIT_IMM
) {
1832 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, 0);
1835 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1839 return SLJIT_SUCCESS
;
1842 if (FAST_IS_REG(src2
) && !(src1
& SLJIT_IMM
)) {
1843 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1846 return SLJIT_SUCCESS
;
1849 if (src2
& SLJIT_IMM
) {
1850 if (src1
& SLJIT_IMM
) {
1851 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1855 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, src1w
);
1858 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1859 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1863 return SLJIT_SUCCESS
;
1866 static sljit_s32
emit_test_binary(struct sljit_compiler
*compiler
,
1867 sljit_s32 src1
, sljit_sw src1w
,
1868 sljit_s32 src2
, sljit_sw src2w
)
1872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1873 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1875 if (src1
== SLJIT_R0
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1877 BINARY_EAX_IMM(TEST_EAX_i32
, src2w
);
1878 return SLJIT_SUCCESS
;
1881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1882 if (src2
== SLJIT_R0
&& (src1
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1884 if (src2
== SLJIT_R0
&& (src1
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128)) {
1886 BINARY_EAX_IMM(TEST_EAX_i32
, src1w
);
1887 return SLJIT_SUCCESS
;
1890 if (!(src1
& SLJIT_IMM
)) {
1891 if (src2
& SLJIT_IMM
) {
1892 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1893 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1894 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, src1w
);
1899 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, src2w
));
1900 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src1
, src1w
);
1905 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, src1w
);
1909 return SLJIT_SUCCESS
;
1911 else if (FAST_IS_REG(src1
)) {
1912 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1915 return SLJIT_SUCCESS
;
1919 if (!(src2
& SLJIT_IMM
)) {
1920 if (src1
& SLJIT_IMM
) {
1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922 if (IS_HALFWORD(src1w
) || compiler
->mode32
) {
1923 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src1w
, src2
, src2w
);
1928 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, src1w
));
1929 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1934 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, src2
, src2w
);
1938 return SLJIT_SUCCESS
;
1940 else if (FAST_IS_REG(src2
)) {
1941 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1944 return SLJIT_SUCCESS
;
1948 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1949 if (src2
& SLJIT_IMM
) {
1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1951 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1952 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REG1
, 0);
1957 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1958 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, TMP_REG1
, 0);
1963 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REG1
, 0);
1969 inst
= emit_x86_instruction(compiler
, 1, TMP_REG1
, 0, src2
, src2w
);
1973 return SLJIT_SUCCESS
;
1976 static sljit_s32
emit_shift(struct sljit_compiler
*compiler
,
1978 sljit_s32 dst
, sljit_sw dstw
,
1979 sljit_s32 src1
, sljit_sw src1w
,
1980 sljit_s32 src2
, sljit_sw src2w
)
1984 if ((src2
& SLJIT_IMM
) || (src2
== SLJIT_PREF_SHIFT_REG
)) {
1985 if (dst
== src1
&& dstw
== src1w
) {
1986 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, dstw
);
1989 return SLJIT_SUCCESS
;
1991 if (dst
== SLJIT_UNUSED
) {
1992 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
1993 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REG1
, 0);
1996 return SLJIT_SUCCESS
;
1998 if (dst
== SLJIT_PREF_SHIFT_REG
&& src2
== SLJIT_PREF_SHIFT_REG
) {
1999 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2000 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2003 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2004 return SLJIT_SUCCESS
;
2006 if (FAST_IS_REG(dst
)) {
2007 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2008 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, 0);
2011 return SLJIT_SUCCESS
;
2014 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2015 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REG1
, 0);
2018 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG1
, 0);
2019 return SLJIT_SUCCESS
;
2022 if (dst
== SLJIT_PREF_SHIFT_REG
) {
2023 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2024 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2025 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2028 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2030 else if (SLOW_IS_REG(dst
) && dst
!= src2
&& !ADDRESSING_DEPENDS_ON(src2
, dst
)) {
2032 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2033 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2034 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2035 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, dst
, 0);
2038 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2041 /* This case is complex since ecx itself may be used for
2042 addressing, and this case must be supported as well. */
2043 EMIT_MOV(compiler
, TMP_REG1
, 0, src1
, src1w
);
2044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2045 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_SP
), 0, SLJIT_PREF_SHIFT_REG
, 0);
2046 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2047 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2050 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, SLJIT_MEM1(SLJIT_SP
), 0);
2052 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2053 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2054 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG1
, 0);
2057 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG2
, 0);
2059 if (dst
!= SLJIT_UNUSED
)
2060 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2063 return SLJIT_SUCCESS
;
2066 static sljit_s32
emit_shift_with_flags(struct sljit_compiler
*compiler
,
2067 sljit_u8 mode
, sljit_s32 set_flags
,
2068 sljit_s32 dst
, sljit_sw dstw
,
2069 sljit_s32 src1
, sljit_sw src1w
,
2070 sljit_s32 src2
, sljit_sw src2w
)
2072 /* The CPU does not set flags if the shift count is 0. */
2073 if (src2
& SLJIT_IMM
) {
2074 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2075 if ((src2w
& 0x3f) != 0 || (compiler
->mode32
&& (src2w
& 0x1f) != 0))
2076 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2078 if ((src2w
& 0x1f) != 0)
2079 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2082 return emit_mov(compiler
, dst
, dstw
, src1
, src1w
);
2083 /* OR dst, src, 0 */
2084 return emit_cum_binary(compiler
, BINARY_OPCODE(OR
),
2085 dst
, dstw
, src1
, src1w
, SLJIT_IMM
, 0);
2089 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2091 if (!FAST_IS_REG(dst
))
2092 FAIL_IF(emit_cmp_binary(compiler
, src1
, src1w
, SLJIT_IMM
, 0));
2094 FAIL_IF(emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2096 if (FAST_IS_REG(dst
))
2097 return emit_cmp_binary(compiler
, (dst
== SLJIT_UNUSED
) ? TMP_REG1
: dst
, dstw
, SLJIT_IMM
, 0);
2098 return SLJIT_SUCCESS
;
2101 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op2(struct sljit_compiler
*compiler
, sljit_s32 op
,
2102 sljit_s32 dst
, sljit_sw dstw
,
2103 sljit_s32 src1
, sljit_sw src1w
,
2104 sljit_s32 src2
, sljit_sw src2w
)
2107 CHECK(check_sljit_emit_op2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2108 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2109 ADJUST_LOCAL_OFFSET(src1
, src1w
);
2110 ADJUST_LOCAL_OFFSET(src2
, src2w
);
2112 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2113 CHECK_EXTRA_REGS(src1
, src1w
, (void)0);
2114 CHECK_EXTRA_REGS(src2
, src2w
, (void)0);
2115 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2116 compiler
->mode32
= op
& SLJIT_I32_OP
;
2119 if (dst
== SLJIT_UNUSED
&& !HAS_FLAGS(op
))
2120 return SLJIT_SUCCESS
;
2122 switch (GET_OPCODE(op
)) {
2124 if (!HAS_FLAGS(op
)) {
2125 if (emit_lea_binary(compiler
, dst
, dstw
, src1
, src1w
, src2
, src2w
) != SLJIT_ERR_UNSUPPORTED
)
2126 return compiler
->error
;
2128 return emit_cum_binary(compiler
, BINARY_OPCODE(ADD
),
2129 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2131 return emit_cum_binary(compiler
, BINARY_OPCODE(ADC
),
2132 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2134 if (!HAS_FLAGS(op
)) {
2135 if ((src2
& SLJIT_IMM
) && emit_lea_binary(compiler
, dst
, dstw
, src1
, src1w
, SLJIT_IMM
, -src2w
) != SLJIT_ERR_UNSUPPORTED
)
2136 return compiler
->error
;
2139 if (dst
== SLJIT_UNUSED
)
2140 return emit_cmp_binary(compiler
, src1
, src1w
, src2
, src2w
);
2141 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SUB
),
2142 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2144 return emit_non_cum_binary(compiler
, BINARY_OPCODE(SBB
),
2145 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2147 return emit_mul(compiler
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2149 if (dst
== SLJIT_UNUSED
)
2150 return emit_test_binary(compiler
, src1
, src1w
, src2
, src2w
);
2151 return emit_cum_binary(compiler
, BINARY_OPCODE(AND
),
2152 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2154 return emit_cum_binary(compiler
, BINARY_OPCODE(OR
),
2155 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2157 return emit_cum_binary(compiler
, BINARY_OPCODE(XOR
),
2158 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2160 return emit_shift_with_flags(compiler
, SHL
, HAS_FLAGS(op
),
2161 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2163 return emit_shift_with_flags(compiler
, SHR
, HAS_FLAGS(op
),
2164 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2166 return emit_shift_with_flags(compiler
, SAR
, HAS_FLAGS(op
),
2167 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2170 return SLJIT_SUCCESS
;
2173 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_register_index(sljit_s32 reg
)
2175 CHECK_REG_INDEX(check_sljit_get_register_index(reg
));
2176 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2177 if (reg
>= SLJIT_R3
&& reg
<= SLJIT_R8
)
2180 return reg_map
[reg
];
2183 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_float_register_index(sljit_s32 reg
)
2185 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg
));
2186 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2189 return freg_map
[reg
];
2193 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_custom(struct sljit_compiler
*compiler
,
2194 void *instruction
, sljit_s32 size
)
2199 CHECK(check_sljit_emit_op_custom(compiler
, instruction
, size
));
2201 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + size
);
2204 SLJIT_MEMCPY(inst
, instruction
, size
);
2205 return SLJIT_SUCCESS
;
2208 /* --------------------------------------------------------------------- */
2209 /* Floating point operators */
2210 /* --------------------------------------------------------------------- */
2212 /* Alignment(3) + 4 * 16 bytes. */
2213 static sljit_s32 sse2_data
[3 + (4 * 4)];
2214 static sljit_s32
*sse2_buffer
;
2216 static void init_compiler(void)
2218 /* Align to 16 bytes. */
2219 sse2_buffer
= (sljit_s32
*)(((sljit_uw
)sse2_data
+ 15) & ~0xf);
2221 /* Single precision constants (each constant is 16 byte long). */
2222 sse2_buffer
[0] = 0x80000000;
2223 sse2_buffer
[4] = 0x7fffffff;
2224 /* Double precision constants (each constant is 16 byte long). */
2226 sse2_buffer
[9] = 0x80000000;
2227 sse2_buffer
[12] = 0xffffffff;
2228 sse2_buffer
[13] = 0x7fffffff;
2231 static sljit_s32
emit_sse2(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
2232 sljit_s32 single
, sljit_s32 xmm1
, sljit_s32 xmm2
, sljit_sw xmm2w
)
2236 inst
= emit_x86_instruction(compiler
, 2 | (single
? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2240 return SLJIT_SUCCESS
;
2243 static sljit_s32
emit_sse2_logic(struct sljit_compiler
*compiler
, sljit_u8 opcode
,
2244 sljit_s32 pref66
, sljit_s32 xmm1
, sljit_s32 xmm2
, sljit_sw xmm2w
)
2248 inst
= emit_x86_instruction(compiler
, 2 | (pref66
? EX86_PREF_66
: 0) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2252 return SLJIT_SUCCESS
;
2255 static SLJIT_INLINE sljit_s32
emit_sse2_load(struct sljit_compiler
*compiler
,
2256 sljit_s32 single
, sljit_s32 dst
, sljit_s32 src
, sljit_sw srcw
)
2258 return emit_sse2(compiler
, MOVSD_x_xm
, single
, dst
, src
, srcw
);
2261 static SLJIT_INLINE sljit_s32
emit_sse2_store(struct sljit_compiler
*compiler
,
2262 sljit_s32 single
, sljit_s32 dst
, sljit_sw dstw
, sljit_s32 src
)
2264 return emit_sse2(compiler
, MOVSD_xm_x
, single
, src
, dst
, dstw
);
2267 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler
*compiler
, sljit_s32 op
,
2268 sljit_s32 dst
, sljit_sw dstw
,
2269 sljit_s32 src
, sljit_sw srcw
)
2271 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2275 if (GET_OPCODE(op
) == SLJIT_CONV_SW_FROM_F64
)
2276 compiler
->mode32
= 0;
2279 inst
= emit_x86_instruction(compiler
, 2 | ((op
& SLJIT_F32_OP
) ? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2_OP2
, dst_r
, 0, src
, srcw
);
2282 *inst
= CVTTSD2SI_r_xm
;
2284 if (dst
& SLJIT_MEM
)
2285 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2286 return SLJIT_SUCCESS
;
2289 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler
*compiler
, sljit_s32 op
,
2290 sljit_s32 dst
, sljit_sw dstw
,
2291 sljit_s32 src
, sljit_sw srcw
)
2293 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG
;
2296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2297 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_SW
)
2298 compiler
->mode32
= 0;
2301 if (src
& SLJIT_IMM
) {
2302 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2303 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_S32
)
2304 srcw
= (sljit_s32
)srcw
;
2306 EMIT_MOV(compiler
, TMP_REG1
, 0, src
, srcw
);
2311 inst
= emit_x86_instruction(compiler
, 2 | ((op
& SLJIT_F32_OP
) ? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2_OP1
, dst_r
, 0, src
, srcw
);
2314 *inst
= CVTSI2SD_x_rm
;
2316 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2317 compiler
->mode32
= 1;
2319 if (dst_r
== TMP_FREG
)
2320 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2321 return SLJIT_SUCCESS
;
2324 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_cmp(struct sljit_compiler
*compiler
, sljit_s32 op
,
2325 sljit_s32 src1
, sljit_sw src1w
,
2326 sljit_s32 src2
, sljit_sw src2w
)
2328 if (!FAST_IS_REG(src1
)) {
2329 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2333 return emit_sse2_logic(compiler
, UCOMISD_x_xm
, !(op
& SLJIT_F32_OP
), src1
, src2
, src2w
);
2336 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_s32 op
,
2337 sljit_s32 dst
, sljit_sw dstw
,
2338 sljit_s32 src
, sljit_sw srcw
)
2342 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2343 compiler
->mode32
= 1;
2347 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler
, op
, dst
, dstw
, src
, srcw
);
2349 if (GET_OPCODE(op
) == SLJIT_MOV_F64
) {
2350 if (FAST_IS_REG(dst
))
2351 return emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst
, src
, srcw
);
2352 if (FAST_IS_REG(src
))
2353 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, src
);
2354 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src
, srcw
));
2355 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2358 if (GET_OPCODE(op
) == SLJIT_CONV_F64_FROM_F32
) {
2359 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG
;
2360 if (FAST_IS_REG(src
)) {
2361 /* We overwrite the high bits of source. From SLJIT point of view,
2362 this is not an issue.
2363 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2364 FAIL_IF(emit_sse2_logic(compiler
, UNPCKLPD_x_xm
, op
& SLJIT_F32_OP
, src
, src
, 0));
2367 FAIL_IF(emit_sse2_load(compiler
, !(op
& SLJIT_F32_OP
), TMP_FREG
, src
, srcw
));
2371 FAIL_IF(emit_sse2_logic(compiler
, CVTPD2PS_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src
, 0));
2372 if (dst_r
== TMP_FREG
)
2373 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2374 return SLJIT_SUCCESS
;
2377 if (FAST_IS_REG(dst
)) {
2380 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src
, srcw
));
2384 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src
, srcw
));
2387 switch (GET_OPCODE(op
)) {
2389 FAIL_IF(emit_sse2_logic(compiler
, XORPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_F32_OP
? sse2_buffer
: sse2_buffer
+ 8)));
2393 FAIL_IF(emit_sse2_logic(compiler
, ANDPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_F32_OP
? sse2_buffer
+ 4 : sse2_buffer
+ 12)));
2397 if (dst_r
== TMP_FREG
)
2398 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2399 return SLJIT_SUCCESS
;
2402 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_s32 op
,
2403 sljit_s32 dst
, sljit_sw dstw
,
2404 sljit_s32 src1
, sljit_sw src1w
,
2405 sljit_s32 src2
, sljit_sw src2w
)
2410 CHECK(check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2411 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2412 ADJUST_LOCAL_OFFSET(src1
, src1w
);
2413 ADJUST_LOCAL_OFFSET(src2
, src2w
);
2415 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2416 compiler
->mode32
= 1;
2419 if (FAST_IS_REG(dst
)) {
2422 ; /* Do nothing here. */
2423 else if (dst
== src2
&& (op
== SLJIT_ADD_F64
|| op
== SLJIT_MUL_F64
)) {
2424 /* Swap arguments. */
2428 else if (dst
!= src2
)
2429 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, dst_r
, src1
, src1w
));
2432 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2437 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_F32_OP
, TMP_FREG
, src1
, src1w
));
2440 switch (GET_OPCODE(op
)) {
2442 FAIL_IF(emit_sse2(compiler
, ADDSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2446 FAIL_IF(emit_sse2(compiler
, SUBSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2450 FAIL_IF(emit_sse2(compiler
, MULSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2454 FAIL_IF(emit_sse2(compiler
, DIVSD_x_xm
, op
& SLJIT_F32_OP
, dst_r
, src2
, src2w
));
2458 if (dst_r
== TMP_FREG
)
2459 return emit_sse2_store(compiler
, op
& SLJIT_F32_OP
, dst
, dstw
, TMP_FREG
);
2460 return SLJIT_SUCCESS
;
2463 /* --------------------------------------------------------------------- */
2464 /* Conditional instructions */
2465 /* --------------------------------------------------------------------- */
2467 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_label
* sljit_emit_label(struct sljit_compiler
*compiler
)
2470 struct sljit_label
*label
;
2473 CHECK_PTR(check_sljit_emit_label(compiler
));
2475 if (compiler
->last_label
&& compiler
->last_label
->size
== compiler
->size
)
2476 return compiler
->last_label
;
2478 label
= (struct sljit_label
*)ensure_abuf(compiler
, sizeof(struct sljit_label
));
2479 PTR_FAIL_IF(!label
);
2480 set_label(label
, compiler
);
2482 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2491 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_jump(struct sljit_compiler
*compiler
, sljit_s32 type
)
2494 struct sljit_jump
*jump
;
2497 CHECK_PTR(check_sljit_emit_jump(compiler
, type
));
2499 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2500 PTR_FAIL_IF_NULL(jump
);
2501 set_jump(jump
, compiler
, type
& SLJIT_REWRITABLE_JUMP
);
2504 /* Worst case size. */
2505 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2506 compiler
->size
+= (type
>= SLJIT_JUMP
) ? 5 : 6;
2508 compiler
->size
+= (type
>= SLJIT_JUMP
) ? (10 + 3) : (2 + 10 + 3);
2511 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2512 PTR_FAIL_IF_NULL(inst
);
2519 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_ijump(struct sljit_compiler
*compiler
, sljit_s32 type
, sljit_s32 src
, sljit_sw srcw
)
2522 struct sljit_jump
*jump
;
2525 CHECK(check_sljit_emit_ijump(compiler
, type
, src
, srcw
));
2526 ADJUST_LOCAL_OFFSET(src
, srcw
);
2528 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
2530 if (src
== SLJIT_IMM
) {
2531 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2533 set_jump(jump
, compiler
, JUMP_ADDR
);
2534 jump
->u
.target
= srcw
;
2536 /* Worst case size. */
2537 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2538 compiler
->size
+= 5;
2540 compiler
->size
+= 10 + 3;
2543 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2550 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2551 /* REX_W is not necessary (src is not immediate). */
2552 compiler
->mode32
= 1;
2554 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
2557 *inst
|= (type
>= SLJIT_FAST_CALL
) ? CALL_rm
: JMP_rm
;
2559 return SLJIT_SUCCESS
;
2562 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_flags(struct sljit_compiler
*compiler
, sljit_s32 op
,
2563 sljit_s32 dst
, sljit_sw dstw
,
2567 sljit_u8 cond_set
= 0;
2568 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2571 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2572 sljit_s32 dst_save
= dst
;
2573 sljit_sw dstw_save
= dstw
;
2576 CHECK(check_sljit_emit_op_flags(compiler
, op
, dst
, dstw
, type
));
2578 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2579 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2582 /* setcc = jcc + 0x10. */
2583 cond_set
= get_jump_code(type
) + 0x10;
2585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2586 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && FAST_IS_REG(dst
)) {
2587 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4 + 3);
2590 /* Set low register to conditional flag. */
2591 *inst
++ = (reg_map
[TMP_REG1
] <= 7) ? REX
: REX_B
;
2594 *inst
++ = MOD_REG
| reg_lmap
[TMP_REG1
];
2595 *inst
++ = REX
| (reg_map
[TMP_REG1
] <= 7 ? 0 : REX_R
) | (reg_map
[dst
] <= 7 ? 0 : REX_B
);
2596 *inst
++ = OR_rm8_r8
;
2597 *inst
++ = MOD_REG
| (reg_lmap
[TMP_REG1
] << 3) | reg_lmap
[dst
];
2598 return SLJIT_SUCCESS
;
2601 reg
= (GET_OPCODE(op
) < SLJIT_ADD
&& FAST_IS_REG(dst
)) ? dst
: TMP_REG1
;
2603 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 4 + 4);
2606 /* Set low register to conditional flag. */
2607 *inst
++ = (reg_map
[reg
] <= 7) ? REX
: REX_B
;
2610 *inst
++ = MOD_REG
| reg_lmap
[reg
];
2611 *inst
++ = REX_W
| (reg_map
[reg
] <= 7 ? 0 : (REX_B
| REX_R
));
2612 /* The movzx instruction does not affect flags. */
2614 *inst
++ = MOVZX_r_rm8
;
2615 *inst
= MOD_REG
| (reg_lmap
[reg
] << 3) | reg_lmap
[reg
];
2617 if (reg
!= TMP_REG1
)
2618 return SLJIT_SUCCESS
;
2620 if (GET_OPCODE(op
) < SLJIT_ADD
) {
2621 compiler
->mode32
= GET_OPCODE(op
) != SLJIT_MOV
;
2622 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2625 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2626 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2627 compiler
->skip_checks
= 1;
2629 return sljit_emit_op2(compiler
, op
, dst_save
, dstw_save
, dst_save
, dstw_save
, TMP_REG1
, 0);
2632 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2633 if (GET_OPCODE(op
) < SLJIT_ADD
&& FAST_IS_REG(dst
)) {
2634 if (reg_map
[dst
] <= 4) {
2635 /* Low byte is accessible. */
2636 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3 + 3);
2639 /* Set low byte to conditional flag. */
2642 *inst
++ = MOD_REG
| reg_map
[dst
];
2645 *inst
++ = MOVZX_r_rm8
;
2646 *inst
= MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[dst
];
2647 return SLJIT_SUCCESS
;
2650 /* Low byte is not accessible. */
2651 if (cpu_has_cmov
== -1)
2655 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, 1);
2656 /* a xor reg, reg operation would overwrite the flags. */
2657 EMIT_MOV(compiler
, dst
, 0, SLJIT_IMM
, 0);
2659 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 3);
2664 /* cmovcc = setcc - 0x50. */
2665 *inst
++ = cond_set
- 0x50;
2666 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[TMP_REG1
];
2667 return SLJIT_SUCCESS
;
2670 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2672 INC_SIZE(1 + 3 + 3 + 1);
2673 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2674 /* Set al to conditional flag. */
2677 *inst
++ = MOD_REG
| 0 /* eax */;
2680 *inst
++ = MOVZX_r_rm8
;
2681 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | 0 /* eax */;
2682 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2683 return SLJIT_SUCCESS
;
2686 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && FAST_IS_REG(dst
) && reg_map
[dst
] <= 4) {
2687 SLJIT_ASSERT(reg_map
[SLJIT_R0
] == 0);
2689 if (dst
!= SLJIT_R0
) {
2690 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 2 + 1);
2692 INC_SIZE(1 + 3 + 2 + 1);
2693 /* Set low register to conditional flag. */
2694 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2697 *inst
++ = MOD_REG
| 0 /* eax */;
2698 *inst
++ = OR_rm8_r8
;
2699 *inst
++ = MOD_REG
| (0 /* eax */ << 3) | reg_map
[dst
];
2700 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2703 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 2 + 3 + 2 + 2);
2705 INC_SIZE(2 + 3 + 2 + 2);
2706 /* Set low register to conditional flag. */
2707 *inst
++ = XCHG_r_rm
;
2708 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REG1
];
2711 *inst
++ = MOD_REG
| 1 /* ecx */;
2712 *inst
++ = OR_rm8_r8
;
2713 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | 0 /* eax */;
2714 *inst
++ = XCHG_r_rm
;
2715 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REG1
];
2717 return SLJIT_SUCCESS
;
2720 /* Set TMP_REG1 to the bit. */
2721 inst
= (sljit_u8
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2723 INC_SIZE(1 + 3 + 3 + 1);
2724 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2725 /* Set al to conditional flag. */
2728 *inst
++ = MOD_REG
| 0 /* eax */;
2731 *inst
++ = MOVZX_r_rm8
;
2732 *inst
++ = MOD_REG
| (0 << 3) /* eax */ | 0 /* eax */;
2734 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REG1
];
2736 if (GET_OPCODE(op
) < SLJIT_ADD
)
2737 return emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0);
2739 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2740 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2741 compiler
->skip_checks
= 1;
2743 return sljit_emit_op2(compiler
, op
, dst_save
, dstw_save
, dst_save
, dstw_save
, TMP_REG1
, 0);
2744 #endif /* SLJIT_CONFIG_X86_64 */
2747 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_cmov(struct sljit_compiler
*compiler
, sljit_s32 type
,
2749 sljit_s32 src
, sljit_sw srcw
)
2754 CHECK(check_sljit_emit_cmov(compiler
, type
, dst_reg
, src
, srcw
));
2756 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2757 dst_reg
&= ~SLJIT_I32_OP
;
2759 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV
) || (dst_reg
>= SLJIT_R3
&& dst_reg
<= SLJIT_S3
))
2760 return sljit_emit_cmov_generic(compiler
, type
, dst_reg
, src
, srcw
);
2762 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV
))
2763 return sljit_emit_cmov_generic(compiler
, type
, dst_reg
, src
, srcw
);
2766 /* ADJUST_LOCAL_OFFSET is not needed. */
2767 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
2769 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2770 compiler
->mode32
= dst_reg
& SLJIT_I32_OP
;
2771 dst_reg
&= ~SLJIT_I32_OP
;
2774 if (SLJIT_UNLIKELY(src
& SLJIT_IMM
)) {
2775 EMIT_MOV(compiler
, TMP_REG1
, 0, SLJIT_IMM
, srcw
);
2780 inst
= emit_x86_instruction(compiler
, 2, dst_reg
, 0, src
, srcw
);
2783 *inst
= get_jump_code(type
& 0xff) - 0x40;
2784 return SLJIT_SUCCESS
;
2787 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_local_base(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw offset
)
2790 CHECK(check_sljit_get_local_base(compiler
, dst
, dstw
, offset
));
2791 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2793 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2795 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2796 compiler
->mode32
= 0;
2799 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP
), offset
);
2801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2802 if (NOT_HALFWORD(offset
)) {
2803 FAIL_IF(emit_load_imm64(compiler
, TMP_REG1
, offset
));
2804 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2805 SLJIT_ASSERT(emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, TMP_REG1
, 0) != SLJIT_ERR_UNSUPPORTED
);
2806 return compiler
->error
;
2808 return emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, TMP_REG1
, 0);
2814 return emit_lea_binary(compiler
, dst
, dstw
, SLJIT_SP
, 0, SLJIT_IMM
, offset
);
2815 return emit_mov(compiler
, dst
, dstw
, SLJIT_SP
, 0);
2818 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_const
* sljit_emit_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw init_value
)
2821 struct sljit_const
*const_
;
2822 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2827 CHECK_PTR(check_sljit_emit_const(compiler
, dst
, dstw
, init_value
));
2828 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2830 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2832 const_
= (struct sljit_const
*)ensure_abuf(compiler
, sizeof(struct sljit_const
));
2833 PTR_FAIL_IF(!const_
);
2834 set_const(const_
, compiler
);
2836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2837 compiler
->mode32
= 0;
2838 reg
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2840 if (emit_load_imm64(compiler
, reg
, init_value
))
2843 if (emit_mov(compiler
, dst
, dstw
, SLJIT_IMM
, init_value
))
2847 inst
= (sljit_u8
*)ensure_buf(compiler
, 2);
2853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2854 if (dst
& SLJIT_MEM
)
2855 if (emit_mov(compiler
, dst
, dstw
, TMP_REG1
, 0))
2862 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_jump_addr(sljit_uw addr
, sljit_uw new_target
, sljit_sw executable_offset
)
2864 SLJIT_UNUSED_ARG(executable_offset
);
2865 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2866 sljit_unaligned_store_sw((void*)addr
, new_target
- (addr
+ 4) - (sljit_uw
)executable_offset
);
2868 sljit_unaligned_store_sw((void*)addr
, (sljit_sw
) new_target
);
2872 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_const(sljit_uw addr
, sljit_sw new_constant
, sljit_sw executable_offset
)
2874 SLJIT_UNUSED_ARG(executable_offset
);
2875 sljit_unaligned_store_sw((void*)addr
, new_constant
);