2 * Stack-less Just-In-Time compiler
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST
char* sljit_get_platform_name(void)
29 return "x86" SLJIT_CPUINFO
;
54 8 - R8 - From now on REX prefix is required
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
66 /* Last register + 1. */
67 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
69 static SLJIT_CONST sljit_ub reg_map
[SLJIT_NO_REGISTERS
+ 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
85 #else /* SLJIT_CONFIG_X86_32 */
87 /* Last register + 1. */
88 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map
[SLJIT_NO_REGISTERS
+ 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap
[SLJIT_NO_REGISTERS
+ 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map
[SLJIT_NO_REGISTERS
+ 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap
[SLJIT_NO_REGISTERS
+ 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
121 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
122 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
124 #define CHECK_EXTRA_REGS(p, w, do)
126 #endif /* SLJIT_CONFIG_X86_32 */
128 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
132 /* Size flags for emit_x86_instruction: */
133 #define EX86_BIN_INS 0x0010
134 #define EX86_SHIFT_INS 0x0020
135 #define EX86_REX 0x0040
136 #define EX86_NO_REXW 0x0080
137 #define EX86_BYTE_ARG 0x0100
138 #define EX86_HALF_ARG 0x0200
139 #define EX86_PREF_66 0x0400
141 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
142 #define EX86_SSE2 0x0800
143 #define EX86_PREF_F2 0x1000
144 #define EX86_PREF_F3 0x2000
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define DIV (/* GROUP_F7 */ 6 << 3)
175 #define DIVSD_x_xm 0x5e
177 #define IDIV (/* GROUP_F7 */ 7 << 3)
178 #define IMUL (/* GROUP_F7 */ 5 << 3)
179 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
180 #define IMUL_r_rm_i8 0x6b
181 #define IMUL_r_rm_i32 0x69
185 #define JMP_rm (/* GROUP_FF */ 4 << 3)
187 #define MOV_r_rm 0x8b
188 #define MOV_r_i32 0xb8
189 #define MOV_rm_r 0x89
190 #define MOV_rm_i32 0xc7
191 #define MOV_rm8_i8 0xc6
192 #define MOV_rm8_r8 0x88
193 #define MOVSD_x_xm 0x10
194 #define MOVSD_xm_x 0x11
195 #define MOVSXD_r_rm 0x63
196 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
197 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
198 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
199 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
200 #define MUL (/* GROUP_F7 */ 4 << 3)
201 #define MULSD_x_xm 0x59
202 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
204 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
205 #define OR (/* BINARY */ 1 << 3)
207 #define OR_EAX_i32 0x0d
209 #define OR_rm8_r8 0x08
213 #define PUSH_i32 0x68
215 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
217 #define RET_near 0xc3
219 #define SBB (/* BINARY */ 3 << 3)
220 #define SBB_EAX_i32 0x1d
221 #define SBB_r_rm 0x1b
222 #define SBB_rm_r 0x19
223 #define SAR (/* SHIFT */ 7 << 3)
224 #define SHL (/* SHIFT */ 4 << 3)
225 #define SHR (/* SHIFT */ 5 << 3)
226 #define SUB (/* BINARY */ 5 << 3)
227 #define SUB_EAX_i32 0x2d
228 #define SUB_r_rm 0x2b
229 #define SUB_rm_r 0x29
230 #define SUBSD_x_xm 0x5c
231 #define TEST_EAX_i32 0xa9
232 #define TEST_rm_r 0x85
233 #define UCOMISD_x_xm 0x2e
234 #define XCHG_EAX_r 0x90
235 #define XCHG_r_rm 0x87
236 #define XOR (/* BINARY */ 6 << 3)
237 #define XOR_EAX_i32 0x35
238 #define XOR_r_rm 0x33
239 #define XOR_rm_r 0x31
240 #define XORPD_x_xm 0x57
242 #define GROUP_0F 0x0f
243 #define GROUP_F7 0xf7
244 #define GROUP_FF 0xff
245 #define GROUP_BINARY_81 0x81
246 #define GROUP_BINARY_83 0x83
247 #define GROUP_SHIFT_1 0xd1
248 #define GROUP_SHIFT_N 0xc1
249 #define GROUP_SHIFT_CL 0xd3
252 #define MOD_DISP8 0x40
254 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
256 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
257 #define POP_REG(r) (*inst++ = (POP_r + (r)))
258 #define RET() (*inst++ = (RET_near))
259 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
261 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
263 /* Multithreading does not affect these static variables, since they store
264 built-in CPU features. Therefore they can be overwritten by different threads
265 if they detect the CPU features in the same time. */
266 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
267 static sljit_si cpu_has_sse2
= -1;
269 static sljit_si cpu_has_cmov
= -1;
271 #if defined(_MSC_VER) && _MSC_VER >= 1400
275 static void get_cpu_features(void)
279 #if defined(_MSC_VER) && _MSC_VER >= 1400
283 features
= (sljit_ui
)CPUInfo
[3];
285 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
290 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
291 /* On x86-32, there is no red zone, so this
292 should work (no need for a local variable). */
296 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
302 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
303 : "%eax", "%ecx", "%edx"
305 : "%rax", "%rbx", "%rcx", "%rdx"
309 #else /* _MSC_VER && _MSC_VER >= 1400 */
318 #endif /* _MSC_VER && _MSC_VER >= 1400 */
320 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
321 cpu_has_sse2
= (features
>> 26) & 0x1;
323 cpu_has_cmov
= (features
>> 15) & 0x1;
326 static sljit_ub
get_jump_code(sljit_si type
)
330 case SLJIT_C_FLOAT_EQUAL
:
331 return 0x84 /* je */;
333 case SLJIT_C_NOT_EQUAL
:
334 case SLJIT_C_FLOAT_NOT_EQUAL
:
335 return 0x85 /* jne */;
338 case SLJIT_C_FLOAT_LESS
:
339 return 0x82 /* jc */;
341 case SLJIT_C_GREATER_EQUAL
:
342 case SLJIT_C_FLOAT_GREATER_EQUAL
:
343 return 0x83 /* jae */;
345 case SLJIT_C_GREATER
:
346 case SLJIT_C_FLOAT_GREATER
:
347 return 0x87 /* jnbe */;
349 case SLJIT_C_LESS_EQUAL
:
350 case SLJIT_C_FLOAT_LESS_EQUAL
:
351 return 0x86 /* jbe */;
353 case SLJIT_C_SIG_LESS
:
354 return 0x8c /* jl */;
356 case SLJIT_C_SIG_GREATER_EQUAL
:
357 return 0x8d /* jnl */;
359 case SLJIT_C_SIG_GREATER
:
360 return 0x8f /* jnle */;
362 case SLJIT_C_SIG_LESS_EQUAL
:
363 return 0x8e /* jle */;
365 case SLJIT_C_OVERFLOW
:
366 case SLJIT_C_MUL_OVERFLOW
:
367 return 0x80 /* jo */;
369 case SLJIT_C_NOT_OVERFLOW
:
370 case SLJIT_C_MUL_NOT_OVERFLOW
:
371 return 0x81 /* jno */;
373 case SLJIT_C_FLOAT_UNORDERED
:
374 return 0x8a /* jp */;
376 case SLJIT_C_FLOAT_ORDERED
:
377 return 0x8b /* jpo */;
382 static sljit_ub
* generate_far_jump_code(struct sljit_jump
*jump
, sljit_ub
*code_ptr
, sljit_si type
);
384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
385 static sljit_ub
* generate_fixed_jump(sljit_ub
*code_ptr
, sljit_sw addr
, sljit_si type
);
388 static sljit_ub
* generate_near_jump_code(struct sljit_jump
*jump
, sljit_ub
*code_ptr
, sljit_ub
*code
, sljit_si type
)
393 if (jump
->flags
& JUMP_LABEL
)
394 label_addr
= (sljit_uw
)(code
+ jump
->u
.label
->size
);
396 label_addr
= jump
->u
.target
;
397 short_jump
= (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) >= -128 && (sljit_sw
)(label_addr
- (jump
->addr
+ 2)) <= 127;
399 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
400 if ((sljit_sw
)(label_addr
- (jump
->addr
+ 1)) > 0x7fffffffll
|| (sljit_sw
)(label_addr
- (jump
->addr
+ 1)) < -0x80000000ll
)
401 return generate_far_jump_code(jump
, code_ptr
, type
);
404 if (type
== SLJIT_JUMP
) {
406 *code_ptr
++ = JMP_i8
;
408 *code_ptr
++ = JMP_i32
;
411 else if (type
>= SLJIT_FAST_CALL
) {
413 *code_ptr
++ = CALL_i32
;
416 else if (short_jump
) {
417 *code_ptr
++ = get_jump_code(type
) - 0x10;
421 *code_ptr
++ = GROUP_0F
;
422 *code_ptr
++ = get_jump_code(type
);
427 jump
->flags
|= PATCH_MB
;
428 code_ptr
+= sizeof(sljit_sb
);
430 jump
->flags
|= PATCH_MW
;
431 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
432 code_ptr
+= sizeof(sljit_sw
);
434 code_ptr
+= sizeof(sljit_si
);
441 SLJIT_API_FUNC_ATTRIBUTE
void* sljit_generate_code(struct sljit_compiler
*compiler
)
443 struct sljit_memory_fragment
*buf
;
450 struct sljit_label
*label
;
451 struct sljit_jump
*jump
;
452 struct sljit_const
*const_
;
455 check_sljit_generate_code(compiler
);
456 reverse_buf(compiler
);
458 /* Second code generation pass. */
459 code
= (sljit_ub
*)SLJIT_MALLOC_EXEC(compiler
->size
);
460 PTR_FAIL_WITH_EXEC_IF(code
);
464 label
= compiler
->labels
;
465 jump
= compiler
->jumps
;
466 const_
= compiler
->consts
;
468 buf_ptr
= buf
->memory
;
469 buf_end
= buf_ptr
+ buf
->used_size
;
473 /* The code is already generated. */
474 SLJIT_MEMMOVE(code_ptr
, buf_ptr
, len
);
480 jump
->addr
= (sljit_uw
)code_ptr
;
481 if (!(jump
->flags
& SLJIT_REWRITABLE_JUMP
))
482 code_ptr
= generate_near_jump_code(jump
, code_ptr
, code
, *buf_ptr
- 4);
484 code_ptr
= generate_far_jump_code(jump
, code_ptr
, *buf_ptr
- 4);
487 else if (*buf_ptr
== 0) {
488 label
->addr
= (sljit_uw
)code_ptr
;
489 label
->size
= code_ptr
- code
;
492 else if (*buf_ptr
== 1) {
493 const_
->addr
= ((sljit_uw
)code_ptr
) - sizeof(sljit_sw
);
494 const_
= const_
->next
;
497 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
498 *code_ptr
++ = (*buf_ptr
== 2) ? CALL_i32
: JMP_i32
;
500 *(sljit_sw
*)code_ptr
= *(sljit_sw
*)buf_ptr
- ((sljit_sw
)code_ptr
+ sizeof(sljit_sw
));
501 code_ptr
+= sizeof(sljit_sw
);
502 buf_ptr
+= sizeof(sljit_sw
) - 1;
504 code_ptr
= generate_fixed_jump(code_ptr
, *(sljit_sw
*)(buf_ptr
+ 1), *buf_ptr
);
505 buf_ptr
+= sizeof(sljit_sw
);
510 } while (buf_ptr
< buf_end
);
511 SLJIT_ASSERT(buf_ptr
== buf_end
);
515 SLJIT_ASSERT(!label
);
517 SLJIT_ASSERT(!const_
);
519 jump
= compiler
->jumps
;
521 if (jump
->flags
& PATCH_MB
) {
522 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_sb
))) >= -128 && (sljit_sw
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_sb
))) <= 127);
523 *(sljit_ub
*)jump
->addr
= (sljit_ub
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_sb
)));
524 } else if (jump
->flags
& PATCH_MW
) {
525 if (jump
->flags
& JUMP_LABEL
) {
526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
527 *(sljit_sw
*)jump
->addr
= (sljit_sw
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_sw
)));
529 SLJIT_ASSERT((sljit_sw
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_si
))) >= -0x80000000ll
&& (sljit_sw
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_si
))) <= 0x7fffffffll
);
530 *(sljit_si
*)jump
->addr
= (sljit_si
)(jump
->u
.label
->addr
- (jump
->addr
+ sizeof(sljit_si
)));
534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
535 *(sljit_sw
*)jump
->addr
= (sljit_sw
)(jump
->u
.target
- (jump
->addr
+ sizeof(sljit_sw
)));
537 SLJIT_ASSERT((sljit_sw
)(jump
->u
.target
- (jump
->addr
+ sizeof(sljit_si
))) >= -0x80000000ll
&& (sljit_sw
)(jump
->u
.target
- (jump
->addr
+ sizeof(sljit_si
))) <= 0x7fffffffll
);
538 *(sljit_si
*)jump
->addr
= (sljit_si
)(jump
->u
.target
- (jump
->addr
+ sizeof(sljit_si
)));
542 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
543 else if (jump
->flags
& PATCH_MD
)
544 *(sljit_sw
*)jump
->addr
= jump
->u
.label
->addr
;
550 /* Maybe we waste some space because of short jumps. */
551 SLJIT_ASSERT(code_ptr
<= code
+ compiler
->size
);
552 compiler
->error
= SLJIT_ERR_COMPILED
;
553 compiler
->executable_size
= code_ptr
- code
;
557 /* --------------------------------------------------------------------- */
559 /* --------------------------------------------------------------------- */
561 static sljit_si
emit_cum_binary(struct sljit_compiler
*compiler
,
562 sljit_ub op_rm
, sljit_ub op_mr
, sljit_ub op_imm
, sljit_ub op_eax_imm
,
563 sljit_si dst
, sljit_sw dstw
,
564 sljit_si src1
, sljit_sw src1w
,
565 sljit_si src2
, sljit_sw src2w
);
567 static sljit_si
emit_non_cum_binary(struct sljit_compiler
*compiler
,
568 sljit_ub op_rm
, sljit_ub op_mr
, sljit_ub op_imm
, sljit_ub op_eax_imm
,
569 sljit_si dst
, sljit_sw dstw
,
570 sljit_si src1
, sljit_sw src1w
,
571 sljit_si src2
, sljit_sw src2w
);
573 static sljit_si
emit_mov(struct sljit_compiler
*compiler
,
574 sljit_si dst
, sljit_sw dstw
,
575 sljit_si src
, sljit_sw srcw
);
577 static SLJIT_INLINE sljit_si
emit_save_flags(struct sljit_compiler
*compiler
)
581 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
582 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 5);
586 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 6);
591 *inst
++ = LEA_r_m
; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
594 *inst
++ = (sljit_ub
)sizeof(sljit_sw
);
596 compiler
->flags_saved
= 1;
597 return SLJIT_SUCCESS
;
600 static SLJIT_INLINE sljit_si
emit_restore_flags(struct sljit_compiler
*compiler
, sljit_si keep_flags
)
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 5);
610 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 6);
616 *inst
++ = LEA_r_m
; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
619 *inst
++ = (sljit_ub
)-(sljit_sb
)sizeof(sljit_sw
);
620 compiler
->flags_saved
= keep_flags
;
621 return SLJIT_SUCCESS
;
627 static void SLJIT_CALL
sljit_grow_stack(sljit_sw local_size
)
629 /* Workaround for calling the internal _chkstk() function on Windows.
630 This function touches all 4k pages belongs to the requested stack space,
631 which size is passed in local_size. This is necessary on Windows where
632 the stack can only grow in 4k steps. However, this function just burn
633 CPU cycles if the stack is large enough. However, you don't know it in
634 advance, so it must always be called. I think this is a bad design in
635 general even if it has some reasons. */
636 *(sljit_si
*)alloca(local_size
) = 0;
641 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
642 #include "sljitNativeX86_32.c"
644 #include "sljitNativeX86_64.c"
647 static sljit_si
emit_mov(struct sljit_compiler
*compiler
,
648 sljit_si dst
, sljit_sw dstw
,
649 sljit_si src
, sljit_sw srcw
)
653 if (dst
== SLJIT_UNUSED
) {
654 /* No destination, doesn't need to setup flags. */
655 if (src
& SLJIT_MEM
) {
656 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src
, srcw
);
660 return SLJIT_SUCCESS
;
662 if (src
<= TMP_REGISTER
) {
663 inst
= emit_x86_instruction(compiler
, 1, src
, 0, dst
, dstw
);
666 return SLJIT_SUCCESS
;
668 if (src
& SLJIT_IMM
) {
669 if (dst
<= TMP_REGISTER
) {
670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
671 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
673 if (!compiler
->mode32
) {
674 if (NOT_HALFWORD(srcw
))
675 return emit_load_imm64(compiler
, dst
, srcw
);
678 return emit_do_imm32(compiler
, (reg_map
[dst
] >= 8) ? REX_B
: 0, MOV_r_i32
+ reg_lmap
[dst
], srcw
);
681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
682 if (!compiler
->mode32
&& NOT_HALFWORD(srcw
)) {
683 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, srcw
));
684 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, dst
, dstw
);
687 return SLJIT_SUCCESS
;
690 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, dstw
);
693 return SLJIT_SUCCESS
;
695 if (dst
<= TMP_REGISTER
) {
696 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src
, srcw
);
699 return SLJIT_SUCCESS
;
702 /* Memory to memory move. Requires two instruction. */
703 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src
, srcw
);
706 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, dst
, dstw
);
709 return SLJIT_SUCCESS
;
712 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
713 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
715 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_op0(struct sljit_compiler
*compiler
, sljit_si op
)
718 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
723 check_sljit_emit_op0(compiler
, op
);
725 switch (GET_OPCODE(op
)) {
726 case SLJIT_BREAKPOINT
:
727 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
733 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
742 compiler
->flags_saved
= 0;
743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
745 SLJIT_COMPILE_ASSERT(
746 reg_map
[SLJIT_SCRATCH_REG1
] == 0
747 && reg_map
[SLJIT_SCRATCH_REG2
] == 2
748 && reg_map
[TMP_REGISTER
] > 7,
749 invalid_register_assignment_for_div_mul
);
751 SLJIT_COMPILE_ASSERT(
752 reg_map
[SLJIT_SCRATCH_REG1
] == 0
753 && reg_map
[SLJIT_SCRATCH_REG2
] < 7
754 && reg_map
[TMP_REGISTER
] == 2,
755 invalid_register_assignment_for_div_mul
);
757 compiler
->mode32
= op
& SLJIT_INT_OP
;
761 if (op
== SLJIT_UDIV
) {
762 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
763 EMIT_MOV(compiler
, TMP_REGISTER
, 0, SLJIT_SCRATCH_REG2
, 0);
764 inst
= emit_x86_instruction(compiler
, 1, SLJIT_SCRATCH_REG2
, 0, SLJIT_SCRATCH_REG2
, 0);
766 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, TMP_REGISTER
, 0);
772 if (op
== SLJIT_SDIV
) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
774 EMIT_MOV(compiler
, TMP_REGISTER
, 0, SLJIT_SCRATCH_REG2
, 0);
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
778 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
783 if (compiler
->mode32
) {
784 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
789 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 2);
798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
799 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 2);
803 *inst
= MOD_REG
| ((op
>= SLJIT_UDIV
) ? reg_map
[TMP_REGISTER
] : reg_map
[SLJIT_SCRATCH_REG2
]);
806 size
= (!compiler
->mode32
|| op
>= SLJIT_UDIV
) ? 3 : 2;
808 size
= (!compiler
->mode32
) ? 3 : 2;
810 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + size
);
814 if (!compiler
->mode32
)
815 *inst
++ = REX_W
| ((op
>= SLJIT_UDIV
) ? REX_B
: 0);
816 else if (op
>= SLJIT_UDIV
)
819 *inst
= MOD_REG
| ((op
>= SLJIT_UDIV
) ? reg_lmap
[TMP_REGISTER
] : reg_lmap
[SLJIT_SCRATCH_REG2
]);
821 if (!compiler
->mode32
)
824 *inst
= MOD_REG
| reg_map
[SLJIT_SCRATCH_REG2
];
841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
842 EMIT_MOV(compiler
, SLJIT_SCRATCH_REG2
, 0, TMP_REGISTER
, 0);
847 return SLJIT_SUCCESS
;
850 #define ENCODE_PREFIX(prefix) \
852 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
858 static sljit_si
emit_mov_byte(struct sljit_compiler
*compiler
, sljit_si sign
,
859 sljit_si dst
, sljit_sw dstw
,
860 sljit_si src
, sljit_sw srcw
)
864 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
869 compiler
->mode32
= 0;
872 if (dst
== SLJIT_UNUSED
&& !(src
& SLJIT_MEM
))
873 return SLJIT_SUCCESS
; /* Empty instruction. */
875 if (src
& SLJIT_IMM
) {
876 if (dst
<= TMP_REGISTER
) {
877 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
878 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
880 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
883 return SLJIT_SUCCESS
;
886 inst
= emit_x86_instruction(compiler
, 1 | EX86_BYTE_ARG
| EX86_NO_REXW
, SLJIT_IMM
, srcw
, dst
, dstw
);
889 return SLJIT_SUCCESS
;
892 dst_r
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
894 if ((dst
& SLJIT_MEM
) && src
<= TMP_REGISTER
) {
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 if (reg_map
[src
] >= 4) {
897 SLJIT_ASSERT(dst_r
== TMP_REGISTER
);
898 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, 0);
905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
906 else if (src
<= TMP_REGISTER
&& reg_map
[src
] >= 4) {
907 /* src, dst are registers. */
908 SLJIT_ASSERT(dst
>= SLJIT_SCRATCH_REG1
&& dst
<= TMP_REGISTER
);
909 if (reg_map
[dst
] < 4) {
911 EMIT_MOV(compiler
, dst
, 0, src
, 0);
912 inst
= emit_x86_instruction(compiler
, 2, dst
, 0, dst
, 0);
915 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
919 EMIT_MOV(compiler
, dst
, 0, src
, 0);
922 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
926 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 24, dst
, 0);
931 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 0xff, dst
, 0);
936 return SLJIT_SUCCESS
;
940 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
941 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
944 *inst
= sign
? MOVSX_r_rm8
: MOVZX_r_rm8
;
947 if (dst
& SLJIT_MEM
) {
948 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
949 if (dst_r
== TMP_REGISTER
) {
950 /* Find a non-used register, whose reg_map[src] < 4. */
951 if ((dst
& 0xf) == SLJIT_SCRATCH_REG1
) {
952 if ((dst
& 0xf0) == (SLJIT_SCRATCH_REG2
<< 4))
953 work_r
= SLJIT_SCRATCH_REG3
;
955 work_r
= SLJIT_SCRATCH_REG2
;
958 if ((dst
& 0xf0) != (SLJIT_SCRATCH_REG1
<< 4))
959 work_r
= SLJIT_SCRATCH_REG1
;
960 else if ((dst
& 0xf) == SLJIT_SCRATCH_REG2
)
961 work_r
= SLJIT_SCRATCH_REG3
;
963 work_r
= SLJIT_SCRATCH_REG2
;
966 if (work_r
== SLJIT_SCRATCH_REG1
) {
967 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REGISTER
]);
970 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
975 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst
, dstw
);
979 if (work_r
== SLJIT_SCRATCH_REG1
) {
980 ENCODE_PREFIX(XCHG_EAX_r
+ reg_map
[TMP_REGISTER
]);
983 inst
= emit_x86_instruction(compiler
, 1, work_r
, 0, dst_r
, 0);
989 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, dst
, dstw
);
994 inst
= emit_x86_instruction(compiler
, 1 | EX86_REX
| EX86_NO_REXW
, dst_r
, 0, dst
, dstw
);
1000 return SLJIT_SUCCESS
;
1003 static sljit_si
emit_mov_half(struct sljit_compiler
*compiler
, sljit_si sign
,
1004 sljit_si dst
, sljit_sw dstw
,
1005 sljit_si src
, sljit_sw srcw
)
1010 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1011 compiler
->mode32
= 0;
1014 if (dst
== SLJIT_UNUSED
&& !(src
& SLJIT_MEM
))
1015 return SLJIT_SUCCESS
; /* Empty instruction. */
1017 if (src
& SLJIT_IMM
) {
1018 if (dst
<= TMP_REGISTER
) {
1019 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1020 return emit_do_imm(compiler
, MOV_r_i32
+ reg_map
[dst
], srcw
);
1022 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, srcw
, dst
, 0);
1025 return SLJIT_SUCCESS
;
1028 inst
= emit_x86_instruction(compiler
, 1 | EX86_HALF_ARG
| EX86_NO_REXW
| EX86_PREF_66
, SLJIT_IMM
, srcw
, dst
, dstw
);
1031 return SLJIT_SUCCESS
;
1034 dst_r
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
1036 if ((dst
& SLJIT_MEM
) && src
<= TMP_REGISTER
)
1039 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src
, srcw
);
1042 *inst
= sign
? MOVSX_r_rm16
: MOVZX_r_rm16
;
1045 if (dst
& SLJIT_MEM
) {
1046 inst
= emit_x86_instruction(compiler
, 1 | EX86_NO_REXW
| EX86_PREF_66
, dst_r
, 0, dst
, dstw
);
1051 return SLJIT_SUCCESS
;
1054 static sljit_si
emit_unary(struct sljit_compiler
*compiler
, sljit_ub opcode
,
1055 sljit_si dst
, sljit_sw dstw
,
1056 sljit_si src
, sljit_sw srcw
)
1060 if (dst
== SLJIT_UNUSED
) {
1061 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, srcw
);
1062 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REGISTER
, 0);
1066 return SLJIT_SUCCESS
;
1068 if (dst
== src
&& dstw
== srcw
) {
1069 /* Same input and output */
1070 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1074 return SLJIT_SUCCESS
;
1076 if (dst
<= TMP_REGISTER
) {
1077 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1078 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1082 return SLJIT_SUCCESS
;
1084 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, srcw
);
1085 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REGISTER
, 0);
1089 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1090 return SLJIT_SUCCESS
;
1093 static sljit_si
emit_not_with_flags(struct sljit_compiler
*compiler
,
1094 sljit_si dst
, sljit_sw dstw
,
1095 sljit_si src
, sljit_sw srcw
)
1099 if (dst
== SLJIT_UNUSED
) {
1100 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, srcw
);
1101 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REGISTER
, 0);
1105 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, TMP_REGISTER
, 0);
1108 return SLJIT_SUCCESS
;
1110 if (dst
<= TMP_REGISTER
) {
1111 EMIT_MOV(compiler
, dst
, 0, src
, srcw
);
1112 inst
= emit_x86_instruction(compiler
, 1, 0, 0, dst
, dstw
);
1116 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, dst
, 0);
1119 return SLJIT_SUCCESS
;
1121 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, srcw
);
1122 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REGISTER
, 0);
1126 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, TMP_REGISTER
, 0);
1129 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1130 return SLJIT_SUCCESS
;
1133 static sljit_si
emit_clz(struct sljit_compiler
*compiler
, sljit_si op_flags
,
1134 sljit_si dst
, sljit_sw dstw
,
1135 sljit_si src
, sljit_sw srcw
)
1140 SLJIT_UNUSED_ARG(op_flags
);
1141 if (SLJIT_UNLIKELY(dst
== SLJIT_UNUSED
)) {
1142 /* Just set the zero flag. */
1143 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, srcw
);
1144 inst
= emit_x86_instruction(compiler
, 1, 0, 0, TMP_REGISTER
, 0);
1148 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1149 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, 31, TMP_REGISTER
, 0);
1151 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_IMM
, !(op_flags
& SLJIT_INT_OP
) ? 63 : 31, TMP_REGISTER
, 0);
1155 return SLJIT_SUCCESS
;
1158 if (SLJIT_UNLIKELY(src
& SLJIT_IMM
)) {
1159 EMIT_MOV(compiler
, TMP_REGISTER
, 0, SLJIT_IMM
, srcw
);
1164 inst
= emit_x86_instruction(compiler
, 2, TMP_REGISTER
, 0, src
, srcw
);
1169 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1170 if (dst
<= TMP_REGISTER
)
1173 /* Find an unused temporary register. */
1174 if ((dst
& 0xf) != SLJIT_SCRATCH_REG1
&& (dst
& 0xf0) != (SLJIT_SCRATCH_REG1
<< 4))
1175 dst_r
= SLJIT_SCRATCH_REG1
;
1176 else if ((dst
& 0xf) != SLJIT_SCRATCH_REG2
&& (dst
& 0xf0) != (SLJIT_SCRATCH_REG2
<< 4))
1177 dst_r
= SLJIT_SCRATCH_REG2
;
1179 dst_r
= SLJIT_SCRATCH_REG3
;
1180 EMIT_MOV(compiler
, dst
, dstw
, dst_r
, 0);
1182 EMIT_MOV(compiler
, dst_r
, 0, SLJIT_IMM
, 32 + 31);
1184 dst_r
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REG2
;
1185 compiler
->mode32
= 0;
1186 EMIT_MOV(compiler
, dst_r
, 0, SLJIT_IMM
, !(op_flags
& SLJIT_INT_OP
) ? 64 + 63 : 32 + 31);
1187 compiler
->mode32
= op_flags
& SLJIT_INT_OP
;
1190 if (cpu_has_cmov
== -1)
1194 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REGISTER
, 0);
1197 *inst
= CMOVNE_r_rm
;
1199 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1200 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4);
1207 *inst
++ = MOD_REG
| (reg_map
[dst_r
] << 3) | reg_map
[TMP_REGISTER
];
1209 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 5);
1215 *inst
++ = REX_W
| (reg_map
[dst_r
] >= 8 ? REX_R
: 0) | (reg_map
[TMP_REGISTER
] >= 8 ? REX_B
: 0);
1217 *inst
++ = MOD_REG
| (reg_lmap
[dst_r
] << 3) | reg_lmap
[TMP_REGISTER
];
1221 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1222 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, 31, dst_r
, 0);
1224 inst
= emit_x86_instruction(compiler
, 1 | EX86_BIN_INS
, SLJIT_IMM
, !(op_flags
& SLJIT_INT_OP
) ? 63 : 31, dst_r
, 0);
1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1230 if (dst
& SLJIT_MEM
) {
1231 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, dst
, dstw
);
1236 if (dst
& SLJIT_MEM
)
1237 EMIT_MOV(compiler
, dst
, dstw
, TMP_REG2
, 0);
1239 return SLJIT_SUCCESS
;
1242 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_op1(struct sljit_compiler
*compiler
, sljit_si op
,
1243 sljit_si dst
, sljit_sw dstw
,
1244 sljit_si src
, sljit_sw srcw
)
1247 sljit_si update
= 0;
1248 sljit_si op_flags
= GET_ALL_FLAGS(op
);
1249 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1250 sljit_si dst_is_ereg
= 0;
1251 sljit_si src_is_ereg
= 0;
1253 # define src_is_ereg 0
1257 check_sljit_emit_op1(compiler
, op
, dst
, dstw
, src
, srcw
);
1258 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1259 ADJUST_LOCAL_OFFSET(src
, srcw
);
1261 CHECK_EXTRA_REGS(dst
, dstw
, dst_is_ereg
= 1);
1262 CHECK_EXTRA_REGS(src
, srcw
, src_is_ereg
= 1);
1263 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1264 compiler
->mode32
= op_flags
& SLJIT_INT_OP
;
1267 op
= GET_OPCODE(op
);
1268 if (op
>= SLJIT_MOV
&& op
<= SLJIT_MOVU_P
) {
1269 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1270 compiler
->mode32
= 0;
1273 if (op_flags
& SLJIT_INT_OP
) {
1274 if (src
<= TMP_REGISTER
&& src
== dst
) {
1275 if (!TYPE_CAST_NEEDED(op
))
1276 return SLJIT_SUCCESS
;
1278 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1279 if (op
== SLJIT_MOV_SI
&& (src
& SLJIT_MEM
))
1281 if (op
== SLJIT_MOVU_SI
&& (src
& SLJIT_MEM
))
1283 if (op
== SLJIT_MOV_UI
&& (src
& SLJIT_IMM
))
1285 if (op
== SLJIT_MOVU_UI
&& (src
& SLJIT_IMM
))
1290 SLJIT_COMPILE_ASSERT(SLJIT_MOV
+ 8 == SLJIT_MOVU
, movu_offset
);
1291 if (op
>= SLJIT_MOVU
) {
1296 if (src
& SLJIT_IMM
) {
1299 srcw
= (sljit_ub
)srcw
;
1302 srcw
= (sljit_sb
)srcw
;
1305 srcw
= (sljit_uh
)srcw
;
1308 srcw
= (sljit_sh
)srcw
;
1310 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1312 srcw
= (sljit_ui
)srcw
;
1315 srcw
= (sljit_si
)srcw
;
1319 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1320 if (SLJIT_UNLIKELY(dst_is_ereg
))
1321 return emit_mov(compiler
, dst
, dstw
, src
, srcw
);
1325 if (SLJIT_UNLIKELY(update
) && (src
& SLJIT_MEM
) && !src_is_ereg
&& (src
& 0xf) && (srcw
!= 0 || (src
& 0xf0) != 0)) {
1326 inst
= emit_x86_instruction(compiler
, 1, src
& 0xf, 0, src
, srcw
);
1329 src
&= SLJIT_MEM
| 0xf;
1333 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1334 if (SLJIT_UNLIKELY(dst_is_ereg
) && (!(op
== SLJIT_MOV
|| op
== SLJIT_MOV_UI
|| op
== SLJIT_MOV_SI
|| op
== SLJIT_MOV_P
) || (src
& SLJIT_MEM
))) {
1335 SLJIT_ASSERT(dst
== SLJIT_MEM1(SLJIT_LOCALS_REG
));
1343 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1347 FAIL_IF(emit_mov(compiler
, dst
, dstw
, src
, srcw
));
1350 FAIL_IF(emit_mov_byte(compiler
, 0, dst
, dstw
, src
, srcw
));
1353 FAIL_IF(emit_mov_byte(compiler
, 1, dst
, dstw
, src
, srcw
));
1356 FAIL_IF(emit_mov_half(compiler
, 0, dst
, dstw
, src
, srcw
));
1359 FAIL_IF(emit_mov_half(compiler
, 1, dst
, dstw
, src
, srcw
));
1361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1363 FAIL_IF(emit_mov_int(compiler
, 0, dst
, dstw
, src
, srcw
));
1366 FAIL_IF(emit_mov_int(compiler
, 1, dst
, dstw
, src
, srcw
));
1371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1372 if (SLJIT_UNLIKELY(dst_is_ereg
) && dst
== TMP_REGISTER
)
1373 return emit_mov(compiler
, SLJIT_MEM1(SLJIT_LOCALS_REG
), dstw
, TMP_REGISTER
, 0);
1376 if (SLJIT_UNLIKELY(update
) && (dst
& SLJIT_MEM
) && (dst
& 0xf) && (dstw
!= 0 || (dst
& 0xf0) != 0)) {
1377 inst
= emit_x86_instruction(compiler
, 1, dst
& 0xf, 0, dst
, dstw
);
1381 return SLJIT_SUCCESS
;
1384 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags
)))
1385 compiler
->flags_saved
= 0;
1389 if (SLJIT_UNLIKELY(op_flags
& SLJIT_SET_E
))
1390 return emit_not_with_flags(compiler
, dst
, dstw
, src
, srcw
);
1391 return emit_unary(compiler
, NOT_rm
, dst
, dstw
, src
, srcw
);
1394 if (SLJIT_UNLIKELY(op_flags
& SLJIT_KEEP_FLAGS
) && !compiler
->flags_saved
)
1395 FAIL_IF(emit_save_flags(compiler
));
1396 return emit_unary(compiler
, NEG_rm
, dst
, dstw
, src
, srcw
);
1399 if (SLJIT_UNLIKELY(op_flags
& SLJIT_KEEP_FLAGS
) && !compiler
->flags_saved
)
1400 FAIL_IF(emit_save_flags(compiler
));
1401 return emit_clz(compiler
, op_flags
, dst
, dstw
, src
, srcw
);
1404 return SLJIT_SUCCESS
;
1406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1411 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1414 if (IS_HALFWORD(immw) || compiler->mode32) { \
1415 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1417 *(inst + 1) |= (op_imm); \
1420 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1421 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1426 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1427 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1431 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1432 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1434 *(inst + 1) |= (op_imm);
1436 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1437 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1441 static sljit_si
emit_cum_binary(struct sljit_compiler
*compiler
,
1442 sljit_ub op_rm
, sljit_ub op_mr
, sljit_ub op_imm
, sljit_ub op_eax_imm
,
1443 sljit_si dst
, sljit_sw dstw
,
1444 sljit_si src1
, sljit_sw src1w
,
1445 sljit_si src2
, sljit_sw src2w
)
1449 if (dst
== SLJIT_UNUSED
) {
1450 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1451 if (src2
& SLJIT_IMM
) {
1452 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REGISTER
, 0);
1455 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1459 return SLJIT_SUCCESS
;
1462 if (dst
== src1
&& dstw
== src1w
) {
1463 if (src2
& SLJIT_IMM
) {
1464 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1465 if ((dst
== SLJIT_SCRATCH_REG1
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1467 if ((dst
== SLJIT_SCRATCH_REG1
) && (src2w
> 127 || src2w
< -128)) {
1469 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1472 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1475 else if (dst
<= TMP_REGISTER
) {
1476 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1480 else if (src2
<= TMP_REGISTER
) {
1481 /* Special exception for sljit_emit_op_flags. */
1482 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1487 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src2
, src2w
);
1488 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, dst
, dstw
);
1492 return SLJIT_SUCCESS
;
1495 /* Only for cumulative operations. */
1496 if (dst
== src2
&& dstw
== src2w
) {
1497 if (src1
& SLJIT_IMM
) {
1498 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1499 if ((dst
== SLJIT_SCRATCH_REG1
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1501 if ((dst
== SLJIT_SCRATCH_REG1
) && (src1w
> 127 || src1w
< -128)) {
1503 BINARY_EAX_IMM(op_eax_imm
, src1w
);
1506 BINARY_IMM(op_imm
, op_mr
, src1w
, dst
, dstw
);
1509 else if (dst
<= TMP_REGISTER
) {
1510 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src1
, src1w
);
1514 else if (src1
<= TMP_REGISTER
) {
1515 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, dst
, dstw
);
1520 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1521 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, dst
, dstw
);
1525 return SLJIT_SUCCESS
;
1528 /* General version. */
1529 if (dst
<= TMP_REGISTER
) {
1530 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1531 if (src2
& SLJIT_IMM
) {
1532 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1535 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1541 /* This version requires less memory writing. */
1542 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1543 if (src2
& SLJIT_IMM
) {
1544 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REGISTER
, 0);
1547 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1551 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1554 return SLJIT_SUCCESS
;
1557 static sljit_si
emit_non_cum_binary(struct sljit_compiler
*compiler
,
1558 sljit_ub op_rm
, sljit_ub op_mr
, sljit_ub op_imm
, sljit_ub op_eax_imm
,
1559 sljit_si dst
, sljit_sw dstw
,
1560 sljit_si src1
, sljit_sw src1w
,
1561 sljit_si src2
, sljit_sw src2w
)
1565 if (dst
== SLJIT_UNUSED
) {
1566 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1567 if (src2
& SLJIT_IMM
) {
1568 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REGISTER
, 0);
1571 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1575 return SLJIT_SUCCESS
;
1578 if (dst
== src1
&& dstw
== src1w
) {
1579 if (src2
& SLJIT_IMM
) {
1580 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1581 if ((dst
== SLJIT_SCRATCH_REG1
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1583 if ((dst
== SLJIT_SCRATCH_REG1
) && (src2w
> 127 || src2w
< -128)) {
1585 BINARY_EAX_IMM(op_eax_imm
, src2w
);
1588 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, dstw
);
1591 else if (dst
<= TMP_REGISTER
) {
1592 inst
= emit_x86_instruction(compiler
, 1, dst
, dstw
, src2
, src2w
);
1596 else if (src2
<= TMP_REGISTER
) {
1597 inst
= emit_x86_instruction(compiler
, 1, src2
, src2w
, dst
, dstw
);
1602 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src2
, src2w
);
1603 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, dst
, dstw
);
1607 return SLJIT_SUCCESS
;
1610 /* General version. */
1611 if (dst
<= TMP_REGISTER
&& dst
!= src2
) {
1612 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
1613 if (src2
& SLJIT_IMM
) {
1614 BINARY_IMM(op_imm
, op_mr
, src2w
, dst
, 0);
1617 inst
= emit_x86_instruction(compiler
, 1, dst
, 0, src2
, src2w
);
1623 /* This version requires less memory writing. */
1624 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1625 if (src2
& SLJIT_IMM
) {
1626 BINARY_IMM(op_imm
, op_mr
, src2w
, TMP_REGISTER
, 0);
1629 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1633 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1636 return SLJIT_SUCCESS
;
1639 static sljit_si
emit_mul(struct sljit_compiler
*compiler
,
1640 sljit_si dst
, sljit_sw dstw
,
1641 sljit_si src1
, sljit_sw src1w
,
1642 sljit_si src2
, sljit_sw src2w
)
1647 dst_r
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
1649 /* Register destination. */
1650 if (dst_r
== src1
&& !(src2
& SLJIT_IMM
)) {
1651 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1656 else if (dst_r
== src2
&& !(src1
& SLJIT_IMM
)) {
1657 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src1
, src1w
);
1662 else if (src1
& SLJIT_IMM
) {
1663 if (src2
& SLJIT_IMM
) {
1664 EMIT_MOV(compiler
, dst_r
, 0, SLJIT_IMM
, src2w
);
1669 if (src1w
<= 127 && src1w
>= -128) {
1670 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1672 *inst
= IMUL_r_rm_i8
;
1673 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
1676 *inst
= (sljit_sb
)src1w
;
1678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1680 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1682 *inst
= IMUL_r_rm_i32
;
1683 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4);
1686 *(sljit_sw
*)inst
= src1w
;
1689 else if (IS_HALFWORD(src1w
)) {
1690 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src2
, src2w
);
1692 *inst
= IMUL_r_rm_i32
;
1693 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4);
1696 *(sljit_si
*)inst
= (sljit_si
)src1w
;
1699 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_IMM
, src1w
);
1701 EMIT_MOV(compiler
, dst_r
, 0, src2
, src2w
);
1702 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1709 else if (src2
& SLJIT_IMM
) {
1710 /* Note: src1 is NOT immediate. */
1712 if (src2w
<= 127 && src2w
>= -128) {
1713 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1715 *inst
= IMUL_r_rm_i8
;
1716 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1);
1719 *inst
= (sljit_sb
)src2w
;
1721 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1723 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1725 *inst
= IMUL_r_rm_i32
;
1726 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4);
1729 *(sljit_sw
*)inst
= src2w
;
1732 else if (IS_HALFWORD(src2w
)) {
1733 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, src1
, src1w
);
1735 *inst
= IMUL_r_rm_i32
;
1736 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4);
1739 *(sljit_si
*)inst
= (sljit_si
)src2w
;
1742 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_IMM
, src1w
);
1744 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1745 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, TMP_REG2
, 0);
1753 /* Neither argument is immediate. */
1754 if (ADDRESSING_DEPENDS_ON(src2
, dst_r
))
1755 dst_r
= TMP_REGISTER
;
1756 EMIT_MOV(compiler
, dst_r
, 0, src1
, src1w
);
1757 inst
= emit_x86_instruction(compiler
, 2, dst_r
, 0, src2
, src2w
);
1763 if (dst_r
== TMP_REGISTER
)
1764 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1766 return SLJIT_SUCCESS
;
1769 static sljit_si
emit_lea_binary(struct sljit_compiler
*compiler
, sljit_si keep_flags
,
1770 sljit_si dst
, sljit_sw dstw
,
1771 sljit_si src1
, sljit_sw src1w
,
1772 sljit_si src2
, sljit_sw src2w
)
1775 sljit_si dst_r
, done
= 0;
1777 /* These cases better be left to handled by normal way. */
1779 if (dst
== src1
&& dstw
== src1w
)
1780 return SLJIT_ERR_UNSUPPORTED
;
1781 if (dst
== src2
&& dstw
== src2w
)
1782 return SLJIT_ERR_UNSUPPORTED
;
1785 dst_r
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
1787 if (src1
<= TMP_REGISTER
) {
1788 if (src2
<= TMP_REGISTER
|| src2
== TMP_REGISTER
) {
1789 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM2(src1
, src2
), 0);
1794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1795 if ((src2
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1796 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), (sljit_si
)src2w
);
1798 if (src2
& SLJIT_IMM
) {
1799 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src1
), src2w
);
1806 else if (src2
<= TMP_REGISTER
) {
1807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1808 if ((src1
& SLJIT_IMM
) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1809 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), (sljit_si
)src1w
);
1811 if (src1
& SLJIT_IMM
) {
1812 inst
= emit_x86_instruction(compiler
, 1, dst_r
, 0, SLJIT_MEM1(src2
), src1w
);
1821 if (dst_r
== TMP_REGISTER
)
1822 return emit_mov(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
1823 return SLJIT_SUCCESS
;
1825 return SLJIT_ERR_UNSUPPORTED
;
1828 static sljit_si
emit_cmp_binary(struct sljit_compiler
*compiler
,
1829 sljit_si src1
, sljit_sw src1w
,
1830 sljit_si src2
, sljit_sw src2w
)
1834 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1835 if (src1
== SLJIT_SCRATCH_REG1
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1837 if (src1
== SLJIT_SCRATCH_REG1
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1839 BINARY_EAX_IMM(CMP_EAX_i32
, src2w
);
1840 return SLJIT_SUCCESS
;
1843 if (src1
<= TMP_REGISTER
) {
1844 if (src2
& SLJIT_IMM
) {
1845 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, 0);
1848 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1852 return SLJIT_SUCCESS
;
1855 if (src2
<= TMP_REGISTER
&& !(src1
& SLJIT_IMM
)) {
1856 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1859 return SLJIT_SUCCESS
;
1862 if (src2
& SLJIT_IMM
) {
1863 if (src1
& SLJIT_IMM
) {
1864 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1865 src1
= TMP_REGISTER
;
1868 BINARY_IMM(CMP
, CMP_rm_r
, src2w
, src1
, src1w
);
1871 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1872 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1876 return SLJIT_SUCCESS
;
1879 static sljit_si
emit_test_binary(struct sljit_compiler
*compiler
,
1880 sljit_si src1
, sljit_sw src1w
,
1881 sljit_si src2
, sljit_sw src2w
)
1885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1886 if (src1
== SLJIT_SCRATCH_REG1
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src2w
))) {
1888 if (src1
== SLJIT_SCRATCH_REG1
&& (src2
& SLJIT_IMM
) && (src2w
> 127 || src2w
< -128)) {
1890 BINARY_EAX_IMM(TEST_EAX_i32
, src2w
);
1891 return SLJIT_SUCCESS
;
1894 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1895 if (src2
== SLJIT_SCRATCH_REG1
&& (src2
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128) && (compiler
->mode32
|| IS_HALFWORD(src1w
))) {
1897 if (src2
== SLJIT_SCRATCH_REG1
&& (src1
& SLJIT_IMM
) && (src1w
> 127 || src1w
< -128)) {
1899 BINARY_EAX_IMM(TEST_EAX_i32
, src1w
);
1900 return SLJIT_SUCCESS
;
1903 if (src1
<= TMP_REGISTER
) {
1904 if (src2
& SLJIT_IMM
) {
1905 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1906 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1907 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, 0);
1912 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1913 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, src1
, 0);
1918 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, src1
, 0);
1924 inst
= emit_x86_instruction(compiler
, 1, src1
, 0, src2
, src2w
);
1928 return SLJIT_SUCCESS
;
1931 if (src2
<= TMP_REGISTER
) {
1932 if (src1
& SLJIT_IMM
) {
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 if (IS_HALFWORD(src1w
) || compiler
->mode32
) {
1935 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src1w
, src2
, 0);
1940 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src1w
));
1941 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, src2
, 0);
1946 inst
= emit_x86_instruction(compiler
, 1, src1
, src1w
, src2
, 0);
1952 inst
= emit_x86_instruction(compiler
, 1, src2
, 0, src1
, src1w
);
1956 return SLJIT_SUCCESS
;
1959 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
1960 if (src2
& SLJIT_IMM
) {
1961 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1962 if (IS_HALFWORD(src2w
) || compiler
->mode32
) {
1963 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REGISTER
, 0);
1968 FAIL_IF(emit_load_imm64(compiler
, TMP_REG2
, src2w
));
1969 inst
= emit_x86_instruction(compiler
, 1, TMP_REG2
, 0, TMP_REGISTER
, 0);
1974 inst
= emit_x86_instruction(compiler
, 1, SLJIT_IMM
, src2w
, TMP_REGISTER
, 0);
1980 inst
= emit_x86_instruction(compiler
, 1, TMP_REGISTER
, 0, src2
, src2w
);
1984 return SLJIT_SUCCESS
;
1987 static sljit_si
emit_shift(struct sljit_compiler
*compiler
,
1989 sljit_si dst
, sljit_sw dstw
,
1990 sljit_si src1
, sljit_sw src1w
,
1991 sljit_si src2
, sljit_sw src2w
)
1995 if ((src2
& SLJIT_IMM
) || (src2
== SLJIT_PREF_SHIFT_REG
)) {
1996 if (dst
== src1
&& dstw
== src1w
) {
1997 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, dstw
);
2000 return SLJIT_SUCCESS
;
2002 if (dst
== SLJIT_UNUSED
) {
2003 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
2004 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REGISTER
, 0);
2007 return SLJIT_SUCCESS
;
2009 if (dst
== SLJIT_PREF_SHIFT_REG
&& src2
== SLJIT_PREF_SHIFT_REG
) {
2010 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
2011 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2014 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2015 return SLJIT_SUCCESS
;
2017 if (dst
<= TMP_REGISTER
) {
2018 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2019 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, dst
, 0);
2022 return SLJIT_SUCCESS
;
2025 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
2026 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, src2
, src2w
, TMP_REGISTER
, 0);
2029 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
2030 return SLJIT_SUCCESS
;
2033 if (dst
== SLJIT_PREF_SHIFT_REG
) {
2034 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
2035 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2036 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2039 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2041 else if (dst
<= TMP_REGISTER
&& dst
!= src2
&& !ADDRESSING_DEPENDS_ON(src2
, dst
)) {
2043 EMIT_MOV(compiler
, dst
, 0, src1
, src1w
);
2044 EMIT_MOV(compiler
, TMP_REGISTER
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2045 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2046 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, dst
, 0);
2049 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2052 /* This case is really difficult, since ecx itself may used for
2053 addressing, and we must ensure to work even in that case. */
2054 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src1
, src1w
);
2055 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2056 EMIT_MOV(compiler
, TMP_REG2
, 0, SLJIT_PREF_SHIFT_REG
, 0);
2058 /* [esp+0] contains the flags. */
2059 EMIT_MOV(compiler
, SLJIT_MEM1(SLJIT_LOCALS_REG
), sizeof(sljit_sw
), SLJIT_PREF_SHIFT_REG
, 0);
2061 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, src2
, src2w
);
2062 inst
= emit_x86_instruction(compiler
, 1 | EX86_SHIFT_INS
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REGISTER
, 0);
2065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2066 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, TMP_REG2
, 0);
2068 EMIT_MOV(compiler
, SLJIT_PREF_SHIFT_REG
, 0, SLJIT_MEM1(SLJIT_LOCALS_REG
), sizeof(sljit_sw
));
2070 EMIT_MOV(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
2073 return SLJIT_SUCCESS
;
2076 static sljit_si
emit_shift_with_flags(struct sljit_compiler
*compiler
,
2077 sljit_ub mode
, sljit_si set_flags
,
2078 sljit_si dst
, sljit_sw dstw
,
2079 sljit_si src1
, sljit_sw src1w
,
2080 sljit_si src2
, sljit_sw src2w
)
2082 /* The CPU does not set flags if the shift count is 0. */
2083 if (src2
& SLJIT_IMM
) {
2084 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2085 if ((src2w
& 0x3f) != 0 || (compiler
->mode32
&& (src2w
& 0x1f) != 0))
2086 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2088 if ((src2w
& 0x1f) != 0)
2089 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2092 return emit_mov(compiler
, dst
, dstw
, src1
, src1w
);
2093 /* OR dst, src, 0 */
2094 return emit_cum_binary(compiler
, OR_r_rm
, OR_rm_r
, OR
, OR_EAX_i32
,
2095 dst
, dstw
, src1
, src1w
, SLJIT_IMM
, 0);
2099 return emit_shift(compiler
, mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2101 if (!(dst
<= TMP_REGISTER
))
2102 FAIL_IF(emit_cmp_binary(compiler
, src1
, src1w
, SLJIT_IMM
, 0));
2104 FAIL_IF(emit_shift(compiler
,mode
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
2106 if (dst
<= TMP_REGISTER
)
2107 return emit_cmp_binary(compiler
, dst
, dstw
, SLJIT_IMM
, 0);
2108 return SLJIT_SUCCESS
;
2111 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_op2(struct sljit_compiler
*compiler
, sljit_si op
,
2112 sljit_si dst
, sljit_sw dstw
,
2113 sljit_si src1
, sljit_sw src1w
,
2114 sljit_si src2
, sljit_sw src2w
)
2117 check_sljit_emit_op2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2118 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2119 ADJUST_LOCAL_OFFSET(src1
, src1w
);
2120 ADJUST_LOCAL_OFFSET(src2
, src2w
);
2122 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2123 CHECK_EXTRA_REGS(src1
, src1w
, (void)0);
2124 CHECK_EXTRA_REGS(src2
, src2w
, (void)0);
2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126 compiler
->mode32
= op
& SLJIT_INT_OP
;
2129 if (GET_OPCODE(op
) >= SLJIT_MUL
) {
2130 if (SLJIT_UNLIKELY(GET_FLAGS(op
)))
2131 compiler
->flags_saved
= 0;
2132 else if (SLJIT_UNLIKELY(op
& SLJIT_KEEP_FLAGS
) && !compiler
->flags_saved
)
2133 FAIL_IF(emit_save_flags(compiler
));
2136 switch (GET_OPCODE(op
)) {
2138 if (!GET_FLAGS(op
)) {
2139 if (emit_lea_binary(compiler
, op
& SLJIT_KEEP_FLAGS
, dst
, dstw
, src1
, src1w
, src2
, src2w
) != SLJIT_ERR_UNSUPPORTED
)
2140 return compiler
->error
;
2143 compiler
->flags_saved
= 0;
2144 if (SLJIT_UNLIKELY(op
& SLJIT_KEEP_FLAGS
) && !compiler
->flags_saved
)
2145 FAIL_IF(emit_save_flags(compiler
));
2146 return emit_cum_binary(compiler
, ADD_r_rm
, ADD_rm_r
, ADD
, ADD_EAX_i32
,
2147 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2149 if (SLJIT_UNLIKELY(compiler
->flags_saved
)) /* C flag must be restored. */
2150 FAIL_IF(emit_restore_flags(compiler
, 1));
2151 else if (SLJIT_UNLIKELY(op
& SLJIT_KEEP_FLAGS
))
2152 FAIL_IF(emit_save_flags(compiler
));
2153 if (SLJIT_UNLIKELY(GET_FLAGS(op
)))
2154 compiler
->flags_saved
= 0;
2155 return emit_cum_binary(compiler
, ADC_r_rm
, ADC_rm_r
, ADC
, ADC_EAX_i32
,
2156 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2158 if (!GET_FLAGS(op
)) {
2159 if ((src2
& SLJIT_IMM
) && emit_lea_binary(compiler
, op
& SLJIT_KEEP_FLAGS
, dst
, dstw
, src1
, src1w
, SLJIT_IMM
, -src2w
) != SLJIT_ERR_UNSUPPORTED
)
2160 return compiler
->error
;
2163 compiler
->flags_saved
= 0;
2164 if (SLJIT_UNLIKELY(op
& SLJIT_KEEP_FLAGS
) && !compiler
->flags_saved
)
2165 FAIL_IF(emit_save_flags(compiler
));
2166 if (dst
== SLJIT_UNUSED
)
2167 return emit_cmp_binary(compiler
, src1
, src1w
, src2
, src2w
);
2168 return emit_non_cum_binary(compiler
, SUB_r_rm
, SUB_rm_r
, SUB
, SUB_EAX_i32
,
2169 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2171 if (SLJIT_UNLIKELY(compiler
->flags_saved
)) /* C flag must be restored. */
2172 FAIL_IF(emit_restore_flags(compiler
, 1));
2173 else if (SLJIT_UNLIKELY(op
& SLJIT_KEEP_FLAGS
))
2174 FAIL_IF(emit_save_flags(compiler
));
2175 if (SLJIT_UNLIKELY(GET_FLAGS(op
)))
2176 compiler
->flags_saved
= 0;
2177 return emit_non_cum_binary(compiler
, SBB_r_rm
, SBB_rm_r
, SBB
, SBB_EAX_i32
,
2178 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2180 return emit_mul(compiler
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2182 if (dst
== SLJIT_UNUSED
)
2183 return emit_test_binary(compiler
, src1
, src1w
, src2
, src2w
);
2184 return emit_cum_binary(compiler
, AND_r_rm
, AND_rm_r
, AND
, AND_EAX_i32
,
2185 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2187 return emit_cum_binary(compiler
, OR_r_rm
, OR_rm_r
, OR
, OR_EAX_i32
,
2188 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2190 return emit_cum_binary(compiler
, XOR_r_rm
, XOR_rm_r
, XOR
, XOR_EAX_i32
,
2191 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2193 return emit_shift_with_flags(compiler
, SHL
, GET_FLAGS(op
),
2194 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2196 return emit_shift_with_flags(compiler
, SHR
, GET_FLAGS(op
),
2197 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2199 return emit_shift_with_flags(compiler
, SAR
, GET_FLAGS(op
),
2200 dst
, dstw
, src1
, src1w
, src2
, src2w
);
2203 return SLJIT_SUCCESS
;
2206 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_get_register_index(sljit_si reg
)
2208 check_sljit_get_register_index(reg
);
2209 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2210 if (reg
== SLJIT_TEMPORARY_EREG1
|| reg
== SLJIT_TEMPORARY_EREG2
2211 || reg
== SLJIT_SAVED_EREG1
|| reg
== SLJIT_SAVED_EREG2
)
2214 return reg_map
[reg
];
2217 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_get_float_register_index(sljit_si reg
)
2219 check_sljit_get_float_register_index(reg
);
2223 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_op_custom(struct sljit_compiler
*compiler
,
2224 void *instruction
, sljit_si size
)
2229 check_sljit_emit_op_custom(compiler
, instruction
, size
);
2230 SLJIT_ASSERT(size
> 0 && size
< 16);
2232 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + size
);
2235 SLJIT_MEMMOVE(inst
, instruction
, size
);
2236 return SLJIT_SUCCESS
;
2239 /* --------------------------------------------------------------------- */
2240 /* Floating point operators */
2241 /* --------------------------------------------------------------------- */
2243 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2245 /* Alignment + 2 * 16 bytes. */
2246 static sljit_si sse2_data
[3 + (4 + 4) * 2];
2247 static sljit_si
*sse2_buffer
;
2249 static void init_compiler(void)
2251 sse2_buffer
= (sljit_si
*)(((sljit_uw
)sse2_data
+ 15) & ~0xf);
2252 /* Single precision constants. */
2253 sse2_buffer
[0] = 0x80000000;
2254 sse2_buffer
[4] = 0x7fffffff;
2255 /* Double precision constants. */
2257 sse2_buffer
[9] = 0x80000000;
2258 sse2_buffer
[12] = 0xffffffff;
2259 sse2_buffer
[13] = 0x7fffffff;
2264 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_is_fpu_available(void)
2266 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2267 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 if (cpu_has_sse2
== -1)
2270 return cpu_has_sse2
;
2271 #else /* SLJIT_DETECT_SSE2 */
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 #else /* SLJIT_SSE2 */
2279 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2281 static sljit_si
emit_sse2(struct sljit_compiler
*compiler
, sljit_ub opcode
,
2282 sljit_si single
, sljit_si xmm1
, sljit_si xmm2
, sljit_sw xmm2w
)
2286 inst
= emit_x86_instruction(compiler
, 2 | (single
? EX86_PREF_F3
: EX86_PREF_F2
) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2290 return SLJIT_SUCCESS
;
2293 static sljit_si
emit_sse2_logic(struct sljit_compiler
*compiler
, sljit_ub opcode
,
2294 sljit_si pref66
, sljit_si xmm1
, sljit_si xmm2
, sljit_sw xmm2w
)
2298 inst
= emit_x86_instruction(compiler
, 2 | (pref66
? EX86_PREF_66
: 0) | EX86_SSE2
, xmm1
, 0, xmm2
, xmm2w
);
2302 return SLJIT_SUCCESS
;
2305 static SLJIT_INLINE sljit_si
emit_sse2_load(struct sljit_compiler
*compiler
,
2306 sljit_si single
, sljit_si dst
, sljit_si src
, sljit_sw srcw
)
2308 return emit_sse2(compiler
, MOVSD_x_xm
, single
, dst
, src
, srcw
);
2311 static SLJIT_INLINE sljit_si
emit_sse2_store(struct sljit_compiler
*compiler
,
2312 sljit_si single
, sljit_si dst
, sljit_sw dstw
, sljit_si src
)
2314 return emit_sse2(compiler
, MOVSD_xm_x
, single
, src
, dst
, dstw
);
2317 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_si op
,
2318 sljit_si dst
, sljit_sw dstw
,
2319 sljit_si src
, sljit_sw srcw
)
2324 check_sljit_emit_fop1(compiler
, op
, dst
, dstw
, src
, srcw
);
2326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2327 compiler
->mode32
= 1;
2330 if (GET_OPCODE(op
) == SLJIT_CMPD
) {
2331 compiler
->flags_saved
= 0;
2332 if (dst
<= SLJIT_FLOAT_REG6
)
2336 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, dst_r
, dst
, dstw
));
2338 return emit_sse2_logic(compiler
, UCOMISD_x_xm
, !(op
& SLJIT_SINGLE_OP
), dst_r
, src
, srcw
);
2341 if (op
== SLJIT_MOVD
) {
2342 if (dst
<= SLJIT_FLOAT_REG6
)
2343 return emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, dst
, src
, srcw
);
2344 if (src
<= SLJIT_FLOAT_REG6
)
2345 return emit_sse2_store(compiler
, op
& SLJIT_SINGLE_OP
, dst
, dstw
, src
);
2346 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, TMP_FREG
, src
, srcw
));
2347 return emit_sse2_store(compiler
, op
& SLJIT_SINGLE_OP
, dst
, dstw
, TMP_FREG
);
2350 if (dst
>= SLJIT_FLOAT_REG1
&& dst
<= SLJIT_FLOAT_REG6
) {
2353 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, dst_r
, src
, srcw
));
2357 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, dst_r
, src
, srcw
));
2360 switch (GET_OPCODE(op
)) {
2362 FAIL_IF(emit_sse2_logic(compiler
, XORPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_SINGLE_OP
? sse2_buffer
: sse2_buffer
+ 8)));
2366 FAIL_IF(emit_sse2_logic(compiler
, ANDPD_x_xm
, 1, dst_r
, SLJIT_MEM0(), (sljit_sw
)(op
& SLJIT_SINGLE_OP
? sse2_buffer
+ 4 : sse2_buffer
+ 12)));
2370 if (dst_r
== TMP_FREG
)
2371 return emit_sse2_store(compiler
, op
& SLJIT_SINGLE_OP
, dst
, dstw
, TMP_FREG
);
2372 return SLJIT_SUCCESS
;
2375 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_si op
,
2376 sljit_si dst
, sljit_sw dstw
,
2377 sljit_si src1
, sljit_sw src1w
,
2378 sljit_si src2
, sljit_sw src2w
)
2383 check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2385 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2386 compiler
->mode32
= 1;
2389 if (dst
<= SLJIT_FLOAT_REG6
) {
2392 ; /* Do nothing here. */
2393 else if (dst
== src2
&& (op
== SLJIT_ADDD
|| op
== SLJIT_MULD
)) {
2394 /* Swap arguments. */
2398 else if (dst
!= src2
)
2399 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, dst_r
, src1
, src1w
));
2402 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, TMP_FREG
, src1
, src1w
));
2407 FAIL_IF(emit_sse2_load(compiler
, op
& SLJIT_SINGLE_OP
, TMP_FREG
, src1
, src1w
));
2410 switch (GET_OPCODE(op
)) {
2412 FAIL_IF(emit_sse2(compiler
, ADDSD_x_xm
, op
& SLJIT_SINGLE_OP
, dst_r
, src2
, src2w
));
2416 FAIL_IF(emit_sse2(compiler
, SUBSD_x_xm
, op
& SLJIT_SINGLE_OP
, dst_r
, src2
, src2w
));
2420 FAIL_IF(emit_sse2(compiler
, MULSD_x_xm
, op
& SLJIT_SINGLE_OP
, dst_r
, src2
, src2w
));
2424 FAIL_IF(emit_sse2(compiler
, DIVSD_x_xm
, op
& SLJIT_SINGLE_OP
, dst_r
, src2
, src2w
));
2428 if (dst_r
== TMP_FREG
)
2429 return emit_sse2_store(compiler
, op
& SLJIT_SINGLE_OP
, dst
, dstw
, TMP_FREG
);
2430 return SLJIT_SUCCESS
;
2435 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_si op
,
2436 sljit_si dst
, sljit_sw dstw
,
2437 sljit_si src
, sljit_sw srcw
)
2440 /* Should cause an assertion fail. */
2441 check_sljit_emit_fop1(compiler
, op
, dst
, dstw
, src
, srcw
);
2442 compiler
->error
= SLJIT_ERR_UNSUPPORTED
;
2443 return SLJIT_ERR_UNSUPPORTED
;
2446 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_si op
,
2447 sljit_si dst
, sljit_sw dstw
,
2448 sljit_si src1
, sljit_sw src1w
,
2449 sljit_si src2
, sljit_sw src2w
)
2452 /* Should cause an assertion fail. */
2453 check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
);
2454 compiler
->error
= SLJIT_ERR_UNSUPPORTED
;
2455 return SLJIT_ERR_UNSUPPORTED
;
2460 /* --------------------------------------------------------------------- */
2461 /* Conditional instructions */
2462 /* --------------------------------------------------------------------- */
2464 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_label
* sljit_emit_label(struct sljit_compiler
*compiler
)
2467 struct sljit_label
*label
;
2470 check_sljit_emit_label(compiler
);
2472 /* We should restore the flags before the label,
2473 since other taken jumps has their own flags as well. */
2474 if (SLJIT_UNLIKELY(compiler
->flags_saved
))
2475 PTR_FAIL_IF(emit_restore_flags(compiler
, 0));
2477 if (compiler
->last_label
&& compiler
->last_label
->size
== compiler
->size
)
2478 return compiler
->last_label
;
2480 label
= (struct sljit_label
*)ensure_abuf(compiler
, sizeof(struct sljit_label
));
2481 PTR_FAIL_IF(!label
);
2482 set_label(label
, compiler
);
2484 inst
= (sljit_ub
*)ensure_buf(compiler
, 2);
2493 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_jump(struct sljit_compiler
*compiler
, sljit_si type
)
2496 struct sljit_jump
*jump
;
2499 check_sljit_emit_jump(compiler
, type
);
2501 if (SLJIT_UNLIKELY(compiler
->flags_saved
)) {
2502 if ((type
& 0xff) <= SLJIT_JUMP
)
2503 PTR_FAIL_IF(emit_restore_flags(compiler
, 0));
2504 compiler
->flags_saved
= 0;
2507 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2508 PTR_FAIL_IF_NULL(jump
);
2509 set_jump(jump
, compiler
, type
& SLJIT_REWRITABLE_JUMP
);
2512 if (type
>= SLJIT_CALL1
)
2513 PTR_FAIL_IF(call_with_args(compiler
, type
));
2515 /* Worst case size. */
2516 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2517 compiler
->size
+= (type
>= SLJIT_JUMP
) ? 5 : 6;
2519 compiler
->size
+= (type
>= SLJIT_JUMP
) ? (10 + 3) : (2 + 10 + 3);
2522 inst
= (sljit_ub
*)ensure_buf(compiler
, 2);
2523 PTR_FAIL_IF_NULL(inst
);
2530 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_ijump(struct sljit_compiler
*compiler
, sljit_si type
, sljit_si src
, sljit_sw srcw
)
2533 struct sljit_jump
*jump
;
2536 check_sljit_emit_ijump(compiler
, type
, src
, srcw
);
2537 ADJUST_LOCAL_OFFSET(src
, srcw
);
2539 CHECK_EXTRA_REGS(src
, srcw
, (void)0);
2541 if (SLJIT_UNLIKELY(compiler
->flags_saved
)) {
2542 if (type
<= SLJIT_JUMP
)
2543 FAIL_IF(emit_restore_flags(compiler
, 0));
2544 compiler
->flags_saved
= 0;
2547 if (type
>= SLJIT_CALL1
) {
2548 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2549 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2550 if (src
== SLJIT_SCRATCH_REG3
) {
2551 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, 0);
2554 if (src
== SLJIT_MEM1(SLJIT_LOCALS_REG
) && type
>= SLJIT_CALL3
)
2555 srcw
+= sizeof(sljit_sw
);
2558 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2559 if (src
== SLJIT_SCRATCH_REG3
) {
2560 EMIT_MOV(compiler
, TMP_REGISTER
, 0, src
, 0);
2564 FAIL_IF(call_with_args(compiler
, type
));
2567 if (src
== SLJIT_IMM
) {
2568 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2570 set_jump(jump
, compiler
, JUMP_ADDR
);
2571 jump
->u
.target
= srcw
;
2573 /* Worst case size. */
2574 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2575 compiler
->size
+= 5;
2577 compiler
->size
+= 10 + 3;
2580 inst
= (sljit_ub
*)ensure_buf(compiler
, 2);
2587 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2588 /* REX_W is not necessary (src is not immediate). */
2589 compiler
->mode32
= 1;
2591 inst
= emit_x86_instruction(compiler
, 1, 0, 0, src
, srcw
);
2594 *inst
|= (type
>= SLJIT_FAST_CALL
) ? CALL_rm
: JMP_rm
;
2596 return SLJIT_SUCCESS
;
2599 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_emit_op_flags(struct sljit_compiler
*compiler
, sljit_si op
,
2600 sljit_si dst
, sljit_sw dstw
,
2601 sljit_si src
, sljit_sw srcw
,
2605 sljit_ub cond_set
= 0;
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2609 /* CHECK_EXTRA_REGS migh overwrite these values. */
2610 sljit_si dst_save
= dst
;
2611 sljit_sw dstw_save
= dstw
;
2615 check_sljit_emit_op_flags(compiler
, op
, dst
, dstw
, src
, srcw
, type
);
2617 if (dst
== SLJIT_UNUSED
)
2618 return SLJIT_SUCCESS
;
2620 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2621 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2622 if (SLJIT_UNLIKELY(compiler
->flags_saved
))
2623 FAIL_IF(emit_restore_flags(compiler
, op
& SLJIT_KEEP_FLAGS
));
2625 /* setcc = jcc + 0x10. */
2626 cond_set
= get_jump_code(type
) + 0x10;
2628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2629 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && dst
<= TMP_REGISTER
&& dst
== src
) {
2630 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4 + 3);
2633 /* Set low register to conditional flag. */
2634 *inst
++ = (reg_map
[TMP_REGISTER
] <= 7) ? REX
: REX_B
;
2637 *inst
++ = MOD_REG
| reg_lmap
[TMP_REGISTER
];
2638 *inst
++ = REX
| (reg_map
[TMP_REGISTER
] <= 7 ? 0 : REX_R
) | (reg_map
[dst
] <= 7 ? 0 : REX_B
);
2639 *inst
++ = OR_rm8_r8
;
2640 *inst
++ = MOD_REG
| (reg_lmap
[TMP_REGISTER
] << 3) | reg_lmap
[dst
];
2641 return SLJIT_SUCCESS
;
2644 reg
= (op
== SLJIT_MOV
&& dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
2646 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 4 + 4);
2649 /* Set low register to conditional flag. */
2650 *inst
++ = (reg_map
[reg
] <= 7) ? REX
: REX_B
;
2653 *inst
++ = MOD_REG
| reg_lmap
[reg
];
2654 *inst
++ = REX_W
| (reg_map
[reg
] <= 7 ? 0 : (REX_B
| REX_R
));
2656 *inst
++ = MOVZX_r_rm8
;
2657 *inst
= MOD_REG
| (reg_lmap
[reg
] << 3) | reg_lmap
[reg
];
2659 if (reg
!= TMP_REGISTER
)
2660 return SLJIT_SUCCESS
;
2662 if (GET_OPCODE(op
) < SLJIT_ADD
) {
2663 compiler
->mode32
= GET_OPCODE(op
) != SLJIT_MOV
;
2664 return emit_mov(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
2666 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2667 compiler
->skip_checks
= 1;
2669 return sljit_emit_op2(compiler
, op
, dst
, dstw
, dst
, dstw
, TMP_REGISTER
, 0);
2670 #else /* SLJIT_CONFIG_X86_64 */
2671 if (GET_OPCODE(op
) < SLJIT_ADD
&& dst
<= TMP_REGISTER
) {
2672 if (reg_map
[dst
] <= 4) {
2673 /* Low byte is accessible. */
2674 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 3 + 3);
2677 /* Set low byte to conditional flag. */
2680 *inst
++ = MOD_REG
| reg_map
[dst
];
2683 *inst
++ = MOVZX_r_rm8
;
2684 *inst
= MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[dst
];
2685 return SLJIT_SUCCESS
;
2688 /* Low byte is not accessible. */
2689 if (cpu_has_cmov
== -1)
2693 EMIT_MOV(compiler
, TMP_REGISTER
, 0, SLJIT_IMM
, 1);
2694 /* a xor reg, reg operation would overwrite the flags. */
2695 EMIT_MOV(compiler
, dst
, 0, SLJIT_IMM
, 0);
2697 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 3);
2702 /* cmovcc = setcc - 0x50. */
2703 *inst
++ = cond_set
- 0x50;
2704 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | reg_map
[TMP_REGISTER
];
2705 return SLJIT_SUCCESS
;
2708 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2710 INC_SIZE(1 + 3 + 3 + 1);
2711 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2712 /* Set al to conditional flag. */
2715 *inst
++ = MOD_REG
| 0 /* eax */;
2718 *inst
++ = MOVZX_r_rm8
;
2719 *inst
++ = MOD_REG
| (reg_map
[dst
] << 3) | 0 /* eax */;
2720 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2721 return SLJIT_SUCCESS
;
2724 if (GET_OPCODE(op
) == SLJIT_OR
&& !GET_ALL_FLAGS(op
) && dst
<= TMP_REGISTER
&& dst
== src
&& reg_map
[dst
] <= 4) {
2725 SLJIT_COMPILE_ASSERT(reg_map
[SLJIT_SCRATCH_REG1
] == 0, scratch_reg1_must_be_eax
);
2726 if (dst
!= SLJIT_SCRATCH_REG1
) {
2727 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1 + 3 + 2 + 1);
2729 INC_SIZE(1 + 3 + 2 + 1);
2730 /* Set low register to conditional flag. */
2731 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2734 *inst
++ = MOD_REG
| 0 /* eax */;
2735 *inst
++ = OR_rm8_r8
;
2736 *inst
++ = MOD_REG
| (0 /* eax */ << 3) | reg_map
[dst
];
2737 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2740 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 2 + 3 + 2 + 2);
2742 INC_SIZE(2 + 3 + 2 + 2);
2743 /* Set low register to conditional flag. */
2744 *inst
++ = XCHG_r_rm
;
2745 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REGISTER
];
2748 *inst
++ = MOD_REG
| 1 /* ecx */;
2749 *inst
++ = OR_rm8_r8
;
2750 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | 0 /* eax */;
2751 *inst
++ = XCHG_r_rm
;
2752 *inst
++ = MOD_REG
| (1 /* ecx */ << 3) | reg_map
[TMP_REGISTER
];
2754 return SLJIT_SUCCESS
;
2757 /* Set TMP_REGISTER to the bit. */
2758 inst
= (sljit_ub
*)ensure_buf(compiler
, 1 + 1 + 3 + 3 + 1);
2760 INC_SIZE(1 + 3 + 3 + 1);
2761 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2762 /* Set al to conditional flag. */
2765 *inst
++ = MOD_REG
| 0 /* eax */;
2768 *inst
++ = MOVZX_r_rm8
;
2769 *inst
++ = MOD_REG
| (0 << 3) /* eax */ | 0 /* eax */;
2771 *inst
++ = XCHG_EAX_r
+ reg_map
[TMP_REGISTER
];
2773 if (GET_OPCODE(op
) < SLJIT_ADD
)
2774 return emit_mov(compiler
, dst
, dstw
, TMP_REGISTER
, 0);
2776 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2777 compiler
->skip_checks
= 1;
2779 return sljit_emit_op2(compiler
, op
, dst_save
, dstw_save
, dst_save
, dstw_save
, TMP_REGISTER
, 0);
2780 #endif /* SLJIT_CONFIG_X86_64 */
2783 SLJIT_API_FUNC_ATTRIBUTE sljit_si
sljit_get_local_base(struct sljit_compiler
*compiler
, sljit_si dst
, sljit_sw dstw
, sljit_sw offset
)
2786 check_sljit_get_local_base(compiler
, dst
, dstw
, offset
);
2787 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2789 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2791 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2792 compiler
->mode32
= 0;
2795 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG
), offset
);
2797 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2798 if (NOT_HALFWORD(offset
)) {
2799 FAIL_IF(emit_load_imm64(compiler
, TMP_REGISTER
, offset
));
2800 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2801 SLJIT_ASSERT(emit_lea_binary(compiler
, SLJIT_KEEP_FLAGS
, dst
, dstw
, SLJIT_LOCALS_REG
, 0, TMP_REGISTER
, 0) != SLJIT_ERR_UNSUPPORTED
);
2802 return compiler
->error
;
2804 return emit_lea_binary(compiler
, SLJIT_KEEP_FLAGS
, dst
, dstw
, SLJIT_LOCALS_REG
, 0, TMP_REGISTER
, 0);
2810 return emit_lea_binary(compiler
, SLJIT_KEEP_FLAGS
, dst
, dstw
, SLJIT_LOCALS_REG
, 0, SLJIT_IMM
, offset
);
2811 return emit_mov(compiler
, dst
, dstw
, SLJIT_LOCALS_REG
, 0);
2814 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_const
* sljit_emit_const(struct sljit_compiler
*compiler
, sljit_si dst
, sljit_sw dstw
, sljit_sw init_value
)
2817 struct sljit_const
*const_
;
2818 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2823 check_sljit_emit_const(compiler
, dst
, dstw
, init_value
);
2824 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2826 CHECK_EXTRA_REGS(dst
, dstw
, (void)0);
2828 const_
= (struct sljit_const
*)ensure_abuf(compiler
, sizeof(struct sljit_const
));
2829 PTR_FAIL_IF(!const_
);
2830 set_const(const_
, compiler
);
2832 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2833 compiler
->mode32
= 0;
2834 reg
= (dst
<= TMP_REGISTER
) ? dst
: TMP_REGISTER
;
2836 if (emit_load_imm64(compiler
, reg
, init_value
))
2839 if (dst
== SLJIT_UNUSED
)
2842 if (emit_mov(compiler
, dst
, dstw
, SLJIT_IMM
, init_value
))
2846 inst
= (sljit_ub
*)ensure_buf(compiler
, 2);
2852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2853 if (reg
== TMP_REGISTER
&& dst
!= SLJIT_UNUSED
)
2854 if (emit_mov(compiler
, dst
, dstw
, TMP_REGISTER
, 0))
2861 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_jump_addr(sljit_uw addr
, sljit_uw new_addr
)
2863 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2864 *(sljit_sw
*)addr
= new_addr
- (addr
+ 4);
2866 *(sljit_uw
*)addr
= new_addr
;
2870 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_const(sljit_uw addr
, sljit_sw new_constant
)
2872 *(sljit_sw
*)addr
= new_constant
;