The missing sljit_get_float_register_index function is added.
[sljit.git] / sljit_src / sljitNativeX86_common.c
blobceb3d675b72a40615a33399b553de55a30088218
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
29 return "x86" SLJIT_CPUINFO;
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
66 /* Last register + 1. */
67 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 do; \
78 } \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 do; \
85 #else /* SLJIT_CONFIG_X86_32 */
87 /* Last register + 1. */
88 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
113 #endif
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
121 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
122 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
124 #define CHECK_EXTRA_REGS(p, w, do)
126 #endif /* SLJIT_CONFIG_X86_32 */
128 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
129 #define TMP_FREG (0)
130 #endif
132 /* Size flags for emit_x86_instruction: */
133 #define EX86_BIN_INS 0x0010
134 #define EX86_SHIFT_INS 0x0020
135 #define EX86_REX 0x0040
136 #define EX86_NO_REXW 0x0080
137 #define EX86_BYTE_ARG 0x0100
138 #define EX86_HALF_ARG 0x0200
139 #define EX86_PREF_66 0x0400
141 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
142 #define EX86_SSE2 0x0800
143 #define EX86_PREF_F2 0x1000
144 #define EX86_PREF_F3 0x2000
145 #endif
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define DIV (/* GROUP_F7 */ 6 << 3)
175 #define DIVSD_x_xm 0x5e
176 #define INT3 0xcc
177 #define IDIV (/* GROUP_F7 */ 7 << 3)
178 #define IMUL (/* GROUP_F7 */ 5 << 3)
179 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
180 #define IMUL_r_rm_i8 0x6b
181 #define IMUL_r_rm_i32 0x69
182 #define JE_i8 0x74
183 #define JMP_i8 0xeb
184 #define JMP_i32 0xe9
185 #define JMP_rm (/* GROUP_FF */ 4 << 3)
186 #define LEA_r_m 0x8d
187 #define MOV_r_rm 0x8b
188 #define MOV_r_i32 0xb8
189 #define MOV_rm_r 0x89
190 #define MOV_rm_i32 0xc7
191 #define MOV_rm8_i8 0xc6
192 #define MOV_rm8_r8 0x88
193 #define MOVSD_x_xm 0x10
194 #define MOVSD_xm_x 0x11
195 #define MOVSXD_r_rm 0x63
196 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
197 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
198 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
199 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
200 #define MUL (/* GROUP_F7 */ 4 << 3)
201 #define MULSD_x_xm 0x59
202 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
203 #define NOP 0x90
204 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
205 #define OR (/* BINARY */ 1 << 3)
206 #define OR_r_rm 0x0b
207 #define OR_EAX_i32 0x0d
208 #define OR_rm_r 0x09
209 #define OR_rm8_r8 0x08
210 #define POP_r 0x58
211 #define POP_rm 0x8f
212 #define POPF 0x9d
213 #define PUSH_i32 0x68
214 #define PUSH_r 0x50
215 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
216 #define PUSHF 0x9c
217 #define RET_near 0xc3
218 #define RET_i16 0xc2
219 #define SBB (/* BINARY */ 3 << 3)
220 #define SBB_EAX_i32 0x1d
221 #define SBB_r_rm 0x1b
222 #define SBB_rm_r 0x19
223 #define SAR (/* SHIFT */ 7 << 3)
224 #define SHL (/* SHIFT */ 4 << 3)
225 #define SHR (/* SHIFT */ 5 << 3)
226 #define SUB (/* BINARY */ 5 << 3)
227 #define SUB_EAX_i32 0x2d
228 #define SUB_r_rm 0x2b
229 #define SUB_rm_r 0x29
230 #define SUBSD_x_xm 0x5c
231 #define TEST_EAX_i32 0xa9
232 #define TEST_rm_r 0x85
233 #define UCOMISD_x_xm 0x2e
234 #define XCHG_EAX_r 0x90
235 #define XCHG_r_rm 0x87
236 #define XOR (/* BINARY */ 6 << 3)
237 #define XOR_EAX_i32 0x35
238 #define XOR_r_rm 0x33
239 #define XOR_rm_r 0x31
240 #define XORPD_x_xm 0x57
242 #define GROUP_0F 0x0f
243 #define GROUP_F7 0xf7
244 #define GROUP_FF 0xff
245 #define GROUP_BINARY_81 0x81
246 #define GROUP_BINARY_83 0x83
247 #define GROUP_SHIFT_1 0xd1
248 #define GROUP_SHIFT_N 0xc1
249 #define GROUP_SHIFT_CL 0xd3
251 #define MOD_REG 0xc0
252 #define MOD_DISP8 0x40
254 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
256 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
257 #define POP_REG(r) (*inst++ = (POP_r + (r)))
258 #define RET() (*inst++ = (RET_near))
259 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
260 /* r32, r/m32 */
261 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
263 /* Multithreading does not affect these static variables, since they store
264 built-in CPU features. Therefore they can be overwritten by different threads
265 if they detect the CPU features in the same time. */
266 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
267 static sljit_si cpu_has_sse2 = -1;
268 #endif
269 static sljit_si cpu_has_cmov = -1;
271 #if defined(_MSC_VER) && _MSC_VER >= 1400
272 #include <intrin.h>
273 #endif
275 static void get_cpu_features(void)
277 sljit_ui features;
279 #if defined(_MSC_VER) && _MSC_VER >= 1400
281 int CPUInfo[4];
282 __cpuid(CPUInfo, 1);
283 features = (sljit_ui)CPUInfo[3];
285 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
287 /* AT&T syntax. */
288 __asm__ (
289 "movl $0x1, %%eax\n"
290 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
291 /* On x86-32, there is no red zone, so this
292 should work (no need for a local variable). */
293 "push %%ebx\n"
294 #endif
295 "cpuid\n"
296 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
297 "pop %%ebx\n"
298 #endif
299 "movl %%edx, %0\n"
300 : "=g" (features)
302 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
303 : "%eax", "%ecx", "%edx"
304 #else
305 : "%rax", "%rbx", "%rcx", "%rdx"
306 #endif
309 #else /* _MSC_VER && _MSC_VER >= 1400 */
311 /* Intel syntax. */
312 __asm {
313 mov eax, 1
314 cpuid
315 mov features, edx
318 #endif /* _MSC_VER && _MSC_VER >= 1400 */
320 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
321 cpu_has_sse2 = (features >> 26) & 0x1;
322 #endif
323 cpu_has_cmov = (features >> 15) & 0x1;
326 static sljit_ub get_jump_code(sljit_si type)
328 switch (type) {
329 case SLJIT_C_EQUAL:
330 case SLJIT_C_FLOAT_EQUAL:
331 return 0x84 /* je */;
333 case SLJIT_C_NOT_EQUAL:
334 case SLJIT_C_FLOAT_NOT_EQUAL:
335 return 0x85 /* jne */;
337 case SLJIT_C_LESS:
338 case SLJIT_C_FLOAT_LESS:
339 return 0x82 /* jc */;
341 case SLJIT_C_GREATER_EQUAL:
342 case SLJIT_C_FLOAT_GREATER_EQUAL:
343 return 0x83 /* jae */;
345 case SLJIT_C_GREATER:
346 case SLJIT_C_FLOAT_GREATER:
347 return 0x87 /* jnbe */;
349 case SLJIT_C_LESS_EQUAL:
350 case SLJIT_C_FLOAT_LESS_EQUAL:
351 return 0x86 /* jbe */;
353 case SLJIT_C_SIG_LESS:
354 return 0x8c /* jl */;
356 case SLJIT_C_SIG_GREATER_EQUAL:
357 return 0x8d /* jnl */;
359 case SLJIT_C_SIG_GREATER:
360 return 0x8f /* jnle */;
362 case SLJIT_C_SIG_LESS_EQUAL:
363 return 0x8e /* jle */;
365 case SLJIT_C_OVERFLOW:
366 case SLJIT_C_MUL_OVERFLOW:
367 return 0x80 /* jo */;
369 case SLJIT_C_NOT_OVERFLOW:
370 case SLJIT_C_MUL_NOT_OVERFLOW:
371 return 0x81 /* jno */;
373 case SLJIT_C_FLOAT_UNORDERED:
374 return 0x8a /* jp */;
376 case SLJIT_C_FLOAT_ORDERED:
377 return 0x8b /* jpo */;
379 return 0;
382 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
385 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
386 #endif
388 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
390 sljit_si short_jump;
391 sljit_uw label_addr;
393 if (jump->flags & JUMP_LABEL)
394 label_addr = (sljit_uw)(code + jump->u.label->size);
395 else
396 label_addr = jump->u.target;
397 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
399 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
400 if ((sljit_sw)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_sw)(label_addr - (jump->addr + 1)) < -0x80000000ll)
401 return generate_far_jump_code(jump, code_ptr, type);
402 #endif
404 if (type == SLJIT_JUMP) {
405 if (short_jump)
406 *code_ptr++ = JMP_i8;
407 else
408 *code_ptr++ = JMP_i32;
409 jump->addr++;
411 else if (type >= SLJIT_FAST_CALL) {
412 short_jump = 0;
413 *code_ptr++ = CALL_i32;
414 jump->addr++;
416 else if (short_jump) {
417 *code_ptr++ = get_jump_code(type) - 0x10;
418 jump->addr++;
420 else {
421 *code_ptr++ = GROUP_0F;
422 *code_ptr++ = get_jump_code(type);
423 jump->addr += 2;
426 if (short_jump) {
427 jump->flags |= PATCH_MB;
428 code_ptr += sizeof(sljit_sb);
429 } else {
430 jump->flags |= PATCH_MW;
431 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
432 code_ptr += sizeof(sljit_sw);
433 #else
434 code_ptr += sizeof(sljit_si);
435 #endif
438 return code_ptr;
441 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
443 struct sljit_memory_fragment *buf;
444 sljit_ub *code;
445 sljit_ub *code_ptr;
446 sljit_ub *buf_ptr;
447 sljit_ub *buf_end;
448 sljit_ub len;
450 struct sljit_label *label;
451 struct sljit_jump *jump;
452 struct sljit_const *const_;
454 CHECK_ERROR_PTR();
455 check_sljit_generate_code(compiler);
456 reverse_buf(compiler);
458 /* Second code generation pass. */
459 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
460 PTR_FAIL_WITH_EXEC_IF(code);
461 buf = compiler->buf;
463 code_ptr = code;
464 label = compiler->labels;
465 jump = compiler->jumps;
466 const_ = compiler->consts;
467 do {
468 buf_ptr = buf->memory;
469 buf_end = buf_ptr + buf->used_size;
470 do {
471 len = *buf_ptr++;
472 if (len > 0) {
473 /* The code is already generated. */
474 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
475 code_ptr += len;
476 buf_ptr += len;
478 else {
479 if (*buf_ptr >= 4) {
480 jump->addr = (sljit_uw)code_ptr;
481 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
482 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
483 else
484 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
485 jump = jump->next;
487 else if (*buf_ptr == 0) {
488 label->addr = (sljit_uw)code_ptr;
489 label->size = code_ptr - code;
490 label = label->next;
492 else if (*buf_ptr == 1) {
493 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
494 const_ = const_->next;
496 else {
497 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
498 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
499 buf_ptr++;
500 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
501 code_ptr += sizeof(sljit_sw);
502 buf_ptr += sizeof(sljit_sw) - 1;
503 #else
504 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
505 buf_ptr += sizeof(sljit_sw);
506 #endif
508 buf_ptr++;
510 } while (buf_ptr < buf_end);
511 SLJIT_ASSERT(buf_ptr == buf_end);
512 buf = buf->next;
513 } while (buf);
515 SLJIT_ASSERT(!label);
516 SLJIT_ASSERT(!jump);
517 SLJIT_ASSERT(!const_);
519 jump = compiler->jumps;
520 while (jump) {
521 if (jump->flags & PATCH_MB) {
522 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
523 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
524 } else if (jump->flags & PATCH_MW) {
525 if (jump->flags & JUMP_LABEL) {
526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
527 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
528 #else
529 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll);
530 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
531 #endif
533 else {
534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
535 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
536 #else
537 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll);
538 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
539 #endif
542 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
543 else if (jump->flags & PATCH_MD)
544 *(sljit_sw*)jump->addr = jump->u.label->addr;
545 #endif
547 jump = jump->next;
550 /* Maybe we waste some space because of short jumps. */
551 SLJIT_ASSERT(code_ptr <= code + compiler->size);
552 compiler->error = SLJIT_ERR_COMPILED;
553 compiler->executable_size = code_ptr - code;
554 return (void*)code;
557 /* --------------------------------------------------------------------- */
558 /* Operators */
559 /* --------------------------------------------------------------------- */
561 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
562 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
563 sljit_si dst, sljit_sw dstw,
564 sljit_si src1, sljit_sw src1w,
565 sljit_si src2, sljit_sw src2w);
567 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
568 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
569 sljit_si dst, sljit_sw dstw,
570 sljit_si src1, sljit_sw src1w,
571 sljit_si src2, sljit_sw src2w);
573 static sljit_si emit_mov(struct sljit_compiler *compiler,
574 sljit_si dst, sljit_sw dstw,
575 sljit_si src, sljit_sw srcw);
577 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
579 sljit_ub *inst;
581 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
582 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
583 FAIL_IF(!inst);
584 INC_SIZE(5);
585 #else
586 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
587 FAIL_IF(!inst);
588 INC_SIZE(6);
589 *inst++ = REX_W;
590 #endif
591 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
592 *inst++ = 0x64;
593 *inst++ = 0x24;
594 *inst++ = (sljit_ub)sizeof(sljit_sw);
595 *inst++ = PUSHF;
596 compiler->flags_saved = 1;
597 return SLJIT_SUCCESS;
600 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
602 sljit_ub *inst;
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
606 FAIL_IF(!inst);
607 INC_SIZE(5);
608 *inst++ = POPF;
609 #else
610 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
611 FAIL_IF(!inst);
612 INC_SIZE(6);
613 *inst++ = POPF;
614 *inst++ = REX_W;
615 #endif
616 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
617 *inst++ = 0x64;
618 *inst++ = 0x24;
619 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
620 compiler->flags_saved = keep_flags;
621 return SLJIT_SUCCESS;
624 #ifdef _WIN32
625 #include <malloc.h>
627 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
629 /* Workaround for calling the internal _chkstk() function on Windows.
630 This function touches all 4k pages belongs to the requested stack space,
631 which size is passed in local_size. This is necessary on Windows where
632 the stack can only grow in 4k steps. However, this function just burn
633 CPU cycles if the stack is large enough. However, you don't know it in
634 advance, so it must always be called. I think this is a bad design in
635 general even if it has some reasons. */
636 *(sljit_si*)alloca(local_size) = 0;
639 #endif
641 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
642 #include "sljitNativeX86_32.c"
643 #else
644 #include "sljitNativeX86_64.c"
645 #endif
647 static sljit_si emit_mov(struct sljit_compiler *compiler,
648 sljit_si dst, sljit_sw dstw,
649 sljit_si src, sljit_sw srcw)
651 sljit_ub* inst;
653 if (dst == SLJIT_UNUSED) {
654 /* No destination, doesn't need to setup flags. */
655 if (src & SLJIT_MEM) {
656 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
657 FAIL_IF(!inst);
658 *inst = MOV_r_rm;
660 return SLJIT_SUCCESS;
662 if (src <= TMP_REGISTER) {
663 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
664 FAIL_IF(!inst);
665 *inst = MOV_rm_r;
666 return SLJIT_SUCCESS;
668 if (src & SLJIT_IMM) {
669 if (dst <= TMP_REGISTER) {
670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
671 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
672 #else
673 if (!compiler->mode32) {
674 if (NOT_HALFWORD(srcw))
675 return emit_load_imm64(compiler, dst, srcw);
677 else
678 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
679 #endif
681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
682 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
683 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
684 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
685 FAIL_IF(!inst);
686 *inst = MOV_rm_r;
687 return SLJIT_SUCCESS;
689 #endif
690 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
691 FAIL_IF(!inst);
692 *inst = MOV_rm_i32;
693 return SLJIT_SUCCESS;
695 if (dst <= TMP_REGISTER) {
696 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
697 FAIL_IF(!inst);
698 *inst = MOV_r_rm;
699 return SLJIT_SUCCESS;
702 /* Memory to memory move. Requires two instruction. */
703 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
704 FAIL_IF(!inst);
705 *inst = MOV_r_rm;
706 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
707 FAIL_IF(!inst);
708 *inst = MOV_rm_r;
709 return SLJIT_SUCCESS;
712 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
713 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
715 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
717 sljit_ub *inst;
718 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
719 sljit_si size;
720 #endif
722 CHECK_ERROR();
723 check_sljit_emit_op0(compiler, op);
725 switch (GET_OPCODE(op)) {
726 case SLJIT_BREAKPOINT:
727 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
728 FAIL_IF(!inst);
729 INC_SIZE(1);
730 *inst = INT3;
731 break;
732 case SLJIT_NOP:
733 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
734 FAIL_IF(!inst);
735 INC_SIZE(1);
736 *inst = NOP;
737 break;
738 case SLJIT_UMUL:
739 case SLJIT_SMUL:
740 case SLJIT_UDIV:
741 case SLJIT_SDIV:
742 compiler->flags_saved = 0;
743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
744 #ifdef _WIN64
745 SLJIT_COMPILE_ASSERT(
746 reg_map[SLJIT_SCRATCH_REG1] == 0
747 && reg_map[SLJIT_SCRATCH_REG2] == 2
748 && reg_map[TMP_REGISTER] > 7,
749 invalid_register_assignment_for_div_mul);
750 #else
751 SLJIT_COMPILE_ASSERT(
752 reg_map[SLJIT_SCRATCH_REG1] == 0
753 && reg_map[SLJIT_SCRATCH_REG2] < 7
754 && reg_map[TMP_REGISTER] == 2,
755 invalid_register_assignment_for_div_mul);
756 #endif
757 compiler->mode32 = op & SLJIT_INT_OP;
758 #endif
760 op = GET_OPCODE(op);
761 if (op == SLJIT_UDIV) {
762 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
763 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0);
764 inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
765 #else
766 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
767 #endif
768 FAIL_IF(!inst);
769 *inst = XOR_r_rm;
772 if (op == SLJIT_SDIV) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
774 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0);
775 #endif
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
778 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
779 FAIL_IF(!inst);
780 INC_SIZE(1);
781 *inst = CDQ;
782 #else
783 if (compiler->mode32) {
784 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
785 FAIL_IF(!inst);
786 INC_SIZE(1);
787 *inst = CDQ;
788 } else {
789 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
790 FAIL_IF(!inst);
791 INC_SIZE(2);
792 *inst++ = REX_W;
793 *inst = CDQ;
795 #endif
798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
800 FAIL_IF(!inst);
801 INC_SIZE(2);
802 *inst++ = GROUP_F7;
803 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_SCRATCH_REG2]);
804 #else
805 #ifdef _WIN64
806 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
807 #else
808 size = (!compiler->mode32) ? 3 : 2;
809 #endif
810 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
811 FAIL_IF(!inst);
812 INC_SIZE(size);
813 #ifdef _WIN64
814 if (!compiler->mode32)
815 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
816 else if (op >= SLJIT_UDIV)
817 *inst++ = REX_B;
818 *inst++ = GROUP_F7;
819 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_SCRATCH_REG2]);
820 #else
821 if (!compiler->mode32)
822 *inst++ = REX_W;
823 *inst++ = GROUP_F7;
824 *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
825 #endif
826 #endif
827 switch (op) {
828 case SLJIT_UMUL:
829 *inst |= MUL;
830 break;
831 case SLJIT_SMUL:
832 *inst |= IMUL;
833 break;
834 case SLJIT_UDIV:
835 *inst |= DIV;
836 break;
837 case SLJIT_SDIV:
838 *inst |= IDIV;
839 break;
841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
842 EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REGISTER, 0);
843 #endif
844 break;
847 return SLJIT_SUCCESS;
850 #define ENCODE_PREFIX(prefix) \
851 do { \
852 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
853 FAIL_IF(!inst); \
854 INC_SIZE(1); \
855 *inst = (prefix); \
856 } while (0)
858 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
859 sljit_si dst, sljit_sw dstw,
860 sljit_si src, sljit_sw srcw)
862 sljit_ub* inst;
863 sljit_si dst_r;
864 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
865 sljit_si work_r;
866 #endif
868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
869 compiler->mode32 = 0;
870 #endif
872 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
873 return SLJIT_SUCCESS; /* Empty instruction. */
875 if (src & SLJIT_IMM) {
876 if (dst <= TMP_REGISTER) {
877 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
878 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
879 #else
880 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
881 FAIL_IF(!inst);
882 *inst = MOV_rm_i32;
883 return SLJIT_SUCCESS;
884 #endif
886 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
887 FAIL_IF(!inst);
888 *inst = MOV_rm8_i8;
889 return SLJIT_SUCCESS;
892 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
894 if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) {
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 if (reg_map[src] >= 4) {
897 SLJIT_ASSERT(dst_r == TMP_REGISTER);
898 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
899 } else
900 dst_r = src;
901 #else
902 dst_r = src;
903 #endif
905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
906 else if (src <= TMP_REGISTER && reg_map[src] >= 4) {
907 /* src, dst are registers. */
908 SLJIT_ASSERT(dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REGISTER);
909 if (reg_map[dst] < 4) {
910 if (dst != src)
911 EMIT_MOV(compiler, dst, 0, src, 0);
912 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
913 FAIL_IF(!inst);
914 *inst++ = GROUP_0F;
915 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
917 else {
918 if (dst != src)
919 EMIT_MOV(compiler, dst, 0, src, 0);
920 if (sign) {
921 /* shl reg, 24 */
922 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
923 FAIL_IF(!inst);
924 *inst |= SHL;
925 /* sar reg, 24 */
926 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
927 FAIL_IF(!inst);
928 *inst |= SAR;
930 else {
931 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
932 FAIL_IF(!inst);
933 *(inst + 1) |= AND;
936 return SLJIT_SUCCESS;
938 #endif
939 else {
940 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
941 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
942 FAIL_IF(!inst);
943 *inst++ = GROUP_0F;
944 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
947 if (dst & SLJIT_MEM) {
948 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
949 if (dst_r == TMP_REGISTER) {
950 /* Find a non-used register, whose reg_map[src] < 4. */
951 if ((dst & 0xf) == SLJIT_SCRATCH_REG1) {
952 if ((dst & 0xf0) == (SLJIT_SCRATCH_REG2 << 4))
953 work_r = SLJIT_SCRATCH_REG3;
954 else
955 work_r = SLJIT_SCRATCH_REG2;
957 else {
958 if ((dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4))
959 work_r = SLJIT_SCRATCH_REG1;
960 else if ((dst & 0xf) == SLJIT_SCRATCH_REG2)
961 work_r = SLJIT_SCRATCH_REG3;
962 else
963 work_r = SLJIT_SCRATCH_REG2;
966 if (work_r == SLJIT_SCRATCH_REG1) {
967 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]);
969 else {
970 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
971 FAIL_IF(!inst);
972 *inst = XCHG_r_rm;
975 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
976 FAIL_IF(!inst);
977 *inst = MOV_rm8_r8;
979 if (work_r == SLJIT_SCRATCH_REG1) {
980 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]);
982 else {
983 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
984 FAIL_IF(!inst);
985 *inst = XCHG_r_rm;
988 else {
989 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
990 FAIL_IF(!inst);
991 *inst = MOV_rm8_r8;
993 #else
994 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
995 FAIL_IF(!inst);
996 *inst = MOV_rm8_r8;
997 #endif
1000 return SLJIT_SUCCESS;
1003 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1004 sljit_si dst, sljit_sw dstw,
1005 sljit_si src, sljit_sw srcw)
1007 sljit_ub* inst;
1008 sljit_si dst_r;
1010 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1011 compiler->mode32 = 0;
1012 #endif
1014 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1015 return SLJIT_SUCCESS; /* Empty instruction. */
1017 if (src & SLJIT_IMM) {
1018 if (dst <= TMP_REGISTER) {
1019 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1020 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1021 #else
1022 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1023 FAIL_IF(!inst);
1024 *inst = MOV_rm_i32;
1025 return SLJIT_SUCCESS;
1026 #endif
1028 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1029 FAIL_IF(!inst);
1030 *inst = MOV_rm_i32;
1031 return SLJIT_SUCCESS;
1034 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1036 if ((dst & SLJIT_MEM) && src <= TMP_REGISTER)
1037 dst_r = src;
1038 else {
1039 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1040 FAIL_IF(!inst);
1041 *inst++ = GROUP_0F;
1042 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1045 if (dst & SLJIT_MEM) {
1046 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1047 FAIL_IF(!inst);
1048 *inst = MOV_rm_r;
1051 return SLJIT_SUCCESS;
1054 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1055 sljit_si dst, sljit_sw dstw,
1056 sljit_si src, sljit_sw srcw)
1058 sljit_ub* inst;
1060 if (dst == SLJIT_UNUSED) {
1061 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1062 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1063 FAIL_IF(!inst);
1064 *inst++ = GROUP_F7;
1065 *inst |= opcode;
1066 return SLJIT_SUCCESS;
1068 if (dst == src && dstw == srcw) {
1069 /* Same input and output */
1070 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1071 FAIL_IF(!inst);
1072 *inst++ = GROUP_F7;
1073 *inst |= opcode;
1074 return SLJIT_SUCCESS;
1076 if (dst <= TMP_REGISTER) {
1077 EMIT_MOV(compiler, dst, 0, src, srcw);
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_F7;
1081 *inst |= opcode;
1082 return SLJIT_SUCCESS;
1084 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1085 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1086 FAIL_IF(!inst);
1087 *inst++ = GROUP_F7;
1088 *inst |= opcode;
1089 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1090 return SLJIT_SUCCESS;
1093 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1094 sljit_si dst, sljit_sw dstw,
1095 sljit_si src, sljit_sw srcw)
1097 sljit_ub* inst;
1099 if (dst == SLJIT_UNUSED) {
1100 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1102 FAIL_IF(!inst);
1103 *inst++ = GROUP_F7;
1104 *inst |= NOT_rm;
1105 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
1106 FAIL_IF(!inst);
1107 *inst = OR_r_rm;
1108 return SLJIT_SUCCESS;
1110 if (dst <= TMP_REGISTER) {
1111 EMIT_MOV(compiler, dst, 0, src, srcw);
1112 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1113 FAIL_IF(!inst);
1114 *inst++ = GROUP_F7;
1115 *inst |= NOT_rm;
1116 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1117 FAIL_IF(!inst);
1118 *inst = OR_r_rm;
1119 return SLJIT_SUCCESS;
1121 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1122 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1123 FAIL_IF(!inst);
1124 *inst++ = GROUP_F7;
1125 *inst |= NOT_rm;
1126 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
1127 FAIL_IF(!inst);
1128 *inst = OR_r_rm;
1129 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1130 return SLJIT_SUCCESS;
1133 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1134 sljit_si dst, sljit_sw dstw,
1135 sljit_si src, sljit_sw srcw)
1137 sljit_ub* inst;
1138 sljit_si dst_r;
1140 SLJIT_UNUSED_ARG(op_flags);
1141 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1142 /* Just set the zero flag. */
1143 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1144 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1145 FAIL_IF(!inst);
1146 *inst++ = GROUP_F7;
1147 *inst |= NOT_rm;
1148 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1149 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
1150 #else
1151 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
1152 #endif
1153 FAIL_IF(!inst);
1154 *inst |= SHR;
1155 return SLJIT_SUCCESS;
1158 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1159 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, srcw);
1160 src = TMP_REGISTER;
1161 srcw = 0;
1164 inst = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
1165 FAIL_IF(!inst);
1166 *inst++ = GROUP_0F;
1167 *inst = BSR_r_rm;
1169 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1170 if (dst <= TMP_REGISTER)
1171 dst_r = dst;
1172 else {
1173 /* Find an unused temporary register. */
1174 if ((dst & 0xf) != SLJIT_SCRATCH_REG1 && (dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4))
1175 dst_r = SLJIT_SCRATCH_REG1;
1176 else if ((dst & 0xf) != SLJIT_SCRATCH_REG2 && (dst & 0xf0) != (SLJIT_SCRATCH_REG2 << 4))
1177 dst_r = SLJIT_SCRATCH_REG2;
1178 else
1179 dst_r = SLJIT_SCRATCH_REG3;
1180 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1182 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1183 #else
1184 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REG2;
1185 compiler->mode32 = 0;
1186 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1187 compiler->mode32 = op_flags & SLJIT_INT_OP;
1188 #endif
1190 if (cpu_has_cmov == -1)
1191 get_cpu_features();
1193 if (cpu_has_cmov) {
1194 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1195 FAIL_IF(!inst);
1196 *inst++ = GROUP_0F;
1197 *inst = CMOVNE_r_rm;
1198 } else {
1199 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1200 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1201 FAIL_IF(!inst);
1202 INC_SIZE(4);
1204 *inst++ = JE_i8;
1205 *inst++ = 2;
1206 *inst++ = MOV_r_rm;
1207 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REGISTER];
1208 #else
1209 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1210 FAIL_IF(!inst);
1211 INC_SIZE(5);
1213 *inst++ = JE_i8;
1214 *inst++ = 3;
1215 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REGISTER] >= 8 ? REX_B : 0);
1216 *inst++ = MOV_r_rm;
1217 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REGISTER];
1218 #endif
1221 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1222 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1223 #else
1224 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1225 #endif
1226 FAIL_IF(!inst);
1227 *(inst + 1) |= XOR;
1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1230 if (dst & SLJIT_MEM) {
1231 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1232 FAIL_IF(!inst);
1233 *inst = XCHG_r_rm;
1235 #else
1236 if (dst & SLJIT_MEM)
1237 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1238 #endif
1239 return SLJIT_SUCCESS;
1242 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1243 sljit_si dst, sljit_sw dstw,
1244 sljit_si src, sljit_sw srcw)
1246 sljit_ub* inst;
1247 sljit_si update = 0;
1248 sljit_si op_flags = GET_ALL_FLAGS(op);
1249 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1250 sljit_si dst_is_ereg = 0;
1251 sljit_si src_is_ereg = 0;
1252 #else
1253 # define src_is_ereg 0
1254 #endif
1256 CHECK_ERROR();
1257 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1258 ADJUST_LOCAL_OFFSET(dst, dstw);
1259 ADJUST_LOCAL_OFFSET(src, srcw);
1261 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1262 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1263 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1264 compiler->mode32 = op_flags & SLJIT_INT_OP;
1265 #endif
1267 op = GET_OPCODE(op);
1268 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1269 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1270 compiler->mode32 = 0;
1271 #endif
1273 if (op_flags & SLJIT_INT_OP) {
1274 if (src <= TMP_REGISTER && src == dst) {
1275 if (!TYPE_CAST_NEEDED(op))
1276 return SLJIT_SUCCESS;
1278 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1279 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1280 op = SLJIT_MOV_UI;
1281 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1282 op = SLJIT_MOVU_UI;
1283 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1284 op = SLJIT_MOV_SI;
1285 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1286 op = SLJIT_MOVU_SI;
1287 #endif
1290 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1291 if (op >= SLJIT_MOVU) {
1292 update = 1;
1293 op -= 8;
1296 if (src & SLJIT_IMM) {
1297 switch (op) {
1298 case SLJIT_MOV_UB:
1299 srcw = (sljit_ub)srcw;
1300 break;
1301 case SLJIT_MOV_SB:
1302 srcw = (sljit_sb)srcw;
1303 break;
1304 case SLJIT_MOV_UH:
1305 srcw = (sljit_uh)srcw;
1306 break;
1307 case SLJIT_MOV_SH:
1308 srcw = (sljit_sh)srcw;
1309 break;
1310 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1311 case SLJIT_MOV_UI:
1312 srcw = (sljit_ui)srcw;
1313 break;
1314 case SLJIT_MOV_SI:
1315 srcw = (sljit_si)srcw;
1316 break;
1317 #endif
1319 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1320 if (SLJIT_UNLIKELY(dst_is_ereg))
1321 return emit_mov(compiler, dst, dstw, src, srcw);
1322 #endif
1325 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1326 inst = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1327 FAIL_IF(!inst);
1328 *inst = LEA_r_m;
1329 src &= SLJIT_MEM | 0xf;
1330 srcw = 0;
1333 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1334 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1335 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1336 dst = TMP_REGISTER;
1338 #endif
1340 switch (op) {
1341 case SLJIT_MOV:
1342 case SLJIT_MOV_P:
1343 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1344 case SLJIT_MOV_UI:
1345 case SLJIT_MOV_SI:
1346 #endif
1347 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1348 break;
1349 case SLJIT_MOV_UB:
1350 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1351 break;
1352 case SLJIT_MOV_SB:
1353 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1354 break;
1355 case SLJIT_MOV_UH:
1356 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1357 break;
1358 case SLJIT_MOV_SH:
1359 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1360 break;
1361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1362 case SLJIT_MOV_UI:
1363 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1364 break;
1365 case SLJIT_MOV_SI:
1366 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1367 break;
1368 #endif
1371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1372 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1373 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1374 #endif
1376 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1377 inst = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1378 FAIL_IF(!inst);
1379 *inst = LEA_r_m;
1381 return SLJIT_SUCCESS;
1384 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1385 compiler->flags_saved = 0;
1387 switch (op) {
1388 case SLJIT_NOT:
1389 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1390 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1391 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1393 case SLJIT_NEG:
1394 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1395 FAIL_IF(emit_save_flags(compiler));
1396 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1398 case SLJIT_CLZ:
1399 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1400 FAIL_IF(emit_save_flags(compiler));
1401 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1404 return SLJIT_SUCCESS;
1406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1407 # undef src_is_ereg
1408 #endif
1411 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1414 if (IS_HALFWORD(immw) || compiler->mode32) { \
1415 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1416 FAIL_IF(!inst); \
1417 *(inst + 1) |= (op_imm); \
1419 else { \
1420 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1421 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1422 FAIL_IF(!inst); \
1423 *inst = (op_mr); \
1426 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1427 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1429 #else
1431 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1432 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1433 FAIL_IF(!inst); \
1434 *(inst + 1) |= (op_imm);
1436 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1437 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1439 #endif
1441 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1442 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1443 sljit_si dst, sljit_sw dstw,
1444 sljit_si src1, sljit_sw src1w,
1445 sljit_si src2, sljit_sw src2w)
1447 sljit_ub* inst;
1449 if (dst == SLJIT_UNUSED) {
1450 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1451 if (src2 & SLJIT_IMM) {
1452 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1454 else {
1455 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1456 FAIL_IF(!inst);
1457 *inst = op_rm;
1459 return SLJIT_SUCCESS;
1462 if (dst == src1 && dstw == src1w) {
1463 if (src2 & SLJIT_IMM) {
1464 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1465 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1466 #else
1467 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1468 #endif
1469 BINARY_EAX_IMM(op_eax_imm, src2w);
1471 else {
1472 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1475 else if (dst <= TMP_REGISTER) {
1476 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1477 FAIL_IF(!inst);
1478 *inst = op_rm;
1480 else if (src2 <= TMP_REGISTER) {
1481 /* Special exception for sljit_emit_op_flags. */
1482 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1483 FAIL_IF(!inst);
1484 *inst = op_mr;
1486 else {
1487 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1488 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1489 FAIL_IF(!inst);
1490 *inst = op_mr;
1492 return SLJIT_SUCCESS;
1495 /* Only for cumulative operations. */
1496 if (dst == src2 && dstw == src2w) {
1497 if (src1 & SLJIT_IMM) {
1498 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1499 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1500 #else
1501 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
1502 #endif
1503 BINARY_EAX_IMM(op_eax_imm, src1w);
1505 else {
1506 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1509 else if (dst <= TMP_REGISTER) {
1510 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1511 FAIL_IF(!inst);
1512 *inst = op_rm;
1514 else if (src1 <= TMP_REGISTER) {
1515 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1516 FAIL_IF(!inst);
1517 *inst = op_mr;
1519 else {
1520 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1521 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1522 FAIL_IF(!inst);
1523 *inst = op_mr;
1525 return SLJIT_SUCCESS;
1528 /* General version. */
1529 if (dst <= TMP_REGISTER) {
1530 EMIT_MOV(compiler, dst, 0, src1, src1w);
1531 if (src2 & SLJIT_IMM) {
1532 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1534 else {
1535 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1536 FAIL_IF(!inst);
1537 *inst = op_rm;
1540 else {
1541 /* This version requires less memory writing. */
1542 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1543 if (src2 & SLJIT_IMM) {
1544 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1546 else {
1547 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1548 FAIL_IF(!inst);
1549 *inst = op_rm;
1551 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1554 return SLJIT_SUCCESS;
1557 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1558 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1559 sljit_si dst, sljit_sw dstw,
1560 sljit_si src1, sljit_sw src1w,
1561 sljit_si src2, sljit_sw src2w)
1563 sljit_ub* inst;
1565 if (dst == SLJIT_UNUSED) {
1566 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1567 if (src2 & SLJIT_IMM) {
1568 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1570 else {
1571 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1572 FAIL_IF(!inst);
1573 *inst = op_rm;
1575 return SLJIT_SUCCESS;
1578 if (dst == src1 && dstw == src1w) {
1579 if (src2 & SLJIT_IMM) {
1580 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1581 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1582 #else
1583 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1584 #endif
1585 BINARY_EAX_IMM(op_eax_imm, src2w);
1587 else {
1588 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1591 else if (dst <= TMP_REGISTER) {
1592 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1593 FAIL_IF(!inst);
1594 *inst = op_rm;
1596 else if (src2 <= TMP_REGISTER) {
1597 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1598 FAIL_IF(!inst);
1599 *inst = op_mr;
1601 else {
1602 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1603 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1604 FAIL_IF(!inst);
1605 *inst = op_mr;
1607 return SLJIT_SUCCESS;
1610 /* General version. */
1611 if (dst <= TMP_REGISTER && dst != src2) {
1612 EMIT_MOV(compiler, dst, 0, src1, src1w);
1613 if (src2 & SLJIT_IMM) {
1614 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1616 else {
1617 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1618 FAIL_IF(!inst);
1619 *inst = op_rm;
1622 else {
1623 /* This version requires less memory writing. */
1624 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1625 if (src2 & SLJIT_IMM) {
1626 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1628 else {
1629 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1630 FAIL_IF(!inst);
1631 *inst = op_rm;
1633 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1636 return SLJIT_SUCCESS;
1639 static sljit_si emit_mul(struct sljit_compiler *compiler,
1640 sljit_si dst, sljit_sw dstw,
1641 sljit_si src1, sljit_sw src1w,
1642 sljit_si src2, sljit_sw src2w)
1644 sljit_ub* inst;
1645 sljit_si dst_r;
1647 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1649 /* Register destination. */
1650 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1651 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1652 FAIL_IF(!inst);
1653 *inst++ = GROUP_0F;
1654 *inst = IMUL_r_rm;
1656 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1657 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1658 FAIL_IF(!inst);
1659 *inst++ = GROUP_0F;
1660 *inst = IMUL_r_rm;
1662 else if (src1 & SLJIT_IMM) {
1663 if (src2 & SLJIT_IMM) {
1664 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1665 src2 = dst_r;
1666 src2w = 0;
1669 if (src1w <= 127 && src1w >= -128) {
1670 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1671 FAIL_IF(!inst);
1672 *inst = IMUL_r_rm_i8;
1673 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1674 FAIL_IF(!inst);
1675 INC_SIZE(1);
1676 *inst = (sljit_sb)src1w;
1678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1679 else {
1680 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1681 FAIL_IF(!inst);
1682 *inst = IMUL_r_rm_i32;
1683 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1684 FAIL_IF(!inst);
1685 INC_SIZE(4);
1686 *(sljit_sw*)inst = src1w;
1688 #else
1689 else if (IS_HALFWORD(src1w)) {
1690 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1691 FAIL_IF(!inst);
1692 *inst = IMUL_r_rm_i32;
1693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1694 FAIL_IF(!inst);
1695 INC_SIZE(4);
1696 *(sljit_si*)inst = (sljit_si)src1w;
1698 else {
1699 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1700 if (dst_r != src2)
1701 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1702 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1703 FAIL_IF(!inst);
1704 *inst++ = GROUP_0F;
1705 *inst = IMUL_r_rm;
1707 #endif
1709 else if (src2 & SLJIT_IMM) {
1710 /* Note: src1 is NOT immediate. */
1712 if (src2w <= 127 && src2w >= -128) {
1713 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1714 FAIL_IF(!inst);
1715 *inst = IMUL_r_rm_i8;
1716 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1717 FAIL_IF(!inst);
1718 INC_SIZE(1);
1719 *inst = (sljit_sb)src2w;
1721 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1722 else {
1723 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1724 FAIL_IF(!inst);
1725 *inst = IMUL_r_rm_i32;
1726 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1727 FAIL_IF(!inst);
1728 INC_SIZE(4);
1729 *(sljit_sw*)inst = src2w;
1731 #else
1732 else if (IS_HALFWORD(src2w)) {
1733 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1734 FAIL_IF(!inst);
1735 *inst = IMUL_r_rm_i32;
1736 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1737 FAIL_IF(!inst);
1738 INC_SIZE(4);
1739 *(sljit_si*)inst = (sljit_si)src2w;
1741 else {
1742 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1743 if (dst_r != src1)
1744 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1745 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1746 FAIL_IF(!inst);
1747 *inst++ = GROUP_0F;
1748 *inst = IMUL_r_rm;
1750 #endif
1752 else {
1753 /* Neither argument is immediate. */
1754 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1755 dst_r = TMP_REGISTER;
1756 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1757 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1758 FAIL_IF(!inst);
1759 *inst++ = GROUP_0F;
1760 *inst = IMUL_r_rm;
1763 if (dst_r == TMP_REGISTER)
1764 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1766 return SLJIT_SUCCESS;
1769 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1770 sljit_si dst, sljit_sw dstw,
1771 sljit_si src1, sljit_sw src1w,
1772 sljit_si src2, sljit_sw src2w)
1774 sljit_ub* inst;
1775 sljit_si dst_r, done = 0;
1777 /* These cases better be left to handled by normal way. */
1778 if (!keep_flags) {
1779 if (dst == src1 && dstw == src1w)
1780 return SLJIT_ERR_UNSUPPORTED;
1781 if (dst == src2 && dstw == src2w)
1782 return SLJIT_ERR_UNSUPPORTED;
1785 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1787 if (src1 <= TMP_REGISTER) {
1788 if (src2 <= TMP_REGISTER || src2 == TMP_REGISTER) {
1789 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1790 FAIL_IF(!inst);
1791 *inst = LEA_r_m;
1792 done = 1;
1794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1795 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1796 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1797 #else
1798 if (src2 & SLJIT_IMM) {
1799 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1800 #endif
1801 FAIL_IF(!inst);
1802 *inst = LEA_r_m;
1803 done = 1;
1806 else if (src2 <= TMP_REGISTER) {
1807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1808 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1809 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1810 #else
1811 if (src1 & SLJIT_IMM) {
1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1813 #endif
1814 FAIL_IF(!inst);
1815 *inst = LEA_r_m;
1816 done = 1;
1820 if (done) {
1821 if (dst_r == TMP_REGISTER)
1822 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1823 return SLJIT_SUCCESS;
1825 return SLJIT_ERR_UNSUPPORTED;
1828 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1829 sljit_si src1, sljit_sw src1w,
1830 sljit_si src2, sljit_sw src2w)
1832 sljit_ub* inst;
1834 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1835 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1836 #else
1837 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1838 #endif
1839 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1840 return SLJIT_SUCCESS;
1843 if (src1 <= TMP_REGISTER) {
1844 if (src2 & SLJIT_IMM) {
1845 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1847 else {
1848 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1849 FAIL_IF(!inst);
1850 *inst = CMP_r_rm;
1852 return SLJIT_SUCCESS;
1855 if (src2 <= TMP_REGISTER && !(src1 & SLJIT_IMM)) {
1856 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1857 FAIL_IF(!inst);
1858 *inst = CMP_rm_r;
1859 return SLJIT_SUCCESS;
1862 if (src2 & SLJIT_IMM) {
1863 if (src1 & SLJIT_IMM) {
1864 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1865 src1 = TMP_REGISTER;
1866 src1w = 0;
1868 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1870 else {
1871 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1872 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1873 FAIL_IF(!inst);
1874 *inst = CMP_r_rm;
1876 return SLJIT_SUCCESS;
1879 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1880 sljit_si src1, sljit_sw src1w,
1881 sljit_si src2, sljit_sw src2w)
1883 sljit_ub* inst;
1885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1886 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1887 #else
1888 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1889 #endif
1890 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1891 return SLJIT_SUCCESS;
1894 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1895 if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1896 #else
1897 if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1898 #endif
1899 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1900 return SLJIT_SUCCESS;
1903 if (src1 <= TMP_REGISTER) {
1904 if (src2 & SLJIT_IMM) {
1905 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1906 if (IS_HALFWORD(src2w) || compiler->mode32) {
1907 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1908 FAIL_IF(!inst);
1909 *inst = GROUP_F7;
1911 else {
1912 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1913 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1914 FAIL_IF(!inst);
1915 *inst = TEST_rm_r;
1917 #else
1918 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1919 FAIL_IF(!inst);
1920 *inst = GROUP_F7;
1921 #endif
1923 else {
1924 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1925 FAIL_IF(!inst);
1926 *inst = TEST_rm_r;
1928 return SLJIT_SUCCESS;
1931 if (src2 <= TMP_REGISTER) {
1932 if (src1 & SLJIT_IMM) {
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 if (IS_HALFWORD(src1w) || compiler->mode32) {
1935 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1936 FAIL_IF(!inst);
1937 *inst = GROUP_F7;
1939 else {
1940 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1941 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1942 FAIL_IF(!inst);
1943 *inst = TEST_rm_r;
1945 #else
1946 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1947 FAIL_IF(!inst);
1948 *inst = GROUP_F7;
1949 #endif
1951 else {
1952 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1953 FAIL_IF(!inst);
1954 *inst = TEST_rm_r;
1956 return SLJIT_SUCCESS;
1959 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1960 if (src2 & SLJIT_IMM) {
1961 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1962 if (IS_HALFWORD(src2w) || compiler->mode32) {
1963 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1964 FAIL_IF(!inst);
1965 *inst = GROUP_F7;
1967 else {
1968 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1969 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1970 FAIL_IF(!inst);
1971 *inst = TEST_rm_r;
1973 #else
1974 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1975 FAIL_IF(!inst);
1976 *inst = GROUP_F7;
1977 #endif
1979 else {
1980 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1981 FAIL_IF(!inst);
1982 *inst = TEST_rm_r;
1984 return SLJIT_SUCCESS;
1987 static sljit_si emit_shift(struct sljit_compiler *compiler,
1988 sljit_ub mode,
1989 sljit_si dst, sljit_sw dstw,
1990 sljit_si src1, sljit_sw src1w,
1991 sljit_si src2, sljit_sw src2w)
1993 sljit_ub* inst;
1995 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1996 if (dst == src1 && dstw == src1w) {
1997 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1998 FAIL_IF(!inst);
1999 *inst |= mode;
2000 return SLJIT_SUCCESS;
2002 if (dst == SLJIT_UNUSED) {
2003 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2004 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
2005 FAIL_IF(!inst);
2006 *inst |= mode;
2007 return SLJIT_SUCCESS;
2009 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2010 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2011 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2012 FAIL_IF(!inst);
2013 *inst |= mode;
2014 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2015 return SLJIT_SUCCESS;
2017 if (dst <= TMP_REGISTER) {
2018 EMIT_MOV(compiler, dst, 0, src1, src1w);
2019 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2020 FAIL_IF(!inst);
2021 *inst |= mode;
2022 return SLJIT_SUCCESS;
2025 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2026 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
2027 FAIL_IF(!inst);
2028 *inst |= mode;
2029 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2030 return SLJIT_SUCCESS;
2033 if (dst == SLJIT_PREF_SHIFT_REG) {
2034 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2035 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2036 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2037 FAIL_IF(!inst);
2038 *inst |= mode;
2039 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2041 else if (dst <= TMP_REGISTER && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2042 if (src1 != dst)
2043 EMIT_MOV(compiler, dst, 0, src1, src1w);
2044 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
2045 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2046 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2047 FAIL_IF(!inst);
2048 *inst |= mode;
2049 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2051 else {
2052 /* This case is really difficult, since ecx itself may used for
2053 addressing, and we must ensure to work even in that case. */
2054 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2055 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2056 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2057 #else
2058 /* [esp+0] contains the flags. */
2059 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2060 #endif
2061 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2062 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2063 FAIL_IF(!inst);
2064 *inst |= mode;
2065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2066 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2067 #else
2068 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
2069 #endif
2070 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2073 return SLJIT_SUCCESS;
2076 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2077 sljit_ub mode, sljit_si set_flags,
2078 sljit_si dst, sljit_sw dstw,
2079 sljit_si src1, sljit_sw src1w,
2080 sljit_si src2, sljit_sw src2w)
2082 /* The CPU does not set flags if the shift count is 0. */
2083 if (src2 & SLJIT_IMM) {
2084 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2085 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2086 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2087 #else
2088 if ((src2w & 0x1f) != 0)
2089 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2090 #endif
2091 if (!set_flags)
2092 return emit_mov(compiler, dst, dstw, src1, src1w);
2093 /* OR dst, src, 0 */
2094 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2095 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2098 if (!set_flags)
2099 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2101 if (!(dst <= TMP_REGISTER))
2102 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2104 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2106 if (dst <= TMP_REGISTER)
2107 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2108 return SLJIT_SUCCESS;
2111 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2112 sljit_si dst, sljit_sw dstw,
2113 sljit_si src1, sljit_sw src1w,
2114 sljit_si src2, sljit_sw src2w)
2116 CHECK_ERROR();
2117 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2118 ADJUST_LOCAL_OFFSET(dst, dstw);
2119 ADJUST_LOCAL_OFFSET(src1, src1w);
2120 ADJUST_LOCAL_OFFSET(src2, src2w);
2122 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2123 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2124 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126 compiler->mode32 = op & SLJIT_INT_OP;
2127 #endif
2129 if (GET_OPCODE(op) >= SLJIT_MUL) {
2130 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2131 compiler->flags_saved = 0;
2132 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2133 FAIL_IF(emit_save_flags(compiler));
2136 switch (GET_OPCODE(op)) {
2137 case SLJIT_ADD:
2138 if (!GET_FLAGS(op)) {
2139 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2140 return compiler->error;
2142 else
2143 compiler->flags_saved = 0;
2144 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2145 FAIL_IF(emit_save_flags(compiler));
2146 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2147 dst, dstw, src1, src1w, src2, src2w);
2148 case SLJIT_ADDC:
2149 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2150 FAIL_IF(emit_restore_flags(compiler, 1));
2151 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2152 FAIL_IF(emit_save_flags(compiler));
2153 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2154 compiler->flags_saved = 0;
2155 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2156 dst, dstw, src1, src1w, src2, src2w);
2157 case SLJIT_SUB:
2158 if (!GET_FLAGS(op)) {
2159 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2160 return compiler->error;
2162 else
2163 compiler->flags_saved = 0;
2164 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2165 FAIL_IF(emit_save_flags(compiler));
2166 if (dst == SLJIT_UNUSED)
2167 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2168 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2169 dst, dstw, src1, src1w, src2, src2w);
2170 case SLJIT_SUBC:
2171 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2172 FAIL_IF(emit_restore_flags(compiler, 1));
2173 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2174 FAIL_IF(emit_save_flags(compiler));
2175 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2176 compiler->flags_saved = 0;
2177 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2178 dst, dstw, src1, src1w, src2, src2w);
2179 case SLJIT_MUL:
2180 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2181 case SLJIT_AND:
2182 if (dst == SLJIT_UNUSED)
2183 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2184 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2185 dst, dstw, src1, src1w, src2, src2w);
2186 case SLJIT_OR:
2187 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2188 dst, dstw, src1, src1w, src2, src2w);
2189 case SLJIT_XOR:
2190 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2191 dst, dstw, src1, src1w, src2, src2w);
2192 case SLJIT_SHL:
2193 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2194 dst, dstw, src1, src1w, src2, src2w);
2195 case SLJIT_LSHR:
2196 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2197 dst, dstw, src1, src1w, src2, src2w);
2198 case SLJIT_ASHR:
2199 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2200 dst, dstw, src1, src1w, src2, src2w);
2203 return SLJIT_SUCCESS;
2206 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2208 check_sljit_get_register_index(reg);
2209 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2210 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
2211 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
2212 return -1;
2213 #endif
2214 return reg_map[reg];
2217 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2219 check_sljit_get_float_register_index(reg);
2220 return reg;
2223 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2224 void *instruction, sljit_si size)
2226 sljit_ub *inst;
2228 CHECK_ERROR();
2229 check_sljit_emit_op_custom(compiler, instruction, size);
2230 SLJIT_ASSERT(size > 0 && size < 16);
2232 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2233 FAIL_IF(!inst);
2234 INC_SIZE(size);
2235 SLJIT_MEMMOVE(inst, instruction, size);
2236 return SLJIT_SUCCESS;
2239 /* --------------------------------------------------------------------- */
2240 /* Floating point operators */
2241 /* --------------------------------------------------------------------- */
2243 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2245 /* Alignment + 2 * 16 bytes. */
2246 static sljit_si sse2_data[3 + (4 + 4) * 2];
2247 static sljit_si *sse2_buffer;
2249 static void init_compiler(void)
2251 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2252 /* Single precision constants. */
2253 sse2_buffer[0] = 0x80000000;
2254 sse2_buffer[4] = 0x7fffffff;
2255 /* Double precision constants. */
2256 sse2_buffer[8] = 0;
2257 sse2_buffer[9] = 0x80000000;
2258 sse2_buffer[12] = 0xffffffff;
2259 sse2_buffer[13] = 0x7fffffff;
2262 #endif
2264 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2266 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2267 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 if (cpu_has_sse2 == -1)
2269 get_cpu_features();
2270 return cpu_has_sse2;
2271 #else /* SLJIT_DETECT_SSE2 */
2272 return 1;
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 #else /* SLJIT_SSE2 */
2275 return 0;
2276 #endif
2279 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2281 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2282 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2284 sljit_ub *inst;
2286 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2287 FAIL_IF(!inst);
2288 *inst++ = GROUP_0F;
2289 *inst = opcode;
2290 return SLJIT_SUCCESS;
2293 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2294 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2296 sljit_ub *inst;
2298 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2299 FAIL_IF(!inst);
2300 *inst++ = GROUP_0F;
2301 *inst = opcode;
2302 return SLJIT_SUCCESS;
2305 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2306 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2308 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2311 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2312 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2314 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2317 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2318 sljit_si dst, sljit_sw dstw,
2319 sljit_si src, sljit_sw srcw)
2321 sljit_si dst_r;
2323 CHECK_ERROR();
2324 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2327 compiler->mode32 = 1;
2328 #endif
2330 if (GET_OPCODE(op) == SLJIT_CMPD) {
2331 compiler->flags_saved = 0;
2332 if (dst <= SLJIT_FLOAT_REG6)
2333 dst_r = dst;
2334 else {
2335 dst_r = TMP_FREG;
2336 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
2338 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
2341 if (op == SLJIT_MOVD) {
2342 if (dst <= SLJIT_FLOAT_REG6)
2343 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2344 if (src <= SLJIT_FLOAT_REG6)
2345 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2346 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2347 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2350 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG6) {
2351 dst_r = dst;
2352 if (dst != src)
2353 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2355 else {
2356 dst_r = TMP_FREG;
2357 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2360 switch (GET_OPCODE(op)) {
2361 case SLJIT_NEGD:
2362 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2363 break;
2365 case SLJIT_ABSD:
2366 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2367 break;
2370 if (dst_r == TMP_FREG)
2371 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2372 return SLJIT_SUCCESS;
2375 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2376 sljit_si dst, sljit_sw dstw,
2377 sljit_si src1, sljit_sw src1w,
2378 sljit_si src2, sljit_sw src2w)
2380 sljit_si dst_r;
2382 CHECK_ERROR();
2383 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2385 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2386 compiler->mode32 = 1;
2387 #endif
2389 if (dst <= SLJIT_FLOAT_REG6) {
2390 dst_r = dst;
2391 if (dst == src1)
2392 ; /* Do nothing here. */
2393 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2394 /* Swap arguments. */
2395 src2 = src1;
2396 src2w = src1w;
2398 else if (dst != src2)
2399 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2400 else {
2401 dst_r = TMP_FREG;
2402 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2405 else {
2406 dst_r = TMP_FREG;
2407 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2410 switch (GET_OPCODE(op)) {
2411 case SLJIT_ADDD:
2412 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2413 break;
2415 case SLJIT_SUBD:
2416 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2417 break;
2419 case SLJIT_MULD:
2420 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2421 break;
2423 case SLJIT_DIVD:
2424 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2425 break;
2428 if (dst_r == TMP_FREG)
2429 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2430 return SLJIT_SUCCESS;
2433 #else
2435 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2436 sljit_si dst, sljit_sw dstw,
2437 sljit_si src, sljit_sw srcw)
2439 CHECK_ERROR();
2440 /* Should cause an assertion fail. */
2441 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2442 compiler->error = SLJIT_ERR_UNSUPPORTED;
2443 return SLJIT_ERR_UNSUPPORTED;
2446 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2447 sljit_si dst, sljit_sw dstw,
2448 sljit_si src1, sljit_sw src1w,
2449 sljit_si src2, sljit_sw src2w)
2451 CHECK_ERROR();
2452 /* Should cause an assertion fail. */
2453 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2454 compiler->error = SLJIT_ERR_UNSUPPORTED;
2455 return SLJIT_ERR_UNSUPPORTED;
2458 #endif
2460 /* --------------------------------------------------------------------- */
2461 /* Conditional instructions */
2462 /* --------------------------------------------------------------------- */
2464 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2466 sljit_ub *inst;
2467 struct sljit_label *label;
2469 CHECK_ERROR_PTR();
2470 check_sljit_emit_label(compiler);
2472 /* We should restore the flags before the label,
2473 since other taken jumps has their own flags as well. */
2474 if (SLJIT_UNLIKELY(compiler->flags_saved))
2475 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2477 if (compiler->last_label && compiler->last_label->size == compiler->size)
2478 return compiler->last_label;
2480 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2481 PTR_FAIL_IF(!label);
2482 set_label(label, compiler);
2484 inst = (sljit_ub*)ensure_buf(compiler, 2);
2485 PTR_FAIL_IF(!inst);
2487 *inst++ = 0;
2488 *inst++ = 0;
2490 return label;
2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2495 sljit_ub *inst;
2496 struct sljit_jump *jump;
2498 CHECK_ERROR_PTR();
2499 check_sljit_emit_jump(compiler, type);
2501 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2502 if ((type & 0xff) <= SLJIT_JUMP)
2503 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2504 compiler->flags_saved = 0;
2507 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2508 PTR_FAIL_IF_NULL(jump);
2509 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2510 type &= 0xff;
2512 if (type >= SLJIT_CALL1)
2513 PTR_FAIL_IF(call_with_args(compiler, type));
2515 /* Worst case size. */
2516 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2517 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2518 #else
2519 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2520 #endif
2522 inst = (sljit_ub*)ensure_buf(compiler, 2);
2523 PTR_FAIL_IF_NULL(inst);
2525 *inst++ = 0;
2526 *inst++ = type + 4;
2527 return jump;
2530 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2532 sljit_ub *inst;
2533 struct sljit_jump *jump;
2535 CHECK_ERROR();
2536 check_sljit_emit_ijump(compiler, type, src, srcw);
2537 ADJUST_LOCAL_OFFSET(src, srcw);
2539 CHECK_EXTRA_REGS(src, srcw, (void)0);
2541 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2542 if (type <= SLJIT_JUMP)
2543 FAIL_IF(emit_restore_flags(compiler, 0));
2544 compiler->flags_saved = 0;
2547 if (type >= SLJIT_CALL1) {
2548 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2549 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2550 if (src == SLJIT_SCRATCH_REG3) {
2551 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2552 src = TMP_REGISTER;
2554 if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
2555 srcw += sizeof(sljit_sw);
2556 #endif
2557 #endif
2558 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2559 if (src == SLJIT_SCRATCH_REG3) {
2560 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2561 src = TMP_REGISTER;
2563 #endif
2564 FAIL_IF(call_with_args(compiler, type));
2567 if (src == SLJIT_IMM) {
2568 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2569 FAIL_IF_NULL(jump);
2570 set_jump(jump, compiler, JUMP_ADDR);
2571 jump->u.target = srcw;
2573 /* Worst case size. */
2574 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2575 compiler->size += 5;
2576 #else
2577 compiler->size += 10 + 3;
2578 #endif
2580 inst = (sljit_ub*)ensure_buf(compiler, 2);
2581 FAIL_IF_NULL(inst);
2583 *inst++ = 0;
2584 *inst++ = type + 4;
2586 else {
2587 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2588 /* REX_W is not necessary (src is not immediate). */
2589 compiler->mode32 = 1;
2590 #endif
2591 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2592 FAIL_IF(!inst);
2593 *inst++ = GROUP_FF;
2594 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2596 return SLJIT_SUCCESS;
2599 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2600 sljit_si dst, sljit_sw dstw,
2601 sljit_si src, sljit_sw srcw,
2602 sljit_si type)
2604 sljit_ub *inst;
2605 sljit_ub cond_set = 0;
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2607 sljit_si reg;
2608 #else
2609 /* CHECK_EXTRA_REGS migh overwrite these values. */
2610 sljit_si dst_save = dst;
2611 sljit_sw dstw_save = dstw;
2612 #endif
2614 CHECK_ERROR();
2615 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2617 if (dst == SLJIT_UNUSED)
2618 return SLJIT_SUCCESS;
2620 ADJUST_LOCAL_OFFSET(dst, dstw);
2621 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2622 if (SLJIT_UNLIKELY(compiler->flags_saved))
2623 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2625 /* setcc = jcc + 0x10. */
2626 cond_set = get_jump_code(type) + 0x10;
2628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2629 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src) {
2630 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2631 FAIL_IF(!inst);
2632 INC_SIZE(4 + 3);
2633 /* Set low register to conditional flag. */
2634 *inst++ = (reg_map[TMP_REGISTER] <= 7) ? REX : REX_B;
2635 *inst++ = GROUP_0F;
2636 *inst++ = cond_set;
2637 *inst++ = MOD_REG | reg_lmap[TMP_REGISTER];
2638 *inst++ = REX | (reg_map[TMP_REGISTER] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2639 *inst++ = OR_rm8_r8;
2640 *inst++ = MOD_REG | (reg_lmap[TMP_REGISTER] << 3) | reg_lmap[dst];
2641 return SLJIT_SUCCESS;
2644 reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
2646 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2647 FAIL_IF(!inst);
2648 INC_SIZE(4 + 4);
2649 /* Set low register to conditional flag. */
2650 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2651 *inst++ = GROUP_0F;
2652 *inst++ = cond_set;
2653 *inst++ = MOD_REG | reg_lmap[reg];
2654 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2655 *inst++ = GROUP_0F;
2656 *inst++ = MOVZX_r_rm8;
2657 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2659 if (reg != TMP_REGISTER)
2660 return SLJIT_SUCCESS;
2662 if (GET_OPCODE(op) < SLJIT_ADD) {
2663 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2664 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
2666 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2667 compiler->skip_checks = 1;
2668 #endif
2669 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2670 #else /* SLJIT_CONFIG_X86_64 */
2671 if (GET_OPCODE(op) < SLJIT_ADD && dst <= TMP_REGISTER) {
2672 if (reg_map[dst] <= 4) {
2673 /* Low byte is accessible. */
2674 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2675 FAIL_IF(!inst);
2676 INC_SIZE(3 + 3);
2677 /* Set low byte to conditional flag. */
2678 *inst++ = GROUP_0F;
2679 *inst++ = cond_set;
2680 *inst++ = MOD_REG | reg_map[dst];
2682 *inst++ = GROUP_0F;
2683 *inst++ = MOVZX_r_rm8;
2684 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2685 return SLJIT_SUCCESS;
2688 /* Low byte is not accessible. */
2689 if (cpu_has_cmov == -1)
2690 get_cpu_features();
2692 if (cpu_has_cmov) {
2693 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, 1);
2694 /* a xor reg, reg operation would overwrite the flags. */
2695 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2697 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2698 FAIL_IF(!inst);
2699 INC_SIZE(3);
2701 *inst++ = GROUP_0F;
2702 /* cmovcc = setcc - 0x50. */
2703 *inst++ = cond_set - 0x50;
2704 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REGISTER];
2705 return SLJIT_SUCCESS;
2708 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2709 FAIL_IF(!inst);
2710 INC_SIZE(1 + 3 + 3 + 1);
2711 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2712 /* Set al to conditional flag. */
2713 *inst++ = GROUP_0F;
2714 *inst++ = cond_set;
2715 *inst++ = MOD_REG | 0 /* eax */;
2717 *inst++ = GROUP_0F;
2718 *inst++ = MOVZX_r_rm8;
2719 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2720 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2721 return SLJIT_SUCCESS;
2724 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src && reg_map[dst] <= 4) {
2725 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
2726 if (dst != SLJIT_SCRATCH_REG1) {
2727 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2728 FAIL_IF(!inst);
2729 INC_SIZE(1 + 3 + 2 + 1);
2730 /* Set low register to conditional flag. */
2731 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2732 *inst++ = GROUP_0F;
2733 *inst++ = cond_set;
2734 *inst++ = MOD_REG | 0 /* eax */;
2735 *inst++ = OR_rm8_r8;
2736 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2737 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2739 else {
2740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2741 FAIL_IF(!inst);
2742 INC_SIZE(2 + 3 + 2 + 2);
2743 /* Set low register to conditional flag. */
2744 *inst++ = XCHG_r_rm;
2745 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
2746 *inst++ = GROUP_0F;
2747 *inst++ = cond_set;
2748 *inst++ = MOD_REG | 1 /* ecx */;
2749 *inst++ = OR_rm8_r8;
2750 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2751 *inst++ = XCHG_r_rm;
2752 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
2754 return SLJIT_SUCCESS;
2757 /* Set TMP_REGISTER to the bit. */
2758 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2759 FAIL_IF(!inst);
2760 INC_SIZE(1 + 3 + 3 + 1);
2761 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2762 /* Set al to conditional flag. */
2763 *inst++ = GROUP_0F;
2764 *inst++ = cond_set;
2765 *inst++ = MOD_REG | 0 /* eax */;
2767 *inst++ = GROUP_0F;
2768 *inst++ = MOVZX_r_rm8;
2769 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2771 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2773 if (GET_OPCODE(op) < SLJIT_ADD)
2774 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
2776 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2777 compiler->skip_checks = 1;
2778 #endif
2779 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
2780 #endif /* SLJIT_CONFIG_X86_64 */
2783 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2785 CHECK_ERROR();
2786 check_sljit_get_local_base(compiler, dst, dstw, offset);
2787 ADJUST_LOCAL_OFFSET(dst, dstw);
2789 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2791 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2792 compiler->mode32 = 0;
2793 #endif
2795 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
2797 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2798 if (NOT_HALFWORD(offset)) {
2799 FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
2800 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2801 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
2802 return compiler->error;
2803 #else
2804 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
2805 #endif
2807 #endif
2809 if (offset != 0)
2810 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
2811 return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
2814 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2816 sljit_ub *inst;
2817 struct sljit_const *const_;
2818 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2819 sljit_si reg;
2820 #endif
2822 CHECK_ERROR_PTR();
2823 check_sljit_emit_const(compiler, dst, dstw, init_value);
2824 ADJUST_LOCAL_OFFSET(dst, dstw);
2826 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2828 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2829 PTR_FAIL_IF(!const_);
2830 set_const(const_, compiler);
2832 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2833 compiler->mode32 = 0;
2834 reg = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
2836 if (emit_load_imm64(compiler, reg, init_value))
2837 return NULL;
2838 #else
2839 if (dst == SLJIT_UNUSED)
2840 dst = TMP_REGISTER;
2842 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2843 return NULL;
2844 #endif
2846 inst = (sljit_ub*)ensure_buf(compiler, 2);
2847 PTR_FAIL_IF(!inst);
2849 *inst++ = 0;
2850 *inst++ = 1;
2852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2853 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2854 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2855 return NULL;
2856 #endif
2858 return const_;
2861 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2863 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2864 *(sljit_sw*)addr = new_addr - (addr + 4);
2865 #else
2866 *(sljit_uw*)addr = new_addr;
2867 #endif
2870 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2872 *(sljit_sw*)addr = new_constant;