2 * Stack-less Just-In-Time compiler
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE
const char* sljit_get_platform_name(void)
29 return "ARM-Thumb2" SLJIT_CPUINFO
;
32 /* Length of an instruction word. */
33 typedef sljit_u32 sljit_ins
;
35 /* Last register + 1. */
36 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
37 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
38 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
39 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5)
42 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
44 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
45 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 6] = {
46 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15
49 #define COPY_BITS(src, from, to, bits) \
50 ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
52 /* Thumb16 encodings. */
53 #define RD3(rd) (reg_map[rd])
54 #define RN3(rn) (reg_map[rn] << 3)
55 #define RM3(rm) (reg_map[rm] << 6)
56 #define RDN3(rdn) (reg_map[rdn] << 8)
57 #define IMM3(imm) (imm << 6)
58 #define IMM8(imm) (imm)
60 /* Thumb16 helpers. */
61 #define SET_REGS44(rd, rn) \
62 ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
63 #define IS_2_LO_REGS(reg1, reg2) \
64 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
65 #define IS_3_LO_REGS(reg1, reg2, reg3) \
66 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
68 /* Thumb32 encodings. */
69 #define RD4(rd) (reg_map[rd] << 8)
70 #define RN4(rn) (reg_map[rn] << 16)
71 #define RM4(rm) (reg_map[rm])
72 #define RT4(rt) (reg_map[rt] << 12)
73 #define DD4(dd) ((dd) << 12)
74 #define DN4(dn) ((dn) << 16)
77 (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
79 (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
81 /* --------------------------------------------------------------------- */
82 /* Instrucion forms */
83 /* --------------------------------------------------------------------- */
85 /* dot '.' changed to _
86 I immediate form (possibly followed by number of immediate bits). */
87 #define ADCI 0xf1400000
89 #define ADC_W 0xeb400000
94 #define ADD_W 0xeb000000
95 #define ADDWI 0xf2000000
97 #define ADD_W 0xeb000000
98 #define ADD_WI 0xf1000000
99 #define ANDI 0xf0000000
101 #define AND_W 0xea000000
104 #define ASR_W 0xfa40f000
105 #define ASR_WI 0xea4f0020
106 #define BICI 0xf0200000
110 #define CLZ 0xfab0f080
112 #define CMP_W 0xebb00f00
113 #define EORI 0xf0800000
115 #define EOR_W 0xea800000
119 #define LSL_W 0xfa00f000
120 #define LSL_WI 0xea4f0000
123 #define LSR_W 0xfa20f000
124 #define LSR_WI 0xea4f0010
128 #define MOVT 0xf2c00000
129 #define MOVW 0xf2400000
130 #define MOV_W 0xea4f0000
131 #define MOV_WI 0xf04f0000
132 #define MUL 0xfb00f000
134 #define MVN_W 0xea6f0000
135 #define MVN_WI 0xf06f0000
137 #define ORNI 0xf0600000
138 #define ORRI 0xf0400000
140 #define ORR_W 0xea400000
142 #define POP_W 0xe8bd0000
144 #define PUSH_W 0xe92d0000
145 #define RSB_WI 0xf1c00000
147 #define SBCI 0xf1600000
149 #define SBC_W 0xeb600000
150 #define SMULL 0xfb800000
151 #define STR_SP 0x9000
153 #define SUBSI3 0x1e00
154 #define SUBSI8 0x3800
155 #define SUB_W 0xeba00000
156 #define SUBWI 0xf2a00000
157 #define SUB_SP 0xb080
158 #define SUB_WI 0xf1a00000
160 #define SXTB_W 0xfa4ff080
162 #define SXTH_W 0xfa0ff080
164 #define UMULL 0xfba00000
166 #define UXTB_W 0xfa5ff080
168 #define UXTH_W 0xfa1ff080
169 #define VABS_F32 0xeeb00ac0
170 #define VADD_F32 0xee300a00
171 #define VCMP_F32 0xeeb40a40
172 #define VCVT_F32_S32 0xeeb80ac0
173 #define VCVT_F64_F32 0xeeb70ac0
174 #define VCVT_S32_F32 0xeebd0ac0
175 #define VDIV_F32 0xee800a00
176 #define VMOV_F32 0xeeb00a40
177 #define VMOV 0xee000a10
178 #define VMRS 0xeef1fa10
179 #define VMUL_F32 0xee200a00
180 #define VNEG_F32 0xeeb10a40
181 #define VSTR_F32 0xed000a00
182 #define VSUB_F32 0xee300a40
184 static sljit_s32
push_inst16(struct sljit_compiler
*compiler
, sljit_ins inst
)
187 SLJIT_ASSERT(!(inst
& 0xffff0000));
189 ptr
= (sljit_u16
*)ensure_buf(compiler
, sizeof(sljit_u16
));
193 return SLJIT_SUCCESS
;
196 static sljit_s32
push_inst32(struct sljit_compiler
*compiler
, sljit_ins inst
)
198 sljit_u16
*ptr
= (sljit_u16
*)ensure_buf(compiler
, sizeof(sljit_ins
));
203 return SLJIT_SUCCESS
;
206 static SLJIT_INLINE sljit_s32
emit_imm32_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_uw imm
)
208 FAIL_IF(push_inst32(compiler
, MOVW
| RD4(dst
) |
209 COPY_BITS(imm
, 12, 16, 4) | COPY_BITS(imm
, 11, 26, 1) | COPY_BITS(imm
, 8, 12, 3) | (imm
& 0xff)));
210 return push_inst32(compiler
, MOVT
| RD4(dst
) |
211 COPY_BITS(imm
, 12 + 16, 16, 4) | COPY_BITS(imm
, 11 + 16, 26, 1) | COPY_BITS(imm
, 8 + 16, 12, 3) | ((imm
& 0xff0000) >> 16));
214 static SLJIT_INLINE
void modify_imm32_const(sljit_u16
*inst
, sljit_uw new_imm
)
216 sljit_s32 dst
= inst
[1] & 0x0f00;
217 SLJIT_ASSERT(((inst
[0] & 0xfbf0) == (MOVW
>> 16)) && ((inst
[2] & 0xfbf0) == (MOVT
>> 16)) && dst
== (inst
[3] & 0x0f00));
218 inst
[0] = (MOVW
>> 16) | COPY_BITS(new_imm
, 12, 0, 4) | COPY_BITS(new_imm
, 11, 10, 1);
219 inst
[1] = dst
| COPY_BITS(new_imm
, 8, 12, 3) | (new_imm
& 0xff);
220 inst
[2] = (MOVT
>> 16) | COPY_BITS(new_imm
, 12 + 16, 0, 4) | COPY_BITS(new_imm
, 11 + 16, 10, 1);
221 inst
[3] = dst
| COPY_BITS(new_imm
, 8 + 16, 12, 3) | ((new_imm
& 0xff0000) >> 16);
224 static SLJIT_INLINE sljit_s32
detect_jump_type(struct sljit_jump
*jump
, sljit_u16
*code_ptr
, sljit_u16
*code
, sljit_sw executable_offset
)
228 if (jump
->flags
& SLJIT_REWRITABLE_JUMP
)
231 if (jump
->flags
& JUMP_ADDR
) {
232 /* Branch to ARM code is not optimized yet. */
233 if (!(jump
->u
.target
& 0x1))
235 diff
= ((sljit_sw
)jump
->u
.target
- (sljit_sw
)(code_ptr
+ 2) - executable_offset
) >> 1;
238 SLJIT_ASSERT(jump
->flags
& JUMP_LABEL
);
239 diff
= ((sljit_sw
)(code
+ jump
->u
.label
->size
) - (sljit_sw
)(code_ptr
+ 2)) >> 1;
242 if (jump
->flags
& IS_COND
) {
243 SLJIT_ASSERT(!(jump
->flags
& IS_BL
));
244 if (diff
<= 127 && diff
>= -128) {
245 jump
->flags
|= PATCH_TYPE1
;
248 if (diff
<= 524287 && diff
>= -524288) {
249 jump
->flags
|= PATCH_TYPE2
;
252 /* +1 comes from the prefix IT instruction. */
254 if (diff
<= 8388607 && diff
>= -8388608) {
255 jump
->flags
|= PATCH_TYPE3
;
259 else if (jump
->flags
& IS_BL
) {
260 if (diff
<= 8388607 && diff
>= -8388608) {
261 jump
->flags
|= PATCH_BL
;
266 if (diff
<= 1023 && diff
>= -1024) {
267 jump
->flags
|= PATCH_TYPE4
;
270 if (diff
<= 8388607 && diff
>= -8388608) {
271 jump
->flags
|= PATCH_TYPE5
;
279 static SLJIT_INLINE
void set_jump_instruction(struct sljit_jump
*jump
, sljit_sw executable_offset
)
281 sljit_s32 type
= (jump
->flags
>> 4) & 0xf;
283 sljit_u16
*jump_inst
;
286 if (SLJIT_UNLIKELY(type
== 0)) {
287 modify_imm32_const((sljit_u16
*)jump
->addr
, (jump
->flags
& JUMP_LABEL
) ? jump
->u
.label
->addr
: jump
->u
.target
);
291 if (jump
->flags
& JUMP_ADDR
) {
292 SLJIT_ASSERT(jump
->u
.target
& 0x1);
293 diff
= ((sljit_sw
)jump
->u
.target
- (sljit_sw
)(jump
->addr
+ sizeof(sljit_u32
)) - executable_offset
) >> 1;
296 SLJIT_ASSERT(jump
->u
.label
->addr
& 0x1);
297 diff
= ((sljit_sw
)(jump
->u
.label
->addr
) - (sljit_sw
)(jump
->addr
+ sizeof(sljit_u32
)) - executable_offset
) >> 1;
299 jump_inst
= (sljit_u16
*)jump
->addr
;
303 /* Encoding T1 of 'B' instruction */
304 SLJIT_ASSERT(diff
<= 127 && diff
>= -128 && (jump
->flags
& IS_COND
));
305 jump_inst
[0] = 0xd000 | (jump
->flags
& 0xf00) | (diff
& 0xff);
308 /* Encoding T3 of 'B' instruction */
309 SLJIT_ASSERT(diff
<= 524287 && diff
>= -524288 && (jump
->flags
& IS_COND
));
310 jump_inst
[0] = 0xf000 | COPY_BITS(jump
->flags
, 8, 6, 4) | COPY_BITS(diff
, 11, 0, 6) | COPY_BITS(diff
, 19, 10, 1);
311 jump_inst
[1] = 0x8000 | COPY_BITS(diff
, 17, 13, 1) | COPY_BITS(diff
, 18, 11, 1) | (diff
& 0x7ff);
314 SLJIT_ASSERT(jump
->flags
& IS_COND
);
315 *jump_inst
++ = IT
| ((jump
->flags
>> 4) & 0xf0) | 0x8;
320 /* Encoding T2 of 'B' instruction */
321 SLJIT_ASSERT(diff
<= 1023 && diff
>= -1024 && !(jump
->flags
& IS_COND
));
322 jump_inst
[0] = 0xe000 | (diff
& 0x7ff);
326 SLJIT_ASSERT(diff
<= 8388607 && diff
>= -8388608);
328 /* Really complex instruction form for branches. */
329 s
= (diff
>> 23) & 0x1;
330 j1
= (~(diff
>> 21) ^ s
) & 0x1;
331 j2
= (~(diff
>> 22) ^ s
) & 0x1;
332 jump_inst
[0] = 0xf000 | (s
<< 10) | COPY_BITS(diff
, 11, 0, 10);
333 jump_inst
[1] = (j1
<< 13) | (j2
<< 11) | (diff
& 0x7ff);
335 /* The others have a common form. */
336 if (type
== 5) /* Encoding T4 of 'B' instruction */
337 jump_inst
[1] |= 0x9000;
338 else if (type
== 6) /* Encoding T1 of 'BL' instruction */
339 jump_inst
[1] |= 0xd000;
344 SLJIT_API_FUNC_ATTRIBUTE
void* sljit_generate_code(struct sljit_compiler
*compiler
)
346 struct sljit_memory_fragment
*buf
;
352 sljit_sw executable_offset
;
354 struct sljit_label
*label
;
355 struct sljit_jump
*jump
;
356 struct sljit_const
*const_
;
359 CHECK_PTR(check_sljit_generate_code(compiler
));
360 reverse_buf(compiler
);
362 code
= (sljit_u16
*)SLJIT_MALLOC_EXEC(compiler
->size
* sizeof(sljit_u16
));
363 PTR_FAIL_WITH_EXEC_IF(code
);
368 executable_offset
= SLJIT_EXEC_OFFSET(code
);
370 label
= compiler
->labels
;
371 jump
= compiler
->jumps
;
372 const_
= compiler
->consts
;
375 buf_ptr
= (sljit_u16
*)buf
->memory
;
376 buf_end
= buf_ptr
+ (buf
->used_size
>> 1);
378 *code_ptr
= *buf_ptr
++;
379 /* These structures are ordered by their address. */
380 SLJIT_ASSERT(!label
|| label
->size
>= half_count
);
381 SLJIT_ASSERT(!jump
|| jump
->addr
>= half_count
);
382 SLJIT_ASSERT(!const_
|| const_
->addr
>= half_count
);
383 if (label
&& label
->size
== half_count
) {
384 label
->addr
= ((sljit_uw
)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
)) | 0x1;
385 label
->size
= code_ptr
- code
;
388 if (jump
&& jump
->addr
== half_count
) {
389 jump
->addr
= (sljit_uw
)code_ptr
- ((jump
->flags
& IS_COND
) ? 10 : 8);
390 code_ptr
-= detect_jump_type(jump
, code_ptr
, code
, executable_offset
);
393 if (const_
&& const_
->addr
== half_count
) {
394 const_
->addr
= (sljit_uw
)code_ptr
;
395 const_
= const_
->next
;
399 } while (buf_ptr
< buf_end
);
404 if (label
&& label
->size
== half_count
) {
405 label
->addr
= ((sljit_uw
)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
)) | 0x1;
406 label
->size
= code_ptr
- code
;
410 SLJIT_ASSERT(!label
);
412 SLJIT_ASSERT(!const_
);
413 SLJIT_ASSERT(code_ptr
- code
<= (sljit_sw
)compiler
->size
);
415 jump
= compiler
->jumps
;
417 set_jump_instruction(jump
, executable_offset
);
421 compiler
->error
= SLJIT_ERR_COMPILED
;
422 compiler
->executable_offset
= executable_offset
;
423 compiler
->executable_size
= (code_ptr
- code
) * sizeof(sljit_u16
);
425 code
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(code
, executable_offset
);
426 code_ptr
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
);
428 SLJIT_CACHE_FLUSH(code
, code_ptr
);
429 /* Set thumb mode flag. */
430 return (void*)((sljit_uw
)code
| 0x1);
433 /* --------------------------------------------------------------------- */
434 /* Core code generator functions. */
435 /* --------------------------------------------------------------------- */
437 #define INVALID_IMM 0x80000000
438 static sljit_uw
get_imm(sljit_uw imm
)
440 /* Thumb immediate form. */
446 if ((imm
& 0xffff) == (imm
>> 16)) {
447 /* Some special cases. */
449 return (1 << 12) | (imm
& 0xff);
451 return (2 << 12) | ((imm
>> 8) & 0xff);
452 if ((imm
& 0xff00) == ((imm
& 0xff) << 8))
453 return (3 << 12) | (imm
& 0xff);
456 /* Assembly optimization: count leading zeroes? */
458 if (!(imm
& 0xffff0000)) {
462 if (!(imm
& 0xff000000)) {
466 if (!(imm
& 0xf0000000)) {
470 if (!(imm
& 0xc0000000)) {
474 if (!(imm
& 0x80000000)) {
478 /* Since imm >= 128, this must be true. */
479 SLJIT_ASSERT(counter
<= 31);
481 if (imm
& 0x00ffffff)
482 return INVALID_IMM
; /* Cannot be encoded. */
484 return ((imm
>> 24) & 0x7f) | COPY_BITS(counter
, 4, 26, 1) | COPY_BITS(counter
, 1, 12, 3) | COPY_BITS(counter
, 0, 7, 1);
487 static sljit_s32
load_immediate(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_uw imm
)
491 if (imm
>= 0x10000) {
493 if (tmp
!= INVALID_IMM
)
494 return push_inst32(compiler
, MOV_WI
| RD4(dst
) | tmp
);
496 if (tmp
!= INVALID_IMM
)
497 return push_inst32(compiler
, MVN_WI
| RD4(dst
) | tmp
);
500 /* set low 16 bits, set hi 16 bits to 0. */
501 FAIL_IF(push_inst32(compiler
, MOVW
| RD4(dst
) |
502 COPY_BITS(imm
, 12, 16, 4) | COPY_BITS(imm
, 11, 26, 1) | COPY_BITS(imm
, 8, 12, 3) | (imm
& 0xff)));
504 /* set hi 16 bit if needed. */
506 return push_inst32(compiler
, MOVT
| RD4(dst
) |
507 COPY_BITS(imm
, 12 + 16, 16, 4) | COPY_BITS(imm
, 11 + 16, 26, 1) | COPY_BITS(imm
, 8 + 16, 12, 3) | ((imm
& 0xff0000) >> 16));
508 return SLJIT_SUCCESS
;
511 #define ARG1_IMM 0x0010000
512 #define ARG2_IMM 0x0020000
513 #define KEEP_FLAGS 0x0040000
514 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
515 #define SET_FLAGS 0x0100000
516 #define UNUSED_RETURN 0x0200000
517 #define SLOW_DEST 0x0400000
518 #define SLOW_SRC1 0x0800000
519 #define SLOW_SRC2 0x1000000
521 static sljit_s32
emit_op_imm(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 dst
, sljit_uw arg1
, sljit_uw arg2
)
523 /* dst must be register, TMP_REG1
524 arg1 must be register, TMP_REG1, imm
525 arg2 must be register, TMP_REG2, imm */
529 if (SLJIT_UNLIKELY((flags
& (ARG1_IMM
| ARG2_IMM
)) == (ARG1_IMM
| ARG2_IMM
))) {
530 /* Both are immediates. */
532 FAIL_IF(load_immediate(compiler
, TMP_REG1
, arg1
));
536 if (flags
& (ARG1_IMM
| ARG2_IMM
)) {
537 reg
= (flags
& ARG2_IMM
) ? arg1
: arg2
;
538 imm
= (flags
& ARG2_IMM
) ? arg2
: arg1
;
540 switch (flags
& 0xffff) {
543 /* No form with immediate operand. */
546 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && (flags
& ARG2_IMM
) && arg1
== TMP_REG1
);
547 return load_immediate(compiler
, dst
, imm
);
549 if (!(flags
& SET_FLAGS
))
550 return load_immediate(compiler
, dst
, ~imm
);
551 /* Since the flags should be set, we just fallback to the register mode.
552 Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
556 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(reg
, dst
)) {
558 return push_inst16(compiler
, ADDSI3
| IMM3(imm
) | RD3(dst
) | RN3(reg
));
560 return push_inst16(compiler
, SUBSI3
| IMM3(nimm
) | RD3(dst
) | RN3(reg
));
563 return push_inst16(compiler
, ADDSI8
| IMM8(imm
) | RDN3(dst
));
565 return push_inst16(compiler
, SUBSI8
| IMM8(nimm
) | RDN3(dst
));
568 if (!(flags
& SET_FLAGS
)) {
570 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(imm
));
572 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(nimm
));
575 if (imm
!= INVALID_IMM
)
576 return push_inst32(compiler
, ADD_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
580 if (imm
!= INVALID_IMM
)
581 return push_inst32(compiler
, ADCI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
584 if (flags
& ARG1_IMM
) {
585 if (!(flags
& KEEP_FLAGS
) && imm
== 0 && IS_2_LO_REGS(reg
, dst
))
586 return push_inst16(compiler
, RSBSI
| RD3(dst
) | RN3(reg
));
588 if (imm
!= INVALID_IMM
)
589 return push_inst32(compiler
, RSB_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
593 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(reg
, dst
)) {
595 return push_inst16(compiler
, SUBSI3
| IMM3(imm
) | RD3(dst
) | RN3(reg
));
597 return push_inst16(compiler
, ADDSI3
| IMM3(nimm
) | RD3(dst
) | RN3(reg
));
600 return push_inst16(compiler
, SUBSI8
| IMM8(imm
) | RDN3(dst
));
602 return push_inst16(compiler
, ADDSI8
| IMM8(nimm
) | RDN3(dst
));
604 if (imm
<= 0xff && (flags
& UNUSED_RETURN
))
605 return push_inst16(compiler
, CMPI
| IMM8(imm
) | RDN3(reg
));
607 if (!(flags
& SET_FLAGS
)) {
609 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(imm
));
611 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(nimm
));
614 if (imm
!= INVALID_IMM
)
615 return push_inst32(compiler
, SUB_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
618 if (flags
& ARG1_IMM
)
621 if (imm
!= INVALID_IMM
)
622 return push_inst32(compiler
, SBCI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
626 if (nimm
!= INVALID_IMM
)
627 return push_inst32(compiler
, ANDI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
629 if (imm
!= INVALID_IMM
)
630 return push_inst32(compiler
, BICI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
634 if (nimm
!= INVALID_IMM
)
635 return push_inst32(compiler
, ORRI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
637 if (imm
!= INVALID_IMM
)
638 return push_inst32(compiler
, ORNI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
642 if (imm
!= INVALID_IMM
)
643 return push_inst32(compiler
, EORI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
648 if (flags
& ARG1_IMM
)
652 if (!(flags
& SET_FLAGS
))
653 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, reg
));
654 if (IS_2_LO_REGS(dst
, reg
))
655 return push_inst16(compiler
, MOVS
| RD3(dst
) | RN3(reg
));
656 return push_inst32(compiler
, MOV_W
| SET_FLAGS
| RD4(dst
) | RM4(reg
));
658 switch (flags
& 0xffff) {
660 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, reg
))
661 return push_inst16(compiler
, LSLSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
662 return push_inst32(compiler
, LSL_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
664 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, reg
))
665 return push_inst16(compiler
, LSRSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
666 return push_inst32(compiler
, LSR_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
667 default: /* SLJIT_ASHR */
668 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, reg
))
669 return push_inst16(compiler
, ASRSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
670 return push_inst32(compiler
, ASR_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
677 if (flags
& ARG2_IMM
) {
678 FAIL_IF(load_immediate(compiler
, TMP_REG2
, arg2
));
682 FAIL_IF(load_immediate(compiler
, TMP_REG1
, arg1
));
687 /* Both arguments are registers. */
688 switch (flags
& 0xffff) {
697 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG1
);
699 return SLJIT_SUCCESS
;
700 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, arg2
));
703 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG1
);
704 if (IS_2_LO_REGS(dst
, arg2
))
705 return push_inst16(compiler
, UXTB
| RD3(dst
) | RN3(arg2
));
706 return push_inst32(compiler
, UXTB_W
| RD4(dst
) | RM4(arg2
));
709 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG1
);
710 if (IS_2_LO_REGS(dst
, arg2
))
711 return push_inst16(compiler
, SXTB
| RD3(dst
) | RN3(arg2
));
712 return push_inst32(compiler
, SXTB_W
| RD4(dst
) | RM4(arg2
));
715 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG1
);
716 if (IS_2_LO_REGS(dst
, arg2
))
717 return push_inst16(compiler
, UXTH
| RD3(dst
) | RN3(arg2
));
718 return push_inst32(compiler
, UXTH_W
| RD4(dst
) | RM4(arg2
));
721 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG1
);
722 if (IS_2_LO_REGS(dst
, arg2
))
723 return push_inst16(compiler
, SXTH
| RD3(dst
) | RN3(arg2
));
724 return push_inst32(compiler
, SXTH_W
| RD4(dst
) | RM4(arg2
));
726 SLJIT_ASSERT(arg1
== TMP_REG1
);
727 if (!(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
728 return push_inst16(compiler
, MVNS
| RD3(dst
) | RN3(arg2
));
729 return push_inst32(compiler
, MVN_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(arg2
));
731 SLJIT_ASSERT(arg1
== TMP_REG1
);
732 FAIL_IF(push_inst32(compiler
, CLZ
| RN4(arg2
) | RD4(dst
) | RM4(arg2
)));
733 if (flags
& SET_FLAGS
) {
734 if (reg_map
[dst
] <= 7)
735 return push_inst16(compiler
, CMPI
| RDN3(dst
));
736 return push_inst32(compiler
, ADD_WI
| SET_FLAGS
| RN4(dst
) | RD4(dst
));
738 return SLJIT_SUCCESS
;
740 if (!(flags
& KEEP_FLAGS
) && IS_3_LO_REGS(dst
, arg1
, arg2
))
741 return push_inst16(compiler
, ADDS
| RD3(dst
) | RN3(arg1
) | RM3(arg2
));
742 if (dst
== arg1
&& !(flags
& SET_FLAGS
))
743 return push_inst16(compiler
, ADD
| SET_REGS44(dst
, arg2
));
744 return push_inst32(compiler
, ADD_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
746 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
747 return push_inst16(compiler
, ADCS
| RD3(dst
) | RN3(arg2
));
748 return push_inst32(compiler
, ADC_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
750 if (!(flags
& KEEP_FLAGS
) && IS_3_LO_REGS(dst
, arg1
, arg2
))
751 return push_inst16(compiler
, SUBS
| RD3(dst
) | RN3(arg1
) | RM3(arg2
));
752 return push_inst32(compiler
, SUB_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
754 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
755 return push_inst16(compiler
, SBCS
| RD3(dst
) | RN3(arg2
));
756 return push_inst32(compiler
, SBC_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
758 if (!(flags
& SET_FLAGS
))
759 return push_inst32(compiler
, MUL
| RD4(dst
) | RN4(arg1
) | RM4(arg2
));
760 SLJIT_ASSERT(reg_map
[TMP_REG2
] <= 7 && dst
!= TMP_REG2
);
761 FAIL_IF(push_inst32(compiler
, SMULL
| RT4(dst
) | RD4(TMP_REG2
) | RN4(arg1
) | RM4(arg2
)));
762 /* cmp TMP_REG2, dst asr #31. */
763 return push_inst32(compiler
, CMP_W
| RN4(TMP_REG2
) | 0x70e0 | RM4(dst
));
765 if (!(flags
& KEEP_FLAGS
)) {
766 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
767 return push_inst16(compiler
, ANDS
| RD3(dst
) | RN3(arg2
));
768 if ((flags
& UNUSED_RETURN
) && IS_2_LO_REGS(arg1
, arg2
))
769 return push_inst16(compiler
, TST
| RD3(arg1
) | RN3(arg2
));
771 return push_inst32(compiler
, AND_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
773 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
774 return push_inst16(compiler
, ORRS
| RD3(dst
) | RN3(arg2
));
775 return push_inst32(compiler
, ORR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
777 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
778 return push_inst16(compiler
, EORS
| RD3(dst
) | RN3(arg2
));
779 return push_inst32(compiler
, EOR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
781 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
782 return push_inst16(compiler
, LSLS
| RD3(dst
) | RN3(arg2
));
783 return push_inst32(compiler
, LSL_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
785 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
786 return push_inst16(compiler
, LSRS
| RD3(dst
) | RN3(arg2
));
787 return push_inst32(compiler
, LSR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
789 if (dst
== arg1
&& !(flags
& KEEP_FLAGS
) && IS_2_LO_REGS(dst
, arg2
))
790 return push_inst16(compiler
, ASRS
| RD3(dst
) | RN3(arg2
));
791 return push_inst32(compiler
, ASR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
795 return SLJIT_SUCCESS
;
801 #define WORD_SIZE 0x00
802 #define BYTE_SIZE 0x04
803 #define HALF_SIZE 0x08
806 #define ARG_TEST 0x20
808 #define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE)))
809 #define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift)))
826 static const sljit_ins sljit_mem16
[12] = {
827 /* w u l */ 0x5800 /* ldr */,
828 /* w u s */ 0x5000 /* str */,
829 /* w s l */ 0x5800 /* ldr */,
830 /* w s s */ 0x5000 /* str */,
832 /* b u l */ 0x5c00 /* ldrb */,
833 /* b u s */ 0x5400 /* strb */,
834 /* b s l */ 0x5600 /* ldrsb */,
835 /* b s s */ 0x5400 /* strb */,
837 /* h u l */ 0x5a00 /* ldrh */,
838 /* h u s */ 0x5200 /* strh */,
839 /* h s l */ 0x5e00 /* ldrsh */,
840 /* h s s */ 0x5200 /* strh */,
843 static const sljit_ins sljit_mem16_imm5
[12] = {
844 /* w u l */ 0x6800 /* ldr imm5 */,
845 /* w u s */ 0x6000 /* str imm5 */,
846 /* w s l */ 0x6800 /* ldr imm5 */,
847 /* w s s */ 0x6000 /* str imm5 */,
849 /* b u l */ 0x7800 /* ldrb imm5 */,
850 /* b u s */ 0x7000 /* strb imm5 */,
851 /* b s l */ 0x0000 /* not allowed */,
852 /* b s s */ 0x7000 /* strb imm5 */,
854 /* h u l */ 0x8800 /* ldrh imm5 */,
855 /* h u s */ 0x8000 /* strh imm5 */,
856 /* h s l */ 0x0000 /* not allowed */,
857 /* h s s */ 0x8000 /* strh imm5 */,
860 #define MEM_IMM8 0xc00
861 #define MEM_IMM12 0x800000
862 static const sljit_ins sljit_mem32
[12] = {
863 /* w u l */ 0xf8500000 /* ldr.w */,
864 /* w u s */ 0xf8400000 /* str.w */,
865 /* w s l */ 0xf8500000 /* ldr.w */,
866 /* w s s */ 0xf8400000 /* str.w */,
868 /* b u l */ 0xf8100000 /* ldrb.w */,
869 /* b u s */ 0xf8000000 /* strb.w */,
870 /* b s l */ 0xf9100000 /* ldrsb.w */,
871 /* b s s */ 0xf8000000 /* strb.w */,
873 /* h u l */ 0xf8300000 /* ldrh.w */,
874 /* h u s */ 0xf8200000 /* strsh.w */,
875 /* h s l */ 0xf9300000 /* ldrsh.w */,
876 /* h s s */ 0xf8200000 /* strsh.w */,
879 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
880 static sljit_s32
emit_set_delta(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_s32 reg
, sljit_sw value
)
884 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(value
));
885 value
= get_imm(value
);
886 if (value
!= INVALID_IMM
)
887 return push_inst32(compiler
, ADD_WI
| RD4(dst
) | RN4(reg
) | value
);
892 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(value
));
893 value
= get_imm(value
);
894 if (value
!= INVALID_IMM
)
895 return push_inst32(compiler
, SUB_WI
| RD4(dst
) | RN4(reg
) | value
);
897 return SLJIT_ERR_UNSUPPORTED
;
900 /* Can perform an operation using at most 1 instruction. */
901 static sljit_s32
getput_arg_fast(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
, sljit_s32 arg
, sljit_sw argw
)
903 sljit_s32 other_r
, shift
;
905 SLJIT_ASSERT(arg
& SLJIT_MEM
);
907 if (SLJIT_UNLIKELY(flags
& UPDATE
)) {
908 if ((arg
& REG_MASK
) && !(arg
& OFFS_REG_MASK
) && argw
<= 0xff && argw
>= -0xff) {
909 if (SLJIT_UNLIKELY(flags
& ARG_TEST
))
920 SLJIT_ASSERT(argw
>= 0 && (argw
& 0xff) <= 0xff);
921 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM8
| RT4(reg
) | RN4(arg
) | 0x100 | argw
));
927 if (SLJIT_UNLIKELY(arg
& OFFS_REG_MASK
)) {
928 if (SLJIT_UNLIKELY(flags
& ARG_TEST
))
932 other_r
= OFFS_REG(arg
);
935 if (!argw
&& IS_3_LO_REGS(reg
, arg
, other_r
))
936 FAIL_IF(push_inst16(compiler
, sljit_mem16
[flags
] | RD3(reg
) | RN3(arg
) | RM3(other_r
)));
938 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(other_r
) | (argw
<< 4)));
942 if (!(arg
& REG_MASK
) || argw
> 0xfff || argw
< -0xff)
945 if (SLJIT_UNLIKELY(flags
& ARG_TEST
))
949 if (IS_2_LO_REGS(reg
, arg
) && sljit_mem16_imm5
[flags
]) {
951 if (IS_WORD_SIZE(flags
)) {
952 if (OFFSET_CHECK(0x1f, 2))
955 else if (flags
& BYTE_SIZE
)
957 if (OFFSET_CHECK(0x1f, 0))
961 SLJIT_ASSERT(flags
& HALF_SIZE
);
962 if (OFFSET_CHECK(0x1f, 1))
967 FAIL_IF(push_inst16(compiler
, sljit_mem16_imm5
[flags
] | RD3(reg
) | RN3(arg
) | (argw
<< (6 - shift
))));
972 /* SP based immediate. */
973 if (SLJIT_UNLIKELY(arg
== SLJIT_SP
) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags
) && reg_map
[reg
] <= 7) {
974 FAIL_IF(push_inst16(compiler
, STR_SP
| ((flags
& STORE
) ? 0 : 0x800) | RDN3(reg
) | (argw
>> 2)));
979 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(arg
) | argw
));
981 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM8
| RT4(reg
) | RN4(arg
) | -argw
));
985 /* see getput_arg below.
986 Note: can_cache is called only for binary operators. Those
987 operators always uses word arguments without write back. */
988 static sljit_s32
can_cache(sljit_s32 arg
, sljit_sw argw
, sljit_s32 next_arg
, sljit_sw next_argw
)
991 if ((arg
& OFFS_REG_MASK
) || !(next_arg
& SLJIT_MEM
))
994 if (!(arg
& REG_MASK
)) {
995 diff
= argw
- next_argw
;
996 if (diff
<= 0xfff && diff
>= -0xfff)
1001 if (argw
== next_argw
)
1004 diff
= argw
- next_argw
;
1005 if (arg
== next_arg
&& diff
<= 0xfff && diff
>= -0xfff)
1011 /* Emit the necessary instructions. See can_cache above. */
1012 static sljit_s32
getput_arg(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
,
1013 sljit_s32 arg
, sljit_sw argw
, sljit_s32 next_arg
, sljit_sw next_argw
)
1015 sljit_s32 tmp_r
, other_r
;
1018 SLJIT_ASSERT(arg
& SLJIT_MEM
);
1019 if (!(next_arg
& SLJIT_MEM
)) {
1024 tmp_r
= (flags
& STORE
) ? TMP_REG3
: reg
;
1026 if (SLJIT_UNLIKELY((flags
& UPDATE
) && (arg
& REG_MASK
))) {
1027 /* Update only applies if a base register exists. */
1028 /* There is no caching here. */
1029 other_r
= OFFS_REG(arg
);
1034 if (!(argw
& ~0xfff)) {
1035 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(arg
) | argw
));
1036 return push_inst32(compiler
, ADDWI
| RD4(arg
) | RN4(arg
) | IMM12(argw
));
1039 if (compiler
->cache_arg
== SLJIT_MEM
) {
1040 if (argw
== compiler
->cache_argw
) {
1044 else if (emit_set_delta(compiler
, TMP_REG3
, TMP_REG3
, argw
- compiler
->cache_argw
) != SLJIT_ERR_UNSUPPORTED
) {
1045 FAIL_IF(compiler
->error
);
1046 compiler
->cache_argw
= argw
;
1053 FAIL_IF(load_immediate(compiler
, TMP_REG3
, argw
));
1054 compiler
->cache_arg
= SLJIT_MEM
;
1055 compiler
->cache_argw
= argw
;
1062 if (!argw
&& IS_3_LO_REGS(reg
, arg
, other_r
)) {
1063 FAIL_IF(push_inst16(compiler
, sljit_mem16
[flags
] | RD3(reg
) | RN3(arg
) | RM3(other_r
)));
1064 return push_inst16(compiler
, ADD
| SET_REGS44(arg
, other_r
));
1066 FAIL_IF(push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(other_r
) | (argw
<< 4)));
1067 return push_inst32(compiler
, ADD_W
| RD4(arg
) | RN4(arg
) | RM4(other_r
) | (argw
<< 6));
1071 SLJIT_ASSERT(!(arg
& OFFS_REG_MASK
));
1073 if (compiler
->cache_arg
== arg
) {
1074 diff
= argw
- compiler
->cache_argw
;
1075 if (!(diff
& ~0xfff))
1076 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(TMP_REG3
) | diff
);
1077 if (!((compiler
->cache_argw
- argw
) & ~0xff))
1078 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM8
| RT4(reg
) | RN4(TMP_REG3
) | (compiler
->cache_argw
- argw
));
1079 if (emit_set_delta(compiler
, TMP_REG3
, TMP_REG3
, diff
) != SLJIT_ERR_UNSUPPORTED
) {
1080 FAIL_IF(compiler
->error
);
1081 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(TMP_REG3
) | 0);
1085 next_arg
= (arg
& REG_MASK
) && (arg
== next_arg
) && (argw
!= next_argw
);
1087 if (arg
&& compiler
->cache_arg
== SLJIT_MEM
) {
1088 if (compiler
->cache_argw
== argw
)
1089 return push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(TMP_REG3
));
1090 if (emit_set_delta(compiler
, TMP_REG3
, TMP_REG3
, argw
- compiler
->cache_argw
) != SLJIT_ERR_UNSUPPORTED
) {
1091 FAIL_IF(compiler
->error
);
1092 compiler
->cache_argw
= argw
;
1093 return push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(TMP_REG3
));
1097 compiler
->cache_argw
= argw
;
1098 if (next_arg
&& emit_set_delta(compiler
, TMP_REG3
, arg
, argw
) != SLJIT_ERR_UNSUPPORTED
) {
1099 FAIL_IF(compiler
->error
);
1100 compiler
->cache_arg
= SLJIT_MEM
| arg
;
1104 FAIL_IF(load_immediate(compiler
, TMP_REG3
, argw
));
1105 compiler
->cache_arg
= SLJIT_MEM
;
1107 diff
= argw
- next_argw
;
1108 if (next_arg
&& diff
<= 0xfff && diff
>= -0xfff) {
1109 FAIL_IF(push_inst16(compiler
, ADD
| SET_REGS44(TMP_REG3
, arg
)));
1110 compiler
->cache_arg
= SLJIT_MEM
| arg
;
1116 return push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(TMP_REG3
));
1117 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(TMP_REG3
) | 0);
1120 static SLJIT_INLINE sljit_s32
emit_op_mem(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
, sljit_s32 arg
, sljit_sw argw
)
1122 if (getput_arg_fast(compiler
, flags
, reg
, arg
, argw
))
1123 return compiler
->error
;
1124 compiler
->cache_arg
= 0;
1125 compiler
->cache_argw
= 0;
1126 return getput_arg(compiler
, flags
, reg
, arg
, argw
, 0, 0);
1129 static SLJIT_INLINE sljit_s32
emit_op_mem2(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
, sljit_s32 arg1
, sljit_sw arg1w
, sljit_s32 arg2
, sljit_sw arg2w
)
1131 if (getput_arg_fast(compiler
, flags
, reg
, arg1
, arg1w
))
1132 return compiler
->error
;
1133 return getput_arg(compiler
, flags
, reg
, arg1
, arg1w
, arg2
, arg2w
);
1136 /* --------------------------------------------------------------------- */
1138 /* --------------------------------------------------------------------- */
1140 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
1141 sljit_s32 options
, sljit_s32 args
, sljit_s32 scratches
, sljit_s32 saveds
,
1142 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
1144 sljit_s32 size
, i
, tmp
;
1148 CHECK(check_sljit_emit_enter(compiler
, options
, args
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
1149 set_emit_enter(compiler
, options
, args
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
1153 tmp
= saveds
< SLJIT_NUMBER_OF_SAVED_REGISTERS
? (SLJIT_S0
+ 1 - saveds
) : SLJIT_FIRST_SAVED_REG
;
1154 for (i
= SLJIT_S0
; i
>= tmp
; i
--)
1155 push
|= 1 << reg_map
[i
];
1157 for (i
= scratches
; i
>= SLJIT_FIRST_SAVED_REG
; i
--)
1158 push
|= 1 << reg_map
[i
];
1160 FAIL_IF((push
& 0xff00)
1161 ? push_inst32(compiler
, PUSH_W
| (1 << 14) | push
)
1162 : push_inst16(compiler
, PUSH
| (1 << 8) | push
));
1164 /* Stack must be aligned to 8 bytes: (LR, R4) */
1165 size
= GET_SAVED_REGISTERS_SIZE(scratches
, saveds
, 2);
1166 local_size
= ((size
+ local_size
+ 7) & ~7) - size
;
1167 compiler
->local_size
= local_size
;
1168 if (local_size
> 0) {
1169 if (local_size
<= (127 << 2))
1170 FAIL_IF(push_inst16(compiler
, SUB_SP
| (local_size
>> 2)));
1172 FAIL_IF(emit_op_imm(compiler
, SLJIT_SUB
| ARG2_IMM
, SLJIT_SP
, SLJIT_SP
, local_size
));
1176 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_S0
, SLJIT_R0
)));
1178 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_S1
, SLJIT_R1
)));
1180 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_S2
, SLJIT_R2
)));
1182 return SLJIT_SUCCESS
;
1185 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
1186 sljit_s32 options
, sljit_s32 args
, sljit_s32 scratches
, sljit_s32 saveds
,
1187 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
1192 CHECK(check_sljit_set_context(compiler
, options
, args
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
1193 set_set_context(compiler
, options
, args
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
1195 size
= GET_SAVED_REGISTERS_SIZE(scratches
, saveds
, 2);
1196 compiler
->local_size
= ((size
+ local_size
+ 7) & ~7) - size
;
1197 return SLJIT_SUCCESS
;
1200 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return(struct sljit_compiler
*compiler
, sljit_s32 op
, sljit_s32 src
, sljit_sw srcw
)
1206 CHECK(check_sljit_emit_return(compiler
, op
, src
, srcw
));
1208 FAIL_IF(emit_mov_before_return(compiler
, op
, src
, srcw
));
1210 if (compiler
->local_size
> 0) {
1211 if (compiler
->local_size
<= (127 << 2))
1212 FAIL_IF(push_inst16(compiler
, ADD_SP
| (compiler
->local_size
>> 2)));
1214 FAIL_IF(emit_op_imm(compiler
, SLJIT_ADD
| ARG2_IMM
, SLJIT_SP
, SLJIT_SP
, compiler
->local_size
));
1219 tmp
= compiler
->saveds
< SLJIT_NUMBER_OF_SAVED_REGISTERS
? (SLJIT_S0
+ 1 - compiler
->saveds
) : SLJIT_FIRST_SAVED_REG
;
1220 for (i
= SLJIT_S0
; i
>= tmp
; i
--)
1221 pop
|= 1 << reg_map
[i
];
1223 for (i
= compiler
->scratches
; i
>= SLJIT_FIRST_SAVED_REG
; i
--)
1224 pop
|= 1 << reg_map
[i
];
1226 return (pop
& 0xff00)
1227 ? push_inst32(compiler
, POP_W
| (1 << 15) | pop
)
1228 : push_inst16(compiler
, POP
| (1 << 8) | pop
);
1231 /* --------------------------------------------------------------------- */
1233 /* --------------------------------------------------------------------- */
1239 #if defined(__GNUC__)
1240 extern unsigned int __aeabi_uidivmod(unsigned int numerator
, int unsigned denominator
);
1241 extern int __aeabi_idivmod(int numerator
, int denominator
);
1243 #error "Software divmod functions are needed"
1250 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op0(struct sljit_compiler
*compiler
, sljit_s32 op
)
1252 sljit_sw saved_reg_list
[3];
1253 sljit_sw saved_reg_count
;
1256 CHECK(check_sljit_emit_op0(compiler
, op
));
1258 op
= GET_OPCODE(op
);
1260 case SLJIT_BREAKPOINT
:
1261 return push_inst16(compiler
, BKPT
);
1263 return push_inst16(compiler
, NOP
);
1266 return push_inst32(compiler
, (op
== SLJIT_LMUL_UW
? UMULL
: SMULL
)
1267 | (reg_map
[SLJIT_R1
] << 8)
1268 | (reg_map
[SLJIT_R0
] << 12)
1269 | (reg_map
[SLJIT_R0
] << 16)
1270 | reg_map
[SLJIT_R1
]);
1271 case SLJIT_DIVMOD_UW
:
1272 case SLJIT_DIVMOD_SW
:
1275 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW
& 0x2) == 0 && SLJIT_DIV_UW
- 0x2 == SLJIT_DIVMOD_UW
, bad_div_opcode_assignments
);
1276 SLJIT_COMPILE_ASSERT(reg_map
[2] == 1 && reg_map
[3] == 2 && reg_map
[4] == 12, bad_register_mapping
);
1278 saved_reg_count
= 0;
1279 if (compiler
->scratches
>= 4)
1280 saved_reg_list
[saved_reg_count
++] = 12;
1281 if (compiler
->scratches
>= 3)
1282 saved_reg_list
[saved_reg_count
++] = 2;
1283 if (op
>= SLJIT_DIV_UW
)
1284 saved_reg_list
[saved_reg_count
++] = 1;
1286 if (saved_reg_count
> 0) {
1287 FAIL_IF(push_inst32(compiler
, 0xf84d0d00 | (saved_reg_count
>= 3 ? 16 : 8)
1288 | (saved_reg_list
[0] << 12) /* str rX, [sp, #-8/-16]! */));
1289 if (saved_reg_count
>= 2) {
1290 SLJIT_ASSERT(saved_reg_list
[1] < 8);
1291 FAIL_IF(push_inst16(compiler
, 0x9001 | (saved_reg_list
[1] << 8) /* str rX, [sp, #4] */));
1293 if (saved_reg_count
>= 3) {
1294 SLJIT_ASSERT(saved_reg_list
[2] < 8);
1295 FAIL_IF(push_inst16(compiler
, 0x9002 | (saved_reg_list
[2] << 8) /* str rX, [sp, #8] */));
1299 #if defined(__GNUC__)
1300 FAIL_IF(sljit_emit_ijump(compiler
, SLJIT_FAST_CALL
, SLJIT_IMM
,
1301 ((op
| 0x2) == SLJIT_DIV_UW
? SLJIT_FUNC_OFFSET(__aeabi_uidivmod
) : SLJIT_FUNC_OFFSET(__aeabi_idivmod
))));
1303 #error "Software divmod functions are needed"
1306 if (saved_reg_count
> 0) {
1307 if (saved_reg_count
>= 3) {
1308 SLJIT_ASSERT(saved_reg_list
[2] < 8);
1309 FAIL_IF(push_inst16(compiler
, 0x9802 | (saved_reg_list
[2] << 8) /* ldr rX, [sp, #8] */));
1311 if (saved_reg_count
>= 2) {
1312 SLJIT_ASSERT(saved_reg_list
[1] < 8);
1313 FAIL_IF(push_inst16(compiler
, 0x9801 | (saved_reg_list
[1] << 8) /* ldr rX, [sp, #4] */));
1315 return push_inst32(compiler
, 0xf85d0b00 | (saved_reg_count
>= 3 ? 16 : 8)
1316 | (saved_reg_list
[0] << 12) /* ldr rX, [sp], #8/16 */);
1318 return SLJIT_SUCCESS
;
1321 return SLJIT_SUCCESS
;
1324 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1325 sljit_s32 dst
, sljit_sw dstw
,
1326 sljit_s32 src
, sljit_sw srcw
)
1328 sljit_s32 dst_r
, flags
;
1329 sljit_s32 op_flags
= GET_ALL_FLAGS(op
);
1332 CHECK(check_sljit_emit_op1(compiler
, op
, dst
, dstw
, src
, srcw
));
1333 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1334 ADJUST_LOCAL_OFFSET(src
, srcw
);
1336 compiler
->cache_arg
= 0;
1337 compiler
->cache_argw
= 0;
1339 dst_r
= SLOW_IS_REG(dst
) ? dst
: TMP_REG1
;
1341 op
= GET_OPCODE(op
);
1342 if (op
>= SLJIT_MOV
&& op
<= SLJIT_MOVU_P
) {
1352 if (src
& SLJIT_IMM
)
1353 srcw
= (sljit_u8
)srcw
;
1356 flags
= BYTE_SIZE
| SIGNED
;
1357 if (src
& SLJIT_IMM
)
1358 srcw
= (sljit_s8
)srcw
;
1362 if (src
& SLJIT_IMM
)
1363 srcw
= (sljit_u16
)srcw
;
1366 flags
= HALF_SIZE
| SIGNED
;
1367 if (src
& SLJIT_IMM
)
1368 srcw
= (sljit_s16
)srcw
;
1371 case SLJIT_MOVU_U32
:
1372 case SLJIT_MOVU_S32
:
1374 flags
= WORD_SIZE
| UPDATE
;
1377 flags
= BYTE_SIZE
| UPDATE
;
1378 if (src
& SLJIT_IMM
)
1379 srcw
= (sljit_u8
)srcw
;
1382 flags
= BYTE_SIZE
| SIGNED
| UPDATE
;
1383 if (src
& SLJIT_IMM
)
1384 srcw
= (sljit_s8
)srcw
;
1386 case SLJIT_MOVU_U16
:
1387 flags
= HALF_SIZE
| UPDATE
;
1388 if (src
& SLJIT_IMM
)
1389 srcw
= (sljit_u16
)srcw
;
1391 case SLJIT_MOVU_S16
:
1392 flags
= HALF_SIZE
| SIGNED
| UPDATE
;
1393 if (src
& SLJIT_IMM
)
1394 srcw
= (sljit_s16
)srcw
;
1397 SLJIT_ASSERT_STOP();
1402 if (src
& SLJIT_IMM
)
1403 FAIL_IF(emit_op_imm(compiler
, SLJIT_MOV
| ARG2_IMM
, dst_r
, TMP_REG1
, srcw
));
1404 else if (src
& SLJIT_MEM
) {
1405 if (getput_arg_fast(compiler
, flags
, dst_r
, src
, srcw
))
1406 FAIL_IF(compiler
->error
);
1408 FAIL_IF(getput_arg(compiler
, flags
, dst_r
, src
, srcw
, dst
, dstw
));
1410 if (dst_r
!= TMP_REG1
)
1411 return emit_op_imm(compiler
, op
, dst_r
, TMP_REG1
, src
);
1415 if (dst
& SLJIT_MEM
) {
1416 if (getput_arg_fast(compiler
, flags
| STORE
, dst_r
, dst
, dstw
))
1417 return compiler
->error
;
1419 return getput_arg(compiler
, flags
| STORE
, dst_r
, dst
, dstw
, 0, 0);
1421 return SLJIT_SUCCESS
;
1424 if (op
== SLJIT_NEG
) {
1425 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1426 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1427 compiler
->skip_checks
= 1;
1429 return sljit_emit_op2(compiler
, SLJIT_SUB
| op_flags
, dst
, dstw
, SLJIT_IMM
, 0, src
, srcw
);
1432 flags
= (GET_FLAGS(op_flags
) ? SET_FLAGS
: 0) | ((op_flags
& SLJIT_KEEP_FLAGS
) ? KEEP_FLAGS
: 0);
1433 if (src
& SLJIT_MEM
) {
1434 if (getput_arg_fast(compiler
, WORD_SIZE
, TMP_REG2
, src
, srcw
))
1435 FAIL_IF(compiler
->error
);
1437 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG2
, src
, srcw
, dst
, dstw
));
1441 if (src
& SLJIT_IMM
)
1446 emit_op_imm(compiler
, flags
| op
, dst_r
, TMP_REG1
, srcw
);
1448 if (dst
& SLJIT_MEM
) {
1449 if (getput_arg_fast(compiler
, flags
| STORE
, dst_r
, dst
, dstw
))
1450 return compiler
->error
;
1452 return getput_arg(compiler
, flags
| STORE
, dst_r
, dst
, dstw
, 0, 0);
1454 return SLJIT_SUCCESS
;
1457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op2(struct sljit_compiler
*compiler
, sljit_s32 op
,
1458 sljit_s32 dst
, sljit_sw dstw
,
1459 sljit_s32 src1
, sljit_sw src1w
,
1460 sljit_s32 src2
, sljit_sw src2w
)
1462 sljit_s32 dst_r
, flags
;
1465 CHECK(check_sljit_emit_op2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
1466 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1467 ADJUST_LOCAL_OFFSET(src1
, src1w
);
1468 ADJUST_LOCAL_OFFSET(src2
, src2w
);
1470 compiler
->cache_arg
= 0;
1471 compiler
->cache_argw
= 0;
1473 dst_r
= SLOW_IS_REG(dst
) ? dst
: TMP_REG1
;
1474 flags
= (GET_FLAGS(op
) ? SET_FLAGS
: 0) | ((op
& SLJIT_KEEP_FLAGS
) ? KEEP_FLAGS
: 0);
1476 if ((dst
& SLJIT_MEM
) && !getput_arg_fast(compiler
, WORD_SIZE
| STORE
| ARG_TEST
, TMP_REG1
, dst
, dstw
))
1479 if (src1
& SLJIT_MEM
) {
1480 if (getput_arg_fast(compiler
, WORD_SIZE
, TMP_REG1
, src1
, src1w
))
1481 FAIL_IF(compiler
->error
);
1485 if (src2
& SLJIT_MEM
) {
1486 if (getput_arg_fast(compiler
, WORD_SIZE
, TMP_REG2
, src2
, src2w
))
1487 FAIL_IF(compiler
->error
);
1492 if ((flags
& (SLOW_SRC1
| SLOW_SRC2
)) == (SLOW_SRC1
| SLOW_SRC2
)) {
1493 if (!can_cache(src1
, src1w
, src2
, src2w
) && can_cache(src1
, src1w
, dst
, dstw
)) {
1494 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG2
, src2
, src2w
, src1
, src1w
));
1495 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG1
, src1
, src1w
, dst
, dstw
));
1498 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG1
, src1
, src1w
, src2
, src2w
));
1499 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG2
, src2
, src2w
, dst
, dstw
));
1502 else if (flags
& SLOW_SRC1
)
1503 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG1
, src1
, src1w
, dst
, dstw
));
1504 else if (flags
& SLOW_SRC2
)
1505 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG2
, src2
, src2w
, dst
, dstw
));
1507 if (src1
& SLJIT_MEM
)
1509 if (src2
& SLJIT_MEM
)
1512 if (src1
& SLJIT_IMM
)
1516 if (src2
& SLJIT_IMM
)
1521 if (dst
== SLJIT_UNUSED
)
1522 flags
|= UNUSED_RETURN
;
1524 emit_op_imm(compiler
, flags
| GET_OPCODE(op
), dst_r
, src1w
, src2w
);
1526 if (dst
& SLJIT_MEM
) {
1527 if (!(flags
& SLOW_DEST
)) {
1528 getput_arg_fast(compiler
, WORD_SIZE
| STORE
, dst_r
, dst
, dstw
);
1529 return compiler
->error
;
1531 return getput_arg(compiler
, WORD_SIZE
| STORE
, TMP_REG1
, dst
, dstw
, 0, 0);
1533 return SLJIT_SUCCESS
;
1536 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_register_index(sljit_s32 reg
)
1538 CHECK_REG_INDEX(check_sljit_get_register_index(reg
));
1539 return reg_map
[reg
];
1542 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_float_register_index(sljit_s32 reg
)
1544 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg
));
1548 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_custom(struct sljit_compiler
*compiler
,
1549 void *instruction
, sljit_s32 size
)
1552 CHECK(check_sljit_emit_op_custom(compiler
, instruction
, size
));
1555 return push_inst16(compiler
, *(sljit_u16
*)instruction
);
1556 return push_inst32(compiler
, *(sljit_ins
*)instruction
);
1559 /* --------------------------------------------------------------------- */
1560 /* Floating point operators */
1561 /* --------------------------------------------------------------------- */
1563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_is_fpu_available(void)
1565 #ifdef SLJIT_IS_FPU_AVAILABLE
1566 return SLJIT_IS_FPU_AVAILABLE
;
1568 /* Available by default. */
1573 #define FPU_LOAD (1 << 20)
1575 static sljit_s32
emit_fop_mem(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
, sljit_s32 arg
, sljit_sw argw
)
1579 sljit_sw inst
= VSTR_F32
| (flags
& (SLJIT_F32_OP
| FPU_LOAD
));
1581 SLJIT_ASSERT(arg
& SLJIT_MEM
);
1583 /* Fast loads and stores. */
1584 if (SLJIT_UNLIKELY(arg
& OFFS_REG_MASK
)) {
1585 FAIL_IF(push_inst32(compiler
, ADD_W
| RD4(TMP_REG2
) | RN4(arg
& REG_MASK
) | RM4(OFFS_REG(arg
)) | ((argw
& 0x3) << 6)));
1586 arg
= SLJIT_MEM
| TMP_REG2
;
1590 if ((arg
& REG_MASK
) && (argw
& 0x3) == 0) {
1591 if (!(argw
& ~0x3fc))
1592 return push_inst32(compiler
, inst
| 0x800000 | RN4(arg
& REG_MASK
) | DD4(reg
) | (argw
>> 2));
1593 if (!(-argw
& ~0x3fc))
1594 return push_inst32(compiler
, inst
| RN4(arg
& REG_MASK
) | DD4(reg
) | (-argw
>> 2));
1598 SLJIT_ASSERT(!(arg
& OFFS_REG_MASK
));
1599 if (compiler
->cache_arg
== arg
) {
1600 tmp
= argw
- compiler
->cache_argw
;
1601 if (!(tmp
& ~0x3fc))
1602 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG3
) | DD4(reg
) | (tmp
>> 2));
1603 if (!(-tmp
& ~0x3fc))
1604 return push_inst32(compiler
, inst
| RN4(TMP_REG3
) | DD4(reg
) | (-tmp
>> 2));
1605 if (emit_set_delta(compiler
, TMP_REG3
, TMP_REG3
, tmp
) != SLJIT_ERR_UNSUPPORTED
) {
1606 FAIL_IF(compiler
->error
);
1607 compiler
->cache_argw
= argw
;
1608 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG3
) | DD4(reg
));
1612 if (arg
& REG_MASK
) {
1613 if (emit_set_delta(compiler
, TMP_REG1
, arg
& REG_MASK
, argw
) != SLJIT_ERR_UNSUPPORTED
) {
1614 FAIL_IF(compiler
->error
);
1615 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG1
) | DD4(reg
));
1617 imm
= get_imm(argw
& ~0x3fc);
1618 if (imm
!= INVALID_IMM
) {
1619 FAIL_IF(push_inst32(compiler
, ADD_WI
| RD4(TMP_REG1
) | RN4(arg
& REG_MASK
) | imm
));
1620 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG1
) | DD4(reg
) | ((argw
& 0x3fc) >> 2));
1622 imm
= get_imm(-argw
& ~0x3fc);
1623 if (imm
!= INVALID_IMM
) {
1625 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(arg
& REG_MASK
) | imm
));
1626 return push_inst32(compiler
, inst
| RN4(TMP_REG1
) | DD4(reg
) | ((argw
& 0x3fc) >> 2));
1630 compiler
->cache_arg
= arg
;
1631 compiler
->cache_argw
= argw
;
1633 FAIL_IF(load_immediate(compiler
, TMP_REG3
, argw
));
1635 FAIL_IF(push_inst16(compiler
, ADD
| SET_REGS44(TMP_REG3
, (arg
& REG_MASK
))));
1636 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG3
) | DD4(reg
));
1639 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler
*compiler
, sljit_s32 op
,
1640 sljit_s32 dst
, sljit_sw dstw
,
1641 sljit_s32 src
, sljit_sw srcw
)
1643 if (src
& SLJIT_MEM
) {
1644 FAIL_IF(emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, TMP_FREG1
, src
, srcw
));
1648 FAIL_IF(push_inst32(compiler
, VCVT_S32_F32
| (op
& SLJIT_F32_OP
) | DD4(TMP_FREG1
) | DM4(src
)));
1650 if (dst
== SLJIT_UNUSED
)
1651 return SLJIT_SUCCESS
;
1653 if (FAST_IS_REG(dst
))
1654 return push_inst32(compiler
, VMOV
| (1 << 20) | RT4(dst
) | DN4(TMP_FREG1
));
1656 /* Store the integer value from a VFP register. */
1657 return emit_fop_mem(compiler
, 0, TMP_FREG1
, dst
, dstw
);
1660 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler
*compiler
, sljit_s32 op
,
1661 sljit_s32 dst
, sljit_sw dstw
,
1662 sljit_s32 src
, sljit_sw srcw
)
1664 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1666 if (FAST_IS_REG(src
))
1667 FAIL_IF(push_inst32(compiler
, VMOV
| RT4(src
) | DN4(TMP_FREG1
)));
1668 else if (src
& SLJIT_MEM
) {
1669 /* Load the integer value into a VFP register. */
1670 FAIL_IF(emit_fop_mem(compiler
, FPU_LOAD
, TMP_FREG1
, src
, srcw
));
1673 FAIL_IF(load_immediate(compiler
, TMP_REG1
, srcw
));
1674 FAIL_IF(push_inst32(compiler
, VMOV
| RT4(TMP_REG1
) | DN4(TMP_FREG1
)));
1677 FAIL_IF(push_inst32(compiler
, VCVT_F32_S32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DM4(TMP_FREG1
)));
1679 if (dst
& SLJIT_MEM
)
1680 return emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
), TMP_FREG1
, dst
, dstw
);
1681 return SLJIT_SUCCESS
;
1684 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_cmp(struct sljit_compiler
*compiler
, sljit_s32 op
,
1685 sljit_s32 src1
, sljit_sw src1w
,
1686 sljit_s32 src2
, sljit_sw src2w
)
1688 if (src1
& SLJIT_MEM
) {
1689 emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, TMP_FREG1
, src1
, src1w
);
1693 if (src2
& SLJIT_MEM
) {
1694 emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, TMP_FREG2
, src2
, src2w
);
1698 FAIL_IF(push_inst32(compiler
, VCMP_F32
| (op
& SLJIT_F32_OP
) | DD4(src1
) | DM4(src2
)));
1699 return push_inst32(compiler
, VMRS
);
1702 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1703 sljit_s32 dst
, sljit_sw dstw
,
1704 sljit_s32 src
, sljit_sw srcw
)
1709 compiler
->cache_arg
= 0;
1710 compiler
->cache_argw
= 0;
1711 if (GET_OPCODE(op
) != SLJIT_CONV_F64_FROM_F32
)
1714 SLJIT_COMPILE_ASSERT((SLJIT_F32_OP
== 0x100), float_transfer_bit_error
);
1715 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler
, op
, dst
, dstw
, src
, srcw
);
1717 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1719 if (src
& SLJIT_MEM
) {
1720 emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, dst_r
, src
, srcw
);
1724 switch (GET_OPCODE(op
)) {
1727 if (dst_r
!= TMP_FREG1
)
1728 FAIL_IF(push_inst32(compiler
, VMOV_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DM4(src
)));
1734 FAIL_IF(push_inst32(compiler
, VNEG_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DM4(src
)));
1737 FAIL_IF(push_inst32(compiler
, VABS_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DM4(src
)));
1739 case SLJIT_CONV_F64_FROM_F32
:
1740 FAIL_IF(push_inst32(compiler
, VCVT_F64_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DM4(src
)));
1745 if (dst
& SLJIT_MEM
)
1746 return emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
), dst_r
, dst
, dstw
);
1747 return SLJIT_SUCCESS
;
1750 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_s32 op
,
1751 sljit_s32 dst
, sljit_sw dstw
,
1752 sljit_s32 src1
, sljit_sw src1w
,
1753 sljit_s32 src2
, sljit_sw src2w
)
1758 CHECK(check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
1759 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1760 ADJUST_LOCAL_OFFSET(src1
, src1w
);
1761 ADJUST_LOCAL_OFFSET(src2
, src2w
);
1763 compiler
->cache_arg
= 0;
1764 compiler
->cache_argw
= 0;
1767 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1768 if (src1
& SLJIT_MEM
) {
1769 emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, TMP_FREG1
, src1
, src1w
);
1772 if (src2
& SLJIT_MEM
) {
1773 emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
) | FPU_LOAD
, TMP_FREG2
, src2
, src2w
);
1777 switch (GET_OPCODE(op
)) {
1779 FAIL_IF(push_inst32(compiler
, VADD_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1782 FAIL_IF(push_inst32(compiler
, VSUB_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1785 FAIL_IF(push_inst32(compiler
, VMUL_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1788 FAIL_IF(push_inst32(compiler
, VDIV_F32
| (op
& SLJIT_F32_OP
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1792 if (!(dst
& SLJIT_MEM
))
1793 return SLJIT_SUCCESS
;
1794 return emit_fop_mem(compiler
, (op
& SLJIT_F32_OP
), TMP_FREG1
, dst
, dstw
);
1799 /* --------------------------------------------------------------------- */
1800 /* Other instructions */
1801 /* --------------------------------------------------------------------- */
1803 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
1806 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
1807 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1809 /* For UNUSED dst. Uncommon, but possible. */
1810 if (dst
== SLJIT_UNUSED
)
1811 return SLJIT_SUCCESS
;
1813 if (FAST_IS_REG(dst
))
1814 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, TMP_REG3
));
1817 if (getput_arg_fast(compiler
, WORD_SIZE
| STORE
, TMP_REG3
, dst
, dstw
))
1818 return compiler
->error
;
1819 /* TMP_REG3 is used for caching. */
1820 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG2
, TMP_REG3
)));
1821 compiler
->cache_arg
= 0;
1822 compiler
->cache_argw
= 0;
1823 return getput_arg(compiler
, WORD_SIZE
| STORE
, TMP_REG2
, dst
, dstw
, 0, 0);
1826 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_return(struct sljit_compiler
*compiler
, sljit_s32 src
, sljit_sw srcw
)
1829 CHECK(check_sljit_emit_fast_return(compiler
, src
, srcw
));
1830 ADJUST_LOCAL_OFFSET(src
, srcw
);
1832 if (FAST_IS_REG(src
))
1833 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG3
, src
)));
1834 else if (src
& SLJIT_MEM
) {
1835 if (getput_arg_fast(compiler
, WORD_SIZE
, TMP_REG3
, src
, srcw
))
1836 FAIL_IF(compiler
->error
);
1838 compiler
->cache_arg
= 0;
1839 compiler
->cache_argw
= 0;
1840 FAIL_IF(getput_arg(compiler
, WORD_SIZE
, TMP_REG2
, src
, srcw
, 0, 0));
1841 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG3
, TMP_REG2
)));
1844 else if (src
& SLJIT_IMM
)
1845 FAIL_IF(load_immediate(compiler
, TMP_REG3
, srcw
));
1846 return push_inst16(compiler
, BLX
| RN3(TMP_REG3
));
1849 /* --------------------------------------------------------------------- */
1850 /* Conditional instructions */
1851 /* --------------------------------------------------------------------- */
1853 static sljit_uw
get_cc(sljit_s32 type
)
1857 case SLJIT_MUL_NOT_OVERFLOW
:
1858 case SLJIT_EQUAL_F64
:
1861 case SLJIT_NOT_EQUAL
:
1862 case SLJIT_MUL_OVERFLOW
:
1863 case SLJIT_NOT_EQUAL_F64
:
1867 case SLJIT_LESS_F64
:
1870 case SLJIT_GREATER_EQUAL
:
1871 case SLJIT_GREATER_EQUAL_F64
:
1875 case SLJIT_GREATER_F64
:
1878 case SLJIT_LESS_EQUAL
:
1879 case SLJIT_LESS_EQUAL_F64
:
1882 case SLJIT_SIG_LESS
:
1885 case SLJIT_SIG_GREATER_EQUAL
:
1888 case SLJIT_SIG_GREATER
:
1891 case SLJIT_SIG_LESS_EQUAL
:
1894 case SLJIT_OVERFLOW
:
1895 case SLJIT_UNORDERED_F64
:
1898 case SLJIT_NOT_OVERFLOW
:
1899 case SLJIT_ORDERED_F64
:
1902 default: /* SLJIT_JUMP */
1903 SLJIT_ASSERT_STOP();
1908 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_label
* sljit_emit_label(struct sljit_compiler
*compiler
)
1910 struct sljit_label
*label
;
1913 CHECK_PTR(check_sljit_emit_label(compiler
));
1915 if (compiler
->last_label
&& compiler
->last_label
->size
== compiler
->size
)
1916 return compiler
->last_label
;
1918 label
= (struct sljit_label
*)ensure_abuf(compiler
, sizeof(struct sljit_label
));
1919 PTR_FAIL_IF(!label
);
1920 set_label(label
, compiler
);
1924 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_jump(struct sljit_compiler
*compiler
, sljit_s32 type
)
1926 struct sljit_jump
*jump
;
1930 CHECK_PTR(check_sljit_emit_jump(compiler
, type
));
1932 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
1934 set_jump(jump
, compiler
, type
& SLJIT_REWRITABLE_JUMP
);
1937 /* In ARM, we don't need to touch the arguments. */
1938 PTR_FAIL_IF(emit_imm32_const(compiler
, TMP_REG1
, 0));
1939 if (type
< SLJIT_JUMP
) {
1940 jump
->flags
|= IS_COND
;
1942 jump
->flags
|= cc
<< 8;
1943 PTR_FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
1946 jump
->addr
= compiler
->size
;
1947 if (type
<= SLJIT_JUMP
)
1948 PTR_FAIL_IF(push_inst16(compiler
, BX
| RN3(TMP_REG1
)));
1950 jump
->flags
|= IS_BL
;
1951 PTR_FAIL_IF(push_inst16(compiler
, BLX
| RN3(TMP_REG1
)));
1957 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_ijump(struct sljit_compiler
*compiler
, sljit_s32 type
, sljit_s32 src
, sljit_sw srcw
)
1959 struct sljit_jump
*jump
;
1962 CHECK(check_sljit_emit_ijump(compiler
, type
, src
, srcw
));
1963 ADJUST_LOCAL_OFFSET(src
, srcw
);
1965 /* In ARM, we don't need to touch the arguments. */
1966 if (!(src
& SLJIT_IMM
)) {
1967 if (FAST_IS_REG(src
))
1968 return push_inst16(compiler
, (type
<= SLJIT_JUMP
? BX
: BLX
) | RN3(src
));
1970 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, type
<= SLJIT_JUMP
? TMP_PC
: TMP_REG1
, src
, srcw
));
1971 if (type
>= SLJIT_FAST_CALL
)
1972 return push_inst16(compiler
, BLX
| RN3(TMP_REG1
));
1975 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
1977 set_jump(jump
, compiler
, JUMP_ADDR
| ((type
>= SLJIT_FAST_CALL
) ? IS_BL
: 0));
1978 jump
->u
.target
= srcw
;
1980 FAIL_IF(emit_imm32_const(compiler
, TMP_REG1
, 0));
1981 jump
->addr
= compiler
->size
;
1982 return push_inst16(compiler
, (type
<= SLJIT_JUMP
? BX
: BLX
) | RN3(TMP_REG1
));
1985 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_flags(struct sljit_compiler
*compiler
, sljit_s32 op
,
1986 sljit_s32 dst
, sljit_sw dstw
,
1987 sljit_s32 src
, sljit_sw srcw
,
1990 sljit_s32 dst_r
, flags
= GET_ALL_FLAGS(op
);
1994 CHECK(check_sljit_emit_op_flags(compiler
, op
, dst
, dstw
, src
, srcw
, type
));
1995 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1996 ADJUST_LOCAL_OFFSET(src
, srcw
);
1998 if (dst
== SLJIT_UNUSED
)
1999 return SLJIT_SUCCESS
;
2001 op
= GET_OPCODE(op
);
2002 cc
= get_cc(type
& 0xff);
2003 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG2
;
2005 if (op
< SLJIT_ADD
) {
2006 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | (((cc
& 0x1) ^ 0x1) << 3) | 0x4));
2007 if (reg_map
[dst_r
] > 7) {
2008 FAIL_IF(push_inst32(compiler
, MOV_WI
| RD4(dst_r
) | 1));
2009 FAIL_IF(push_inst32(compiler
, MOV_WI
| RD4(dst_r
) | 0));
2011 FAIL_IF(push_inst16(compiler
, MOVSI
| RDN3(dst_r
) | 1));
2012 FAIL_IF(push_inst16(compiler
, MOVSI
| RDN3(dst_r
) | 0));
2014 if (dst_r
!= TMP_REG2
)
2015 return SLJIT_SUCCESS
;
2016 return emit_op_mem(compiler
, WORD_SIZE
| STORE
, TMP_REG2
, dst
, dstw
);
2019 ins
= (op
== SLJIT_AND
? ANDI
: (op
== SLJIT_OR
? ORRI
: EORI
));
2020 if ((op
== SLJIT_OR
|| op
== SLJIT_XOR
) && FAST_IS_REG(dst
) && dst
== src
) {
2021 /* Does not change the other bits. */
2022 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2023 FAIL_IF(push_inst32(compiler
, ins
| RN4(src
) | RD4(dst
) | 1));
2024 if (flags
& SLJIT_SET_E
) {
2025 /* The condition must always be set, even if the ORRI/EORI is not executed above. */
2026 if (reg_map
[dst
] <= 7)
2027 return push_inst16(compiler
, MOVS
| RD3(TMP_REG1
) | RN3(dst
));
2028 return push_inst32(compiler
, MOV_W
| SET_FLAGS
| RD4(TMP_REG1
) | RM4(dst
));
2030 return SLJIT_SUCCESS
;
2033 compiler
->cache_arg
= 0;
2034 compiler
->cache_argw
= 0;
2035 if (src
& SLJIT_MEM
) {
2036 FAIL_IF(emit_op_mem2(compiler
, WORD_SIZE
, TMP_REG2
, src
, srcw
, dst
, dstw
));
2039 } else if (src
& SLJIT_IMM
) {
2040 FAIL_IF(load_immediate(compiler
, TMP_REG2
, srcw
));
2045 if (op
== SLJIT_AND
|| src
!= dst_r
) {
2046 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | (((cc
& 0x1) ^ 0x1) << 3) | 0x4));
2047 FAIL_IF(push_inst32(compiler
, ins
| RN4(src
) | RD4(dst_r
) | 1));
2048 FAIL_IF(push_inst32(compiler
, ins
| RN4(src
) | RD4(dst_r
) | 0));
2051 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2052 FAIL_IF(push_inst32(compiler
, ins
| RN4(src
) | RD4(dst_r
) | 1));
2055 if (dst_r
== TMP_REG2
)
2056 FAIL_IF(emit_op_mem2(compiler
, WORD_SIZE
| STORE
, TMP_REG2
, dst
, dstw
, 0, 0));
2058 if (flags
& SLJIT_SET_E
) {
2059 /* The condition must always be set, even if the ORR/EORI is not executed above. */
2060 if (reg_map
[dst_r
] <= 7)
2061 return push_inst16(compiler
, MOVS
| RD3(TMP_REG1
) | RN3(dst_r
));
2062 return push_inst32(compiler
, MOV_W
| SET_FLAGS
| RD4(TMP_REG1
) | RM4(dst_r
));
2064 return SLJIT_SUCCESS
;
2067 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_const
* sljit_emit_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw init_value
)
2069 struct sljit_const
*const_
;
2073 CHECK_PTR(check_sljit_emit_const(compiler
, dst
, dstw
, init_value
));
2074 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2076 const_
= (struct sljit_const
*)ensure_abuf(compiler
, sizeof(struct sljit_const
));
2077 PTR_FAIL_IF(!const_
);
2078 set_const(const_
, compiler
);
2080 dst_r
= SLOW_IS_REG(dst
) ? dst
: TMP_REG1
;
2081 PTR_FAIL_IF(emit_imm32_const(compiler
, dst_r
, init_value
));
2083 if (dst
& SLJIT_MEM
)
2084 PTR_FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
| STORE
, dst_r
, dst
, dstw
));
2088 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_jump_addr(sljit_uw addr
, sljit_uw new_target
, sljit_sw executable_offset
)
2090 sljit_u16
*inst
= (sljit_u16
*)addr
;
2091 modify_imm32_const(inst
, new_target
);
2092 inst
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(inst
, executable_offset
);
2093 SLJIT_CACHE_FLUSH(inst
, inst
+ 4);
2096 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_const(sljit_uw addr
, sljit_sw new_constant
, sljit_sw executable_offset
)
2098 sljit_u16
*inst
= (sljit_u16
*)addr
;
2099 modify_imm32_const(inst
, new_constant
);
2100 inst
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(inst
, executable_offset
);
2101 SLJIT_CACHE_FLUSH(inst
, inst
+ 4);