2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 SLJIT_API_FUNC_ATTRIBUTE
const char* sljit_get_platform_name(void)
30 return "ARM-Thumb2" SLJIT_CPUINFO
" ABI:softfp";
32 return "ARM-Thumb2" SLJIT_CPUINFO
" ABI:hardfp";
36 /* Length of an instruction word. */
37 typedef sljit_u32 sljit_ins
;
39 /* Last register + 1. */
40 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
44 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
47 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48 static const sljit_u8 reg_map
[SLJIT_NUMBER_OF_REGISTERS
+ 5] = {
49 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
52 static const sljit_u8 freg_map
[SLJIT_NUMBER_OF_FLOAT_REGISTERS
+ 3] = {
53 0, 0, 1, 2, 3, 4, 5, 6, 7
56 #define COPY_BITS(src, from, to, bits) \
57 ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
59 /* Thumb16 encodings. */
60 #define RD3(rd) (reg_map[rd])
61 #define RN3(rn) (reg_map[rn] << 3)
62 #define RM3(rm) (reg_map[rm] << 6)
63 #define RDN3(rdn) (reg_map[rdn] << 8)
64 #define IMM3(imm) (imm << 6)
65 #define IMM8(imm) (imm)
67 /* Thumb16 helpers. */
68 #define SET_REGS44(rd, rn) \
69 ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
70 #define IS_2_LO_REGS(reg1, reg2) \
71 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
72 #define IS_3_LO_REGS(reg1, reg2, reg3) \
73 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
75 /* Thumb32 encodings. */
76 #define RD4(rd) (reg_map[rd] << 8)
77 #define RN4(rn) (reg_map[rn] << 16)
78 #define RM4(rm) (reg_map[rm])
79 #define RT4(rt) (reg_map[rt] << 12)
80 #define DD4(dd) (freg_map[dd] << 12)
81 #define DN4(dn) (freg_map[dn] << 16)
82 #define DM4(dm) (freg_map[dm])
84 (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
86 (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
88 /* --------------------------------------------------------------------- */
89 /* Instrucion forms */
90 /* --------------------------------------------------------------------- */
92 /* dot '.' changed to _
93 I immediate form (possibly followed by number of immediate bits). */
94 #define ADCI 0xf1400000
96 #define ADC_W 0xeb400000
100 #define ADDSI8 0x3000
101 #define ADD_W 0xeb000000
102 #define ADDWI 0xf2000000
103 #define ADD_SP 0xb000
104 #define ADD_W 0xeb000000
105 #define ADD_WI 0xf1000000
106 #define ANDI 0xf0000000
108 #define AND_W 0xea000000
111 #define ASR_W 0xfa40f000
112 #define ASR_WI 0xea4f0020
114 #define BICI 0xf0200000
118 #define CLZ 0xfab0f080
119 #define CMNI_W 0xf1100f00
122 #define CMPI_W 0xf1b00f00
124 #define CMP_W 0xebb00f00
125 #define EORI 0xf0800000
127 #define EOR_W 0xea800000
129 #define LDR_SP 0x9800
130 #define LDRI 0xf8500800
133 #define LSL_W 0xfa00f000
134 #define LSL_WI 0xea4f0000
137 #define LSR_W 0xfa20f000
138 #define LSR_WI 0xea4f0010
142 #define MOVT 0xf2c00000
143 #define MOVW 0xf2400000
144 #define MOV_W 0xea4f0000
145 #define MOV_WI 0xf04f0000
146 #define MUL 0xfb00f000
148 #define MVN_W 0xea6f0000
149 #define MVN_WI 0xf06f0000
151 #define ORNI 0xf0600000
152 #define ORRI 0xf0400000
154 #define ORR_W 0xea400000
156 #define POP_W 0xe8bd0000
158 #define PUSH_W 0xe92d0000
159 #define RSB_WI 0xf1c00000
161 #define SBCI 0xf1600000
163 #define SBC_W 0xeb600000
164 #define SDIV 0xfb90f0f0
165 #define SMULL 0xfb800000
166 #define STR_SP 0x9000
168 #define SUBSI3 0x1e00
169 #define SUBSI8 0x3800
170 #define SUB_W 0xeba00000
171 #define SUBWI 0xf2a00000
172 #define SUB_SP 0xb080
173 #define SUB_WI 0xf1a00000
175 #define SXTB_W 0xfa4ff080
177 #define SXTH_W 0xfa0ff080
179 #define TSTI 0xf0000f00
180 #define TST_W 0xea000f00
181 #define UDIV 0xfbb0f0f0
182 #define UMULL 0xfba00000
184 #define UXTB_W 0xfa5ff080
186 #define UXTH_W 0xfa1ff080
187 #define VABS_F32 0xeeb00ac0
188 #define VADD_F32 0xee300a00
189 #define VCMP_F32 0xeeb40a40
190 #define VCVT_F32_S32 0xeeb80ac0
191 #define VCVT_F64_F32 0xeeb70ac0
192 #define VCVT_S32_F32 0xeebd0ac0
193 #define VDIV_F32 0xee800a00
194 #define VLDR_F32 0xed100a00
195 #define VMOV_F32 0xeeb00a40
196 #define VMOV 0xee000a10
197 #define VMOV2 0xec400a10
198 #define VMRS 0xeef1fa10
199 #define VMUL_F32 0xee200a00
200 #define VNEG_F32 0xeeb10a40
201 #define VSTR_F32 0xed000a00
202 #define VSUB_F32 0xee300a40
204 static sljit_s32
push_inst16(struct sljit_compiler
*compiler
, sljit_ins inst
)
207 SLJIT_ASSERT(!(inst
& 0xffff0000));
209 ptr
= (sljit_u16
*)ensure_buf(compiler
, sizeof(sljit_u16
));
213 return SLJIT_SUCCESS
;
216 static sljit_s32
push_inst32(struct sljit_compiler
*compiler
, sljit_ins inst
)
218 sljit_u16
*ptr
= (sljit_u16
*)ensure_buf(compiler
, sizeof(sljit_ins
));
223 return SLJIT_SUCCESS
;
226 static SLJIT_INLINE sljit_s32
emit_imm32_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_uw imm
)
228 FAIL_IF(push_inst32(compiler
, MOVW
| RD4(dst
)
229 | COPY_BITS(imm
, 12, 16, 4) | COPY_BITS(imm
, 11, 26, 1) | COPY_BITS(imm
, 8, 12, 3) | (imm
& 0xff)));
230 return push_inst32(compiler
, MOVT
| RD4(dst
)
231 | COPY_BITS(imm
, 12 + 16, 16, 4) | COPY_BITS(imm
, 11 + 16, 26, 1) | COPY_BITS(imm
, 8 + 16, 12, 3) | ((imm
& 0xff0000) >> 16));
234 static SLJIT_INLINE
void modify_imm32_const(sljit_u16
*inst
, sljit_uw new_imm
)
236 sljit_s32 dst
= inst
[1] & 0x0f00;
237 SLJIT_ASSERT(((inst
[0] & 0xfbf0) == (MOVW
>> 16)) && ((inst
[2] & 0xfbf0) == (MOVT
>> 16)) && dst
== (inst
[3] & 0x0f00));
238 inst
[0] = (MOVW
>> 16) | COPY_BITS(new_imm
, 12, 0, 4) | COPY_BITS(new_imm
, 11, 10, 1);
239 inst
[1] = dst
| COPY_BITS(new_imm
, 8, 12, 3) | (new_imm
& 0xff);
240 inst
[2] = (MOVT
>> 16) | COPY_BITS(new_imm
, 12 + 16, 0, 4) | COPY_BITS(new_imm
, 11 + 16, 10, 1);
241 inst
[3] = dst
| COPY_BITS(new_imm
, 8 + 16, 12, 3) | ((new_imm
& 0xff0000) >> 16);
244 static SLJIT_INLINE sljit_s32
detect_jump_type(struct sljit_jump
*jump
, sljit_u16
*code_ptr
, sljit_u16
*code
, sljit_sw executable_offset
)
248 if (jump
->flags
& SLJIT_REWRITABLE_JUMP
)
251 if (jump
->flags
& JUMP_ADDR
) {
252 /* Branch to ARM code is not optimized yet. */
253 if (!(jump
->u
.target
& 0x1))
255 diff
= ((sljit_sw
)jump
->u
.target
- (sljit_sw
)(code_ptr
+ 2) - executable_offset
) >> 1;
258 SLJIT_ASSERT(jump
->flags
& JUMP_LABEL
);
259 diff
= ((sljit_sw
)(code
+ jump
->u
.label
->size
) - (sljit_sw
)(code_ptr
+ 2)) >> 1;
262 if (jump
->flags
& IS_COND
) {
263 SLJIT_ASSERT(!(jump
->flags
& IS_BL
));
264 if (diff
<= 127 && diff
>= -128) {
265 jump
->flags
|= PATCH_TYPE1
;
268 if (diff
<= 524287 && diff
>= -524288) {
269 jump
->flags
|= PATCH_TYPE2
;
272 /* +1 comes from the prefix IT instruction. */
274 if (diff
<= 8388607 && diff
>= -8388608) {
275 jump
->flags
|= PATCH_TYPE3
;
279 else if (jump
->flags
& IS_BL
) {
280 if (diff
<= 8388607 && diff
>= -8388608) {
281 jump
->flags
|= PATCH_BL
;
286 if (diff
<= 1023 && diff
>= -1024) {
287 jump
->flags
|= PATCH_TYPE4
;
290 if (diff
<= 8388607 && diff
>= -8388608) {
291 jump
->flags
|= PATCH_TYPE5
;
299 static SLJIT_INLINE
void set_jump_instruction(struct sljit_jump
*jump
, sljit_sw executable_offset
)
301 sljit_s32 type
= (jump
->flags
>> 4) & 0xf;
303 sljit_u16
*jump_inst
;
306 if (SLJIT_UNLIKELY(type
== 0)) {
307 modify_imm32_const((sljit_u16
*)jump
->addr
, (jump
->flags
& JUMP_LABEL
) ? jump
->u
.label
->addr
: jump
->u
.target
);
311 if (jump
->flags
& JUMP_ADDR
) {
312 SLJIT_ASSERT(jump
->u
.target
& 0x1);
313 diff
= ((sljit_sw
)jump
->u
.target
- (sljit_sw
)(jump
->addr
+ sizeof(sljit_u32
)) - executable_offset
) >> 1;
316 SLJIT_ASSERT(jump
->u
.label
->addr
& 0x1);
317 diff
= ((sljit_sw
)(jump
->u
.label
->addr
) - (sljit_sw
)(jump
->addr
+ sizeof(sljit_u32
)) - executable_offset
) >> 1;
319 jump_inst
= (sljit_u16
*)jump
->addr
;
323 /* Encoding T1 of 'B' instruction */
324 SLJIT_ASSERT(diff
<= 127 && diff
>= -128 && (jump
->flags
& IS_COND
));
325 jump_inst
[0] = 0xd000 | (jump
->flags
& 0xf00) | (diff
& 0xff);
328 /* Encoding T3 of 'B' instruction */
329 SLJIT_ASSERT(diff
<= 524287 && diff
>= -524288 && (jump
->flags
& IS_COND
));
330 jump_inst
[0] = 0xf000 | COPY_BITS(jump
->flags
, 8, 6, 4) | COPY_BITS(diff
, 11, 0, 6) | COPY_BITS(diff
, 19, 10, 1);
331 jump_inst
[1] = 0x8000 | COPY_BITS(diff
, 17, 13, 1) | COPY_BITS(diff
, 18, 11, 1) | (diff
& 0x7ff);
334 SLJIT_ASSERT(jump
->flags
& IS_COND
);
335 *jump_inst
++ = IT
| ((jump
->flags
>> 4) & 0xf0) | 0x8;
340 /* Encoding T2 of 'B' instruction */
341 SLJIT_ASSERT(diff
<= 1023 && diff
>= -1024 && !(jump
->flags
& IS_COND
));
342 jump_inst
[0] = 0xe000 | (diff
& 0x7ff);
346 SLJIT_ASSERT(diff
<= 8388607 && diff
>= -8388608);
348 /* Really complex instruction form for branches. */
349 s
= (diff
>> 23) & 0x1;
350 j1
= (~(diff
>> 22) ^ s
) & 0x1;
351 j2
= (~(diff
>> 21) ^ s
) & 0x1;
352 jump_inst
[0] = 0xf000 | (s
<< 10) | COPY_BITS(diff
, 11, 0, 10);
353 jump_inst
[1] = (j1
<< 13) | (j2
<< 11) | (diff
& 0x7ff);
355 /* The others have a common form. */
356 if (type
== 5) /* Encoding T4 of 'B' instruction */
357 jump_inst
[1] |= 0x9000;
358 else if (type
== 6) /* Encoding T1 of 'BL' instruction */
359 jump_inst
[1] |= 0xd000;
364 SLJIT_API_FUNC_ATTRIBUTE
void* sljit_generate_code(struct sljit_compiler
*compiler
)
366 struct sljit_memory_fragment
*buf
;
373 sljit_sw executable_offset
;
375 struct sljit_label
*label
;
376 struct sljit_jump
*jump
;
377 struct sljit_const
*const_
;
378 struct sljit_put_label
*put_label
;
381 CHECK_PTR(check_sljit_generate_code(compiler
));
382 reverse_buf(compiler
);
384 code
= (sljit_u16
*)SLJIT_MALLOC_EXEC(compiler
->size
* sizeof(sljit_u16
), compiler
->exec_allocator_data
);
385 PTR_FAIL_WITH_EXEC_IF(code
);
391 executable_offset
= SLJIT_EXEC_OFFSET(code
);
393 label
= compiler
->labels
;
394 jump
= compiler
->jumps
;
395 const_
= compiler
->consts
;
396 put_label
= compiler
->put_labels
;
399 buf_ptr
= (sljit_u16
*)buf
->memory
;
400 buf_end
= buf_ptr
+ (buf
->used_size
>> 1);
402 *code_ptr
= *buf_ptr
++;
403 if (next_addr
== half_count
) {
404 SLJIT_ASSERT(!label
|| label
->size
>= half_count
);
405 SLJIT_ASSERT(!jump
|| jump
->addr
>= half_count
);
406 SLJIT_ASSERT(!const_
|| const_
->addr
>= half_count
);
407 SLJIT_ASSERT(!put_label
|| put_label
->addr
>= half_count
);
409 /* These structures are ordered by their address. */
410 if (label
&& label
->size
== half_count
) {
411 label
->addr
= ((sljit_uw
)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
)) | 0x1;
412 label
->size
= code_ptr
- code
;
415 if (jump
&& jump
->addr
== half_count
) {
416 jump
->addr
= (sljit_uw
)code_ptr
- ((jump
->flags
& IS_COND
) ? 10 : 8);
417 code_ptr
-= detect_jump_type(jump
, code_ptr
, code
, executable_offset
);
420 if (const_
&& const_
->addr
== half_count
) {
421 const_
->addr
= (sljit_uw
)code_ptr
;
422 const_
= const_
->next
;
424 if (put_label
&& put_label
->addr
== half_count
) {
425 SLJIT_ASSERT(put_label
->label
);
426 put_label
->addr
= (sljit_uw
)code_ptr
;
427 put_label
= put_label
->next
;
429 next_addr
= compute_next_addr(label
, jump
, const_
, put_label
);
433 } while (buf_ptr
< buf_end
);
438 if (label
&& label
->size
== half_count
) {
439 label
->addr
= ((sljit_uw
)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
)) | 0x1;
440 label
->size
= code_ptr
- code
;
444 SLJIT_ASSERT(!label
);
446 SLJIT_ASSERT(!const_
);
447 SLJIT_ASSERT(!put_label
);
448 SLJIT_ASSERT(code_ptr
- code
<= (sljit_sw
)compiler
->size
);
450 jump
= compiler
->jumps
;
452 set_jump_instruction(jump
, executable_offset
);
456 put_label
= compiler
->put_labels
;
458 modify_imm32_const((sljit_u16
*)put_label
->addr
, put_label
->label
->addr
);
459 put_label
= put_label
->next
;
462 compiler
->error
= SLJIT_ERR_COMPILED
;
463 compiler
->executable_offset
= executable_offset
;
464 compiler
->executable_size
= (code_ptr
- code
) * sizeof(sljit_u16
);
466 code
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(code
, executable_offset
);
467 code_ptr
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(code_ptr
, executable_offset
);
469 SLJIT_CACHE_FLUSH(code
, code_ptr
);
470 SLJIT_UPDATE_WX_FLAGS(code
, code_ptr
, 1);
472 /* Set thumb mode flag. */
473 return (void*)((sljit_uw
)code
| 0x1);
476 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_has_cpu_feature(sljit_s32 feature_type
)
478 switch (feature_type
) {
480 #ifdef SLJIT_IS_FPU_AVAILABLE
481 return SLJIT_IS_FPU_AVAILABLE
;
483 /* Available by default. */
489 case SLJIT_HAS_PREFETCH
:
497 /* --------------------------------------------------------------------- */
498 /* Core code generator functions. */
499 /* --------------------------------------------------------------------- */
501 #define INVALID_IMM 0x80000000
502 static sljit_uw
get_imm(sljit_uw imm
)
504 /* Thumb immediate form. */
510 if ((imm
& 0xffff) == (imm
>> 16)) {
511 /* Some special cases. */
513 return (1 << 12) | (imm
& 0xff);
515 return (2 << 12) | ((imm
>> 8) & 0xff);
516 if ((imm
& 0xff00) == ((imm
& 0xff) << 8))
517 return (3 << 12) | (imm
& 0xff);
520 /* Assembly optimization: count leading zeroes? */
522 if (!(imm
& 0xffff0000)) {
526 if (!(imm
& 0xff000000)) {
530 if (!(imm
& 0xf0000000)) {
534 if (!(imm
& 0xc0000000)) {
538 if (!(imm
& 0x80000000)) {
542 /* Since imm >= 128, this must be true. */
543 SLJIT_ASSERT(counter
<= 31);
545 if (imm
& 0x00ffffff)
546 return INVALID_IMM
; /* Cannot be encoded. */
548 return ((imm
>> 24) & 0x7f) | COPY_BITS(counter
, 4, 26, 1) | COPY_BITS(counter
, 1, 12, 3) | COPY_BITS(counter
, 0, 7, 1);
551 static sljit_s32
load_immediate(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_uw imm
)
555 /* MOVS cannot be used since it destroy flags. */
557 if (imm
>= 0x10000) {
559 if (tmp
!= INVALID_IMM
)
560 return push_inst32(compiler
, MOV_WI
| RD4(dst
) | tmp
);
562 if (tmp
!= INVALID_IMM
)
563 return push_inst32(compiler
, MVN_WI
| RD4(dst
) | tmp
);
566 /* set low 16 bits, set hi 16 bits to 0. */
567 FAIL_IF(push_inst32(compiler
, MOVW
| RD4(dst
)
568 | COPY_BITS(imm
, 12, 16, 4) | COPY_BITS(imm
, 11, 26, 1) | COPY_BITS(imm
, 8, 12, 3) | (imm
& 0xff)));
570 /* set hi 16 bit if needed. */
572 return push_inst32(compiler
, MOVT
| RD4(dst
)
573 | COPY_BITS(imm
, 12 + 16, 16, 4) | COPY_BITS(imm
, 11 + 16, 26, 1) | COPY_BITS(imm
, 8 + 16, 12, 3) | ((imm
& 0xff0000) >> 16));
574 return SLJIT_SUCCESS
;
577 #define ARG1_IMM 0x0010000
578 #define ARG2_IMM 0x0020000
579 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
580 #define SET_FLAGS 0x0100000
581 #define UNUSED_RETURN 0x0200000
583 static sljit_s32
emit_op_imm(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 dst
, sljit_uw arg1
, sljit_uw arg2
)
585 /* dst must be register, TMP_REG1
586 arg1 must be register, imm
587 arg2 must be register, imm */
591 if (SLJIT_UNLIKELY((flags
& (ARG1_IMM
| ARG2_IMM
)) == (ARG1_IMM
| ARG2_IMM
))) {
592 /* Both are immediates, no temporaries are used. */
594 FAIL_IF(load_immediate(compiler
, TMP_REG1
, arg1
));
598 if (flags
& (ARG1_IMM
| ARG2_IMM
)) {
599 reg
= (flags
& ARG2_IMM
) ? arg1
: arg2
;
600 imm
= (flags
& ARG2_IMM
) ? arg2
: arg1
;
602 switch (flags
& 0xffff) {
605 /* No form with immediate operand. */
608 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && (flags
& ARG2_IMM
) && arg1
== TMP_REG2
);
609 return load_immediate(compiler
, dst
, imm
);
611 if (!(flags
& SET_FLAGS
))
612 return load_immediate(compiler
, dst
, ~imm
);
613 /* Since the flags should be set, we just fallback to the register mode.
614 Although some clever things could be done here, "NOT IMM" does not worth the efforts. */
617 compiler
->status_flags_state
= SLJIT_CURRENT_FLAGS_ADD_SUB
;
618 nimm
= -(sljit_sw
)imm
;
619 if (IS_2_LO_REGS(reg
, dst
)) {
621 return push_inst16(compiler
, ADDSI3
| IMM3(imm
) | RD3(dst
) | RN3(reg
));
623 return push_inst16(compiler
, SUBSI3
| IMM3(nimm
) | RD3(dst
) | RN3(reg
));
626 return push_inst16(compiler
, ADDSI8
| IMM8(imm
) | RDN3(dst
));
628 return push_inst16(compiler
, SUBSI8
| IMM8(nimm
) | RDN3(dst
));
631 if (!(flags
& SET_FLAGS
)) {
633 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(imm
));
635 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(nimm
));
638 if (nimm
!= INVALID_IMM
)
639 return push_inst32(compiler
, ADD_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
640 nimm
= get_imm(-(sljit_sw
)imm
);
641 if (nimm
!= INVALID_IMM
)
642 return push_inst32(compiler
, SUB_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
646 if (imm
!= INVALID_IMM
)
647 return push_inst32(compiler
, ADCI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
650 /* SUB operation can be replaced by ADD because of the negative carry flag. */
651 compiler
->status_flags_state
= SLJIT_CURRENT_FLAGS_ADD_SUB
;
652 if (flags
& ARG1_IMM
) {
653 if (imm
== 0 && IS_2_LO_REGS(reg
, dst
))
654 return push_inst16(compiler
, RSBSI
| RD3(dst
) | RN3(reg
));
656 if (imm
!= INVALID_IMM
)
657 return push_inst32(compiler
, RSB_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
660 if (flags
& UNUSED_RETURN
) {
661 if (imm
<= 0xff && reg_map
[reg
] <= 7)
662 return push_inst16(compiler
, CMPI
| IMM8(imm
) | RDN3(reg
));
664 if (nimm
!= INVALID_IMM
)
665 return push_inst32(compiler
, CMPI_W
| RN4(reg
) | nimm
);
666 nimm
= get_imm(-(sljit_sw
)imm
);
667 if (nimm
!= INVALID_IMM
)
668 return push_inst32(compiler
, CMNI_W
| RN4(reg
) | nimm
);
670 nimm
= -(sljit_sw
)imm
;
671 if (IS_2_LO_REGS(reg
, dst
)) {
673 return push_inst16(compiler
, SUBSI3
| IMM3(imm
) | RD3(dst
) | RN3(reg
));
675 return push_inst16(compiler
, ADDSI3
| IMM3(nimm
) | RD3(dst
) | RN3(reg
));
678 return push_inst16(compiler
, SUBSI8
| IMM8(imm
) | RDN3(dst
));
680 return push_inst16(compiler
, ADDSI8
| IMM8(nimm
) | RDN3(dst
));
683 if (!(flags
& SET_FLAGS
)) {
685 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(imm
));
687 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(nimm
));
690 if (nimm
!= INVALID_IMM
)
691 return push_inst32(compiler
, SUB_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
692 nimm
= get_imm(-(sljit_sw
)imm
);
693 if (nimm
!= INVALID_IMM
)
694 return push_inst32(compiler
, ADD_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
697 if (flags
& ARG1_IMM
)
700 if (imm
!= INVALID_IMM
)
701 return push_inst32(compiler
, SBCI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
705 if (nimm
!= INVALID_IMM
)
706 return push_inst32(compiler
, ((flags
& UNUSED_RETURN
) ? TSTI
: ANDI
) | (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
708 if (imm
!= INVALID_IMM
)
709 return push_inst32(compiler
, BICI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
713 if (nimm
!= INVALID_IMM
)
714 return push_inst32(compiler
, ORRI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | nimm
);
716 if (imm
!= INVALID_IMM
)
717 return push_inst32(compiler
, ORNI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
721 if (imm
!= INVALID_IMM
)
722 return push_inst32(compiler
, EORI
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(reg
) | imm
);
727 if (flags
& ARG1_IMM
)
731 if (!(flags
& SET_FLAGS
))
732 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, reg
));
733 if (IS_2_LO_REGS(dst
, reg
))
734 return push_inst16(compiler
, MOVS
| RD3(dst
) | RN3(reg
));
735 return push_inst32(compiler
, MOV_W
| SET_FLAGS
| RD4(dst
) | RM4(reg
));
737 switch (flags
& 0xffff) {
739 if (IS_2_LO_REGS(dst
, reg
))
740 return push_inst16(compiler
, LSLSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
741 return push_inst32(compiler
, LSL_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
743 if (IS_2_LO_REGS(dst
, reg
))
744 return push_inst16(compiler
, LSRSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
745 return push_inst32(compiler
, LSR_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
746 default: /* SLJIT_ASHR */
747 if (IS_2_LO_REGS(dst
, reg
))
748 return push_inst16(compiler
, ASRSI
| RD3(dst
) | RN3(reg
) | (imm
<< 6));
749 return push_inst32(compiler
, ASR_WI
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(reg
) | IMM5(imm
));
756 if (flags
& ARG2_IMM
) {
758 arg2
= (arg1
== TMP_REG1
) ? TMP_REG2
: TMP_REG1
;
759 FAIL_IF(load_immediate(compiler
, arg2
, imm
));
763 arg1
= (arg2
== TMP_REG1
) ? TMP_REG2
: TMP_REG1
;
764 FAIL_IF(load_immediate(compiler
, arg1
, imm
));
767 SLJIT_ASSERT(arg1
!= arg2
);
770 /* Both arguments are registers. */
771 switch (flags
& 0xffff) {
777 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG2
);
779 return SLJIT_SUCCESS
;
780 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, arg2
));
782 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG2
);
783 if (IS_2_LO_REGS(dst
, arg2
))
784 return push_inst16(compiler
, UXTB
| RD3(dst
) | RN3(arg2
));
785 return push_inst32(compiler
, UXTB_W
| RD4(dst
) | RM4(arg2
));
787 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG2
);
788 if (IS_2_LO_REGS(dst
, arg2
))
789 return push_inst16(compiler
, SXTB
| RD3(dst
) | RN3(arg2
));
790 return push_inst32(compiler
, SXTB_W
| RD4(dst
) | RM4(arg2
));
792 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG2
);
793 if (IS_2_LO_REGS(dst
, arg2
))
794 return push_inst16(compiler
, UXTH
| RD3(dst
) | RN3(arg2
));
795 return push_inst32(compiler
, UXTH_W
| RD4(dst
) | RM4(arg2
));
797 SLJIT_ASSERT(!(flags
& SET_FLAGS
) && arg1
== TMP_REG2
);
798 if (IS_2_LO_REGS(dst
, arg2
))
799 return push_inst16(compiler
, SXTH
| RD3(dst
) | RN3(arg2
));
800 return push_inst32(compiler
, SXTH_W
| RD4(dst
) | RM4(arg2
));
802 SLJIT_ASSERT(arg1
== TMP_REG2
);
803 if (IS_2_LO_REGS(dst
, arg2
))
804 return push_inst16(compiler
, MVNS
| RD3(dst
) | RN3(arg2
));
805 return push_inst32(compiler
, MVN_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RM4(arg2
));
807 SLJIT_ASSERT(arg1
== TMP_REG2
);
808 FAIL_IF(push_inst32(compiler
, CLZ
| RN4(arg2
) | RD4(dst
) | RM4(arg2
)));
809 return SLJIT_SUCCESS
;
811 compiler
->status_flags_state
= SLJIT_CURRENT_FLAGS_ADD_SUB
;
812 if (IS_3_LO_REGS(dst
, arg1
, arg2
))
813 return push_inst16(compiler
, ADDS
| RD3(dst
) | RN3(arg1
) | RM3(arg2
));
814 if (dst
== arg1
&& !(flags
& SET_FLAGS
))
815 return push_inst16(compiler
, ADD
| SET_REGS44(dst
, arg2
));
816 return push_inst32(compiler
, ADD_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
818 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
819 return push_inst16(compiler
, ADCS
| RD3(dst
) | RN3(arg2
));
820 return push_inst32(compiler
, ADC_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
822 compiler
->status_flags_state
= SLJIT_CURRENT_FLAGS_ADD_SUB
;
823 if (flags
& UNUSED_RETURN
) {
824 if (IS_2_LO_REGS(arg1
, arg2
))
825 return push_inst16(compiler
, CMP
| RD3(arg1
) | RN3(arg2
));
826 return push_inst16(compiler
, CMP_X
| SET_REGS44(arg1
, arg2
));
828 if (IS_3_LO_REGS(dst
, arg1
, arg2
))
829 return push_inst16(compiler
, SUBS
| RD3(dst
) | RN3(arg1
) | RM3(arg2
));
830 return push_inst32(compiler
, SUB_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
832 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
833 return push_inst16(compiler
, SBCS
| RD3(dst
) | RN3(arg2
));
834 return push_inst32(compiler
, SBC_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
836 compiler
->status_flags_state
= 0;
837 if (!(flags
& SET_FLAGS
))
838 return push_inst32(compiler
, MUL
| RD4(dst
) | RN4(arg1
) | RM4(arg2
));
839 SLJIT_ASSERT(dst
!= TMP_REG2
);
840 FAIL_IF(push_inst32(compiler
, SMULL
| RT4(dst
) | RD4(TMP_REG2
) | RN4(arg1
) | RM4(arg2
)));
841 /* cmp TMP_REG2, dst asr #31. */
842 return push_inst32(compiler
, CMP_W
| RN4(TMP_REG2
) | 0x70e0 | RM4(dst
));
844 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
845 return push_inst16(compiler
, ANDS
| RD3(dst
) | RN3(arg2
));
846 if ((flags
& UNUSED_RETURN
) && IS_2_LO_REGS(arg1
, arg2
))
847 return push_inst16(compiler
, TST
| RD3(arg1
) | RN3(arg2
));
848 return push_inst32(compiler
, ((flags
& UNUSED_RETURN
) ? TST_W
: AND_W
) | (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
850 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
851 return push_inst16(compiler
, ORRS
| RD3(dst
) | RN3(arg2
));
852 return push_inst32(compiler
, ORR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
854 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
855 return push_inst16(compiler
, EORS
| RD3(dst
) | RN3(arg2
));
856 return push_inst32(compiler
, EOR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
858 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
859 return push_inst16(compiler
, LSLS
| RD3(dst
) | RN3(arg2
));
860 return push_inst32(compiler
, LSL_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
862 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
863 return push_inst16(compiler
, LSRS
| RD3(dst
) | RN3(arg2
));
864 return push_inst32(compiler
, LSR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
866 if (dst
== arg1
&& IS_2_LO_REGS(dst
, arg2
))
867 return push_inst16(compiler
, ASRS
| RD3(dst
) | RN3(arg2
));
868 return push_inst32(compiler
, ASR_W
| (flags
& SET_FLAGS
) | RD4(dst
) | RN4(arg1
) | RM4(arg2
));
872 return SLJIT_SUCCESS
;
878 #define WORD_SIZE 0x00
879 #define BYTE_SIZE 0x04
880 #define HALF_SIZE 0x08
883 #define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE)))
884 #define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift)))
901 static const sljit_ins sljit_mem16
[12] = {
902 /* w u l */ 0x5800 /* ldr */,
903 /* w u s */ 0x5000 /* str */,
904 /* w s l */ 0x5800 /* ldr */,
905 /* w s s */ 0x5000 /* str */,
907 /* b u l */ 0x5c00 /* ldrb */,
908 /* b u s */ 0x5400 /* strb */,
909 /* b s l */ 0x5600 /* ldrsb */,
910 /* b s s */ 0x5400 /* strb */,
912 /* h u l */ 0x5a00 /* ldrh */,
913 /* h u s */ 0x5200 /* strh */,
914 /* h s l */ 0x5e00 /* ldrsh */,
915 /* h s s */ 0x5200 /* strh */,
918 static const sljit_ins sljit_mem16_imm5
[12] = {
919 /* w u l */ 0x6800 /* ldr imm5 */,
920 /* w u s */ 0x6000 /* str imm5 */,
921 /* w s l */ 0x6800 /* ldr imm5 */,
922 /* w s s */ 0x6000 /* str imm5 */,
924 /* b u l */ 0x7800 /* ldrb imm5 */,
925 /* b u s */ 0x7000 /* strb imm5 */,
926 /* b s l */ 0x0000 /* not allowed */,
927 /* b s s */ 0x7000 /* strb imm5 */,
929 /* h u l */ 0x8800 /* ldrh imm5 */,
930 /* h u s */ 0x8000 /* strh imm5 */,
931 /* h s l */ 0x0000 /* not allowed */,
932 /* h s s */ 0x8000 /* strh imm5 */,
935 #define MEM_IMM8 0xc00
936 #define MEM_IMM12 0x800000
937 static const sljit_ins sljit_mem32
[13] = {
938 /* w u l */ 0xf8500000 /* ldr.w */,
939 /* w u s */ 0xf8400000 /* str.w */,
940 /* w s l */ 0xf8500000 /* ldr.w */,
941 /* w s s */ 0xf8400000 /* str.w */,
943 /* b u l */ 0xf8100000 /* ldrb.w */,
944 /* b u s */ 0xf8000000 /* strb.w */,
945 /* b s l */ 0xf9100000 /* ldrsb.w */,
946 /* b s s */ 0xf8000000 /* strb.w */,
948 /* h u l */ 0xf8300000 /* ldrh.w */,
949 /* h u s */ 0xf8200000 /* strsh.w */,
950 /* h s l */ 0xf9300000 /* ldrsh.w */,
951 /* h s s */ 0xf8200000 /* strsh.w */,
953 /* p u l */ 0xf8100000 /* pld */,
956 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
957 static sljit_s32
emit_set_delta(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_s32 reg
, sljit_sw value
)
961 return push_inst32(compiler
, ADDWI
| RD4(dst
) | RN4(reg
) | IMM12(value
));
962 value
= get_imm(value
);
963 if (value
!= INVALID_IMM
)
964 return push_inst32(compiler
, ADD_WI
| RD4(dst
) | RN4(reg
) | value
);
969 return push_inst32(compiler
, SUBWI
| RD4(dst
) | RN4(reg
) | IMM12(value
));
970 value
= get_imm(value
);
971 if (value
!= INVALID_IMM
)
972 return push_inst32(compiler
, SUB_WI
| RD4(dst
) | RN4(reg
) | value
);
974 return SLJIT_ERR_UNSUPPORTED
;
977 static SLJIT_INLINE sljit_s32
emit_op_mem(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
,
978 sljit_s32 arg
, sljit_sw argw
, sljit_s32 tmp_reg
)
983 SLJIT_ASSERT(arg
& SLJIT_MEM
);
984 SLJIT_ASSERT((arg
& REG_MASK
) != tmp_reg
);
987 if (SLJIT_UNLIKELY(!(arg
& REG_MASK
))) {
988 tmp
= get_imm(argw
& ~0xfff);
989 if (tmp
!= INVALID_IMM
) {
990 FAIL_IF(push_inst32(compiler
, MOV_WI
| RD4(tmp_reg
) | tmp
));
991 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(tmp_reg
) | (argw
& 0xfff));
994 FAIL_IF(load_immediate(compiler
, tmp_reg
, argw
));
995 if (IS_2_LO_REGS(reg
, tmp_reg
) && sljit_mem16_imm5
[flags
])
996 return push_inst16(compiler
, sljit_mem16_imm5
[flags
] | RD3(reg
) | RN3(tmp_reg
));
997 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(tmp_reg
));
1000 if (SLJIT_UNLIKELY(arg
& OFFS_REG_MASK
)) {
1002 other_r
= OFFS_REG(arg
);
1005 if (!argw
&& IS_3_LO_REGS(reg
, arg
, other_r
))
1006 return push_inst16(compiler
, sljit_mem16
[flags
] | RD3(reg
) | RN3(arg
) | RM3(other_r
));
1007 return push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(other_r
) | (argw
<< 4));
1011 tmp
= get_imm(argw
& ~0xfff);
1012 if (tmp
!= INVALID_IMM
) {
1013 push_inst32(compiler
, ADD_WI
| RD4(tmp_reg
) | RN4(arg
) | tmp
);
1015 argw
= argw
& 0xfff;
1018 else if (argw
< -0xff) {
1019 tmp
= get_imm(-argw
& ~0xff);
1020 if (tmp
!= INVALID_IMM
) {
1021 push_inst32(compiler
, SUB_WI
| RD4(tmp_reg
) | RN4(arg
) | tmp
);
1023 argw
= -(-argw
& 0xff);
1027 if (IS_2_LO_REGS(reg
, arg
) && sljit_mem16_imm5
[flags
]) {
1029 if (IS_WORD_SIZE(flags
)) {
1030 if (OFFSET_CHECK(0x1f, 2))
1033 else if (flags
& BYTE_SIZE
)
1035 if (OFFSET_CHECK(0x1f, 0))
1039 SLJIT_ASSERT(flags
& HALF_SIZE
);
1040 if (OFFSET_CHECK(0x1f, 1))
1045 return push_inst16(compiler
, sljit_mem16_imm5
[flags
] | RD3(reg
) | RN3(arg
) | (argw
<< (6 - tmp
)));
1047 else if (SLJIT_UNLIKELY(arg
== SLJIT_SP
) && IS_WORD_SIZE(flags
) && OFFSET_CHECK(0xff, 2) && reg_map
[reg
] <= 7) {
1048 /* SP based immediate. */
1049 return push_inst16(compiler
, STR_SP
| ((flags
& STORE
) ? 0 : 0x800) | RDN3(reg
) | (argw
>> 2));
1052 if (argw
>= 0 && argw
<= 0xfff)
1053 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM12
| RT4(reg
) | RN4(arg
) | argw
);
1054 else if (argw
< 0 && argw
>= -0xff)
1055 return push_inst32(compiler
, sljit_mem32
[flags
] | MEM_IMM8
| RT4(reg
) | RN4(arg
) | -argw
);
1057 SLJIT_ASSERT(arg
!= tmp_reg
);
1059 FAIL_IF(load_immediate(compiler
, tmp_reg
, argw
));
1060 if (IS_3_LO_REGS(reg
, arg
, tmp_reg
))
1061 return push_inst16(compiler
, sljit_mem16
[flags
] | RD3(reg
) | RN3(arg
) | RM3(tmp_reg
));
1062 return push_inst32(compiler
, sljit_mem32
[flags
] | RT4(reg
) | RN4(arg
) | RM4(tmp_reg
));
1065 /* --------------------------------------------------------------------- */
1067 /* --------------------------------------------------------------------- */
1069 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_enter(struct sljit_compiler
*compiler
,
1070 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
1071 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
1073 sljit_s32 size
, i
, tmp
, offset
, word_arg_count
;
1076 sljit_s32 float_arg_count
;
1078 sljit_s32 old_offset
, f32_offset
;
1080 sljit_u32
*remap_ptr
= remap
;
1087 CHECK(check_sljit_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
1088 set_emit_enter(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
1090 tmp
= saveds
< SLJIT_NUMBER_OF_SAVED_REGISTERS
? (SLJIT_S0
+ 1 - saveds
) : SLJIT_FIRST_SAVED_REG
;
1091 for (i
= SLJIT_S0
; i
>= tmp
; i
--)
1092 push
|= 1 << reg_map
[i
];
1094 for (i
= scratches
; i
>= SLJIT_FIRST_SAVED_REG
; i
--)
1095 push
|= 1 << reg_map
[i
];
1097 FAIL_IF((push
& 0xff00)
1098 ? push_inst32(compiler
, PUSH_W
| (1 << 14) | push
)
1099 : push_inst16(compiler
, PUSH
| (1 << 8) | push
));
1101 /* Stack must be aligned to 8 bytes: (LR, R4) */
1102 size
= GET_SAVED_REGISTERS_SIZE(scratches
, saveds
, 1);
1103 local_size
= ((size
+ local_size
+ 7) & ~7) - size
;
1104 compiler
->local_size
= local_size
;
1106 arg_types
>>= SLJIT_ARG_SHIFT
;
1109 SLJIT_COMPILE_ASSERT(SLJIT_FR0
== 1, float_register_index_start
);
1112 float_arg_count
= 0;
1115 switch (arg_types
& SLJIT_ARG_MASK
) {
1116 case SLJIT_ARG_TYPE_F32
:
1117 if (offset
< 4 * sizeof(sljit_sw
))
1118 FAIL_IF(push_inst32(compiler
, VMOV
| (float_arg_count
<< 16) | (offset
<< 10)));
1120 FAIL_IF(push_inst32(compiler
, VLDR_F32
| 0x800000 | RN4(SLJIT_SP
)
1121 | (float_arg_count
<< 12) | ((offset
+ size
- 4 * sizeof(sljit_sw
)) >> 2)));
1123 offset
+= sizeof(sljit_f32
);
1125 case SLJIT_ARG_TYPE_F64
:
1127 offset
+= sizeof(sljit_sw
);
1129 if (offset
< 4 * sizeof(sljit_sw
))
1130 FAIL_IF(push_inst32(compiler
, VMOV2
| (offset
<< 10) | ((offset
+ sizeof(sljit_sw
)) << 14) | float_arg_count
));
1132 FAIL_IF(push_inst32(compiler
, VLDR_F32
| 0x800100 | RN4(SLJIT_SP
)
1133 | (float_arg_count
<< 12) | ((offset
+ size
- 4 * sizeof(sljit_sw
)) >> 2)));
1135 offset
+= sizeof(sljit_f64
);
1138 SLJIT_ASSERT(reg_map
[SLJIT_S0
- word_arg_count
] <= 7);
1140 if (offset
< 4 * sizeof(sljit_sw
))
1141 FAIL_IF(push_inst16(compiler
, MOV
| reg_map
[SLJIT_S0
- word_arg_count
] | (offset
<< 1)));
1143 FAIL_IF(push_inst16(compiler
, LDR_SP
| RDN3(SLJIT_S0
- word_arg_count
) | ((offset
+ size
- 4 * sizeof(sljit_sw
)) >> 2)));
1146 offset
+= sizeof(sljit_sw
);
1149 arg_types
>>= SLJIT_ARG_SHIFT
;
1153 old_offset
= SLJIT_FR0
;
1157 switch (arg_types
& SLJIT_ARG_MASK
) {
1158 case SLJIT_ARG_TYPE_F32
:
1159 if (f32_offset
!= 0) {
1160 *remap_ptr
++ = VMOV_F32
| 0x20 | DD4(offset
) | DM4(f32_offset
);
1163 if (offset
!= old_offset
)
1164 *remap_ptr
++ = VMOV_F32
| DD4(offset
) | DM4(old_offset
);
1165 f32_offset
= old_offset
;
1170 case SLJIT_ARG_TYPE_F64
:
1171 if (offset
!= old_offset
)
1172 *remap_ptr
++ = VMOV_F32
| SLJIT_32
| DD4(offset
) | DM4(old_offset
);
1177 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_S0
- word_arg_count
, SLJIT_R0
+ word_arg_count
)));
1181 arg_types
>>= SLJIT_ARG_SHIFT
;
1184 SLJIT_ASSERT(remap_ptr
- remap
<= sizeof(remap
));
1186 while (remap_ptr
> remap
)
1187 FAIL_IF(push_inst32(compiler
, *(--remap_ptr
)));
1191 if (local_size
>= 256) {
1192 if (local_size
> 4096)
1193 imm
= get_imm(4096);
1195 imm
= get_imm(local_size
& ~0xff);
1197 SLJIT_ASSERT(imm
!= INVALID_IMM
);
1198 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(SLJIT_SP
) | imm
));
1201 if (local_size
> 0) {
1202 if (local_size
<= (127 << 2))
1203 FAIL_IF(push_inst16(compiler
, SUB_SP
| (local_size
>> 2)));
1205 FAIL_IF(emit_op_imm(compiler
, SLJIT_SUB
| ARG2_IMM
, SLJIT_SP
, SLJIT_SP
, local_size
));
1210 if (local_size
>= 256) {
1211 if (local_size
> 4096) {
1212 imm
= get_imm(4096);
1213 SLJIT_ASSERT(imm
!= INVALID_IMM
);
1215 if (local_size
< 4 * 4096) {
1216 if (local_size
> 2 * 4096) {
1217 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | RT4(TMP_REG2
) | RN4(TMP_REG1
)));
1218 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(TMP_REG1
) | imm
));
1222 if (local_size
> 2 * 4096) {
1223 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | RT4(TMP_REG2
) | RN4(TMP_REG1
)));
1224 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(TMP_REG1
) | imm
));
1228 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | RT4(TMP_REG2
) | RN4(TMP_REG1
)));
1231 SLJIT_ASSERT(local_size
> 0);
1234 FAIL_IF(load_immediate(compiler
, SLJIT_R3
, (local_size
>> 12) - 1));
1235 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | RT4(TMP_REG2
) | RN4(TMP_REG1
)));
1236 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(TMP_REG1
) | imm
));
1237 SLJIT_ASSERT(reg_map
[SLJIT_R3
] < 7);
1238 FAIL_IF(push_inst16(compiler
, SUBSI8
| RDN3(SLJIT_R3
) | 1));
1239 FAIL_IF(push_inst16(compiler
, BCC
| (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
1241 local_size
&= 0xfff;
1243 if (local_size
!= 0)
1244 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | RT4(TMP_REG2
) | RN4(TMP_REG1
)));
1247 if (local_size
>= 256) {
1248 imm
= get_imm(local_size
& ~0xff);
1249 SLJIT_ASSERT(imm
!= INVALID_IMM
);
1251 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(TMP_REG1
) | imm
));
1256 FAIL_IF(push_inst32(compiler
, LDRI
| 0x400 | (local_size
> 0 ? 0x100 : 0) | RT4(TMP_REG2
) | RN4(TMP_REG1
) | local_size
));
1258 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_SP
, TMP_REG1
)));
1260 else if (local_size
> 0)
1261 FAIL_IF(push_inst32(compiler
, LDRI
| 0x500 | RT4(TMP_REG1
) | RN4(SLJIT_SP
) | local_size
));
1264 return SLJIT_SUCCESS
;
1267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_set_context(struct sljit_compiler
*compiler
,
1268 sljit_s32 options
, sljit_s32 arg_types
, sljit_s32 scratches
, sljit_s32 saveds
,
1269 sljit_s32 fscratches
, sljit_s32 fsaveds
, sljit_s32 local_size
)
1274 CHECK(check_sljit_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
));
1275 set_set_context(compiler
, options
, arg_types
, scratches
, saveds
, fscratches
, fsaveds
, local_size
);
1277 size
= GET_SAVED_REGISTERS_SIZE(scratches
, saveds
, 1);
1278 compiler
->local_size
= ((size
+ local_size
+ 7) & ~7) - size
;
1279 return SLJIT_SUCCESS
;
1282 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_return_void(struct sljit_compiler
*compiler
)
1288 CHECK(check_sljit_emit_return_void(compiler
));
1290 if (compiler
->local_size
> 0) {
1291 if (compiler
->local_size
<= (127 << 2))
1292 FAIL_IF(push_inst16(compiler
, ADD_SP
| (compiler
->local_size
>> 2)));
1294 FAIL_IF(emit_op_imm(compiler
, SLJIT_ADD
| ARG2_IMM
, SLJIT_SP
, SLJIT_SP
, compiler
->local_size
));
1297 tmp
= compiler
->saveds
< SLJIT_NUMBER_OF_SAVED_REGISTERS
? (SLJIT_S0
+ 1 - compiler
->saveds
) : SLJIT_FIRST_SAVED_REG
;
1298 for (i
= SLJIT_S0
; i
>= tmp
; i
--)
1299 pop
|= 1 << reg_map
[i
];
1301 for (i
= compiler
->scratches
; i
>= SLJIT_FIRST_SAVED_REG
; i
--)
1302 pop
|= 1 << reg_map
[i
];
1304 return (pop
& 0xff00)
1305 ? push_inst32(compiler
, POP_W
| (1 << 15) | pop
)
1306 : push_inst16(compiler
, POP
| (1 << 8) | pop
);
1309 /* --------------------------------------------------------------------- */
1311 /* --------------------------------------------------------------------- */
1313 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1320 extern unsigned long long __rt_udiv(unsigned int denominator
, unsigned int numerator
);
1321 extern long long __rt_sdiv(int denominator
, int numerator
);
1322 #elif defined(__GNUC__)
1323 extern unsigned int __aeabi_uidivmod(unsigned int numerator
, int unsigned denominator
);
1324 extern int __aeabi_idivmod(int numerator
, int denominator
);
1326 #error "Software divmod functions are needed"
1333 #endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1335 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op0(struct sljit_compiler
*compiler
, sljit_s32 op
)
1337 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1338 sljit_sw saved_reg_list
[3];
1339 sljit_sw saved_reg_count
;
1343 CHECK(check_sljit_emit_op0(compiler
, op
));
1345 op
= GET_OPCODE(op
);
1347 case SLJIT_BREAKPOINT
:
1348 return push_inst16(compiler
, BKPT
);
1350 return push_inst16(compiler
, NOP
);
1353 return push_inst32(compiler
, (op
== SLJIT_LMUL_UW
? UMULL
: SMULL
)
1354 | (reg_map
[SLJIT_R1
] << 8)
1355 | (reg_map
[SLJIT_R0
] << 12)
1356 | (reg_map
[SLJIT_R0
] << 16)
1357 | reg_map
[SLJIT_R1
]);
1358 #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1359 case SLJIT_DIVMOD_UW
:
1360 case SLJIT_DIVMOD_SW
:
1361 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG1
, SLJIT_R0
)));
1362 FAIL_IF(push_inst32(compiler
, (op
== SLJIT_DIVMOD_UW
? UDIV
: SDIV
) | RD4(SLJIT_R0
) | RN4(SLJIT_R0
) | RM4(SLJIT_R1
)));
1363 FAIL_IF(push_inst32(compiler
, MUL
| RD4(SLJIT_R1
) | RN4(SLJIT_R0
) | RM4(SLJIT_R1
)));
1364 return push_inst32(compiler
, SUB_W
| RD4(SLJIT_R1
) | RN4(TMP_REG1
) | RM4(SLJIT_R1
));
1367 return push_inst32(compiler
, (op
== SLJIT_DIV_UW
? UDIV
: SDIV
) | RD4(SLJIT_R0
) | RN4(SLJIT_R0
) | RM4(SLJIT_R1
));
1368 #else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1369 case SLJIT_DIVMOD_UW
:
1370 case SLJIT_DIVMOD_SW
:
1373 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW
& 0x2) == 0 && SLJIT_DIV_UW
- 0x2 == SLJIT_DIVMOD_UW
, bad_div_opcode_assignments
);
1374 SLJIT_ASSERT(reg_map
[2] == 1 && reg_map
[3] == 2 && reg_map
[4] == 3);
1376 saved_reg_count
= 0;
1377 if (compiler
->scratches
>= 4)
1378 saved_reg_list
[saved_reg_count
++] = 3;
1379 if (compiler
->scratches
>= 3)
1380 saved_reg_list
[saved_reg_count
++] = 2;
1381 if (op
>= SLJIT_DIV_UW
)
1382 saved_reg_list
[saved_reg_count
++] = 1;
1384 if (saved_reg_count
> 0) {
1385 FAIL_IF(push_inst32(compiler
, 0xf84d0d00 | (saved_reg_count
>= 3 ? 16 : 8)
1386 | (saved_reg_list
[0] << 12) /* str rX, [sp, #-8/-16]! */));
1387 if (saved_reg_count
>= 2) {
1388 SLJIT_ASSERT(saved_reg_list
[1] < 8);
1389 FAIL_IF(push_inst16(compiler
, 0x9001 | (saved_reg_list
[1] << 8) /* str rX, [sp, #4] */));
1391 if (saved_reg_count
>= 3) {
1392 SLJIT_ASSERT(saved_reg_list
[2] < 8);
1393 FAIL_IF(push_inst16(compiler
, 0x9002 | (saved_reg_list
[2] << 8) /* str rX, [sp, #8] */));
1398 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG1
, SLJIT_R0
)));
1399 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_R0
, SLJIT_R1
)));
1400 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(SLJIT_R1
, TMP_REG1
)));
1401 FAIL_IF(sljit_emit_ijump(compiler
, SLJIT_FAST_CALL
, SLJIT_IMM
,
1402 ((op
| 0x2) == SLJIT_DIV_UW
? SLJIT_FUNC_OFFSET(__rt_udiv
) : SLJIT_FUNC_OFFSET(__rt_sdiv
))));
1403 #elif defined(__GNUC__)
1404 FAIL_IF(sljit_emit_ijump(compiler
, SLJIT_FAST_CALL
, SLJIT_IMM
,
1405 ((op
| 0x2) == SLJIT_DIV_UW
? SLJIT_FUNC_OFFSET(__aeabi_uidivmod
) : SLJIT_FUNC_OFFSET(__aeabi_idivmod
))));
1407 #error "Software divmod functions are needed"
1410 if (saved_reg_count
> 0) {
1411 if (saved_reg_count
>= 3) {
1412 SLJIT_ASSERT(saved_reg_list
[2] < 8);
1413 FAIL_IF(push_inst16(compiler
, 0x9802 | (saved_reg_list
[2] << 8) /* ldr rX, [sp, #8] */));
1415 if (saved_reg_count
>= 2) {
1416 SLJIT_ASSERT(saved_reg_list
[1] < 8);
1417 FAIL_IF(push_inst16(compiler
, 0x9801 | (saved_reg_list
[1] << 8) /* ldr rX, [sp, #4] */));
1419 return push_inst32(compiler
, 0xf85d0b00 | (saved_reg_count
>= 3 ? 16 : 8)
1420 | (saved_reg_list
[0] << 12) /* ldr rX, [sp], #8/16 */);
1422 return SLJIT_SUCCESS
;
1423 #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1425 case SLJIT_SKIP_FRAMES_BEFORE_RETURN
:
1426 return SLJIT_SUCCESS
;
1429 return SLJIT_SUCCESS
;
1432 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1433 sljit_s32 dst
, sljit_sw dstw
,
1434 sljit_s32 src
, sljit_sw srcw
)
1436 sljit_s32 dst_r
, flags
;
1437 sljit_s32 op_flags
= GET_ALL_FLAGS(op
);
1440 CHECK(check_sljit_emit_op1(compiler
, op
, dst
, dstw
, src
, srcw
));
1441 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1442 ADJUST_LOCAL_OFFSET(src
, srcw
);
1444 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1446 op
= GET_OPCODE(op
);
1447 if (op
>= SLJIT_MOV
&& op
<= SLJIT_MOV_P
) {
1458 if (src
& SLJIT_IMM
)
1459 srcw
= (sljit_u8
)srcw
;
1462 flags
= BYTE_SIZE
| SIGNED
;
1463 if (src
& SLJIT_IMM
)
1464 srcw
= (sljit_s8
)srcw
;
1468 if (src
& SLJIT_IMM
)
1469 srcw
= (sljit_u16
)srcw
;
1472 flags
= HALF_SIZE
| SIGNED
;
1473 if (src
& SLJIT_IMM
)
1474 srcw
= (sljit_s16
)srcw
;
1477 SLJIT_UNREACHABLE();
1482 if (src
& SLJIT_IMM
)
1483 FAIL_IF(emit_op_imm(compiler
, SLJIT_MOV
| ARG2_IMM
, dst_r
, TMP_REG2
, srcw
));
1484 else if (src
& SLJIT_MEM
) {
1485 FAIL_IF(emit_op_mem(compiler
, flags
, dst_r
, src
, srcw
, TMP_REG1
));
1487 if (dst_r
!= TMP_REG1
)
1488 return emit_op_imm(compiler
, op
, dst_r
, TMP_REG2
, src
);
1492 if (!(dst
& SLJIT_MEM
))
1493 return SLJIT_SUCCESS
;
1495 return emit_op_mem(compiler
, flags
| STORE
, dst_r
, dst
, dstw
, TMP_REG2
);
1498 if (op
== SLJIT_NEG
) {
1499 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1500 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1501 compiler
->skip_checks
= 1;
1503 return sljit_emit_op2(compiler
, SLJIT_SUB
| op_flags
, dst
, dstw
, SLJIT_IMM
, 0, src
, srcw
);
1506 flags
= HAS_FLAGS(op_flags
) ? SET_FLAGS
: 0;
1508 if (src
& SLJIT_MEM
) {
1509 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, TMP_REG1
, src
, srcw
, TMP_REG1
));
1513 emit_op_imm(compiler
, flags
| op
, dst_r
, TMP_REG2
, src
);
1515 if (SLJIT_UNLIKELY(dst
& SLJIT_MEM
))
1516 return emit_op_mem(compiler
, flags
| STORE
, dst_r
, dst
, dstw
, TMP_REG2
);
1517 return SLJIT_SUCCESS
;
1520 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op2(struct sljit_compiler
*compiler
, sljit_s32 op
,
1521 sljit_s32 dst
, sljit_sw dstw
,
1522 sljit_s32 src1
, sljit_sw src1w
,
1523 sljit_s32 src2
, sljit_sw src2w
)
1525 sljit_s32 dst_reg
, flags
, src2_reg
;
1528 CHECK(check_sljit_emit_op2(compiler
, op
, 0, dst
, dstw
, src1
, src1w
, src2
, src2w
));
1529 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1530 ADJUST_LOCAL_OFFSET(src1
, src1w
);
1531 ADJUST_LOCAL_OFFSET(src2
, src2w
);
1533 dst_reg
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
1534 flags
= HAS_FLAGS(op
) ? SET_FLAGS
: 0;
1536 if (dst
== TMP_REG1
)
1537 flags
|= UNUSED_RETURN
;
1539 if (src1
& SLJIT_IMM
)
1541 else if (src1
& SLJIT_MEM
) {
1542 emit_op_mem(compiler
, WORD_SIZE
, TMP_REG1
, src1
, src1w
, TMP_REG1
);
1548 if (src2
& SLJIT_IMM
)
1550 else if (src2
& SLJIT_MEM
) {
1551 src2_reg
= (!(flags
& ARG1_IMM
) && (src1w
== TMP_REG1
)) ? TMP_REG2
: TMP_REG1
;
1552 emit_op_mem(compiler
, WORD_SIZE
, src2_reg
, src2
, src2w
, src2_reg
);
1558 emit_op_imm(compiler
, flags
| GET_OPCODE(op
), dst_reg
, src1w
, src2w
);
1560 if (!(dst
& SLJIT_MEM
))
1561 return SLJIT_SUCCESS
;
1562 return emit_op_mem(compiler
, WORD_SIZE
| STORE
, dst_reg
, dst
, dstw
, TMP_REG2
);
1565 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op2u(struct sljit_compiler
*compiler
, sljit_s32 op
,
1566 sljit_s32 src1
, sljit_sw src1w
,
1567 sljit_s32 src2
, sljit_sw src2w
)
1570 CHECK(check_sljit_emit_op2(compiler
, op
, 1, 0, 0, src1
, src1w
, src2
, src2w
));
1572 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1573 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1574 compiler
->skip_checks
= 1;
1576 return sljit_emit_op2(compiler
, op
, TMP_REG1
, 0, src1
, src1w
, src2
, src2w
);
1579 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_src(struct sljit_compiler
*compiler
, sljit_s32 op
,
1580 sljit_s32 src
, sljit_sw srcw
)
1583 CHECK(check_sljit_emit_op_src(compiler
, op
, src
, srcw
));
1584 ADJUST_LOCAL_OFFSET(src
, srcw
);
1587 case SLJIT_FAST_RETURN
:
1588 SLJIT_ASSERT(reg_map
[TMP_REG2
] == 14);
1590 if (FAST_IS_REG(src
))
1591 FAIL_IF(push_inst16(compiler
, MOV
| SET_REGS44(TMP_REG2
, src
)));
1593 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, TMP_REG2
, src
, srcw
, TMP_REG2
));
1595 return push_inst16(compiler
, BX
| RN3(TMP_REG2
));
1596 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN
:
1597 return SLJIT_SUCCESS
;
1598 case SLJIT_PREFETCH_L1
:
1599 case SLJIT_PREFETCH_L2
:
1600 case SLJIT_PREFETCH_L3
:
1601 case SLJIT_PREFETCH_ONCE
:
1602 return emit_op_mem(compiler
, PRELOAD
, TMP_PC
, src
, srcw
, TMP_REG1
);
1605 return SLJIT_SUCCESS
;
1608 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_register_index(sljit_s32 reg
)
1610 CHECK_REG_INDEX(check_sljit_get_register_index(reg
));
1611 return reg_map
[reg
];
1614 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_get_float_register_index(sljit_s32 reg
)
1616 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg
));
1617 return (freg_map
[reg
] << 1);
1620 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_custom(struct sljit_compiler
*compiler
,
1621 void *instruction
, sljit_s32 size
)
1624 CHECK(check_sljit_emit_op_custom(compiler
, instruction
, size
));
1627 return push_inst16(compiler
, *(sljit_u16
*)instruction
);
1628 return push_inst32(compiler
, *(sljit_ins
*)instruction
);
1631 /* --------------------------------------------------------------------- */
1632 /* Floating point operators */
1633 /* --------------------------------------------------------------------- */
1635 #define FPU_LOAD (1 << 20)
1637 static sljit_s32
emit_fop_mem(struct sljit_compiler
*compiler
, sljit_s32 flags
, sljit_s32 reg
, sljit_s32 arg
, sljit_sw argw
)
1640 sljit_sw inst
= VSTR_F32
| (flags
& (SLJIT_32
| FPU_LOAD
));
1642 SLJIT_ASSERT(arg
& SLJIT_MEM
);
1644 /* Fast loads and stores. */
1645 if (SLJIT_UNLIKELY(arg
& OFFS_REG_MASK
)) {
1646 FAIL_IF(push_inst32(compiler
, ADD_W
| RD4(TMP_REG1
) | RN4(arg
& REG_MASK
) | RM4(OFFS_REG(arg
)) | ((argw
& 0x3) << 6)));
1647 arg
= SLJIT_MEM
| TMP_REG1
;
1651 if ((arg
& REG_MASK
) && (argw
& 0x3) == 0) {
1652 if (!(argw
& ~0x3fc))
1653 return push_inst32(compiler
, inst
| 0x800000 | RN4(arg
& REG_MASK
) | DD4(reg
) | (argw
>> 2));
1654 if (!(-argw
& ~0x3fc))
1655 return push_inst32(compiler
, inst
| RN4(arg
& REG_MASK
) | DD4(reg
) | (-argw
>> 2));
1658 if (arg
& REG_MASK
) {
1659 if (emit_set_delta(compiler
, TMP_REG1
, arg
& REG_MASK
, argw
) != SLJIT_ERR_UNSUPPORTED
) {
1660 FAIL_IF(compiler
->error
);
1661 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG1
) | DD4(reg
));
1663 imm
= get_imm(argw
& ~0x3fc);
1664 if (imm
!= INVALID_IMM
) {
1665 FAIL_IF(push_inst32(compiler
, ADD_WI
| RD4(TMP_REG1
) | RN4(arg
& REG_MASK
) | imm
));
1666 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG1
) | DD4(reg
) | ((argw
& 0x3fc) >> 2));
1668 imm
= get_imm(-argw
& ~0x3fc);
1669 if (imm
!= INVALID_IMM
) {
1671 FAIL_IF(push_inst32(compiler
, SUB_WI
| RD4(TMP_REG1
) | RN4(arg
& REG_MASK
) | imm
));
1672 return push_inst32(compiler
, inst
| RN4(TMP_REG1
) | DD4(reg
) | ((argw
& 0x3fc) >> 2));
1676 FAIL_IF(load_immediate(compiler
, TMP_REG1
, argw
));
1678 FAIL_IF(push_inst16(compiler
, ADD
| SET_REGS44(TMP_REG1
, (arg
& REG_MASK
))));
1679 return push_inst32(compiler
, inst
| 0x800000 | RN4(TMP_REG1
) | DD4(reg
));
1682 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler
*compiler
, sljit_s32 op
,
1683 sljit_s32 dst
, sljit_sw dstw
,
1684 sljit_s32 src
, sljit_sw srcw
)
1688 if (src
& SLJIT_MEM
) {
1689 FAIL_IF(emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, TMP_FREG1
, src
, srcw
));
1693 FAIL_IF(push_inst32(compiler
, VCVT_S32_F32
| (op
& SLJIT_32
) | DD4(TMP_FREG1
) | DM4(src
)));
1695 if (FAST_IS_REG(dst
))
1696 return push_inst32(compiler
, VMOV
| (1 << 20) | RT4(dst
) | DN4(TMP_FREG1
));
1698 /* Store the integer value from a VFP register. */
1699 return emit_fop_mem(compiler
, 0, TMP_FREG1
, dst
, dstw
);
1702 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler
*compiler
, sljit_s32 op
,
1703 sljit_s32 dst
, sljit_sw dstw
,
1704 sljit_s32 src
, sljit_sw srcw
)
1706 sljit_s32 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1710 if (FAST_IS_REG(src
))
1711 FAIL_IF(push_inst32(compiler
, VMOV
| RT4(src
) | DN4(TMP_FREG1
)));
1712 else if (src
& SLJIT_MEM
) {
1713 /* Load the integer value into a VFP register. */
1714 FAIL_IF(emit_fop_mem(compiler
, FPU_LOAD
, TMP_FREG1
, src
, srcw
));
1717 FAIL_IF(load_immediate(compiler
, TMP_REG1
, srcw
));
1718 FAIL_IF(push_inst32(compiler
, VMOV
| RT4(TMP_REG1
) | DN4(TMP_FREG1
)));
1721 FAIL_IF(push_inst32(compiler
, VCVT_F32_S32
| (op
& SLJIT_32
) | DD4(dst_r
) | DM4(TMP_FREG1
)));
1723 if (dst
& SLJIT_MEM
)
1724 return emit_fop_mem(compiler
, (op
& SLJIT_32
), TMP_FREG1
, dst
, dstw
);
1725 return SLJIT_SUCCESS
;
1728 static SLJIT_INLINE sljit_s32
sljit_emit_fop1_cmp(struct sljit_compiler
*compiler
, sljit_s32 op
,
1729 sljit_s32 src1
, sljit_sw src1w
,
1730 sljit_s32 src2
, sljit_sw src2w
)
1734 if (src1
& SLJIT_MEM
) {
1735 emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, TMP_FREG1
, src1
, src1w
);
1739 if (src2
& SLJIT_MEM
) {
1740 emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, TMP_FREG2
, src2
, src2w
);
1744 FAIL_IF(push_inst32(compiler
, VCMP_F32
| (op
& SLJIT_32
) | DD4(src1
) | DM4(src2
)));
1745 return push_inst32(compiler
, VMRS
);
1748 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop1(struct sljit_compiler
*compiler
, sljit_s32 op
,
1749 sljit_s32 dst
, sljit_sw dstw
,
1750 sljit_s32 src
, sljit_sw srcw
)
1756 SLJIT_COMPILE_ASSERT((SLJIT_32
== 0x100), float_transfer_bit_error
);
1757 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler
, op
, dst
, dstw
, src
, srcw
);
1759 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1761 if (GET_OPCODE(op
) != SLJIT_CONV_F64_FROM_F32
)
1764 if (src
& SLJIT_MEM
) {
1765 emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, dst_r
, src
, srcw
);
1769 switch (GET_OPCODE(op
)) {
1772 if (dst_r
!= TMP_FREG1
)
1773 FAIL_IF(push_inst32(compiler
, VMOV_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DM4(src
)));
1779 FAIL_IF(push_inst32(compiler
, VNEG_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DM4(src
)));
1782 FAIL_IF(push_inst32(compiler
, VABS_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DM4(src
)));
1784 case SLJIT_CONV_F64_FROM_F32
:
1785 FAIL_IF(push_inst32(compiler
, VCVT_F64_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DM4(src
)));
1790 if (dst
& SLJIT_MEM
)
1791 return emit_fop_mem(compiler
, (op
& SLJIT_32
), dst_r
, dst
, dstw
);
1792 return SLJIT_SUCCESS
;
1795 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fop2(struct sljit_compiler
*compiler
, sljit_s32 op
,
1796 sljit_s32 dst
, sljit_sw dstw
,
1797 sljit_s32 src1
, sljit_sw src1w
,
1798 sljit_s32 src2
, sljit_sw src2w
)
1803 CHECK(check_sljit_emit_fop2(compiler
, op
, dst
, dstw
, src1
, src1w
, src2
, src2w
));
1804 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1805 ADJUST_LOCAL_OFFSET(src1
, src1w
);
1806 ADJUST_LOCAL_OFFSET(src2
, src2w
);
1810 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_FREG1
;
1811 if (src1
& SLJIT_MEM
) {
1812 emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, TMP_FREG1
, src1
, src1w
);
1815 if (src2
& SLJIT_MEM
) {
1816 emit_fop_mem(compiler
, (op
& SLJIT_32
) | FPU_LOAD
, TMP_FREG2
, src2
, src2w
);
1820 switch (GET_OPCODE(op
)) {
1822 FAIL_IF(push_inst32(compiler
, VADD_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1825 FAIL_IF(push_inst32(compiler
, VSUB_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1828 FAIL_IF(push_inst32(compiler
, VMUL_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1831 FAIL_IF(push_inst32(compiler
, VDIV_F32
| (op
& SLJIT_32
) | DD4(dst_r
) | DN4(src1
) | DM4(src2
)));
1835 if (!(dst
& SLJIT_MEM
))
1836 return SLJIT_SUCCESS
;
1837 return emit_fop_mem(compiler
, (op
& SLJIT_32
), TMP_FREG1
, dst
, dstw
);
1842 /* --------------------------------------------------------------------- */
1843 /* Other instructions */
1844 /* --------------------------------------------------------------------- */
1846 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_fast_enter(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
1849 CHECK(check_sljit_emit_fast_enter(compiler
, dst
, dstw
));
1850 ADJUST_LOCAL_OFFSET(dst
, dstw
);
1852 SLJIT_ASSERT(reg_map
[TMP_REG2
] == 14);
1854 if (FAST_IS_REG(dst
))
1855 return push_inst16(compiler
, MOV
| SET_REGS44(dst
, TMP_REG2
));
1858 return emit_op_mem(compiler
, WORD_SIZE
| STORE
, TMP_REG2
, dst
, dstw
, TMP_REG1
);
1861 /* --------------------------------------------------------------------- */
1862 /* Conditional instructions */
1863 /* --------------------------------------------------------------------- */
1865 static sljit_uw
get_cc(struct sljit_compiler
*compiler
, sljit_s32 type
)
1869 case SLJIT_EQUAL_F64
:
1872 case SLJIT_NOT_EQUAL
:
1873 case SLJIT_NOT_EQUAL_F64
:
1877 case SLJIT_LESS_F64
:
1880 case SLJIT_GREATER_EQUAL
:
1881 case SLJIT_GREATER_EQUAL_F64
:
1885 case SLJIT_GREATER_F64
:
1888 case SLJIT_LESS_EQUAL
:
1889 case SLJIT_LESS_EQUAL_F64
:
1892 case SLJIT_SIG_LESS
:
1895 case SLJIT_SIG_GREATER_EQUAL
:
1898 case SLJIT_SIG_GREATER
:
1901 case SLJIT_SIG_LESS_EQUAL
:
1904 case SLJIT_OVERFLOW
:
1905 if (!(compiler
->status_flags_state
& SLJIT_CURRENT_FLAGS_ADD_SUB
))
1908 case SLJIT_UNORDERED_F64
:
1911 case SLJIT_NOT_OVERFLOW
:
1912 if (!(compiler
->status_flags_state
& SLJIT_CURRENT_FLAGS_ADD_SUB
))
1915 case SLJIT_ORDERED_F64
:
1918 default: /* SLJIT_JUMP */
1919 SLJIT_UNREACHABLE();
1924 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_label
* sljit_emit_label(struct sljit_compiler
*compiler
)
1926 struct sljit_label
*label
;
1929 CHECK_PTR(check_sljit_emit_label(compiler
));
1931 if (compiler
->last_label
&& compiler
->last_label
->size
== compiler
->size
)
1932 return compiler
->last_label
;
1934 label
= (struct sljit_label
*)ensure_abuf(compiler
, sizeof(struct sljit_label
));
1935 PTR_FAIL_IF(!label
);
1936 set_label(label
, compiler
);
1940 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_jump(struct sljit_compiler
*compiler
, sljit_s32 type
)
1942 struct sljit_jump
*jump
;
1946 CHECK_PTR(check_sljit_emit_jump(compiler
, type
));
1948 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
1950 set_jump(jump
, compiler
, type
& SLJIT_REWRITABLE_JUMP
);
1953 PTR_FAIL_IF(emit_imm32_const(compiler
, TMP_REG1
, 0));
1954 if (type
< SLJIT_JUMP
) {
1955 jump
->flags
|= IS_COND
;
1956 cc
= get_cc(compiler
, type
);
1957 jump
->flags
|= cc
<< 8;
1958 PTR_FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
1961 jump
->addr
= compiler
->size
;
1962 if (type
<= SLJIT_JUMP
)
1963 PTR_FAIL_IF(push_inst16(compiler
, BX
| RN3(TMP_REG1
)));
1965 jump
->flags
|= IS_BL
;
1966 PTR_FAIL_IF(push_inst16(compiler
, BLX
| RN3(TMP_REG1
)));
1974 static sljit_s32
softfloat_call_with_args(struct sljit_compiler
*compiler
, sljit_s32 arg_types
, sljit_s32
*src
)
1976 sljit_s32 offset
= 0;
1977 sljit_s32 word_arg_offset
= 0;
1978 sljit_s32 float_arg_count
= 0;
1979 sljit_s32 types
= 0;
1980 sljit_s32 src_offset
= 4 * sizeof(sljit_sw
);
1981 sljit_u8 offsets
[4];
1982 sljit_u8
*offset_ptr
= offsets
;
1984 if (src
&& FAST_IS_REG(*src
))
1985 src_offset
= reg_map
[*src
] * sizeof(sljit_sw
);
1987 arg_types
>>= SLJIT_ARG_SHIFT
;
1990 types
= (types
<< SLJIT_ARG_SHIFT
) | (arg_types
& SLJIT_ARG_MASK
);
1992 switch (arg_types
& SLJIT_ARG_MASK
) {
1993 case SLJIT_ARG_TYPE_F32
:
1994 *offset_ptr
++ = (sljit_u8
)offset
;
1995 offset
+= sizeof(sljit_f32
);
1998 case SLJIT_ARG_TYPE_F64
:
2000 offset
+= sizeof(sljit_sw
);
2001 *offset_ptr
++ = (sljit_u8
)offset
;
2002 offset
+= sizeof(sljit_f64
);
2006 *offset_ptr
++ = (sljit_u8
)offset
;
2007 offset
+= sizeof(sljit_sw
);
2008 word_arg_offset
+= sizeof(sljit_sw
);
2012 arg_types
>>= SLJIT_ARG_SHIFT
;
2015 if (offset
> 4 * sizeof(sljit_sw
))
2016 FAIL_IF(push_inst16(compiler
, SUB_SP
| (((offset
- 4 * sizeof(sljit_sw
)) + 0x7) & ~0x7) >> 2));
2018 SLJIT_ASSERT(reg_map
[TMP_REG1
] == 12);
2020 /* Process arguments in reversed direction. */
2022 switch (types
& SLJIT_ARG_MASK
) {
2023 case SLJIT_ARG_TYPE_F32
:
2025 offset
= *(--offset_ptr
);
2027 if (offset
< 4 * sizeof(sljit_sw
)) {
2028 if (src_offset
== offset
) {
2029 FAIL_IF(push_inst16(compiler
, MOV
| (src_offset
<< 1) | 4 | (1 << 7)));
2032 FAIL_IF(push_inst32(compiler
, VMOV
| 0x100000 | (float_arg_count
<< 16) | (offset
<< 10)));
2034 FAIL_IF(push_inst32(compiler
, VSTR_F32
| 0x800000 | RN4(SLJIT_SP
)
2035 | (float_arg_count
<< 12) | ((offset
- 4 * sizeof(sljit_sw
)) >> 2)));
2037 case SLJIT_ARG_TYPE_F64
:
2039 offset
= *(--offset_ptr
);
2041 SLJIT_ASSERT((offset
& 0x7) == 0);
2043 if (offset
< 4 * sizeof(sljit_sw
)) {
2044 if (src_offset
== offset
|| src_offset
== offset
+ sizeof(sljit_sw
)) {
2045 FAIL_IF(push_inst16(compiler
, MOV
| (src_offset
<< 1) | 4 | (1 << 7)));
2048 FAIL_IF(push_inst32(compiler
, VMOV2
| 0x100000 | (offset
<< 10) | ((offset
+ sizeof(sljit_sw
)) << 14) | float_arg_count
));
2050 FAIL_IF(push_inst32(compiler
, VSTR_F32
| 0x800100 | RN4(SLJIT_SP
)
2051 | (float_arg_count
<< 12) | ((offset
- 4 * sizeof(sljit_sw
)) >> 2)));
2054 word_arg_offset
-= sizeof(sljit_sw
);
2055 offset
= *(--offset_ptr
);
2057 SLJIT_ASSERT(offset
>= word_arg_offset
);
2059 if (offset
!= word_arg_offset
) {
2060 if (offset
< 4 * sizeof(sljit_sw
)) {
2061 if (src_offset
== offset
) {
2062 FAIL_IF(push_inst16(compiler
, MOV
| (src_offset
<< 1) | 4 | (1 << 7)));
2065 else if (src_offset
== word_arg_offset
) {
2066 *src
= 1 + (offset
>> 2);
2067 src_offset
= offset
;
2069 FAIL_IF(push_inst16(compiler
, MOV
| (offset
>> 2) | (word_arg_offset
<< 1)));
2071 FAIL_IF(push_inst16(compiler
, STR_SP
| (word_arg_offset
<< 6) | ((offset
- 4 * sizeof(sljit_sw
)) >> 2)));
2076 types
>>= SLJIT_ARG_SHIFT
;
2079 return SLJIT_SUCCESS
;
2082 static sljit_s32
softfloat_post_call_with_args(struct sljit_compiler
*compiler
, sljit_s32 arg_types
)
2084 sljit_s32 stack_size
= 0;
2086 if ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F32
)
2087 FAIL_IF(push_inst32(compiler
, VMOV
| (0 << 16) | (0 << 12)));
2088 if ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F64
)
2089 FAIL_IF(push_inst32(compiler
, VMOV2
| (1 << 16) | (0 << 12) | 0));
2091 arg_types
>>= SLJIT_ARG_SHIFT
;
2094 switch (arg_types
& SLJIT_ARG_MASK
) {
2095 case SLJIT_ARG_TYPE_F32
:
2096 stack_size
+= sizeof(sljit_f32
);
2098 case SLJIT_ARG_TYPE_F64
:
2099 if (stack_size
& 0x7)
2100 stack_size
+= sizeof(sljit_sw
);
2101 stack_size
+= sizeof(sljit_f64
);
2104 stack_size
+= sizeof(sljit_sw
);
2108 arg_types
>>= SLJIT_ARG_SHIFT
;
2111 if (stack_size
<= 4 * sizeof(sljit_sw
))
2112 return SLJIT_SUCCESS
;
2114 return push_inst16(compiler
, ADD_SP
| ((((stack_size
- 16) + 0x7) & ~0x7) >> 2));
2119 static sljit_s32
hardfloat_call_with_args(struct sljit_compiler
*compiler
, sljit_s32 arg_types
)
2121 sljit_u32 offset
= SLJIT_FR0
;
2122 sljit_u32 new_offset
= SLJIT_FR0
;
2123 sljit_u32 f32_offset
= 0;
2125 /* Remove return value. */
2126 arg_types
>>= SLJIT_ARG_SHIFT
;
2129 if ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F32
) {
2130 if (f32_offset
!= 0) {
2131 FAIL_IF(push_inst32(compiler
, VMOV_F32
| 0x400000 | DD4(f32_offset
) | DM4(offset
)));
2134 if (offset
!= new_offset
)
2135 FAIL_IF(push_inst32(compiler
, VMOV_F32
| 0x400000 | DD4(new_offset
) | DM4(offset
)));
2136 f32_offset
= new_offset
;
2141 else if ((arg_types
& SLJIT_ARG_MASK
) == SLJIT_ARG_TYPE_F64
) {
2142 if (offset
!= new_offset
)
2143 FAIL_IF(push_inst32(compiler
, VMOV_F32
| SLJIT_32
| DD4(new_offset
) | DM4(offset
)));
2148 arg_types
>>= SLJIT_ARG_SHIFT
;
2151 return SLJIT_SUCCESS
;
2156 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_jump
* sljit_emit_call(struct sljit_compiler
*compiler
, sljit_s32 type
,
2157 sljit_s32 arg_types
)
2160 struct sljit_jump
*jump
;
2164 CHECK_PTR(check_sljit_emit_call(compiler
, type
, arg_types
));
2167 PTR_FAIL_IF(softfloat_call_with_args(compiler
, arg_types
, NULL
));
2169 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2170 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2171 compiler
->skip_checks
= 1;
2174 jump
= sljit_emit_jump(compiler
, type
);
2175 PTR_FAIL_IF(jump
== NULL
);
2177 PTR_FAIL_IF(softfloat_post_call_with_args(compiler
, arg_types
));
2180 PTR_FAIL_IF(hardfloat_call_with_args(compiler
, arg_types
));
2182 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2183 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2184 compiler
->skip_checks
= 1;
2187 return sljit_emit_jump(compiler
, type
);
2191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_ijump(struct sljit_compiler
*compiler
, sljit_s32 type
, sljit_s32 src
, sljit_sw srcw
)
2193 struct sljit_jump
*jump
;
2196 CHECK(check_sljit_emit_ijump(compiler
, type
, src
, srcw
));
2197 ADJUST_LOCAL_OFFSET(src
, srcw
);
2199 SLJIT_ASSERT(reg_map
[TMP_REG1
] != 14);
2201 if (!(src
& SLJIT_IMM
)) {
2202 if (FAST_IS_REG(src
)) {
2203 SLJIT_ASSERT(reg_map
[src
] != 14);
2204 return push_inst16(compiler
, (type
<= SLJIT_JUMP
? BX
: BLX
) | RN3(src
));
2207 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, type
<= SLJIT_JUMP
? TMP_PC
: TMP_REG1
, src
, srcw
, TMP_REG1
));
2208 if (type
>= SLJIT_FAST_CALL
)
2209 return push_inst16(compiler
, BLX
| RN3(TMP_REG1
));
2212 /* These jumps are converted to jump/call instructions when possible. */
2213 jump
= (struct sljit_jump
*)ensure_abuf(compiler
, sizeof(struct sljit_jump
));
2215 set_jump(jump
, compiler
, JUMP_ADDR
| ((type
>= SLJIT_FAST_CALL
) ? IS_BL
: 0));
2216 jump
->u
.target
= srcw
;
2218 FAIL_IF(emit_imm32_const(compiler
, TMP_REG1
, 0));
2219 jump
->addr
= compiler
->size
;
2220 return push_inst16(compiler
, (type
<= SLJIT_JUMP
? BX
: BLX
) | RN3(TMP_REG1
));
2223 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_icall(struct sljit_compiler
*compiler
, sljit_s32 type
,
2224 sljit_s32 arg_types
,
2225 sljit_s32 src
, sljit_sw srcw
)
2228 CHECK(check_sljit_emit_icall(compiler
, type
, arg_types
, src
, srcw
));
2231 if (src
& SLJIT_MEM
) {
2232 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, TMP_REG1
, src
, srcw
, TMP_REG1
));
2236 FAIL_IF(softfloat_call_with_args(compiler
, arg_types
, &src
));
2238 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2239 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2240 compiler
->skip_checks
= 1;
2243 FAIL_IF(sljit_emit_ijump(compiler
, type
, src
, srcw
));
2245 return softfloat_post_call_with_args(compiler
, arg_types
);
2246 #else /* !__SOFTFP__ */
2247 FAIL_IF(hardfloat_call_with_args(compiler
, arg_types
));
2249 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2250 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2251 compiler
->skip_checks
= 1;
2254 return sljit_emit_ijump(compiler
, type
, src
, srcw
);
2255 #endif /* __SOFTFP__ */
2258 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_op_flags(struct sljit_compiler
*compiler
, sljit_s32 op
,
2259 sljit_s32 dst
, sljit_sw dstw
,
2262 sljit_s32 dst_r
, flags
= GET_ALL_FLAGS(op
);
2266 CHECK(check_sljit_emit_op_flags(compiler
, op
, dst
, dstw
, type
));
2267 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2269 op
= GET_OPCODE(op
);
2270 cc
= get_cc(compiler
, type
& 0xff);
2271 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2273 if (op
< SLJIT_ADD
) {
2274 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | (((cc
& 0x1) ^ 0x1) << 3) | 0x4));
2275 if (reg_map
[dst_r
] > 7) {
2276 FAIL_IF(push_inst32(compiler
, MOV_WI
| RD4(dst_r
) | 1));
2277 FAIL_IF(push_inst32(compiler
, MOV_WI
| RD4(dst_r
) | 0));
2279 /* The movsi (immediate) instruction does not set flags in IT block. */
2280 FAIL_IF(push_inst16(compiler
, MOVSI
| RDN3(dst_r
) | 1));
2281 FAIL_IF(push_inst16(compiler
, MOVSI
| RDN3(dst_r
) | 0));
2283 if (!(dst
& SLJIT_MEM
))
2284 return SLJIT_SUCCESS
;
2285 return emit_op_mem(compiler
, WORD_SIZE
| STORE
, TMP_REG1
, dst
, dstw
, TMP_REG2
);
2288 if (dst
& SLJIT_MEM
)
2289 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
, TMP_REG1
, dst
, dstw
, TMP_REG2
));
2291 if (op
== SLJIT_AND
) {
2292 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | (((cc
& 0x1) ^ 0x1) << 3) | 0x4));
2293 FAIL_IF(push_inst32(compiler
, ANDI
| RN4(dst_r
) | RD4(dst_r
) | 1));
2294 FAIL_IF(push_inst32(compiler
, ANDI
| RN4(dst_r
) | RD4(dst_r
) | 0));
2297 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2298 FAIL_IF(push_inst32(compiler
, ((op
== SLJIT_OR
) ? ORRI
: EORI
) | RN4(dst_r
) | RD4(dst_r
) | 1));
2301 if (dst
& SLJIT_MEM
)
2302 FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
| STORE
, TMP_REG1
, dst
, dstw
, TMP_REG2
));
2304 if (!(flags
& SLJIT_SET_Z
))
2305 return SLJIT_SUCCESS
;
2307 /* The condition must always be set, even if the ORR/EORI is not executed above. */
2308 return push_inst32(compiler
, MOV_W
| SET_FLAGS
| RD4(TMP_REG1
) | RM4(dst_r
));
2311 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_cmov(struct sljit_compiler
*compiler
, sljit_s32 type
,
2313 sljit_s32 src
, sljit_sw srcw
)
2318 CHECK(check_sljit_emit_cmov(compiler
, type
, dst_reg
, src
, srcw
));
2320 dst_reg
&= ~SLJIT_32
;
2322 cc
= get_cc(compiler
, type
& 0xff);
2324 if (!(src
& SLJIT_IMM
)) {
2325 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2326 return push_inst16(compiler
, MOV
| SET_REGS44(dst_reg
, src
));
2329 tmp
= (sljit_uw
) srcw
;
2331 if (tmp
< 0x10000) {
2332 /* set low 16 bits, set hi 16 bits to 0. */
2333 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2334 return push_inst32(compiler
, MOVW
| RD4(dst_reg
)
2335 | COPY_BITS(tmp
, 12, 16, 4) | COPY_BITS(tmp
, 11, 26, 1) | COPY_BITS(tmp
, 8, 12, 3) | (tmp
& 0xff));
2338 tmp
= get_imm(srcw
);
2339 if (tmp
!= INVALID_IMM
) {
2340 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2341 return push_inst32(compiler
, MOV_WI
| RD4(dst_reg
) | tmp
);
2344 tmp
= get_imm(~srcw
);
2345 if (tmp
!= INVALID_IMM
) {
2346 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | 0x8));
2347 return push_inst32(compiler
, MVN_WI
| RD4(dst_reg
) | tmp
);
2350 FAIL_IF(push_inst16(compiler
, IT
| (cc
<< 4) | ((cc
& 0x1) << 3) | 0x4));
2352 tmp
= (sljit_uw
) srcw
;
2353 FAIL_IF(push_inst32(compiler
, MOVW
| RD4(dst_reg
)
2354 | COPY_BITS(tmp
, 12, 16, 4) | COPY_BITS(tmp
, 11, 26, 1) | COPY_BITS(tmp
, 8, 12, 3) | (tmp
& 0xff)));
2355 return push_inst32(compiler
, MOVT
| RD4(dst_reg
)
2356 | COPY_BITS(tmp
, 12 + 16, 16, 4) | COPY_BITS(tmp
, 11 + 16, 26, 1) | COPY_BITS(tmp
, 8 + 16, 12, 3) | ((tmp
& 0xff0000) >> 16));
2359 SLJIT_API_FUNC_ATTRIBUTE sljit_s32
sljit_emit_mem(struct sljit_compiler
*compiler
, sljit_s32 type
,
2361 sljit_s32 mem
, sljit_sw memw
)
2367 CHECK(check_sljit_emit_mem(compiler
, type
, reg
, mem
, memw
));
2369 if ((mem
& OFFS_REG_MASK
) || (memw
> 255 || memw
< -255))
2370 return SLJIT_ERR_UNSUPPORTED
;
2372 if (type
& SLJIT_MEM_SUPP
)
2373 return SLJIT_SUCCESS
;
2375 switch (type
& 0xff) {
2387 flags
= BYTE_SIZE
| SIGNED
;
2393 flags
= HALF_SIZE
| SIGNED
;
2396 SLJIT_UNREACHABLE();
2401 if (type
& SLJIT_MEM_STORE
)
2404 inst
= sljit_mem32
[flags
] | 0x900;
2406 if (type
& SLJIT_MEM_PRE
)
2414 return push_inst32(compiler
, inst
| RT4(reg
) | RN4(mem
& REG_MASK
) | memw
);
2417 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_const
* sljit_emit_const(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
, sljit_sw init_value
)
2419 struct sljit_const
*const_
;
2423 CHECK_PTR(check_sljit_emit_const(compiler
, dst
, dstw
, init_value
));
2424 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2426 const_
= (struct sljit_const
*)ensure_abuf(compiler
, sizeof(struct sljit_const
));
2427 PTR_FAIL_IF(!const_
);
2428 set_const(const_
, compiler
);
2430 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2431 PTR_FAIL_IF(emit_imm32_const(compiler
, dst_r
, init_value
));
2433 if (dst
& SLJIT_MEM
)
2434 PTR_FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
| STORE
, dst_r
, dst
, dstw
, TMP_REG2
));
2438 SLJIT_API_FUNC_ATTRIBUTE
struct sljit_put_label
* sljit_emit_put_label(struct sljit_compiler
*compiler
, sljit_s32 dst
, sljit_sw dstw
)
2440 struct sljit_put_label
*put_label
;
2444 CHECK_PTR(check_sljit_emit_put_label(compiler
, dst
, dstw
));
2445 ADJUST_LOCAL_OFFSET(dst
, dstw
);
2447 put_label
= (struct sljit_put_label
*)ensure_abuf(compiler
, sizeof(struct sljit_put_label
));
2448 PTR_FAIL_IF(!put_label
);
2449 set_put_label(put_label
, compiler
, 0);
2451 dst_r
= FAST_IS_REG(dst
) ? dst
: TMP_REG1
;
2452 PTR_FAIL_IF(emit_imm32_const(compiler
, dst_r
, 0));
2454 if (dst
& SLJIT_MEM
)
2455 PTR_FAIL_IF(emit_op_mem(compiler
, WORD_SIZE
| STORE
, dst_r
, dst
, dstw
, TMP_REG2
));
2459 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_jump_addr(sljit_uw addr
, sljit_uw new_target
, sljit_sw executable_offset
)
2461 sljit_u16
*inst
= (sljit_u16
*)addr
;
2462 SLJIT_UNUSED_ARG(executable_offset
);
2464 SLJIT_UPDATE_WX_FLAGS(inst
, inst
+ 4, 0);
2465 modify_imm32_const(inst
, new_target
);
2466 SLJIT_UPDATE_WX_FLAGS(inst
, inst
+ 4, 1);
2467 inst
= (sljit_u16
*)SLJIT_ADD_EXEC_OFFSET(inst
, executable_offset
);
2468 SLJIT_CACHE_FLUSH(inst
, inst
+ 4);
2471 SLJIT_API_FUNC_ATTRIBUTE
void sljit_set_const(sljit_uw addr
, sljit_sw new_constant
, sljit_sw executable_offset
)
2473 sljit_set_jump_addr(addr
, new_constant
, executable_offset
);