Support unsaved registers on s390x
[sljit.git] / sljit_src / sljitNativeS390X.c
blobccaf2df1362042a0a7f986abd3e0e77a34a2f8e3
1 /*
2 * Stack-less Just-In-Time compiler
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <sys/auxv.h>
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
38 return "s390x" SLJIT_CPUINFO;
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
47 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
51 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
54 /* there are also a[2-15] available, but they are slower to access and
55 * their use is limited as mundaym explained:
56 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
63 * WARNING
64 * the following code is non standard and should be improved for
65 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66 * registers because r0 and r1 are the ABI recommended volatiles.
67 * there is a gpr() function that maps sljit to physical register numbers
68 * that should be used instead of the usual index into reg_map[] and
69 * will be retired ASAP (TODO: carenas)
72 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7; /* reg_map[6] */
80 static const sljit_gpr r8 = 8; /* reg_map[7] */
81 static const sljit_gpr r9 = 9; /* reg_map[8] */
82 static const sljit_gpr r10 = 10; /* reg_map[9] */
83 static const sljit_gpr r11 = 11; /* reg_map[10] */
84 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
87 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92 * like we do know might be faster though, reserve?
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0 r0
97 #define tmp1 r1
99 /* TODO(carenas): flags should move to a different register so that
100 * link register doesn't need to change
103 /* When reg cannot be unused. */
104 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
106 /* Link registers. The normal link register is r14, but since
107 we use that for flags we need to use r0 instead to do fast
108 calls so that flags are preserved. */
109 static const sljit_gpr link_r = 14; /* r14 */
110 static const sljit_gpr fast_link_r = 0; /* r0 */
112 #define TMP_FREG1 (0)
114 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
115 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
118 #define R0A(r) (r)
119 #define R4A(r) ((r) << 4)
120 #define R8A(r) ((r) << 8)
121 #define R12A(r) ((r) << 12)
122 #define R16A(r) ((r) << 16)
123 #define R20A(r) ((r) << 20)
124 #define R28A(r) ((r) << 28)
125 #define R32A(r) ((r) << 32)
126 #define R36A(r) ((r) << 36)
128 #define R0(r) ((sljit_ins)reg_map[r])
130 #define F0(r) ((sljit_ins)freg_map[r])
131 #define F4(r) (R4A((sljit_ins)freg_map[r]))
132 #define F20(r) (R20A((sljit_ins)freg_map[r]))
133 #define F36(r) (R36A((sljit_ins)freg_map[r]))
135 struct sljit_s390x_const {
136 struct sljit_const const_; /* must be first */
137 sljit_sw init_value; /* required to build literal pool */
140 /* Convert SLJIT register to hardware register. */
141 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
143 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
144 return reg_map[r];
147 static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
149 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
150 return freg_map[r];
153 /* Size of instruction in bytes. Tags must already be cleared. */
154 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
156 /* keep faulting instructions */
157 if (ins == 0)
158 return 2;
160 if ((ins & 0x00000000ffffL) == ins)
161 return 2;
162 if ((ins & 0x0000ffffffffL) == ins)
163 return 4;
164 if ((ins & 0xffffffffffffL) == ins)
165 return 6;
167 SLJIT_UNREACHABLE();
168 return (sljit_uw)-1;
171 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
173 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
174 FAIL_IF(!ibuf);
175 *ibuf = ins;
176 compiler->size++;
177 return SLJIT_SUCCESS;
180 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
182 sljit_u16 *ibuf = (sljit_u16 *)*ptr;
183 sljit_uw size = sizeof_ins(ins);
185 SLJIT_ASSERT((size & 6) == size);
186 switch (size) {
187 case 6:
188 *ibuf++ = (sljit_u16)(ins >> 32);
189 /* fallthrough */
190 case 4:
191 *ibuf++ = (sljit_u16)(ins >> 16);
192 /* fallthrough */
193 case 2:
194 *ibuf++ = (sljit_u16)(ins);
196 *ptr = (void*)ibuf;
197 return SLJIT_SUCCESS;
200 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
201 (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
202 && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
204 /* Map the given type to a 4-bit condition code mask. */
205 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
206 const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
207 const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
208 const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
209 const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
211 switch (type) {
212 case SLJIT_EQUAL:
213 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
214 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
215 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
216 return cc0;
217 if (type == SLJIT_OVERFLOW)
218 return (cc0 | cc3);
219 return (cc0 | cc2);
221 /* fallthrough */
223 case SLJIT_F_EQUAL:
224 case SLJIT_ORDERED_EQUAL:
225 return cc0;
227 case SLJIT_NOT_EQUAL:
228 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
229 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
230 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
231 return (cc1 | cc2 | cc3);
232 if (type == SLJIT_OVERFLOW)
233 return (cc1 | cc2);
234 return (cc1 | cc3);
236 /* fallthrough */
238 case SLJIT_UNORDERED_OR_NOT_EQUAL:
239 return (cc1 | cc2 | cc3);
241 case SLJIT_LESS:
242 return cc1;
244 case SLJIT_GREATER_EQUAL:
245 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
246 return (cc0 | cc2 | cc3);
248 case SLJIT_GREATER:
249 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
250 return cc2;
251 return cc3;
253 case SLJIT_LESS_EQUAL:
254 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
255 return (cc0 | cc1);
256 return (cc0 | cc1 | cc2);
258 case SLJIT_SIG_LESS:
259 case SLJIT_F_LESS:
260 case SLJIT_ORDERED_LESS:
261 return cc1;
263 case SLJIT_NOT_CARRY:
264 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
265 return (cc2 | cc3);
266 /* fallthrough */
268 case SLJIT_SIG_LESS_EQUAL:
269 case SLJIT_F_LESS_EQUAL:
270 case SLJIT_ORDERED_LESS_EQUAL:
271 return (cc0 | cc1);
273 case SLJIT_CARRY:
274 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
275 return (cc0 | cc1);
276 /* fallthrough */
278 case SLJIT_SIG_GREATER:
279 case SLJIT_UNORDERED_OR_GREATER:
280 /* Overflow is considered greater, see SLJIT_SUB. */
281 return cc2 | cc3;
283 case SLJIT_SIG_GREATER_EQUAL:
284 return (cc0 | cc2 | cc3);
286 case SLJIT_OVERFLOW:
287 if (compiler->status_flags_state & SLJIT_SET_Z)
288 return (cc2 | cc3);
289 /* fallthrough */
291 case SLJIT_UNORDERED:
292 return cc3;
294 case SLJIT_NOT_OVERFLOW:
295 if (compiler->status_flags_state & SLJIT_SET_Z)
296 return (cc0 | cc1);
297 /* fallthrough */
299 case SLJIT_ORDERED:
300 return (cc0 | cc1 | cc2);
302 case SLJIT_F_NOT_EQUAL:
303 case SLJIT_ORDERED_NOT_EQUAL:
304 return (cc1 | cc2);
306 case SLJIT_F_GREATER:
307 case SLJIT_ORDERED_GREATER:
308 return cc2;
310 case SLJIT_F_GREATER_EQUAL:
311 case SLJIT_ORDERED_GREATER_EQUAL:
312 return (cc0 | cc2);
314 case SLJIT_UNORDERED_OR_LESS_EQUAL:
315 return (cc0 | cc1 | cc3);
317 case SLJIT_UNORDERED_OR_EQUAL:
318 return (cc0 | cc3);
320 case SLJIT_UNORDERED_OR_LESS:
321 return (cc1 | cc3);
324 SLJIT_UNREACHABLE();
325 return (sljit_u8)-1;
328 /* Facility to bit index mappings.
329 Note: some facilities share the same bit index. */
330 typedef sljit_uw facility_bit;
331 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
332 #define FAST_LONG_DISPLACEMENT_FACILITY 19
333 #define EXTENDED_IMMEDIATE_FACILITY 21
334 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
335 #define DISTINCT_OPERAND_FACILITY 45
336 #define HIGH_WORD_FACILITY 45
337 #define POPULATION_COUNT_FACILITY 45
338 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
339 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
340 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
341 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
342 #define VECTOR_FACILITY 129
343 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
345 /* Report whether a facility is known to be present due to the compiler
346 settings. This function should always be compiled to a constant
347 value given a constant argument. */
348 static SLJIT_INLINE int have_facility_static(facility_bit x)
350 #if ENABLE_STATIC_FACILITY_DETECTION
351 switch (x) {
352 case FAST_LONG_DISPLACEMENT_FACILITY:
353 return (__ARCH__ >= 6 /* z990 */);
354 case EXTENDED_IMMEDIATE_FACILITY:
355 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
356 return (__ARCH__ >= 7 /* z9-109 */);
357 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
358 return (__ARCH__ >= 8 /* z10 */);
359 case DISTINCT_OPERAND_FACILITY:
360 return (__ARCH__ >= 9 /* z196 */);
361 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
362 return (__ARCH__ >= 10 /* zEC12 */);
363 case LOAD_STORE_ON_CONDITION_2_FACILITY:
364 case VECTOR_FACILITY:
365 return (__ARCH__ >= 11 /* z13 */);
366 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
367 case VECTOR_ENHANCEMENTS_1_FACILITY:
368 return (__ARCH__ >= 12 /* z14 */);
369 default:
370 SLJIT_UNREACHABLE();
372 #endif
373 return 0;
376 static SLJIT_INLINE unsigned long get_hwcap()
378 static unsigned long hwcap = 0;
379 if (SLJIT_UNLIKELY(!hwcap)) {
380 hwcap = getauxval(AT_HWCAP);
381 SLJIT_ASSERT(hwcap != 0);
383 return hwcap;
386 static SLJIT_INLINE int have_stfle()
388 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
389 return 1;
391 return (get_hwcap() & HWCAP_S390_STFLE);
394 /* Report whether the given facility is available. This function always
395 performs a runtime check. */
396 static int have_facility_dynamic(facility_bit x)
398 #if ENABLE_DYNAMIC_FACILITY_DETECTION
399 static struct {
400 sljit_uw bits[4];
401 } cpu_features;
402 size_t size = sizeof(cpu_features);
403 const sljit_uw word_index = x >> 6;
404 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
406 SLJIT_ASSERT(x < size * 8);
407 if (SLJIT_UNLIKELY(!have_stfle()))
408 return 0;
410 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
411 __asm__ __volatile__ (
412 "lgr %%r0, %0;"
413 "stfle 0(%1);"
414 /* outputs */:
415 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
416 /* clobbers */: "r0", "cc", "memory"
418 SLJIT_ASSERT(cpu_features.bits[0] != 0);
420 return (cpu_features.bits[word_index] & bit_index) != 0;
421 #else
422 return 0;
423 #endif
426 #define HAVE_FACILITY(name, bit) \
427 static SLJIT_INLINE int name() \
429 static int have = -1; \
430 /* Static check first. May allow the function to be optimized away. */ \
431 if (have_facility_static(bit)) \
432 have = 1; \
433 else if (SLJIT_UNLIKELY(have < 0)) \
434 have = have_facility_dynamic(bit) ? 1 : 0; \
436 return have; \
439 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
440 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
441 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
442 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
443 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
444 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
445 #undef HAVE_FACILITY
447 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
448 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
450 #define CHECK_SIGNED(v, bitlen) \
451 ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
453 #define is_s8(d) CHECK_SIGNED((d), 8)
454 #define is_s16(d) CHECK_SIGNED((d), 16)
455 #define is_s20(d) CHECK_SIGNED((d), 20)
456 #define is_s32(d) ((d) == (sljit_s32)(d))
458 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
460 SLJIT_ASSERT(is_s20(d));
462 sljit_uw dh = (d >> 12) & 0xff;
463 sljit_uw dl = (d << 8) & 0xfff00;
464 return (dh | dl) << 8;
467 /* TODO(carenas): variadic macro is not strictly needed */
468 #define SLJIT_S390X_INSTRUCTION(op, ...) \
469 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
471 /* RR form instructions. */
472 #define SLJIT_S390X_RR(name, pattern) \
473 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
475 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
478 /* AND */
479 SLJIT_S390X_RR(nr, 0x1400)
481 /* BRANCH AND SAVE */
482 SLJIT_S390X_RR(basr, 0x0d00)
484 /* BRANCH ON CONDITION */
485 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
487 /* DIVIDE */
488 SLJIT_S390X_RR(dr, 0x1d00)
490 /* EXCLUSIVE OR */
491 SLJIT_S390X_RR(xr, 0x1700)
493 /* LOAD */
494 SLJIT_S390X_RR(lr, 0x1800)
496 /* LOAD COMPLEMENT */
497 SLJIT_S390X_RR(lcr, 0x1300)
499 /* OR */
500 SLJIT_S390X_RR(or, 0x1600)
502 #undef SLJIT_S390X_RR
504 /* RRE form instructions */
505 #define SLJIT_S390X_RRE(name, pattern) \
506 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
508 return (pattern) | R4A(dst) | R0A(src); \
511 /* AND */
512 SLJIT_S390X_RRE(ngr, 0xb9800000)
514 /* DIVIDE LOGICAL */
515 SLJIT_S390X_RRE(dlr, 0xb9970000)
516 SLJIT_S390X_RRE(dlgr, 0xb9870000)
518 /* DIVIDE SINGLE */
519 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
521 /* EXCLUSIVE OR */
522 SLJIT_S390X_RRE(xgr, 0xb9820000)
524 /* LOAD */
525 SLJIT_S390X_RRE(lgr, 0xb9040000)
526 SLJIT_S390X_RRE(lgfr, 0xb9140000)
528 /* LOAD BYTE */
529 SLJIT_S390X_RRE(lbr, 0xb9260000)
530 SLJIT_S390X_RRE(lgbr, 0xb9060000)
532 /* LOAD COMPLEMENT */
533 SLJIT_S390X_RRE(lcgr, 0xb9030000)
535 /* LOAD HALFWORD */
536 SLJIT_S390X_RRE(lhr, 0xb9270000)
537 SLJIT_S390X_RRE(lghr, 0xb9070000)
539 /* LOAD LOGICAL */
540 SLJIT_S390X_RRE(llgfr, 0xb9160000)
542 /* LOAD LOGICAL CHARACTER */
543 SLJIT_S390X_RRE(llcr, 0xb9940000)
544 SLJIT_S390X_RRE(llgcr, 0xb9840000)
546 /* LOAD LOGICAL HALFWORD */
547 SLJIT_S390X_RRE(llhr, 0xb9950000)
548 SLJIT_S390X_RRE(llghr, 0xb9850000)
550 /* MULTIPLY LOGICAL */
551 SLJIT_S390X_RRE(mlgr, 0xb9860000)
553 /* MULTIPLY SINGLE */
554 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
556 /* OR */
557 SLJIT_S390X_RRE(ogr, 0xb9810000)
559 /* SUBTRACT */
560 SLJIT_S390X_RRE(sgr, 0xb9090000)
562 #undef SLJIT_S390X_RRE
564 /* RI-a form instructions */
565 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
566 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
568 return (pattern) | R20A(reg) | (imm & 0xffff); \
571 /* ADD HALFWORD IMMEDIATE */
572 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
574 /* LOAD HALFWORD IMMEDIATE */
575 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
576 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
578 /* LOAD LOGICAL IMMEDIATE */
579 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
580 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
581 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
582 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
584 /* MULTIPLY HALFWORD IMMEDIATE */
585 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
586 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
588 /* OR IMMEDIATE */
589 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
591 #undef SLJIT_S390X_RIA
593 /* RIL-a form instructions (requires extended immediate facility) */
594 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
595 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
597 SLJIT_ASSERT(have_eimm()); \
598 return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
601 /* ADD IMMEDIATE */
602 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
604 /* ADD IMMEDIATE HIGH */
605 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
607 /* AND IMMEDIATE */
608 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
610 /* EXCLUSIVE OR IMMEDIATE */
611 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
613 /* INSERT IMMEDIATE */
614 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
615 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
617 /* LOAD IMMEDIATE */
618 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
620 /* LOAD LOGICAL IMMEDIATE */
621 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
622 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
624 /* SUBTRACT LOGICAL IMMEDIATE */
625 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
627 #undef SLJIT_S390X_RILA
629 /* RX-a form instructions */
630 #define SLJIT_S390X_RXA(name, pattern) \
631 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
633 SLJIT_ASSERT((d & 0xfff) == d); \
635 return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
638 /* LOAD */
639 SLJIT_S390X_RXA(l, 0x58000000)
641 /* LOAD ADDRESS */
642 SLJIT_S390X_RXA(la, 0x41000000)
644 /* LOAD HALFWORD */
645 SLJIT_S390X_RXA(lh, 0x48000000)
647 /* MULTIPLY SINGLE */
648 SLJIT_S390X_RXA(ms, 0x71000000)
650 /* STORE */
651 SLJIT_S390X_RXA(st, 0x50000000)
653 /* STORE CHARACTER */
654 SLJIT_S390X_RXA(stc, 0x42000000)
656 /* STORE HALFWORD */
657 SLJIT_S390X_RXA(sth, 0x40000000)
659 #undef SLJIT_S390X_RXA
661 /* RXY-a instructions */
662 #define SLJIT_S390X_RXYA(name, pattern, cond) \
663 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
665 SLJIT_ASSERT(cond); \
667 return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
670 /* LOAD */
671 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
672 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
673 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
675 /* LOAD BYTE */
676 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
677 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
679 /* LOAD HALFWORD */
680 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
681 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
683 /* LOAD LOGICAL */
684 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
686 /* LOAD LOGICAL CHARACTER */
687 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
688 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
690 /* LOAD LOGICAL HALFWORD */
691 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
692 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
694 /* MULTIPLY SINGLE */
695 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
696 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
698 /* STORE */
699 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
700 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
702 /* STORE CHARACTER */
703 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
705 /* STORE HALFWORD */
706 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
708 #undef SLJIT_S390X_RXYA
710 /* RSY-a instructions */
711 #define SLJIT_S390X_RSYA(name, pattern, cond) \
712 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
714 SLJIT_ASSERT(cond); \
716 return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
719 /* LOAD MULTIPLE */
720 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
722 /* SHIFT LEFT LOGICAL */
723 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
725 /* SHIFT RIGHT SINGLE */
726 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
728 /* STORE MULTIPLE */
729 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
731 #undef SLJIT_S390X_RSYA
733 /* RIE-f instructions (require general-instructions-extension facility) */
734 #define SLJIT_S390X_RIEF(name, pattern) \
735 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
737 sljit_ins i3, i4, i5; \
739 SLJIT_ASSERT(have_genext()); \
740 i3 = (sljit_ins)start << 24; \
741 i4 = (sljit_ins)end << 16; \
742 i5 = (sljit_ins)rot << 8; \
744 return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
747 /* ROTATE THEN AND SELECTED BITS */
748 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
750 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
751 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
753 /* ROTATE THEN OR SELECTED BITS */
754 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
756 /* ROTATE THEN INSERT SELECTED BITS */
757 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
758 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
760 /* ROTATE THEN INSERT SELECTED BITS HIGH */
761 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
763 /* ROTATE THEN INSERT SELECTED BITS LOW */
764 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
766 #undef SLJIT_S390X_RIEF
768 /* RRF-c instructions (require load/store-on-condition 1 facility) */
769 #define SLJIT_S390X_RRFC(name, pattern) \
770 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
772 sljit_ins m3; \
774 SLJIT_ASSERT(have_lscond1()); \
775 m3 = (sljit_ins)(mask & 0xf) << 12; \
777 return (pattern) | m3 | R4A(dst) | R0A(src); \
780 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
781 SLJIT_S390X_RRFC(locr, 0xb9f20000)
782 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
784 #undef SLJIT_S390X_RRFC
786 /* RIE-g instructions (require load/store-on-condition 2 facility) */
787 #define SLJIT_S390X_RIEG(name, pattern) \
788 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
790 sljit_ins m3, i2; \
792 SLJIT_ASSERT(have_lscond2()); \
793 m3 = (sljit_ins)(mask & 0xf) << 32; \
794 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
796 return (pattern) | R36A(reg) | m3 | i2; \
799 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
800 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
801 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
803 #undef SLJIT_S390X_RIEG
805 #define SLJIT_S390X_RILB(name, pattern, cond) \
806 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
808 SLJIT_ASSERT(cond); \
810 return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
813 /* BRANCH RELATIVE AND SAVE LONG */
814 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
816 /* LOAD ADDRESS RELATIVE LONG */
817 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
819 /* LOAD RELATIVE LONG */
820 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
822 #undef SLJIT_S390X_RILB
824 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
826 return 0x07f0 | target;
829 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
831 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
832 sljit_ins ri2 = (sljit_ins)target & 0xffff;
833 return 0xa7040000L | m1 | ri2;
836 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
838 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
839 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
840 return 0xc00400000000L | m1 | ri2;
843 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
845 SLJIT_ASSERT(have_eimm());
846 return 0xb9830000 | R8A(dst) | R0A(src);
849 /* INSERT PROGRAM MASK */
850 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
852 return 0xb2220000 | R4A(dst);
855 /* SET PROGRAM MASK */
856 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
858 return 0x0400 | R4A(dst);
861 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
862 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
864 return risbhg(dst, src, start, 0x8 | end, rot);
867 #undef SLJIT_S390X_INSTRUCTION
869 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
871 /* Condition codes: bits 18 and 19.
872 Transformation:
873 0 (zero and no overflow) : unchanged
874 1 (non-zero and no overflow) : unchanged
875 2 (zero and overflow) : decreased by 1
876 3 (non-zero and overflow) : decreased by 1 if non-zero */
877 FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
878 FAIL_IF(push_inst(compiler, ipm(tmp1)));
879 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
880 FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
881 FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
882 FAIL_IF(push_inst(compiler, spm(tmp1)));
883 return SLJIT_SUCCESS;
886 /* load 64-bit immediate into register without clobbering flags */
887 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
889 /* 4 byte instructions */
890 if (is_s16(v))
891 return push_inst(compiler, lghi(target, (sljit_s16)v));
893 if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
894 return push_inst(compiler, llill(target, (sljit_u16)v));
896 if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
897 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
899 if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
900 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
902 if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
903 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
905 /* 6 byte instructions (requires extended immediate facility) */
906 if (have_eimm()) {
907 if (is_s32(v))
908 return push_inst(compiler, lgfi(target, (sljit_s32)v));
910 if (((sljit_uw)v >> 32) == 0)
911 return push_inst(compiler, llilf(target, (sljit_u32)v));
913 if (((sljit_uw)v << 32) == 0)
914 return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
916 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
917 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
920 /* TODO(mundaym): instruction sequences that don't use extended immediates */
921 abort();
924 struct addr {
925 sljit_gpr base;
926 sljit_gpr index;
927 sljit_s32 offset;
930 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
931 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
932 struct addr *addr, sljit_s32 mem, sljit_sw off,
933 sljit_gpr tmp /* clobbered, must not be r0 */)
935 sljit_gpr base = r0;
936 sljit_gpr index = r0;
938 SLJIT_ASSERT(tmp != r0);
939 if (mem & REG_MASK)
940 base = gpr(mem & REG_MASK);
942 if (mem & OFFS_REG_MASK) {
943 index = gpr(OFFS_REG(mem));
944 if (off != 0) {
945 /* shift and put the result into tmp */
946 SLJIT_ASSERT(0 <= off && off < 64);
947 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
948 index = tmp;
949 off = 0; /* clear offset */
952 else if (!is_s20(off)) {
953 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
954 index = tmp;
955 off = 0; /* clear offset */
957 addr->base = base;
958 addr->index = index;
959 addr->offset = (sljit_s32)off;
960 return SLJIT_SUCCESS;
963 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
964 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
965 struct addr *addr, sljit_s32 mem, sljit_sw off,
966 sljit_gpr tmp /* clobbered, must not be r0 */)
968 sljit_gpr base = r0;
969 sljit_gpr index = r0;
971 SLJIT_ASSERT(tmp != r0);
972 if (mem & REG_MASK)
973 base = gpr(mem & REG_MASK);
975 if (mem & OFFS_REG_MASK) {
976 index = gpr(OFFS_REG(mem));
977 if (off != 0) {
978 /* shift and put the result into tmp */
979 SLJIT_ASSERT(0 <= off && off < 64);
980 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
981 index = tmp;
982 off = 0; /* clear offset */
985 else if (!is_u12(off)) {
986 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
987 index = tmp;
988 off = 0; /* clear offset */
990 addr->base = base;
991 addr->index = index;
992 addr->offset = (sljit_s32)off;
993 return SLJIT_SUCCESS;
996 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
997 #define WHEN(cond, r, i1, i2, addr) \
998 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
1000 /* May clobber tmp1. */
1001 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
1002 sljit_s32 src, sljit_sw srcw,
1003 sljit_s32 is_32bit)
1005 struct addr addr;
1006 sljit_ins ins;
1008 SLJIT_ASSERT(src & SLJIT_MEM);
1009 if (have_ldisp() || !is_32bit)
1010 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1011 else
1012 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
1014 if (is_32bit)
1015 ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
1016 else
1017 ins = lg(dst, addr.offset, addr.index, addr.base);
1019 return push_inst(compiler, ins);
1022 /* May clobber tmp1. */
1023 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
1024 sljit_s32 dst, sljit_sw dstw,
1025 sljit_s32 is_32bit)
1027 struct addr addr;
1028 sljit_ins ins;
1030 SLJIT_ASSERT(dst & SLJIT_MEM);
1031 if (have_ldisp() || !is_32bit)
1032 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1033 else
1034 FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1036 if (is_32bit)
1037 ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1038 else
1039 ins = stg(src, addr.offset, addr.index, addr.base);
1041 return push_inst(compiler, ins);
1044 #undef WHEN
1046 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1047 sljit_gpr dst_r,
1048 sljit_s32 src, sljit_sw srcw)
1050 SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1052 if (src & SLJIT_IMM)
1053 return push_load_imm_inst(compiler, dst_r, srcw);
1055 if (src & SLJIT_MEM)
1056 return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1058 sljit_gpr src_r = gpr(src & REG_MASK);
1059 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1062 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1063 sljit_s32 dst,
1064 sljit_s32 src1, sljit_sw src1w,
1065 sljit_s32 src2, sljit_sw src2w)
1067 sljit_gpr dst_r = tmp0;
1068 sljit_gpr src_r = tmp1;
1069 sljit_s32 needs_move = 1;
1071 if (FAST_IS_REG(dst)) {
1072 dst_r = gpr(dst);
1074 if (dst == src1)
1075 needs_move = 0;
1076 else if (dst == src2) {
1077 dst_r = tmp0;
1078 needs_move = 2;
1082 if (needs_move)
1083 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1085 if (FAST_IS_REG(src2))
1086 src_r = gpr(src2);
1087 else
1088 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1090 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1092 if (needs_move != 2)
1093 return SLJIT_SUCCESS;
1095 dst_r = gpr(dst & REG_MASK);
1096 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1099 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1100 sljit_s32 dst,
1101 sljit_s32 src1, sljit_sw src1w)
1103 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1104 sljit_gpr src_r = tmp1;
1106 if (FAST_IS_REG(src1))
1107 src_r = gpr(src1);
1108 else
1109 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1111 return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1114 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1115 sljit_s32 dst,
1116 sljit_s32 src1, sljit_sw src1w,
1117 sljit_s32 src2, sljit_sw src2w)
1119 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1120 sljit_gpr src1_r = tmp0;
1121 sljit_gpr src2_r = tmp1;
1123 if (FAST_IS_REG(src1))
1124 src1_r = gpr(src1);
1125 else
1126 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1128 if (FAST_IS_REG(src2))
1129 src2_r = gpr(src2);
1130 else
1131 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1133 return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1136 typedef enum {
1137 RI_A,
1138 RIL_A,
1139 } emit_ril_type;
1141 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1142 sljit_s32 dst,
1143 sljit_s32 src1, sljit_sw src1w,
1144 sljit_sw src2w,
1145 emit_ril_type type)
1147 sljit_gpr dst_r = tmp0;
1148 sljit_s32 needs_move = 1;
1150 if (FAST_IS_REG(dst)) {
1151 dst_r = gpr(dst);
1153 if (dst == src1)
1154 needs_move = 0;
1157 if (needs_move)
1158 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1160 if (type == RIL_A)
1161 return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1162 return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1165 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1166 sljit_s32 dst,
1167 sljit_s32 src1, sljit_sw src1w,
1168 sljit_sw src2w)
1170 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1171 sljit_gpr src_r = tmp0;
1173 if (!FAST_IS_REG(src1))
1174 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1175 else
1176 src_r = gpr(src1 & REG_MASK);
1178 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1181 typedef enum {
1182 RX_A,
1183 RXY_A,
1184 } emit_rx_type;
1186 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1187 sljit_s32 dst,
1188 sljit_s32 src1, sljit_sw src1w,
1189 sljit_s32 src2, sljit_sw src2w,
1190 emit_rx_type type)
1192 sljit_gpr dst_r = tmp0;
1193 sljit_s32 needs_move = 1;
1194 sljit_gpr base, index;
1196 SLJIT_ASSERT(src2 & SLJIT_MEM);
1198 if (FAST_IS_REG(dst)) {
1199 dst_r = gpr(dst);
1201 if (dst == src1)
1202 needs_move = 0;
1203 else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1204 dst_r = tmp0;
1205 needs_move = 2;
1209 if (needs_move)
1210 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1212 base = gpr(src2 & REG_MASK);
1213 index = tmp0;
1215 if (src2 & OFFS_REG_MASK) {
1216 index = gpr(OFFS_REG(src2));
1218 if (src2w != 0) {
1219 FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1220 src2w = 0;
1221 index = tmp1;
1223 } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1224 FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1226 if (src2 & REG_MASK)
1227 index = tmp1;
1228 else
1229 base = tmp1;
1230 src2w = 0;
1233 if (type == RX_A)
1234 ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1235 else
1236 ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1238 FAIL_IF(push_inst(compiler, ins));
1240 if (needs_move != 2)
1241 return SLJIT_SUCCESS;
1243 dst_r = gpr(dst);
1244 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1247 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1248 sljit_s32 dst, sljit_sw dstw,
1249 sljit_sw srcw)
1251 SLJIT_ASSERT(dst & SLJIT_MEM);
1253 sljit_gpr dst_r = tmp1;
1255 if (dst & OFFS_REG_MASK) {
1256 sljit_gpr index = tmp1;
1258 if ((dstw & 0x3) == 0)
1259 index = gpr(OFFS_REG(dst));
1260 else
1261 FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1263 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1264 dstw = 0;
1266 else if (!is_s20(dstw)) {
1267 FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1269 if (dst & REG_MASK)
1270 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1272 dstw = 0;
1274 else
1275 dst_r = gpr(dst & REG_MASK);
1277 return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1280 struct ins_forms {
1281 sljit_ins op_r;
1282 sljit_ins op_gr;
1283 sljit_ins op_rk;
1284 sljit_ins op_grk;
1285 sljit_ins op;
1286 sljit_ins op_y;
1287 sljit_ins op_g;
1290 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1291 sljit_s32 dst,
1292 sljit_s32 src1, sljit_sw src1w,
1293 sljit_s32 src2, sljit_sw src2w)
1295 sljit_s32 mode = compiler->mode;
1296 sljit_ins ins, ins_k;
1298 if ((src1 | src2) & SLJIT_MEM) {
1299 sljit_ins ins12, ins20;
1301 if (mode & SLJIT_32) {
1302 ins12 = forms->op;
1303 ins20 = forms->op_y;
1305 else {
1306 ins12 = 0;
1307 ins20 = forms->op_g;
1310 if (ins12 && ins20) {
1311 /* Extra instructions needed for address computation can be executed independently. */
1312 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1313 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1314 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1315 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1317 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1320 if (src1 & SLJIT_MEM) {
1321 if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1322 return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1324 return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1327 else if (ins12 || ins20) {
1328 emit_rx_type rx_type;
1330 if (ins12) {
1331 rx_type = RX_A;
1332 ins = ins12;
1334 else {
1335 rx_type = RXY_A;
1336 ins = ins20;
1339 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1340 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1341 return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1343 if (src1 & SLJIT_MEM)
1344 return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1348 if (mode & SLJIT_32) {
1349 ins = forms->op_r;
1350 ins_k = forms->op_rk;
1352 else {
1353 ins = forms->op_gr;
1354 ins_k = forms->op_grk;
1357 SLJIT_ASSERT(ins != 0 || ins_k != 0);
1359 if (ins && FAST_IS_REG(dst)) {
1360 if (dst == src1)
1361 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1363 if (dst == src2)
1364 return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1367 if (ins_k == 0)
1368 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1370 return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1373 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1374 sljit_s32 dst,
1375 sljit_s32 src1, sljit_sw src1w,
1376 sljit_s32 src2, sljit_sw src2w)
1378 sljit_s32 mode = compiler->mode;
1379 sljit_ins ins;
1381 if (src2 & SLJIT_MEM) {
1382 sljit_ins ins12, ins20;
1384 if (mode & SLJIT_32) {
1385 ins12 = forms->op;
1386 ins20 = forms->op_y;
1388 else {
1389 ins12 = 0;
1390 ins20 = forms->op_g;
1393 if (ins12 && ins20) {
1394 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1395 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1397 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1399 else if (ins12)
1400 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1401 else if (ins20)
1402 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1405 ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1407 if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1408 return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1410 return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1413 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1415 struct sljit_label *label;
1416 struct sljit_jump *jump;
1417 struct sljit_s390x_const *const_;
1418 struct sljit_put_label *put_label;
1419 sljit_sw executable_offset;
1420 sljit_uw ins_size = 0; /* instructions */
1421 sljit_uw pool_size = 0; /* literal pool */
1422 sljit_uw pad_size;
1423 sljit_uw i, j = 0;
1424 struct sljit_memory_fragment *buf;
1425 void *code, *code_ptr;
1426 sljit_uw *pool, *pool_ptr;
1427 sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1429 CHECK_ERROR_PTR();
1430 CHECK_PTR(check_sljit_generate_code(compiler));
1431 reverse_buf(compiler);
1433 /* branch handling */
1434 label = compiler->labels;
1435 jump = compiler->jumps;
1436 put_label = compiler->put_labels;
1438 /* TODO(carenas): compiler->executable_size could be calculated
1439 * before to avoid the following loop (except for
1440 * pool_size)
1442 /* calculate the size of the code */
1443 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1444 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1445 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1446 for (i = 0; i < len; ++i, ++j) {
1447 sljit_ins ins = ibuf[i];
1449 /* TODO(carenas): instruction tag vs size/addr == j
1450 * using instruction tags for const is creative
1451 * but unlike all other architectures, and is not
1452 * done consistently for all other objects.
1453 * This might need reviewing later.
1455 if (ins & sljit_ins_const) {
1456 pool_size += sizeof(*pool);
1457 ins &= ~sljit_ins_const;
1459 if (label && label->size == j) {
1460 label->size = ins_size;
1461 label = label->next;
1463 if (jump && jump->addr == j) {
1464 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1465 /* encoded: */
1466 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1467 /* replace with: */
1468 /* lgrl %r1, <pool_addr> */
1469 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1470 pool_size += sizeof(*pool);
1471 ins_size += 2;
1473 jump = jump->next;
1475 if (put_label && put_label->addr == j) {
1476 pool_size += sizeof(*pool);
1477 put_label = put_label->next;
1479 ins_size += sizeof_ins(ins);
1483 /* emit trailing label */
1484 if (label && label->size == j) {
1485 label->size = ins_size;
1486 label = label->next;
1489 SLJIT_ASSERT(!label);
1490 SLJIT_ASSERT(!jump);
1491 SLJIT_ASSERT(!put_label);
1493 /* pad code size to 8 bytes so is accessible with half word offsets */
1494 /* the literal pool needs to be doubleword aligned */
1495 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1496 SLJIT_ASSERT(pad_size < 8UL);
1498 /* allocate target buffer */
1499 code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1500 compiler->exec_allocator_data);
1501 PTR_FAIL_WITH_EXEC_IF(code);
1502 code_ptr = code;
1503 executable_offset = SLJIT_EXEC_OFFSET(code);
1505 /* TODO(carenas): pool is optional, and the ABI recommends it to
1506 * be created before the function code, instead of
1507 * globally; if generated code is too big could
1508 * need offsets bigger than 32bit words and asser()
1510 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1511 pool_ptr = pool;
1512 const_ = (struct sljit_s390x_const *)compiler->consts;
1514 /* update label addresses */
1515 label = compiler->labels;
1516 while (label) {
1517 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1518 (sljit_uw)code_ptr + label->size, executable_offset);
1519 label = label->next;
1522 /* reset jumps */
1523 jump = compiler->jumps;
1524 put_label = compiler->put_labels;
1526 /* emit the code */
1527 j = 0;
1528 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1529 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1530 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1531 for (i = 0; i < len; ++i, ++j) {
1532 sljit_ins ins = ibuf[i];
1533 if (ins & sljit_ins_const) {
1534 /* clear the const tag */
1535 ins &= ~sljit_ins_const;
1537 /* update instruction with relative address of constant */
1538 source = (sljit_sw)code_ptr;
1539 offset = (sljit_sw)pool_ptr - source;
1541 SLJIT_ASSERT(!(offset & 1));
1542 offset >>= 1; /* halfword (not byte) offset */
1543 SLJIT_ASSERT(is_s32(offset));
1545 ins |= (sljit_ins)offset & 0xffffffff;
1547 /* update address */
1548 const_->const_.addr = (sljit_uw)pool_ptr;
1550 /* store initial value into pool and update pool address */
1551 *(pool_ptr++) = (sljit_uw)const_->init_value;
1553 /* move to next constant */
1554 const_ = (struct sljit_s390x_const *)const_->const_.next;
1556 if (jump && jump->addr == j) {
1557 sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1558 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1559 jump->addr = (sljit_uw)pool_ptr;
1561 /* load address into tmp1 */
1562 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1563 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1565 SLJIT_ASSERT(!(offset & 1));
1566 offset >>= 1;
1567 SLJIT_ASSERT(is_s32(offset));
1569 encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1571 /* store jump target into pool and update pool address */
1572 *(pool_ptr++) = (sljit_uw)target;
1574 /* branch to tmp1 */
1575 sljit_ins op = (ins >> 32) & 0xf;
1576 sljit_ins arg = (ins >> 36) & 0xf;
1577 switch (op) {
1578 case 4: /* brcl -> bcr */
1579 ins = bcr(arg, tmp1);
1580 break;
1581 case 5: /* brasl -> basr */
1582 ins = basr(arg, tmp1);
1583 break;
1584 default:
1585 abort();
1588 else {
1589 jump->addr = (sljit_uw)code_ptr + 2;
1590 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1591 offset = target - source;
1593 /* offset must be halfword aligned */
1594 SLJIT_ASSERT(!(offset & 1));
1595 offset >>= 1;
1596 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1598 /* patch jump target */
1599 ins |= (sljit_ins)offset & 0xffffffff;
1601 jump = jump->next;
1603 if (put_label && put_label->addr == j) {
1604 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1606 SLJIT_ASSERT(put_label->label);
1607 put_label->addr = (sljit_uw)code_ptr;
1609 /* store target into pool */
1610 *pool_ptr = put_label->label->addr;
1611 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1612 pool_ptr++;
1614 SLJIT_ASSERT(!(offset & 1));
1615 offset >>= 1;
1616 SLJIT_ASSERT(is_s32(offset));
1617 ins |= (sljit_ins)offset & 0xffffffff;
1619 put_label = put_label->next;
1621 encode_inst(&code_ptr, ins);
1624 SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1625 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1627 compiler->error = SLJIT_ERR_COMPILED;
1628 compiler->executable_offset = executable_offset;
1629 compiler->executable_size = ins_size;
1630 code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1631 code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1632 SLJIT_CACHE_FLUSH(code, code_ptr);
1633 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1634 return code;
1637 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1639 /* TODO(mundaym): implement all */
1640 switch (feature_type) {
1641 case SLJIT_HAS_CLZ:
1642 return have_eimm() ? 1 : 0; /* FLOGR instruction */
1643 case SLJIT_HAS_CMOV:
1644 return have_lscond1() ? 1 : 0;
1645 case SLJIT_HAS_FPU:
1646 return 1;
1648 return 0;
1651 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1653 return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
1656 /* --------------------------------------------------------------------- */
1657 /* Entry, exit */
1658 /* --------------------------------------------------------------------- */
1660 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1661 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1662 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1664 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1665 sljit_s32 offset, i, tmp;
1667 CHECK_ERROR();
1668 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1669 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1671 /* Saved registers are stored in callee allocated save area. */
1672 SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1674 offset = 2 * SSIZE_OF(sw);
1675 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1676 if (saved_arg_count == 0) {
1677 FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1678 offset += 9 * SSIZE_OF(sw);
1679 } else {
1680 FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1681 offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1683 } else {
1684 if (scratches == SLJIT_FIRST_SAVED_REG) {
1685 FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1686 offset += SSIZE_OF(sw);
1687 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1688 FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1689 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1692 if (saved_arg_count == 0) {
1693 if (saveds == 0) {
1694 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1695 offset += SSIZE_OF(sw);
1696 } else {
1697 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1698 offset += (saveds + 1) * SSIZE_OF(sw);
1700 } else if (saveds > saved_arg_count) {
1701 if (saveds == saved_arg_count + 1) {
1702 FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1703 offset += SSIZE_OF(sw);
1704 } else {
1705 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1706 offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1711 if (saved_arg_count > 0) {
1712 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1713 offset += SSIZE_OF(sw);
1716 tmp = SLJIT_FS0 - fsaveds;
1717 for (i = SLJIT_FS0; i > tmp; i--) {
1718 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1719 offset += SSIZE_OF(sw);
1722 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1723 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1724 offset += SSIZE_OF(sw);
1727 local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1728 compiler->local_size = local_size;
1730 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1732 arg_types >>= SLJIT_ARG_SHIFT;
1733 tmp = 0;
1734 while (arg_types > 0) {
1735 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1736 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1737 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1738 saved_arg_count++;
1740 tmp++;
1743 arg_types >>= SLJIT_ARG_SHIFT;
1746 return SLJIT_SUCCESS;
1749 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1750 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1751 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1753 CHECK_ERROR();
1754 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1755 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1757 compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1758 return SLJIT_SUCCESS;
1761 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
1763 sljit_s32 offset, i, tmp;
1764 sljit_s32 local_size = compiler->local_size;
1765 sljit_s32 saveds = compiler->saveds;
1766 sljit_s32 scratches = compiler->scratches;
1767 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1769 if (is_u12(local_size))
1770 FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1771 else
1772 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1774 offset = 2 * SSIZE_OF(sw);
1775 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1776 if (kept_saveds_count == 0) {
1777 FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15)));
1778 offset += 9 * SSIZE_OF(sw);
1779 } else {
1780 FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1781 offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1783 } else {
1784 if (scratches == SLJIT_FIRST_SAVED_REG) {
1785 FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1786 offset += SSIZE_OF(sw);
1787 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1788 FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1789 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1792 if (kept_saveds_count == 0) {
1793 if (saveds == 0) {
1794 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1795 offset += SSIZE_OF(sw);
1796 } else {
1797 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1798 offset += (saveds + 1) * SSIZE_OF(sw);
1800 } else if (saveds > kept_saveds_count) {
1801 if (saveds == kept_saveds_count + 1) {
1802 FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1803 offset += SSIZE_OF(sw);
1804 } else {
1805 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1806 offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1811 if (kept_saveds_count > 0) {
1812 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1813 offset += SSIZE_OF(sw);
1816 tmp = SLJIT_FS0 - compiler->fsaveds;
1817 for (i = SLJIT_FS0; i > tmp; i--) {
1818 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1819 offset += SSIZE_OF(sw);
1822 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1823 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1824 offset += SSIZE_OF(sw);
1827 return SLJIT_SUCCESS;
1830 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1832 CHECK_ERROR();
1833 CHECK(check_sljit_emit_return_void(compiler));
1835 FAIL_IF(emit_stack_frame_release(compiler));
1836 return push_inst(compiler, br(r14)); /* return */
1839 /* --------------------------------------------------------------------- */
1840 /* Operators */
1841 /* --------------------------------------------------------------------- */
1843 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1845 sljit_gpr arg0 = gpr(SLJIT_R0);
1846 sljit_gpr arg1 = gpr(SLJIT_R1);
1848 CHECK_ERROR();
1849 CHECK(check_sljit_emit_op0(compiler, op));
1851 op = GET_OPCODE(op) | (op & SLJIT_32);
1852 switch (op) {
1853 case SLJIT_BREAKPOINT:
1854 /* The following invalid instruction is emitted by gdb. */
1855 return push_inst(compiler, 0x0001 /* 2-byte trap */);
1856 case SLJIT_NOP:
1857 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1858 case SLJIT_LMUL_UW:
1859 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1860 break;
1861 case SLJIT_LMUL_SW:
1862 /* signed multiplication from: */
1863 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1864 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1865 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1866 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1867 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1869 /* unsigned multiplication */
1870 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1872 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1873 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1874 break;
1875 case SLJIT_DIV_U32:
1876 case SLJIT_DIVMOD_U32:
1877 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1878 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1879 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1880 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1881 if (op == SLJIT_DIVMOD_U32)
1882 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1884 return SLJIT_SUCCESS;
1885 case SLJIT_DIV_S32:
1886 case SLJIT_DIVMOD_S32:
1887 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1888 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1889 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1890 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1891 if (op == SLJIT_DIVMOD_S32)
1892 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1894 return SLJIT_SUCCESS;
1895 case SLJIT_DIV_UW:
1896 case SLJIT_DIVMOD_UW:
1897 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1898 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1899 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1900 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1901 if (op == SLJIT_DIVMOD_UW)
1902 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1904 return SLJIT_SUCCESS;
1905 case SLJIT_DIV_SW:
1906 case SLJIT_DIVMOD_SW:
1907 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1908 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1909 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1910 if (op == SLJIT_DIVMOD_SW)
1911 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1913 return SLJIT_SUCCESS;
1914 case SLJIT_ENDBR:
1915 return SLJIT_SUCCESS;
1916 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1917 return SLJIT_SUCCESS;
1918 default:
1919 SLJIT_UNREACHABLE();
1921 /* swap result registers */
1922 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1923 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1924 return push_inst(compiler, lgr(arg1, tmp0));
1927 /* LEVAL will be defined later with different parameters as needed */
1928 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1930 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1931 sljit_s32 dst, sljit_sw dstw,
1932 sljit_s32 src, sljit_sw srcw)
1934 sljit_ins ins;
1935 struct addr mem;
1936 sljit_gpr dst_r;
1937 sljit_gpr src_r;
1938 sljit_s32 opcode = GET_OPCODE(op);
1940 CHECK_ERROR();
1941 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1942 ADJUST_LOCAL_OFFSET(dst, dstw);
1943 ADJUST_LOCAL_OFFSET(src, srcw);
1945 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1946 /* LOAD REGISTER */
1947 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1948 dst_r = gpr(dst);
1949 src_r = gpr(src);
1950 switch (opcode | (op & SLJIT_32)) {
1951 /* 32-bit */
1952 case SLJIT_MOV32_U8:
1953 ins = llcr(dst_r, src_r);
1954 break;
1955 case SLJIT_MOV32_S8:
1956 ins = lbr(dst_r, src_r);
1957 break;
1958 case SLJIT_MOV32_U16:
1959 ins = llhr(dst_r, src_r);
1960 break;
1961 case SLJIT_MOV32_S16:
1962 ins = lhr(dst_r, src_r);
1963 break;
1964 case SLJIT_MOV32:
1965 if (dst_r == src_r)
1966 return SLJIT_SUCCESS;
1967 ins = lr(dst_r, src_r);
1968 break;
1969 /* 64-bit */
1970 case SLJIT_MOV_U8:
1971 ins = llgcr(dst_r, src_r);
1972 break;
1973 case SLJIT_MOV_S8:
1974 ins = lgbr(dst_r, src_r);
1975 break;
1976 case SLJIT_MOV_U16:
1977 ins = llghr(dst_r, src_r);
1978 break;
1979 case SLJIT_MOV_S16:
1980 ins = lghr(dst_r, src_r);
1981 break;
1982 case SLJIT_MOV_U32:
1983 ins = llgfr(dst_r, src_r);
1984 break;
1985 case SLJIT_MOV_S32:
1986 ins = lgfr(dst_r, src_r);
1987 break;
1988 case SLJIT_MOV:
1989 case SLJIT_MOV_P:
1990 if (dst_r == src_r)
1991 return SLJIT_SUCCESS;
1992 ins = lgr(dst_r, src_r);
1993 break;
1994 default:
1995 ins = 0;
1996 SLJIT_UNREACHABLE();
1997 break;
1999 FAIL_IF(push_inst(compiler, ins));
2000 return SLJIT_SUCCESS;
2002 /* LOAD IMMEDIATE */
2003 if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
2004 switch (opcode) {
2005 case SLJIT_MOV_U8:
2006 srcw = (sljit_sw)((sljit_u8)(srcw));
2007 break;
2008 case SLJIT_MOV_S8:
2009 srcw = (sljit_sw)((sljit_s8)(srcw));
2010 break;
2011 case SLJIT_MOV_U16:
2012 srcw = (sljit_sw)((sljit_u16)(srcw));
2013 break;
2014 case SLJIT_MOV_S16:
2015 srcw = (sljit_sw)((sljit_s16)(srcw));
2016 break;
2017 case SLJIT_MOV_U32:
2018 srcw = (sljit_sw)((sljit_u32)(srcw));
2019 break;
2020 case SLJIT_MOV_S32:
2021 case SLJIT_MOV32:
2022 srcw = (sljit_sw)((sljit_s32)(srcw));
2023 break;
2025 return push_load_imm_inst(compiler, gpr(dst), srcw);
2027 /* LOAD */
2028 /* TODO(carenas): avoid reg being defined later */
2029 #define LEVAL(i) EVAL(i, reg, mem)
2030 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2031 sljit_gpr reg = gpr(dst);
2033 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2034 /* TODO(carenas): convert all calls below to LEVAL */
2035 switch (opcode | (op & SLJIT_32)) {
2036 case SLJIT_MOV32_U8:
2037 ins = llc(reg, mem.offset, mem.index, mem.base);
2038 break;
2039 case SLJIT_MOV32_S8:
2040 ins = lb(reg, mem.offset, mem.index, mem.base);
2041 break;
2042 case SLJIT_MOV32_U16:
2043 ins = llh(reg, mem.offset, mem.index, mem.base);
2044 break;
2045 case SLJIT_MOV32_S16:
2046 ins = WHEN2(is_u12(mem.offset), lh, lhy);
2047 break;
2048 case SLJIT_MOV32:
2049 ins = WHEN2(is_u12(mem.offset), l, ly);
2050 break;
2051 case SLJIT_MOV_U8:
2052 ins = LEVAL(llgc);
2053 break;
2054 case SLJIT_MOV_S8:
2055 ins = lgb(reg, mem.offset, mem.index, mem.base);
2056 break;
2057 case SLJIT_MOV_U16:
2058 ins = LEVAL(llgh);
2059 break;
2060 case SLJIT_MOV_S16:
2061 ins = lgh(reg, mem.offset, mem.index, mem.base);
2062 break;
2063 case SLJIT_MOV_U32:
2064 ins = LEVAL(llgf);
2065 break;
2066 case SLJIT_MOV_S32:
2067 ins = lgf(reg, mem.offset, mem.index, mem.base);
2068 break;
2069 case SLJIT_MOV_P:
2070 case SLJIT_MOV:
2071 ins = lg(reg, mem.offset, mem.index, mem.base);
2072 break;
2073 default:
2074 ins = 0;
2075 SLJIT_UNREACHABLE();
2076 break;
2078 FAIL_IF(push_inst(compiler, ins));
2079 return SLJIT_SUCCESS;
2081 /* STORE and STORE IMMEDIATE */
2082 if ((dst & SLJIT_MEM)
2083 && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
2084 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2085 if (src & SLJIT_IMM) {
2086 /* TODO(mundaym): MOVE IMMEDIATE? */
2087 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2089 struct addr mem;
2090 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2091 switch (opcode) {
2092 case SLJIT_MOV_U8:
2093 case SLJIT_MOV_S8:
2094 return push_inst(compiler,
2095 WHEN2(is_u12(mem.offset), stc, stcy));
2096 case SLJIT_MOV_U16:
2097 case SLJIT_MOV_S16:
2098 return push_inst(compiler,
2099 WHEN2(is_u12(mem.offset), sth, sthy));
2100 case SLJIT_MOV_U32:
2101 case SLJIT_MOV_S32:
2102 case SLJIT_MOV32:
2103 return push_inst(compiler,
2104 WHEN2(is_u12(mem.offset), st, sty));
2105 case SLJIT_MOV_P:
2106 case SLJIT_MOV:
2107 FAIL_IF(push_inst(compiler, LEVAL(stg)));
2108 return SLJIT_SUCCESS;
2109 default:
2110 SLJIT_UNREACHABLE();
2113 #undef LEVAL
2114 /* MOVE CHARACTERS */
2115 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2116 struct addr mem;
2117 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2118 switch (opcode) {
2119 case SLJIT_MOV_U8:
2120 case SLJIT_MOV_S8:
2121 FAIL_IF(push_inst(compiler,
2122 EVAL(llgc, tmp0, mem)));
2123 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2124 return push_inst(compiler,
2125 EVAL(stcy, tmp0, mem));
2126 case SLJIT_MOV_U16:
2127 case SLJIT_MOV_S16:
2128 FAIL_IF(push_inst(compiler,
2129 EVAL(llgh, tmp0, mem)));
2130 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2131 return push_inst(compiler,
2132 EVAL(sthy, tmp0, mem));
2133 case SLJIT_MOV_U32:
2134 case SLJIT_MOV_S32:
2135 case SLJIT_MOV32:
2136 FAIL_IF(push_inst(compiler,
2137 EVAL(ly, tmp0, mem)));
2138 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2139 return push_inst(compiler,
2140 EVAL(sty, tmp0, mem));
2141 case SLJIT_MOV_P:
2142 case SLJIT_MOV:
2143 FAIL_IF(push_inst(compiler,
2144 EVAL(lg, tmp0, mem)));
2145 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2146 FAIL_IF(push_inst(compiler,
2147 EVAL(stg, tmp0, mem)));
2148 return SLJIT_SUCCESS;
2149 default:
2150 SLJIT_UNREACHABLE();
2153 SLJIT_UNREACHABLE();
2156 SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
2158 dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
2159 src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
2160 if (src & SLJIT_MEM)
2161 FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32));
2163 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2165 /* TODO(mundaym): optimize loads and stores */
2166 switch (opcode | (op & SLJIT_32)) {
2167 case SLJIT_NOT:
2168 /* emulate ~x with x^-1 */
2169 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2170 if (src_r != dst_r)
2171 FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2173 FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2174 break;
2175 case SLJIT_NOT32:
2176 /* emulate ~x with x^-1 */
2177 if (have_eimm())
2178 FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
2179 else {
2180 FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2181 if (src_r != dst_r)
2182 FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2184 FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2186 break;
2187 case SLJIT_CLZ:
2188 if (have_eimm()) {
2189 FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
2190 if (dst_r != tmp0)
2191 FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
2192 } else {
2193 abort(); /* TODO(mundaym): no eimm (?) */
2195 break;
2196 case SLJIT_CLZ32:
2197 if (have_eimm()) {
2198 FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
2199 FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
2200 FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
2201 if (dst_r != tmp0)
2202 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2203 } else {
2204 abort(); /* TODO(mundaym): no eimm (?) */
2206 break;
2207 default:
2208 SLJIT_UNREACHABLE();
2211 if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2212 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2214 /* TODO(carenas): doesn't need FAIL_IF */
2215 if (dst & SLJIT_MEM)
2216 FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
2218 return SLJIT_SUCCESS;
2221 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2223 switch (GET_OPCODE(op)) {
2224 case SLJIT_ADD:
2225 case SLJIT_ADDC:
2226 case SLJIT_MUL:
2227 case SLJIT_AND:
2228 case SLJIT_OR:
2229 case SLJIT_XOR:
2230 return 1;
2232 return 0;
2235 static SLJIT_INLINE int is_shift(sljit_s32 op) {
2236 sljit_s32 v = GET_OPCODE(op);
2237 return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
2240 static const struct ins_forms add_forms = {
2241 0x1a00, /* ar */
2242 0xb9080000, /* agr */
2243 0xb9f80000, /* ark */
2244 0xb9e80000, /* agrk */
2245 0x5a000000, /* a */
2246 0xe3000000005a, /* ay */
2247 0xe30000000008, /* ag */
2250 static const struct ins_forms logical_add_forms = {
2251 0x1e00, /* alr */
2252 0xb90a0000, /* algr */
2253 0xb9fa0000, /* alrk */
2254 0xb9ea0000, /* algrk */
2255 0x5e000000, /* al */
2256 0xe3000000005e, /* aly */
2257 0xe3000000000a, /* alg */
2260 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2261 sljit_s32 dst, sljit_sw dstw,
2262 sljit_s32 src1, sljit_sw src1w,
2263 sljit_s32 src2, sljit_sw src2w)
2265 int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2266 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2267 const struct ins_forms *forms;
2268 sljit_ins ins;
2270 if (src2 & SLJIT_IMM) {
2271 if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2272 if (sets_overflow)
2273 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2274 else
2275 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2276 return emit_siy(compiler, ins, dst, dstw, src2w);
2279 if (is_s16(src2w)) {
2280 if (sets_overflow)
2281 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2282 else
2283 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2284 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2285 goto done;
2288 if (!sets_overflow) {
2289 if ((op & SLJIT_32) || is_u32(src2w)) {
2290 ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2291 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2292 goto done;
2294 if (is_u32(-src2w)) {
2295 FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2296 goto done;
2299 else if ((op & SLJIT_32) || is_s32(src2w)) {
2300 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2301 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2302 goto done;
2306 forms = sets_overflow ? &add_forms : &logical_add_forms;
2307 FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2309 done:
2310 if (sets_zero_overflow)
2311 FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2313 if (dst & SLJIT_MEM)
2314 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2316 return SLJIT_SUCCESS;
2319 static const struct ins_forms sub_forms = {
2320 0x1b00, /* sr */
2321 0xb9090000, /* sgr */
2322 0xb9f90000, /* srk */
2323 0xb9e90000, /* sgrk */
2324 0x5b000000, /* s */
2325 0xe3000000005b, /* sy */
2326 0xe30000000009, /* sg */
2329 static const struct ins_forms logical_sub_forms = {
2330 0x1f00, /* slr */
2331 0xb90b0000, /* slgr */
2332 0xb9fb0000, /* slrk */
2333 0xb9eb0000, /* slgrk */
2334 0x5f000000, /* sl */
2335 0xe3000000005f, /* sly */
2336 0xe3000000000b, /* slg */
2339 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2340 sljit_s32 dst, sljit_sw dstw,
2341 sljit_s32 src1, sljit_sw src1w,
2342 sljit_s32 src2, sljit_sw src2w)
2344 sljit_s32 flag_type = GET_FLAG_TYPE(op);
2345 int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2346 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2347 const struct ins_forms *forms;
2348 sljit_ins ins;
2350 if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2351 int compare_signed = flag_type >= SLJIT_SIG_LESS;
2353 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2355 if (src2 & SLJIT_IMM) {
2356 if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2358 if ((op & SLJIT_32) || is_s32(src2w)) {
2359 ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2360 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2363 else {
2364 if ((op & SLJIT_32) || is_u32(src2w)) {
2365 ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2366 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2368 if (is_s16(src2w))
2369 return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2372 else if (src2 & SLJIT_MEM) {
2373 if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2374 ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2375 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2378 if (compare_signed)
2379 ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2380 else
2381 ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2382 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2385 if (compare_signed)
2386 ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2387 else
2388 ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2389 return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2392 if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2393 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2394 FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2395 goto done;
2398 if (src2 & SLJIT_IMM) {
2399 sljit_sw neg_src2w = -src2w;
2401 if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2402 if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2403 if (sets_signed)
2404 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2405 else
2406 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2407 return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2410 if (is_s16(neg_src2w)) {
2411 if (sets_signed)
2412 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2413 else
2414 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2415 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2416 goto done;
2420 if (!sets_signed) {
2421 if ((op & SLJIT_32) || is_u32(src2w)) {
2422 ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2423 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2424 goto done;
2426 if (is_u32(neg_src2w)) {
2427 FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2428 goto done;
2431 else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2432 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2433 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2434 goto done;
2438 forms = sets_signed ? &sub_forms : &logical_sub_forms;
2439 FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2441 done:
2442 if (sets_signed) {
2443 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2445 if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2446 /* In case of overflow, the sign bit of the two source operands must be different, and
2447 - the first operand is greater if the sign bit of the result is set
2448 - the first operand is less if the sign bit of the result is not set
2449 The -result operation sets the corrent sign, because the result cannot be zero.
2450 The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2451 FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2452 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2454 else if (op & SLJIT_SET_Z)
2455 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2458 if (dst & SLJIT_MEM)
2459 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2461 return SLJIT_SUCCESS;
2464 static const struct ins_forms multiply_forms = {
2465 0xb2520000, /* msr */
2466 0xb90c0000, /* msgr */
2467 0xb9fd0000, /* msrkc */
2468 0xb9ed0000, /* msgrkc */
2469 0x71000000, /* ms */
2470 0xe30000000051, /* msy */
2471 0xe3000000000c, /* msg */
2474 static const struct ins_forms multiply_overflow_forms = {
2477 0xb9fd0000, /* msrkc */
2478 0xb9ed0000, /* msgrkc */
2480 0xe30000000053, /* msc */
2481 0xe30000000083, /* msgc */
2484 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2485 sljit_s32 dst,
2486 sljit_s32 src1, sljit_sw src1w,
2487 sljit_s32 src2, sljit_sw src2w)
2489 sljit_ins ins;
2491 if (HAS_FLAGS(op)) {
2492 /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2493 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2494 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2495 if (dst_r != tmp0) {
2496 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2498 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2499 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2500 FAIL_IF(push_inst(compiler, ipm(tmp1)));
2501 FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2503 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2506 if (src2 & SLJIT_IMM) {
2507 if (is_s16(src2w)) {
2508 ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2509 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2512 if (is_s32(src2w)) {
2513 ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2514 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2518 return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2521 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2522 sljit_s32 dst,
2523 sljit_s32 src1, sljit_sw src1w,
2524 sljit_uw imm, sljit_s32 count16)
2526 sljit_s32 mode = compiler->mode;
2527 sljit_gpr dst_r = tmp0;
2528 sljit_s32 needs_move = 1;
2530 if (IS_GPR_REG(dst)) {
2531 dst_r = gpr(dst & REG_MASK);
2532 if (dst == src1)
2533 needs_move = 0;
2536 if (needs_move)
2537 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2539 if (type == SLJIT_AND) {
2540 if (!(mode & SLJIT_32))
2541 FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2542 return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2544 else if (type == SLJIT_OR) {
2545 if (count16 >= 3) {
2546 FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2547 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2550 if (count16 >= 2) {
2551 if ((imm & 0x00000000ffffffffull) == 0)
2552 return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2553 if ((imm & 0xffffffff00000000ull) == 0)
2554 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2557 if ((imm & 0xffff000000000000ull) != 0)
2558 FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2559 if ((imm & 0x0000ffff00000000ull) != 0)
2560 FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2561 if ((imm & 0x00000000ffff0000ull) != 0)
2562 FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2563 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2564 return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2565 return SLJIT_SUCCESS;
2568 if ((imm & 0xffffffff00000000ull) != 0)
2569 FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2570 if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2571 return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2572 return SLJIT_SUCCESS;
2575 static const struct ins_forms bitwise_and_forms = {
2576 0x1400, /* nr */
2577 0xb9800000, /* ngr */
2578 0xb9f40000, /* nrk */
2579 0xb9e40000, /* ngrk */
2580 0x54000000, /* n */
2581 0xe30000000054, /* ny */
2582 0xe30000000080, /* ng */
2585 static const struct ins_forms bitwise_or_forms = {
2586 0x1600, /* or */
2587 0xb9810000, /* ogr */
2588 0xb9f60000, /* ork */
2589 0xb9e60000, /* ogrk */
2590 0x56000000, /* o */
2591 0xe30000000056, /* oy */
2592 0xe30000000081, /* og */
2595 static const struct ins_forms bitwise_xor_forms = {
2596 0x1700, /* xr */
2597 0xb9820000, /* xgr */
2598 0xb9f70000, /* xrk */
2599 0xb9e70000, /* xgrk */
2600 0x57000000, /* x */
2601 0xe30000000057, /* xy */
2602 0xe30000000082, /* xg */
2605 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2606 sljit_s32 dst,
2607 sljit_s32 src1, sljit_sw src1w,
2608 sljit_s32 src2, sljit_sw src2w)
2610 sljit_s32 type = GET_OPCODE(op);
2611 const struct ins_forms *forms;
2613 if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2614 sljit_s32 count16 = 0;
2615 sljit_uw imm = (sljit_uw)src2w;
2617 if (op & SLJIT_32)
2618 imm &= 0xffffffffull;
2620 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2621 count16++;
2622 if ((imm & 0x00000000ffff0000ull) != 0)
2623 count16++;
2624 if ((imm & 0x0000ffff00000000ull) != 0)
2625 count16++;
2626 if ((imm & 0xffff000000000000ull) != 0)
2627 count16++;
2629 if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2630 sljit_gpr src_r = tmp0;
2632 if (FAST_IS_REG(src1))
2633 src_r = gpr(src1 & REG_MASK);
2634 else
2635 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2637 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2638 return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
2639 if ((imm & 0x00000000ffff0000ull) != 0)
2640 return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
2641 if ((imm & 0x0000ffff00000000ull) != 0)
2642 return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
2643 return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
2646 if (!(op & SLJIT_SET_Z))
2647 return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2650 if (type == SLJIT_AND)
2651 forms = &bitwise_and_forms;
2652 else if (type == SLJIT_OR)
2653 forms = &bitwise_or_forms;
2654 else
2655 forms = &bitwise_xor_forms;
2657 return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2660 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2661 sljit_s32 dst,
2662 sljit_s32 src1, sljit_sw src1w,
2663 sljit_s32 src2, sljit_sw src2w)
2665 sljit_s32 type = GET_OPCODE(op);
2666 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2667 sljit_gpr src_r = tmp0;
2668 sljit_gpr base_r = tmp0;
2669 sljit_ins imm = 0;
2670 sljit_ins ins;
2672 if (FAST_IS_REG(src1))
2673 src_r = gpr(src1 & REG_MASK);
2674 else
2675 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2677 if (src2 & SLJIT_IMM)
2678 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2679 else if (FAST_IS_REG(src2))
2680 base_r = gpr(src2 & REG_MASK);
2681 else {
2682 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2683 base_r = tmp1;
2686 if ((op & SLJIT_32) && dst_r == src_r) {
2687 if (type == SLJIT_SHL)
2688 ins = 0x89000000 /* sll */;
2689 else if (type == SLJIT_LSHR)
2690 ins = 0x88000000 /* srl */;
2691 else
2692 ins = 0x8a000000 /* sra */;
2694 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2696 else {
2697 if (type == SLJIT_SHL)
2698 ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2699 else if (type == SLJIT_LSHR)
2700 ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2701 else
2702 ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2704 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2707 if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2708 return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2710 return SLJIT_SUCCESS;
2713 static const struct ins_forms addc_forms = {
2714 0xb9980000, /* alcr */
2715 0xb9880000, /* alcgr */
2719 0xe30000000098, /* alc */
2720 0xe30000000088, /* alcg */
2723 static const struct ins_forms subc_forms = {
2724 0xb9990000, /* slbr */
2725 0xb9890000, /* slbgr */
2729 0xe30000000099, /* slb */
2730 0xe30000000089, /* slbg */
2733 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2734 sljit_s32 dst, sljit_sw dstw,
2735 sljit_s32 src1, sljit_sw src1w,
2736 sljit_s32 src2, sljit_sw src2w)
2738 CHECK_ERROR();
2739 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2740 ADJUST_LOCAL_OFFSET(dst, dstw);
2741 ADJUST_LOCAL_OFFSET(src1, src1w);
2742 ADJUST_LOCAL_OFFSET(src2, src2w);
2744 compiler->mode = op & SLJIT_32;
2745 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2747 if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2748 src1 ^= src2;
2749 src2 ^= src1;
2750 src1 ^= src2;
2752 src1w ^= src2w;
2753 src2w ^= src1w;
2754 src1w ^= src2w;
2757 switch (GET_OPCODE(op)) {
2758 case SLJIT_ADD:
2759 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2760 return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2761 case SLJIT_ADDC:
2762 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2763 FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2764 if (dst & SLJIT_MEM)
2765 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2766 return SLJIT_SUCCESS;
2767 case SLJIT_SUB:
2768 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2769 return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2770 case SLJIT_SUBC:
2771 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2772 FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2773 if (dst & SLJIT_MEM)
2774 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2775 return SLJIT_SUCCESS;
2776 case SLJIT_MUL:
2777 FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2778 break;
2779 case SLJIT_AND:
2780 case SLJIT_OR:
2781 case SLJIT_XOR:
2782 FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2783 break;
2784 case SLJIT_SHL:
2785 case SLJIT_LSHR:
2786 case SLJIT_ASHR:
2787 FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2788 break;
2791 if (dst & SLJIT_MEM)
2792 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2793 return SLJIT_SUCCESS;
2796 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2797 sljit_s32 src1, sljit_sw src1w,
2798 sljit_s32 src2, sljit_sw src2w)
2800 CHECK_ERROR();
2801 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2803 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2804 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2805 compiler->skip_checks = 1;
2806 #endif
2807 return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
2810 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2811 struct sljit_compiler *compiler,
2812 sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2814 sljit_gpr src_r;
2816 CHECK_ERROR();
2817 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2818 ADJUST_LOCAL_OFFSET(src, srcw);
2820 switch (op) {
2821 case SLJIT_FAST_RETURN:
2822 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2823 if (src & SLJIT_MEM)
2824 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
2826 return push_inst(compiler, br(src_r));
2827 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2828 /* TODO(carenas): implement? */
2829 return SLJIT_SUCCESS;
2830 case SLJIT_PREFETCH_L1:
2831 case SLJIT_PREFETCH_L2:
2832 case SLJIT_PREFETCH_L3:
2833 case SLJIT_PREFETCH_ONCE:
2834 /* TODO(carenas): implement */
2835 return SLJIT_SUCCESS;
2836 default:
2837 /* TODO(carenas): probably should not success by default */
2838 return SLJIT_SUCCESS;
2841 return SLJIT_SUCCESS;
2844 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2846 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2847 return (sljit_s32)gpr(reg);
2850 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2852 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2853 return (sljit_s32)fgpr(reg);
2856 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2857 void *instruction, sljit_u32 size)
2859 sljit_ins ins = 0;
2861 CHECK_ERROR();
2862 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2864 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2865 return push_inst(compiler, ins);
2868 /* --------------------------------------------------------------------- */
2869 /* Floating point operators */
2870 /* --------------------------------------------------------------------- */
2872 #define FLOAT_LOAD 0
2873 #define FLOAT_STORE 1
2875 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
2876 sljit_s32 reg,
2877 sljit_s32 mem, sljit_sw memw)
2879 struct addr addr;
2880 sljit_ins ins;
2882 SLJIT_ASSERT(mem & SLJIT_MEM);
2884 if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
2885 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
2887 if (op & FLOAT_STORE)
2888 ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
2889 else
2890 ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
2892 return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
2895 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
2897 if (op & FLOAT_STORE)
2898 ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
2899 else
2900 ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
2902 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2905 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
2906 sljit_s32 reg,
2907 sljit_s32 src, sljit_sw srcw)
2909 struct addr addr;
2911 if (!(src & SLJIT_MEM))
2912 return push_inst(compiler, ins_r | F4(reg) | F0(src));
2914 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
2915 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
2918 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2919 sljit_s32 dst, sljit_sw dstw,
2920 sljit_s32 src, sljit_sw srcw)
2922 sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2923 sljit_ins ins;
2925 if (src & SLJIT_MEM) {
2926 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
2927 src = TMP_FREG1;
2930 /* M3 is set to 5 */
2931 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2932 ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
2933 else
2934 ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
2936 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
2938 if (dst & SLJIT_MEM)
2939 return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
2941 return SLJIT_SUCCESS;
2944 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2945 sljit_s32 dst, sljit_sw dstw,
2946 sljit_s32 src, sljit_sw srcw)
2948 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2949 sljit_ins ins;
2951 if (src & SLJIT_IMM) {
2952 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
2953 src = (sljit_s32)tmp0;
2955 else if (src & SLJIT_MEM) {
2956 FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
2957 src = (sljit_s32)tmp0;
2960 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2961 ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
2962 else
2963 ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
2965 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
2967 if (dst & SLJIT_MEM)
2968 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
2970 return SLJIT_SUCCESS;
2973 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2974 sljit_s32 src1, sljit_sw src1w,
2975 sljit_s32 src2, sljit_sw src2w)
2977 sljit_ins ins_r, ins;
2979 if (src1 & SLJIT_MEM) {
2980 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
2981 src1 = TMP_FREG1;
2984 if (op & SLJIT_32) {
2985 ins_r = 0xb3090000 /* cebr */;
2986 ins = 0xed0000000009 /* ceb */;
2987 } else {
2988 ins_r = 0xb3190000 /* cdbr */;
2989 ins = 0xed0000000019 /* cdb */;
2992 return emit_float(compiler, ins_r, ins, src1, src2, src2w);
2995 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2996 sljit_s32 dst, sljit_sw dstw,
2997 sljit_s32 src, sljit_sw srcw)
2999 sljit_s32 dst_r;
3000 sljit_ins ins;
3002 CHECK_ERROR();
3004 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3006 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3008 if (op == SLJIT_CONV_F64_FROM_F32)
3009 FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3010 else {
3011 if (src & SLJIT_MEM) {
3012 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3013 src = dst_r;
3016 switch (GET_OPCODE(op)) {
3017 case SLJIT_MOV_F64:
3018 if (FAST_IS_REG(dst)) {
3019 if (dst == src)
3020 return SLJIT_SUCCESS;
3022 ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3023 break;
3025 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3026 case SLJIT_CONV_F64_FROM_F32:
3027 /* Only SLJIT_CONV_F32_FROM_F64. */
3028 ins = 0xb3440000 /* ledbr */;
3029 break;
3030 case SLJIT_NEG_F64:
3031 ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3032 break;
3033 default:
3034 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3035 ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3036 break;
3039 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3042 if (!(dst & SLJIT_MEM))
3043 return SLJIT_SUCCESS;
3045 SLJIT_ASSERT(dst_r == TMP_FREG1);
3047 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3050 #define FLOAT_MOV(op, dst_r, src_r) \
3051 (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3053 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3054 sljit_s32 dst, sljit_sw dstw,
3055 sljit_s32 src1, sljit_sw src1w,
3056 sljit_s32 src2, sljit_sw src2w)
3058 sljit_s32 dst_r = TMP_FREG1;
3059 sljit_ins ins_r, ins;
3061 CHECK_ERROR();
3062 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3063 ADJUST_LOCAL_OFFSET(dst, dstw);
3064 ADJUST_LOCAL_OFFSET(src1, src1w);
3065 ADJUST_LOCAL_OFFSET(src2, src2w);
3067 do {
3068 if (FAST_IS_REG(dst)) {
3069 dst_r = dst;
3071 if (dst == src1)
3072 break;
3074 if (dst == src2) {
3075 if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3076 src2 = src1;
3077 src2w = src1w;
3078 src1 = dst;
3079 break;
3082 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3083 src2 = TMP_FREG1;
3087 if (src1 & SLJIT_MEM)
3088 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3089 else
3090 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3091 } while (0);
3093 switch (GET_OPCODE(op)) {
3094 case SLJIT_ADD_F64:
3095 ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3096 ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3097 break;
3098 case SLJIT_SUB_F64:
3099 ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3100 ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3101 break;
3102 case SLJIT_MUL_F64:
3103 ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3104 ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3105 break;
3106 default:
3107 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3108 ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3109 ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3110 break;
3113 FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3115 if (dst & SLJIT_MEM)
3116 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3118 SLJIT_ASSERT(dst_r != TMP_FREG1);
3119 return SLJIT_SUCCESS;
3122 /* --------------------------------------------------------------------- */
3123 /* Other instructions */
3124 /* --------------------------------------------------------------------- */
3126 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3128 CHECK_ERROR();
3129 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
3130 ADJUST_LOCAL_OFFSET(dst, dstw);
3132 if (FAST_IS_REG(dst))
3133 return push_inst(compiler, lgr(gpr(dst), fast_link_r));
3135 /* memory */
3136 return store_word(compiler, fast_link_r, dst, dstw, 0);
3139 /* --------------------------------------------------------------------- */
3140 /* Conditional instructions */
3141 /* --------------------------------------------------------------------- */
3143 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3145 struct sljit_label *label;
3147 CHECK_ERROR_PTR();
3148 CHECK_PTR(check_sljit_emit_label(compiler));
3150 if (compiler->last_label && compiler->last_label->size == compiler->size)
3151 return compiler->last_label;
3153 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3154 PTR_FAIL_IF(!label);
3155 set_label(label, compiler);
3156 return label;
3159 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3161 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3163 CHECK_ERROR_PTR();
3164 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3166 /* record jump */
3167 struct sljit_jump *jump = (struct sljit_jump *)
3168 ensure_abuf(compiler, sizeof(struct sljit_jump));
3169 PTR_FAIL_IF(!jump);
3170 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3171 jump->addr = compiler->size;
3173 /* emit jump instruction */
3174 type &= 0xff;
3175 if (type >= SLJIT_FAST_CALL)
3176 PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
3177 else
3178 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3180 return jump;
3183 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3184 sljit_s32 arg_types)
3186 CHECK_ERROR_PTR();
3187 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3189 if (type & SLJIT_CALL_RETURN) {
3190 PTR_FAIL_IF(emit_stack_frame_release(compiler));
3191 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3194 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3195 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3196 compiler->skip_checks = 1;
3197 #endif
3199 return sljit_emit_jump(compiler, type);
3202 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3204 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3206 CHECK_ERROR();
3207 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3209 if (src & SLJIT_IMM) {
3210 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3211 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3213 else if (src & SLJIT_MEM) {
3214 ADJUST_LOCAL_OFFSET(src, srcw);
3215 FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3218 /* emit jump instruction */
3219 if (type >= SLJIT_FAST_CALL)
3220 return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
3222 return push_inst(compiler, br(src_r));
3225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3226 sljit_s32 arg_types,
3227 sljit_s32 src, sljit_sw srcw)
3229 CHECK_ERROR();
3230 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3232 SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3234 if (src & SLJIT_MEM) {
3235 ADJUST_LOCAL_OFFSET(src, srcw);
3236 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3237 src = TMP_REG2;
3240 if (type & SLJIT_CALL_RETURN) {
3241 if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
3242 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3243 src = TMP_REG2;
3246 FAIL_IF(emit_stack_frame_release(compiler));
3247 type = SLJIT_JUMP;
3250 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3251 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3252 compiler->skip_checks = 1;
3253 #endif
3255 return sljit_emit_ijump(compiler, type, src, srcw);
3258 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3259 sljit_s32 dst, sljit_sw dstw,
3260 sljit_s32 type)
3262 sljit_u8 mask = get_cc(compiler, type);
3264 CHECK_ERROR();
3265 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3267 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3268 sljit_gpr loc_r = tmp1;
3269 switch (GET_OPCODE(op)) {
3270 case SLJIT_AND:
3271 case SLJIT_OR:
3272 case SLJIT_XOR:
3273 compiler->status_flags_state = op & SLJIT_SET_Z;
3275 /* dst is also source operand */
3276 if (dst & SLJIT_MEM)
3277 FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3279 break;
3280 case SLJIT_MOV32:
3281 op |= SLJIT_32;
3282 /* fallthrough */
3283 case SLJIT_MOV:
3284 /* can write straight into destination */
3285 loc_r = dst_r;
3286 break;
3287 default:
3288 SLJIT_UNREACHABLE();
3291 /* TODO(mundaym): fold into cmov helper function? */
3292 #define LEVAL(i) i(loc_r, 1, mask)
3293 if (have_lscond2()) {
3294 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3295 FAIL_IF(push_inst(compiler,
3296 WHEN2(op & SLJIT_32, lochi, locghi)));
3297 } else {
3298 /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3299 abort();
3301 #undef LEVAL
3303 /* apply bitwise op and set condition codes */
3304 switch (GET_OPCODE(op)) {
3305 #define LEVAL(i) i(dst_r, loc_r)
3306 case SLJIT_AND:
3307 FAIL_IF(push_inst(compiler,
3308 WHEN2(op & SLJIT_32, nr, ngr)));
3309 break;
3310 case SLJIT_OR:
3311 FAIL_IF(push_inst(compiler,
3312 WHEN2(op & SLJIT_32, or, ogr)));
3313 break;
3314 case SLJIT_XOR:
3315 FAIL_IF(push_inst(compiler,
3316 WHEN2(op & SLJIT_32, xr, xgr)));
3317 break;
3318 #undef LEVAL
3321 /* store result to memory if required */
3322 if (dst & SLJIT_MEM)
3323 return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3325 return SLJIT_SUCCESS;
3328 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3329 sljit_s32 dst_reg,
3330 sljit_s32 src, sljit_sw srcw)
3332 sljit_u8 mask = get_cc(compiler, type);
3333 sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32);
3334 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
3336 CHECK_ERROR();
3337 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3339 if (src & SLJIT_IMM) {
3340 /* TODO(mundaym): fast path with lscond2 */
3341 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3344 #define LEVAL(i) i(dst_r, src_r, mask)
3345 if (have_lscond1())
3346 return push_inst(compiler,
3347 WHEN2(dst_reg & SLJIT_32, locr, locgr));
3349 #undef LEVAL
3351 /* TODO(mundaym): implement */
3352 return SLJIT_ERR_UNSUPPORTED;
3355 /* --------------------------------------------------------------------- */
3356 /* Other instructions */
3357 /* --------------------------------------------------------------------- */
3359 /* On s390x we build a literal pool to hold constants. This has two main
3360 advantages:
3362 1. we only need one instruction in the instruction stream (LGRL)
3363 2. we can store 64 bit addresses and use 32 bit offsets
3365 To retrofit the extra information needed to build the literal pool we
3366 add a new sljit_s390x_const struct that contains the initial value but
3367 can still be cast to a sljit_const. */
3369 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3371 struct sljit_s390x_const *const_;
3372 sljit_gpr dst_r;
3374 CHECK_ERROR_PTR();
3375 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3377 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
3378 sizeof(struct sljit_s390x_const));
3379 PTR_FAIL_IF(!const_);
3380 set_const((struct sljit_const*)const_, compiler);
3381 const_->init_value = init_value;
3383 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3384 if (have_genext())
3385 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
3386 else {
3387 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
3388 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3391 if (dst & SLJIT_MEM)
3392 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
3394 return (struct sljit_const*)const_;
3397 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3399 /* Update the constant pool. */
3400 sljit_uw *ptr = (sljit_uw *)addr;
3401 SLJIT_UNUSED_ARG(executable_offset);
3403 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
3404 *ptr = new_target;
3405 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
3406 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
3409 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3411 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3414 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
3415 struct sljit_compiler *compiler,
3416 sljit_s32 dst, sljit_sw dstw)
3418 struct sljit_put_label *put_label;
3419 sljit_gpr dst_r;
3421 CHECK_ERROR_PTR();
3422 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3423 ADJUST_LOCAL_OFFSET(dst, dstw);
3425 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3426 PTR_FAIL_IF(!put_label);
3427 set_put_label(put_label, compiler, 0);
3429 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3431 if (have_genext())
3432 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3433 else {
3434 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3435 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3438 if (dst & SLJIT_MEM)
3439 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3441 return put_label;
3444 /* TODO(carenas): EVAL probably should move up or be refactored */
3445 #undef WHEN2
3446 #undef EVAL
3448 #undef tmp1
3449 #undef tmp0
3451 /* TODO(carenas): undef other macros that spill like is_u12? */