2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 #include "../tcg-ldst.c.inc"
28 #include "../tcg-pool.c.inc"
31 #define TCG_CT_CONST_S16 (1 << 8)
32 #define TCG_CT_CONST_S32 (1 << 9)
33 #define TCG_CT_CONST_U32 (1 << 10)
34 #define TCG_CT_CONST_ZERO (1 << 11)
35 #define TCG_CT_CONST_P32 (1 << 12)
36 #define TCG_CT_CONST_INV (1 << 13)
37 #define TCG_CT_CONST_INVRISBG (1 << 14)
38 #define TCG_CT_CONST_CMP (1 << 15)
39 #define TCG_CT_CONST_M1 (1 << 16)
41 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
42 #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
44 /* Several places within the instruction set 0 means "no register"
45 rather than TCG_REG_R0. */
46 #define TCG_REG_NONE 0
48 /* A scratch register that may be be used throughout the backend. */
49 #define TCG_TMP0 TCG_REG_R1
50 #define TCG_VEC_TMP0 TCG_REG_V31
52 #define TCG_GUEST_BASE_REG TCG_REG_R13
54 /* All of the following instructions are prefixed with their instruction
55 format, and are defined as 8- or 16-bit quantities, even when the two
56 halves of the 16-bit quantity may appear 32 bits apart in the insn.
57 This makes it easy to copy the values from the tables in Appendix B. */
58 typedef enum S390Opcode {
133 RIEg_LOCGHI = 0xec46,
171 RRFa_MSGRKC = 0xb9ed,
193 RRFam_SELGR = 0xb9e3,
197 RRFc_POPCNT = 0xb9e1,
281 VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
282 VRRc_VCH = 0xe7fb, /* " */
283 VRRc_VCHL = 0xe7f9, /* " */
284 VRRc_VERLLV = 0xe773,
286 VRRc_VESRAV = 0xe77a,
287 VRRc_VESRLV = 0xe778,
300 VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
325 #ifdef CONFIG_DEBUG_TCG
326 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
327 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
328 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
331 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
332 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
333 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
337 /* Since R6 is a potential argument register, choose it last of the
338 call-saved registers. Likewise prefer the call-clobbered registers
339 in reverse order to maximize the chance of avoiding the arguments. */
340 static const int tcg_target_reg_alloc_order[] = {
341 /* Call saved registers. */
350 /* Call clobbered registers. */
354 /* Argument registers, in reverse order of allocation. */
360 /* V8-V15 are call saved, and omitted. */
387 static const int tcg_target_call_iarg_regs[] = {
395 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
397 tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
398 tcg_debug_assert(slot == 0);
406 #define S390_CC_NE (S390_CC_LT | S390_CC_GT)
407 #define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
408 #define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
409 #define S390_CC_NEVER 0
410 #define S390_CC_ALWAYS 15
412 #define S390_TM_EQ 8 /* CC == 0 */
413 #define S390_TM_NE 7 /* CC in {1,2,3} */
415 /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
416 static const uint8_t tcg_cond_to_s390_cond[16] = {
417 [TCG_COND_EQ] = S390_CC_EQ,
418 [TCG_COND_NE] = S390_CC_NE,
419 [TCG_COND_TSTEQ] = S390_CC_EQ,
420 [TCG_COND_TSTNE] = S390_CC_NE,
421 [TCG_COND_LT] = S390_CC_LT,
422 [TCG_COND_LE] = S390_CC_LE,
423 [TCG_COND_GT] = S390_CC_GT,
424 [TCG_COND_GE] = S390_CC_GE,
425 [TCG_COND_LTU] = S390_CC_LT,
426 [TCG_COND_LEU] = S390_CC_LE,
427 [TCG_COND_GTU] = S390_CC_GT,
428 [TCG_COND_GEU] = S390_CC_GE,
431 /* Condition codes that result from a LOAD AND TEST. Here, we have no
432 unsigned instruction variation, however since the test is vs zero we
433 can re-map the outcomes appropriately. */
434 static const uint8_t tcg_cond_to_ltr_cond[16] = {
435 [TCG_COND_EQ] = S390_CC_EQ,
436 [TCG_COND_NE] = S390_CC_NE,
437 [TCG_COND_TSTEQ] = S390_CC_ALWAYS,
438 [TCG_COND_TSTNE] = S390_CC_NEVER,
439 [TCG_COND_LT] = S390_CC_LT,
440 [TCG_COND_LE] = S390_CC_LE,
441 [TCG_COND_GT] = S390_CC_GT,
442 [TCG_COND_GE] = S390_CC_GE,
443 [TCG_COND_LTU] = S390_CC_NEVER,
444 [TCG_COND_LEU] = S390_CC_EQ,
445 [TCG_COND_GTU] = S390_CC_NE,
446 [TCG_COND_GEU] = S390_CC_ALWAYS,
449 static const tcg_insn_unit *tb_ret_addr;
450 uint64_t s390_facilities[3];
452 static inline bool is_general_reg(TCGReg r)
454 return r <= TCG_REG_R15;
457 static inline bool is_vector_reg(TCGReg r)
459 return r >= TCG_REG_V0 && r <= TCG_REG_V31;
462 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
463 intptr_t value, intptr_t addend)
465 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
470 pcrel2 = (tcg_insn_unit *)value - src_rx;
474 if (pcrel2 == (int16_t)pcrel2) {
475 tcg_patch16(src_rw, pcrel2);
480 if (pcrel2 == (int32_t)pcrel2) {
481 tcg_patch32(src_rw, pcrel2);
486 if (value == sextract64(value, 0, 20)) {
487 old = *(uint32_t *)src_rw & 0xf00000ff;
488 old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
489 tcg_patch32(src_rw, old);
494 g_assert_not_reached();
499 static int is_const_p16(uint64_t val)
501 for (int i = 0; i < 4; ++i) {
502 uint64_t mask = 0xffffull << (i * 16);
503 if ((val & ~mask) == 0) {
510 static int is_const_p32(uint64_t val)
512 if ((val & 0xffffffff00000000ull) == 0) {
515 if ((val & 0x00000000ffffffffull) == 0) {
522 * Accept bit patterns like these:
527 * Copied from gcc sources.
529 static bool risbg_mask(uint64_t c)
532 /* We don't change the number of transitions by inverting,
533 so make sure we start with the LSB zero. */
537 /* Reject all zeros or all ones. */
541 /* Find the first transition. */
543 /* Invert to look for a second transition. */
545 /* Erase the first transition. */
547 /* Find the second transition, if any. */
549 /* Match if all the bits are 1's, or if c is zero. */
553 /* Test if a constant matches the constraint. */
554 static bool tcg_target_const_match(int64_t val, int ct,
555 TCGType type, TCGCond cond, int vece)
559 if (ct & TCG_CT_CONST) {
562 if (type == TCG_TYPE_I32) {
563 uval = (uint32_t)val;
567 if (ct & TCG_CT_CONST_CMP) {
571 ct |= TCG_CT_CONST_S32 | TCG_CT_CONST_U32; /* CGFI or CLGFI */
577 ct |= TCG_CT_CONST_S32; /* CGFI */
583 ct |= TCG_CT_CONST_U32; /* CLGFI */
587 if (is_const_p16(uval) >= 0) {
588 return true; /* TMxx */
590 if (risbg_mask(uval)) {
591 return true; /* RISBG */
595 g_assert_not_reached();
599 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
602 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
605 if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
608 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
611 if ((ct & TCG_CT_CONST_M1) && val == -1) {
615 if (ct & TCG_CT_CONST_INV) {
618 if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
621 if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
627 /* Emit instructions according to the given instruction format. */
629 static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
631 tcg_out16(s, (op << 8) | (r1 << 4) | r2);
634 static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
635 TCGReg r1, TCGReg r2)
637 tcg_out32(s, (op << 16) | (r1 << 4) | r2);
640 /* RRF-a without the m4 field */
641 static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
642 TCGReg r1, TCGReg r2, TCGReg r3)
644 tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
647 /* RRF-a with the m4 field */
648 static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
649 TCGReg r1, TCGReg r2, TCGReg r3, int m4)
651 tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
654 static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
655 TCGReg r1, TCGReg r2, int m3)
657 tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
660 static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
662 tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
665 static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
668 tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
669 tcg_out32(s, (i2 << 16) | (op & 0xff));
672 static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
674 tcg_out16(s, op | (r1 << 4));
678 static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
679 TCGReg b2, TCGReg r3, int disp)
681 tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
685 static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
686 TCGReg b2, TCGReg r3, int disp)
688 tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
689 tcg_out32(s, (op & 0xff) | (b2 << 28)
690 | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
693 #define tcg_out_insn_RX tcg_out_insn_RS
694 #define tcg_out_insn_RXY tcg_out_insn_RSY
696 static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
699 * Shift bit 4 of each regno to its corresponding bit of RXB.
700 * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
701 * is the left-shift of the 4th operand.
703 return ((v1 & 0x10) << (4 + 3))
704 | ((v2 & 0x10) << (4 + 2))
705 | ((v3 & 0x10) << (4 + 1))
706 | ((v4 & 0x10) << (4 + 0));
709 static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
710 TCGReg v1, uint16_t i2, int m3)
712 tcg_debug_assert(is_vector_reg(v1));
713 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
715 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
718 static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
719 TCGReg v1, uint8_t i2, uint8_t i3, int m4)
721 tcg_debug_assert(is_vector_reg(v1));
722 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
723 tcg_out16(s, (i2 << 8) | (i3 & 0xff));
724 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
727 static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
728 TCGReg v1, uint16_t i2, TCGReg v3, int m4)
730 tcg_debug_assert(is_vector_reg(v1));
731 tcg_debug_assert(is_vector_reg(v3));
732 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
734 tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
737 static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
738 TCGReg v1, TCGReg v2, int m3)
740 tcg_debug_assert(is_vector_reg(v1));
741 tcg_debug_assert(is_vector_reg(v2));
742 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
743 tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
746 static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
747 TCGReg v1, TCGReg v2, TCGReg v3, int m4)
749 tcg_debug_assert(is_vector_reg(v1));
750 tcg_debug_assert(is_vector_reg(v2));
751 tcg_debug_assert(is_vector_reg(v3));
752 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
753 tcg_out16(s, v3 << 12);
754 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
757 static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
758 TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
760 tcg_debug_assert(is_vector_reg(v1));
761 tcg_debug_assert(is_vector_reg(v2));
762 tcg_debug_assert(is_vector_reg(v3));
763 tcg_debug_assert(is_vector_reg(v4));
764 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
765 tcg_out16(s, v3 << 12);
766 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
769 static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
770 TCGReg v1, TCGReg r2, TCGReg r3)
772 tcg_debug_assert(is_vector_reg(v1));
773 tcg_debug_assert(is_general_reg(r2));
774 tcg_debug_assert(is_general_reg(r3));
775 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
776 tcg_out16(s, r3 << 12);
777 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
780 static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
781 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
783 tcg_debug_assert(is_vector_reg(v1));
784 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
785 tcg_debug_assert(is_general_reg(b2));
786 tcg_debug_assert(is_vector_reg(v3));
787 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
788 tcg_out16(s, b2 << 12 | d2);
789 tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
792 static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
793 intptr_t d2, TCGReg b2, TCGReg r3, int m4)
795 tcg_debug_assert(is_vector_reg(v1));
796 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
797 tcg_debug_assert(is_general_reg(b2));
798 tcg_debug_assert(is_general_reg(r3));
799 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
800 tcg_out16(s, b2 << 12 | d2);
801 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
804 static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
805 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
807 tcg_debug_assert(is_general_reg(r1));
808 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
809 tcg_debug_assert(is_general_reg(b2));
810 tcg_debug_assert(is_vector_reg(v3));
811 tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
812 tcg_out16(s, b2 << 12 | d2);
813 tcg_out16(s, (op & 0x00ff) | RXB(0, v3, 0, 0) | (m4 << 12));
816 static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
817 TCGReg b2, TCGReg x2, intptr_t d2, int m3)
819 tcg_debug_assert(is_vector_reg(v1));
820 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
821 tcg_debug_assert(is_general_reg(x2));
822 tcg_debug_assert(is_general_reg(b2));
823 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
824 tcg_out16(s, (b2 << 12) | d2);
825 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
828 /* Emit an opcode with "type-checking" of the format. */
829 #define tcg_out_insn(S, FMT, OP, ...) \
830 glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
833 /* emit 64-bit shifts */
834 static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
835 TCGReg src, TCGReg sh_reg, int sh_imm)
837 tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
840 /* emit 32-bit shifts */
841 static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
842 TCGReg sh_reg, int sh_imm)
844 tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
847 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
854 if (likely(is_general_reg(dst) && is_general_reg(src))) {
855 tcg_out_insn(s, RR, LR, dst, src);
861 if (likely(is_general_reg(dst))) {
862 if (likely(is_general_reg(src))) {
863 tcg_out_insn(s, RRE, LGR, dst, src);
865 tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
868 } else if (is_general_reg(src)) {
869 tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
876 tcg_out_insn(s, VRRa, VLR, dst, src, 0);
880 g_assert_not_reached();
885 static const S390Opcode li_insns[4] = {
886 RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
888 static const S390Opcode oi_insns[4] = {
889 RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
891 static const S390Opcode lif_insns[2] = {
892 RIL_LLILF, RIL_LLIHF,
894 static const S390Opcode tm_insns[4] = {
895 RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
898 /* load a register with an immediate value */
899 static void tcg_out_movi(TCGContext *s, TCGType type,
900 TCGReg ret, tcg_target_long sval)
902 tcg_target_ulong uval = sval;
906 if (type == TCG_TYPE_I32) {
907 uval = (uint32_t)sval;
908 sval = (int32_t)sval;
911 /* Try all 32-bit insns that can load it in one go. */
912 if (sval >= -0x8000 && sval < 0x8000) {
913 tcg_out_insn(s, RI, LGHI, ret, sval);
917 i = is_const_p16(uval);
919 tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
923 /* Try all 48-bit insns that can load it in one go. */
924 if (sval == (int32_t)sval) {
925 tcg_out_insn(s, RIL, LGFI, ret, sval);
929 i = is_const_p32(uval);
931 tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
935 /* Try for PC-relative address load. For odd addresses, add one. */
936 pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
937 if (pc_off == (int32_t)pc_off) {
938 tcg_out_insn(s, RIL, LARL, ret, pc_off);
940 tcg_out_insn(s, RI, AGHI, ret, 1);
945 /* Otherwise, load it by parts. */
946 i = is_const_p16((uint32_t)uval);
948 tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
950 tcg_out_insn(s, RIL, LLILF, ret, uval);
953 i = is_const_p16(uval);
955 tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
957 tcg_out_insn(s, RIL, OIHF, ret, uval);
961 /* Emit a load/store type instruction. Inputs are:
962 DATA: The register to be loaded or stored.
963 BASE+OFS: The effective address.
964 OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
965 OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
967 static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
968 TCGReg data, TCGReg base, TCGReg index,
971 if (ofs < -0x80000 || ofs >= 0x80000) {
972 /* Combine the low 20 bits of the offset with the actual load insn;
973 the high 44 bits must come from an immediate load. */
974 tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
975 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
978 /* If we were already given an index register, add it in. */
979 if (index != TCG_REG_NONE) {
980 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
985 if (opc_rx && ofs >= 0 && ofs < 0x1000) {
986 tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
988 tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
992 static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
993 TCGReg data, TCGReg base, TCGReg index,
994 tcg_target_long ofs, int m3)
996 if (ofs < 0 || ofs >= 0x1000) {
997 if (ofs >= -0x80000 && ofs < 0x80000) {
998 tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
1000 index = TCG_REG_NONE;
1003 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
1004 if (index != TCG_REG_NONE) {
1005 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
1011 tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1014 /* load data without address translation or endianness conversion */
1015 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1016 TCGReg base, intptr_t ofs)
1020 if (likely(is_general_reg(data))) {
1021 tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1024 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1028 if (likely(is_general_reg(data))) {
1029 tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1035 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1039 /* Hint quadword aligned. */
1040 tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1044 g_assert_not_reached();
1048 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1049 TCGReg base, intptr_t ofs)
1053 if (likely(is_general_reg(data))) {
1054 tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1056 tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1061 if (likely(is_general_reg(data))) {
1062 tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1068 tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1072 /* Hint quadword aligned. */
1073 tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1077 g_assert_not_reached();
1081 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1082 TCGReg base, intptr_t ofs)
1087 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1092 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1093 tcg_target_long imm)
1095 /* This function is only used for passing structs by reference. */
1096 tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1099 static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1100 int msb, int lsb, int ofs, int z)
1103 tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1104 tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1105 tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1108 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1110 tcg_out_insn(s, RRE, LGBR, dest, src);
1113 static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1115 tcg_out_insn(s, RRE, LLGCR, dest, src);
1118 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1120 tcg_out_insn(s, RRE, LGHR, dest, src);
1123 static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1125 tcg_out_insn(s, RRE, LLGHR, dest, src);
1128 static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1130 tcg_out_insn(s, RRE, LGFR, dest, src);
1133 static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1135 tcg_out_insn(s, RRE, LLGFR, dest, src);
1138 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1140 tcg_out_ext32s(s, dest, src);
1143 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1145 tcg_out_ext32u(s, dest, src);
1148 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1150 tcg_out_mov(s, TCG_TYPE_I32, dest, src);
1153 static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1156 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1157 /* Achieve wraparound by swapping msb and lsb. */
1158 msb = 64 - ctz64(~val);
1159 lsb = clz64(~val) - 1;
1162 lsb = 63 - ctz64(val);
1164 tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1167 static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1169 static const S390Opcode ni_insns[4] = {
1170 RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1172 static const S390Opcode nif_insns[2] = {
1175 uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1178 /* Look for the zero-extensions. */
1179 if ((val & valid) == 0xffffffff) {
1180 tcg_out_ext32u(s, dest, dest);
1183 if ((val & valid) == 0xff) {
1184 tcg_out_ext8u(s, dest, dest);
1187 if ((val & valid) == 0xffff) {
1188 tcg_out_ext16u(s, dest, dest);
1192 i = is_const_p16(~val & valid);
1194 tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1198 i = is_const_p32(~val & valid);
1199 tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1201 tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1205 if (risbg_mask(val)) {
1206 tgen_andi_risbg(s, dest, dest, val);
1210 g_assert_not_reached();
1213 static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1215 static const S390Opcode oif_insns[2] = {
1221 i = is_const_p16(val);
1223 tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1227 i = is_const_p32(val);
1229 tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1233 g_assert_not_reached();
1236 static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1238 switch (is_const_p32(val)) {
1240 tcg_out_insn(s, RIL, XILF, dest, val);
1243 tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1246 g_assert_not_reached();
1250 static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1251 TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1253 bool is_unsigned = is_unsigned_cond(c);
1254 TCGCond inv_c = tcg_invert_cond(c);
1257 if (is_tst_cond(c)) {
1258 tcg_debug_assert(!need_carry);
1261 if (type == TCG_TYPE_I32) {
1262 tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
1264 tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
1269 if (type == TCG_TYPE_I32) {
1273 int i = is_const_p16(c2);
1275 tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
1276 *inv_cc = c == TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
1277 return *inv_cc ^ 15;
1280 if (risbg_mask(c2)) {
1281 tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
1284 g_assert_not_reached();
1289 if (!(is_unsigned && need_carry)) {
1290 if (type == TCG_TYPE_I32) {
1291 tcg_out_insn(s, RR, LTR, r1, r1);
1293 tcg_out_insn(s, RRE, LTGR, r1, r1);
1295 *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1296 return tcg_cond_to_ltr_cond[c];
1300 if (!is_unsigned && c2 == (int16_t)c2) {
1301 op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1302 tcg_out_insn_RI(s, op, r1, c2);
1306 if (type == TCG_TYPE_I32) {
1307 op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1308 tcg_out_insn_RIL(s, op, r1, c2);
1312 /* Should match TCG_CT_CONST_CMP. */
1318 tcg_debug_assert(c2 == (int32_t)c2);
1323 if (c2 == (int32_t)c2) {
1332 tcg_debug_assert(c2 == (uint32_t)c2);
1336 g_assert_not_reached();
1338 tcg_out_insn_RIL(s, op, r1, c2);
1339 } else if (type == TCG_TYPE_I32) {
1340 op = (is_unsigned ? RR_CLR : RR_CR);
1341 tcg_out_insn_RR(s, op, r1, c2);
1343 op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1344 tcg_out_insn_RRE(s, op, r1, c2);
1348 *inv_cc = tcg_cond_to_s390_cond[inv_c];
1349 return tcg_cond_to_s390_cond[c];
1352 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1353 TCGArg c2, bool c2const, bool need_carry)
1356 return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1359 static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1360 TCGReg dest, TCGReg c1, TCGArg c2,
1361 bool c2const, bool neg)
1365 /* With LOC2, we can always emit the minimum 3 insns. */
1366 if (HAVE_FACILITY(LOAD_ON_COND2)) {
1367 /* Emit: d = 0, d = (cc ? 1 : d). */
1368 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1369 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1370 tcg_out_insn(s, RIEg, LOCGHI, dest, neg ? -1 : 1, cc);
1379 /* Swap operands so that we can use LEU/GTU/GT/LE. */
1384 cond = tcg_swap_cond(cond);
1393 /* X != 0 is X > 0. */
1394 if (c2const && c2 == 0) {
1395 cond = TCG_COND_GTU;
1404 * The result of a compare has CC=2 for GT and CC=3 unused.
1405 * ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.
1407 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1408 tcg_out_movi(s, type, dest, 0);
1409 tcg_out_insn(s, RRE, ALCGR, dest, dest);
1411 if (type == TCG_TYPE_I32) {
1412 tcg_out_insn(s, RR, LCR, dest, dest);
1414 tcg_out_insn(s, RRE, LCGR, dest, dest);
1420 /* X == 0 is X <= 0. */
1421 if (c2const && c2 == 0) {
1422 cond = TCG_COND_LEU;
1431 * As above, but we're looking for borrow, or !carry.
1432 * The second insn computes d - d - borrow, or -1 for true
1433 * and 0 for false. So we must mask to 1 bit afterward.
1435 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1436 tcg_out_insn(s, RRE, SLBGR, dest, dest);
1438 tgen_andi(s, type, dest, 1);
1443 g_assert_not_reached();
1446 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1447 /* Emit: d = 0, t = 1, d = (cc ? t : d). */
1448 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1449 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, neg ? -1 : 1);
1450 tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1453 static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1454 TCGArg v3, int v3const, TCGReg v4,
1461 if (HAVE_FACILITY(LOAD_ON_COND2)) {
1462 /* Emit: if (cc) dest = v3. */
1463 tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1466 tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1469 /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1470 tcg_out_insn(s, RI, LGHI, dest, v3);
1475 if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1476 /* Emit: dest = cc ? v3 : v4. */
1477 tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1483 tcg_out_mov(s, type, dest, v3);
1489 /* Emit: if (cc) dest = src. */
1490 tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1493 static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1494 TCGReg c1, TCGArg c2, int c2const,
1495 TCGArg v3, int v3const, TCGReg v4)
1499 cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1500 tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1503 static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1504 TCGArg a2, int a2const)
1506 /* Since this sets both R and R+1, we have no choice but to store the
1507 result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */
1508 QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1509 tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1511 if (a2const && a2 == 64) {
1512 tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1517 * Conditions from FLOGR are:
1518 * 2 -> one bit found
1519 * 8 -> no one bit found
1521 tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1524 static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1526 /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1527 if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1528 if (type == TCG_TYPE_I32) {
1529 tcg_out_ext32u(s, dest, src);
1532 tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1536 /* Without MIE3, each byte gets the count of bits for the byte. */
1537 tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1539 /* Multiply to sum each byte at the top of the word. */
1540 if (type == TCG_TYPE_I32) {
1541 tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1542 tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1544 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1545 tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1546 tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1550 static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1551 int ofs, int len, int z)
1553 int lsb = (63 - ofs);
1554 int msb = lsb - (len - 1);
1555 tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1558 static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1561 tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1564 static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1566 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1567 if (off == (int16_t)off) {
1568 tcg_out_insn(s, RI, BRC, cc, off);
1569 } else if (off == (int32_t)off) {
1570 tcg_out_insn(s, RIL, BRCL, cc, off);
1572 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1573 tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1577 static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1580 tgen_gotoi(s, cc, l->u.value_ptr);
1582 tcg_out16(s, RI_BRC | (cc << 4));
1583 tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1588 static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1589 TCGReg r1, TCGReg r2, TCGLabel *l)
1591 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1593 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1595 tcg_out16(s, cc << 12 | (opc & 0xff));
1598 static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1599 TCGReg r1, int i2, TCGLabel *l)
1601 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1603 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1605 tcg_out16(s, (i2 << 8) | (opc & 0xff));
1608 static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1609 TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1613 if (!is_tst_cond(c)) {
1614 bool is_unsigned = is_unsigned_cond(c);
1618 cc = tcg_cond_to_s390_cond[c];
1621 opc = (type == TCG_TYPE_I32
1622 ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1623 : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1624 tgen_compare_branch(s, opc, cc, r1, c2, l);
1629 * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1630 * If the immediate we've been given does not fit that range, we'll
1631 * fall back to separate compare and branch instructions using the
1632 * larger comparison range afforded by COMPARE IMMEDIATE.
1634 if (type == TCG_TYPE_I32) {
1637 in_range = (uint32_t)c2 == (uint8_t)c2;
1640 in_range = (int32_t)c2 == (int8_t)c2;
1645 in_range = (uint64_t)c2 == (uint8_t)c2;
1648 in_range = (int64_t)c2 == (int8_t)c2;
1652 tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1657 cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1658 tgen_branch(s, cc, l);
1661 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1663 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1664 if (off == (int32_t)off) {
1665 tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1667 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1668 tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1672 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1673 const TCGHelperInfo *info)
1675 tcg_out_call_int(s, dest);
1685 bool tcg_target_has_memory_bswap(MemOp memop)
1689 if ((memop & MO_SIZE) <= MO_64) {
1694 * Reject 16-byte memop with 16-byte atomicity,
1695 * but do allow a pair of 64-bit operations.
1697 aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
1698 return aa.atom <= MO_64;
1701 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1704 switch (opc & (MO_SSIZE | MO_BSWAP)) {
1706 tcg_out_insn(s, RXY, LLGC, data, h.base, h.index, h.disp);
1709 tcg_out_insn(s, RXY, LGB, data, h.base, h.index, h.disp);
1712 case MO_UW | MO_BSWAP:
1713 /* swapped unsigned halfword load with upper bits zeroed */
1714 tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1715 tcg_out_ext16u(s, data, data);
1718 tcg_out_insn(s, RXY, LLGH, data, h.base, h.index, h.disp);
1721 case MO_SW | MO_BSWAP:
1722 /* swapped sign-extended halfword load */
1723 tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1724 tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
1727 tcg_out_insn(s, RXY, LGH, data, h.base, h.index, h.disp);
1730 case MO_UL | MO_BSWAP:
1731 /* swapped unsigned int load with upper bits zeroed */
1732 tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1733 tcg_out_ext32u(s, data, data);
1736 tcg_out_insn(s, RXY, LLGF, data, h.base, h.index, h.disp);
1739 case MO_SL | MO_BSWAP:
1740 /* swapped sign-extended int load */
1741 tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1742 tcg_out_ext32s(s, data, data);
1745 tcg_out_insn(s, RXY, LGF, data, h.base, h.index, h.disp);
1748 case MO_UQ | MO_BSWAP:
1749 tcg_out_insn(s, RXY, LRVG, data, h.base, h.index, h.disp);
1752 tcg_out_insn(s, RXY, LG, data, h.base, h.index, h.disp);
1756 g_assert_not_reached();
1760 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1763 switch (opc & (MO_SIZE | MO_BSWAP)) {
1765 if (h.disp >= 0 && h.disp < 0x1000) {
1766 tcg_out_insn(s, RX, STC, data, h.base, h.index, h.disp);
1768 tcg_out_insn(s, RXY, STCY, data, h.base, h.index, h.disp);
1772 case MO_UW | MO_BSWAP:
1773 tcg_out_insn(s, RXY, STRVH, data, h.base, h.index, h.disp);
1776 if (h.disp >= 0 && h.disp < 0x1000) {
1777 tcg_out_insn(s, RX, STH, data, h.base, h.index, h.disp);
1779 tcg_out_insn(s, RXY, STHY, data, h.base, h.index, h.disp);
1783 case MO_UL | MO_BSWAP:
1784 tcg_out_insn(s, RXY, STRV, data, h.base, h.index, h.disp);
1787 if (h.disp >= 0 && h.disp < 0x1000) {
1788 tcg_out_insn(s, RX, ST, data, h.base, h.index, h.disp);
1790 tcg_out_insn(s, RXY, STY, data, h.base, h.index, h.disp);
1794 case MO_UQ | MO_BSWAP:
1795 tcg_out_insn(s, RXY, STRVG, data, h.base, h.index, h.disp);
1798 tcg_out_insn(s, RXY, STG, data, h.base, h.index, h.disp);
1802 g_assert_not_reached();
1806 static const TCGLdstHelperParam ldst_helper_param = {
1807 .ntmp = 1, .tmp = { TCG_TMP0 }
1810 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1812 MemOp opc = get_memop(lb->oi);
1814 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1815 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1819 tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1820 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1821 tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1823 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1827 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1829 MemOp opc = get_memop(lb->oi);
1831 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1832 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1836 tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1837 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1839 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1843 /* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
1844 #define MIN_TLB_MASK_TABLE_OFS -(1 << 19)
1847 * For system-mode, perform the TLB load and compare.
1848 * For user-mode, perform any required alignment tests.
1849 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1850 * is required and fill in @h with the host address for the fast path.
1852 static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1853 TCGReg addr_reg, MemOpIdx oi,
1856 TCGType addr_type = s->addr_type;
1857 TCGLabelQemuLdst *ldst = NULL;
1858 MemOp opc = get_memop(oi);
1859 MemOp s_bits = opc & MO_SIZE;
1862 h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
1863 a_mask = (1 << h->aa.align) - 1;
1865 if (tcg_use_softmmu) {
1866 unsigned s_mask = (1 << s_bits) - 1;
1867 int mem_index = get_mmuidx(oi);
1868 int fast_off = tlb_mask_table_ofs(s, mem_index);
1869 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1870 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1874 ldst = new_ldst_label(s);
1875 ldst->is_ld = is_ld;
1877 ldst->addrlo_reg = addr_reg;
1879 tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
1880 s->page_bits - CPU_TLB_ENTRY_BITS);
1882 tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
1883 tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
1886 * For aligned accesses, we check the first byte and include the
1887 * alignment bits within the address. For unaligned access, we
1888 * check that we don't cross pages using the address of the last
1889 * byte of the access.
1891 a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
1892 tlb_mask = (uint64_t)s->page_mask | a_mask;
1894 tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
1896 tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
1897 tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
1901 ofs = offsetof(CPUTLBEntry, addr_read);
1903 ofs = offsetof(CPUTLBEntry, addr_write);
1905 if (addr_type == TCG_TYPE_I32) {
1906 ofs += HOST_BIG_ENDIAN * 4;
1907 tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1909 tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1912 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1913 ldst->label_ptr[0] = s->code_ptr++;
1915 h->index = TCG_TMP0;
1916 tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
1917 offsetof(CPUTLBEntry, addend));
1919 if (addr_type == TCG_TYPE_I32) {
1920 tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
1921 h->base = TCG_REG_NONE;
1928 ldst = new_ldst_label(s);
1929 ldst->is_ld = is_ld;
1931 ldst->addrlo_reg = addr_reg;
1933 tcg_debug_assert(a_mask <= 0xffff);
1934 tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
1936 tcg_out16(s, RI_BRC | (S390_TM_NE << 4));
1937 ldst->label_ptr[0] = s->code_ptr++;
1941 if (addr_type == TCG_TYPE_I32) {
1942 tcg_out_ext32u(s, TCG_TMP0, addr_reg);
1945 if (guest_base < 0x80000) {
1946 h->index = TCG_REG_NONE;
1947 h->disp = guest_base;
1949 h->index = TCG_GUEST_BASE_REG;
1957 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1958 MemOpIdx oi, TCGType data_type)
1960 TCGLabelQemuLdst *ldst;
1963 ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1964 tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
1967 ldst->type = data_type;
1968 ldst->datalo_reg = data_reg;
1969 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1973 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1974 MemOpIdx oi, TCGType data_type)
1976 TCGLabelQemuLdst *ldst;
1979 ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1980 tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1983 ldst->type = data_type;
1984 ldst->datalo_reg = data_reg;
1985 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1989 static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1990 TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1992 TCGLabel *l1 = NULL, *l2 = NULL;
1993 TCGLabelQemuLdst *ldst;
1999 ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2001 use_pair = h.aa.atom < MO_128;
2002 need_bswap = get_memop(oi) & MO_BSWAP;
2006 * Atomicity requires we use LPQ. If we've already checked for
2007 * 16-byte alignment, that's all we need. If we arrive with
2008 * lesser alignment, we have determined that less than 16-byte
2009 * alignment can be satisfied with two 8-byte loads.
2011 if (h.aa.align < MO_128) {
2013 l1 = gen_new_label();
2014 l2 = gen_new_label();
2016 tcg_out_insn(s, RI, TMLL, addr_reg, 15);
2017 tgen_branch(s, S390_TM_NE, l1);
2020 tcg_debug_assert(!need_bswap);
2021 tcg_debug_assert(datalo & 1);
2022 tcg_debug_assert(datahi == datalo - 1);
2023 insn = is_ld ? RXY_LPQ : RXY_STPQ;
2024 tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
2027 tgen_branch(s, S390_CC_ALWAYS, l2);
2028 tcg_out_label(s, l1);
2035 d1 = datalo, d2 = datahi;
2036 insn = is_ld ? RXY_LRVG : RXY_STRVG;
2038 d1 = datahi, d2 = datalo;
2039 insn = is_ld ? RXY_LG : RXY_STG;
2042 if (h.base == d1 || h.index == d1) {
2043 tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
2045 h.index = TCG_REG_NONE;
2048 tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
2049 tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
2052 tcg_out_label(s, l2);
2056 ldst->type = TCG_TYPE_I128;
2057 ldst->datalo_reg = datalo;
2058 ldst->datahi_reg = datahi;
2059 ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2063 static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2065 /* Reuse the zeroing that exists for goto_ptr. */
2067 tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2069 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2070 tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2074 static void tcg_out_goto_tb(TCGContext *s, int which)
2077 * Branch displacement must be aligned for atomic patching;
2078 * see if we need to add extra nop before branch
2080 if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2083 tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2084 set_jmp_insn_offset(s, which);
2086 set_jmp_reset_offset(s, which);
2089 void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2090 uintptr_t jmp_rx, uintptr_t jmp_rw)
2092 if (!HAVE_FACILITY(GEN_INST_EXT)) {
2095 /* patch the branch destination */
2096 uintptr_t addr = tb->jmp_target_addr[n];
2097 intptr_t disp = addr - (jmp_rx - 2);
2098 qatomic_set((int32_t *)jmp_rw, disp / 2);
2099 /* no need to flush icache explicitly */
2102 # define OP_32_64(x) \
2103 case glue(glue(INDEX_op_,x),_i32): \
2104 case glue(glue(INDEX_op_,x),_i64)
2106 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2107 const TCGArg args[TCG_MAX_OP_ARGS],
2108 const int const_args[TCG_MAX_OP_ARGS])
2114 case INDEX_op_goto_ptr:
2116 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2120 /* ??? LLC (RXY format) is only present with the extended-immediate
2121 facility, whereas LLGC is always present. */
2122 tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2126 /* ??? LB is no smaller than LGB, so no point to using it. */
2127 tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2131 /* ??? LLH (RXY format) is only present with the extended-immediate
2132 facility, whereas LLGH is always present. */
2133 tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2136 case INDEX_op_ld16s_i32:
2137 tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2140 case INDEX_op_ld_i32:
2141 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2145 tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2146 TCG_REG_NONE, args[2]);
2150 tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2151 TCG_REG_NONE, args[2]);
2154 case INDEX_op_st_i32:
2155 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2158 case INDEX_op_add_i32:
2159 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2160 if (const_args[2]) {
2163 if (a2 == (int16_t)a2) {
2164 tcg_out_insn(s, RI, AHI, a0, a2);
2167 tcg_out_insn(s, RIL, AFI, a0, a2);
2170 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2171 } else if (a0 == a1) {
2172 tcg_out_insn(s, RR, AR, a0, a2);
2174 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2177 case INDEX_op_sub_i32:
2178 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2179 if (const_args[2]) {
2182 } else if (a0 == a1) {
2183 tcg_out_insn(s, RR, SR, a0, a2);
2185 tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2189 case INDEX_op_and_i32:
2190 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2191 if (const_args[2]) {
2192 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2193 tgen_andi(s, TCG_TYPE_I32, a0, a2);
2194 } else if (a0 == a1) {
2195 tcg_out_insn(s, RR, NR, a0, a2);
2197 tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2200 case INDEX_op_or_i32:
2201 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2202 if (const_args[2]) {
2203 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2204 tgen_ori(s, a0, a2);
2205 } else if (a0 == a1) {
2206 tcg_out_insn(s, RR, OR, a0, a2);
2208 tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2211 case INDEX_op_xor_i32:
2212 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2213 if (const_args[2]) {
2214 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2215 tcg_out_insn(s, RIL, XILF, a0, a2);
2216 } else if (a0 == a1) {
2217 tcg_out_insn(s, RR, XR, args[0], args[2]);
2219 tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2223 case INDEX_op_andc_i32:
2224 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2225 if (const_args[2]) {
2226 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2227 tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2229 tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2232 case INDEX_op_orc_i32:
2233 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2234 if (const_args[2]) {
2235 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2236 tgen_ori(s, a0, (uint32_t)~a2);
2238 tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2241 case INDEX_op_eqv_i32:
2242 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2243 if (const_args[2]) {
2244 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2245 tcg_out_insn(s, RIL, XILF, a0, ~a2);
2247 tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2250 case INDEX_op_nand_i32:
2251 tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2253 case INDEX_op_nor_i32:
2254 tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2257 case INDEX_op_neg_i32:
2258 tcg_out_insn(s, RR, LCR, args[0], args[1]);
2260 case INDEX_op_not_i32:
2261 tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2264 case INDEX_op_mul_i32:
2265 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2266 if (const_args[2]) {
2267 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2268 if (a2 == (int16_t)a2) {
2269 tcg_out_insn(s, RI, MHI, a0, a2);
2271 tcg_out_insn(s, RIL, MSFI, a0, a2);
2273 } else if (a0 == a1) {
2274 tcg_out_insn(s, RRE, MSR, a0, a2);
2276 tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2280 case INDEX_op_div2_i32:
2281 tcg_debug_assert(args[0] == args[2]);
2282 tcg_debug_assert(args[1] == args[3]);
2283 tcg_debug_assert((args[1] & 1) == 0);
2284 tcg_debug_assert(args[0] == args[1] + 1);
2285 tcg_out_insn(s, RR, DR, args[1], args[4]);
2287 case INDEX_op_divu2_i32:
2288 tcg_debug_assert(args[0] == args[2]);
2289 tcg_debug_assert(args[1] == args[3]);
2290 tcg_debug_assert((args[1] & 1) == 0);
2291 tcg_debug_assert(args[0] == args[1] + 1);
2292 tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2295 case INDEX_op_shl_i32:
2299 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2301 if (const_args[2]) {
2302 tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2304 tcg_out_sh32(s, op, a0, a2, 0);
2307 /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */
2308 if (const_args[2]) {
2309 tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2311 tcg_out_sh64(s, op2, a0, a1, a2, 0);
2315 case INDEX_op_shr_i32:
2319 case INDEX_op_sar_i32:
2324 case INDEX_op_rotl_i32:
2325 /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
2326 if (const_args[2]) {
2327 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2329 tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2332 case INDEX_op_rotr_i32:
2333 if (const_args[2]) {
2334 tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2335 TCG_REG_NONE, (32 - args[2]) & 31);
2337 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2338 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2342 case INDEX_op_bswap16_i32:
2343 a0 = args[0], a1 = args[1], a2 = args[2];
2344 tcg_out_insn(s, RRE, LRVR, a0, a1);
2345 if (a2 & TCG_BSWAP_OS) {
2346 tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2348 tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2351 case INDEX_op_bswap16_i64:
2352 a0 = args[0], a1 = args[1], a2 = args[2];
2353 tcg_out_insn(s, RRE, LRVGR, a0, a1);
2354 if (a2 & TCG_BSWAP_OS) {
2355 tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2357 tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2361 case INDEX_op_bswap32_i32:
2362 tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2364 case INDEX_op_bswap32_i64:
2365 a0 = args[0], a1 = args[1], a2 = args[2];
2366 tcg_out_insn(s, RRE, LRVR, a0, a1);
2367 if (a2 & TCG_BSWAP_OS) {
2368 tcg_out_ext32s(s, a0, a0);
2369 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2370 tcg_out_ext32u(s, a0, a0);
2374 case INDEX_op_add2_i32:
2375 if (const_args[4]) {
2376 tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2378 tcg_out_insn(s, RR, ALR, args[0], args[4]);
2380 tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2382 case INDEX_op_sub2_i32:
2383 if (const_args[4]) {
2384 tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2386 tcg_out_insn(s, RR, SLR, args[0], args[4]);
2388 tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2392 tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2395 case INDEX_op_brcond_i32:
2396 tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2397 args[1], const_args[1], arg_label(args[3]));
2399 case INDEX_op_setcond_i32:
2400 tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2401 args[2], const_args[2], false);
2403 case INDEX_op_negsetcond_i32:
2404 tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2405 args[2], const_args[2], true);
2407 case INDEX_op_movcond_i32:
2408 tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2409 args[2], const_args[2], args[3], const_args[3], args[4]);
2412 case INDEX_op_qemu_ld_a32_i32:
2413 case INDEX_op_qemu_ld_a64_i32:
2414 tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
2416 case INDEX_op_qemu_ld_a32_i64:
2417 case INDEX_op_qemu_ld_a64_i64:
2418 tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
2420 case INDEX_op_qemu_st_a32_i32:
2421 case INDEX_op_qemu_st_a64_i32:
2422 tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
2424 case INDEX_op_qemu_st_a32_i64:
2425 case INDEX_op_qemu_st_a64_i64:
2426 tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
2428 case INDEX_op_qemu_ld_a32_i128:
2429 case INDEX_op_qemu_ld_a64_i128:
2430 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
2432 case INDEX_op_qemu_st_a32_i128:
2433 case INDEX_op_qemu_st_a64_i128:
2434 tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
2437 case INDEX_op_ld16s_i64:
2438 tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2440 case INDEX_op_ld32u_i64:
2441 tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2443 case INDEX_op_ld32s_i64:
2444 tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2446 case INDEX_op_ld_i64:
2447 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2450 case INDEX_op_st32_i64:
2451 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2453 case INDEX_op_st_i64:
2454 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2457 case INDEX_op_add_i64:
2458 a0 = args[0], a1 = args[1], a2 = args[2];
2459 if (const_args[2]) {
2462 if (a2 == (int16_t)a2) {
2463 tcg_out_insn(s, RI, AGHI, a0, a2);
2466 if (a2 == (int32_t)a2) {
2467 tcg_out_insn(s, RIL, AGFI, a0, a2);
2470 if (a2 == (uint32_t)a2) {
2471 tcg_out_insn(s, RIL, ALGFI, a0, a2);
2474 if (-a2 == (uint32_t)-a2) {
2475 tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2479 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2480 } else if (a0 == a1) {
2481 tcg_out_insn(s, RRE, AGR, a0, a2);
2483 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2486 case INDEX_op_sub_i64:
2487 a0 = args[0], a1 = args[1], a2 = args[2];
2488 if (const_args[2]) {
2492 tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2496 case INDEX_op_and_i64:
2497 a0 = args[0], a1 = args[1], a2 = args[2];
2498 if (const_args[2]) {
2499 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2500 tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2502 tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2505 case INDEX_op_or_i64:
2506 a0 = args[0], a1 = args[1], a2 = args[2];
2507 if (const_args[2]) {
2508 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2509 tgen_ori(s, a0, a2);
2511 tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2514 case INDEX_op_xor_i64:
2515 a0 = args[0], a1 = args[1], a2 = args[2];
2516 if (const_args[2]) {
2517 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2518 tgen_xori(s, a0, a2);
2520 tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2524 case INDEX_op_andc_i64:
2525 a0 = args[0], a1 = args[1], a2 = args[2];
2526 if (const_args[2]) {
2527 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2528 tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2530 tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2533 case INDEX_op_orc_i64:
2534 a0 = args[0], a1 = args[1], a2 = args[2];
2535 if (const_args[2]) {
2536 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2537 tgen_ori(s, a0, ~a2);
2539 tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2542 case INDEX_op_eqv_i64:
2543 a0 = args[0], a1 = args[1], a2 = args[2];
2544 if (const_args[2]) {
2545 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2546 tgen_xori(s, a0, ~a2);
2548 tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2551 case INDEX_op_nand_i64:
2552 tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2554 case INDEX_op_nor_i64:
2555 tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2558 case INDEX_op_neg_i64:
2559 tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2561 case INDEX_op_not_i64:
2562 tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2564 case INDEX_op_bswap64_i64:
2565 tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2568 case INDEX_op_mul_i64:
2569 a0 = args[0], a1 = args[1], a2 = args[2];
2570 if (const_args[2]) {
2571 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2572 if (a2 == (int16_t)a2) {
2573 tcg_out_insn(s, RI, MGHI, a0, a2);
2575 tcg_out_insn(s, RIL, MSGFI, a0, a2);
2577 } else if (a0 == a1) {
2578 tcg_out_insn(s, RRE, MSGR, a0, a2);
2580 tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2584 case INDEX_op_div2_i64:
2586 * ??? We get an unnecessary sign-extension of the dividend
2587 * into op0 with this definition, but as we do in fact always
2588 * produce both quotient and remainder using INDEX_op_div_i64
2589 * instead requires jumping through even more hoops.
2591 tcg_debug_assert(args[0] == args[2]);
2592 tcg_debug_assert(args[1] == args[3]);
2593 tcg_debug_assert((args[1] & 1) == 0);
2594 tcg_debug_assert(args[0] == args[1] + 1);
2595 tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2597 case INDEX_op_divu2_i64:
2598 tcg_debug_assert(args[0] == args[2]);
2599 tcg_debug_assert(args[1] == args[3]);
2600 tcg_debug_assert((args[1] & 1) == 0);
2601 tcg_debug_assert(args[0] == args[1] + 1);
2602 tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2604 case INDEX_op_mulu2_i64:
2605 tcg_debug_assert(args[0] == args[2]);
2606 tcg_debug_assert((args[1] & 1) == 0);
2607 tcg_debug_assert(args[0] == args[1] + 1);
2608 tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2610 case INDEX_op_muls2_i64:
2611 tcg_debug_assert((args[1] & 1) == 0);
2612 tcg_debug_assert(args[0] == args[1] + 1);
2613 tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2616 case INDEX_op_shl_i64:
2619 if (const_args[2]) {
2620 tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2622 tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2625 case INDEX_op_shr_i64:
2628 case INDEX_op_sar_i64:
2632 case INDEX_op_rotl_i64:
2633 if (const_args[2]) {
2634 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2635 TCG_REG_NONE, args[2]);
2637 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2640 case INDEX_op_rotr_i64:
2641 if (const_args[2]) {
2642 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2643 TCG_REG_NONE, (64 - args[2]) & 63);
2645 /* We can use the smaller 32-bit negate because only the
2646 low 6 bits are examined for the rotate. */
2647 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2648 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2652 case INDEX_op_add2_i64:
2653 if (const_args[4]) {
2654 if ((int64_t)args[4] >= 0) {
2655 tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2657 tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2660 tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2662 tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2664 case INDEX_op_sub2_i64:
2665 if (const_args[4]) {
2666 if ((int64_t)args[4] >= 0) {
2667 tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2669 tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2672 tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2674 tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2677 case INDEX_op_brcond_i64:
2678 tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2679 args[1], const_args[1], arg_label(args[3]));
2681 case INDEX_op_setcond_i64:
2682 tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2683 args[2], const_args[2], false);
2685 case INDEX_op_negsetcond_i64:
2686 tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2687 args[2], const_args[2], true);
2689 case INDEX_op_movcond_i64:
2690 tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2691 args[2], const_args[2], args[3], const_args[3], args[4]);
2695 a0 = args[0], a1 = args[1], a2 = args[2];
2696 if (const_args[1]) {
2697 tgen_deposit(s, a0, a2, args[3], args[4], 1);
2699 /* Since we can't support "0Z" as a constraint, we allow a1 in
2700 any register. Fix things up as if a matching constraint. */
2702 TCGType type = (opc == INDEX_op_deposit_i64);
2704 tcg_out_mov(s, type, TCG_TMP0, a2);
2707 tcg_out_mov(s, type, a0, a1);
2709 tgen_deposit(s, a0, a2, args[3], args[4], 0);
2714 tgen_extract(s, args[0], args[1], args[2], args[3]);
2717 case INDEX_op_clz_i64:
2718 tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2721 case INDEX_op_ctpop_i32:
2722 tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2724 case INDEX_op_ctpop_i64:
2725 tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2729 /* The host memory model is quite strong, we simply need to
2730 serialize the instruction stream. */
2731 if (args[0] & TCG_MO_ST_LD) {
2732 /* fast-bcr-serialization facility (45) is present */
2733 tcg_out_insn(s, RR, BCR, 14, 0);
2737 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2738 case INDEX_op_mov_i64:
2739 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2740 case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */
2741 case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */
2742 case INDEX_op_ext8s_i32: /* Always emitted via tcg_reg_alloc_op. */
2743 case INDEX_op_ext8s_i64:
2744 case INDEX_op_ext8u_i32:
2745 case INDEX_op_ext8u_i64:
2746 case INDEX_op_ext16s_i32:
2747 case INDEX_op_ext16s_i64:
2748 case INDEX_op_ext16u_i32:
2749 case INDEX_op_ext16u_i64:
2750 case INDEX_op_ext32s_i64:
2751 case INDEX_op_ext32u_i64:
2752 case INDEX_op_ext_i32_i64:
2753 case INDEX_op_extu_i32_i64:
2754 case INDEX_op_extrl_i64_i32:
2756 g_assert_not_reached();
2760 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2761 TCGReg dst, TCGReg src)
2763 if (is_general_reg(src)) {
2764 /* Replicate general register into two MO_64. */
2765 tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2766 if (vece == MO_64) {
2773 * Recall that the "standard" integer, within a vector, is the
2774 * rightmost element of the leftmost doubleword, a-la VLLEZ.
2776 tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2780 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2781 TCGReg dst, TCGReg base, intptr_t offset)
2783 tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2787 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2788 TCGReg dst, int64_t val)
2790 int i, mask, msb, lsb;
2792 /* Look for int16_t elements. */
2793 if (vece <= MO_16 ||
2794 (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2795 tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2799 /* Look for bit masks. */
2800 if (vece == MO_32) {
2801 if (risbg_mask((int32_t)val)) {
2802 /* Handle wraparound by swapping msb and lsb. */
2803 if ((val & 0x80000001u) == 0x80000001u) {
2804 msb = 32 - ctz32(~val);
2805 lsb = clz32(~val) - 1;
2808 lsb = 31 - ctz32(val);
2810 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2814 if (risbg_mask(val)) {
2815 /* Handle wraparound by swapping msb and lsb. */
2816 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2817 /* Handle wraparound by swapping msb and lsb. */
2818 msb = 64 - ctz64(~val);
2819 lsb = clz64(~val) - 1;
2822 lsb = 63 - ctz64(val);
2824 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2829 /* Look for all bytes 0x00 or 0xff. */
2830 for (i = mask = 0; i < 8; i++) {
2831 uint8_t byte = val >> (i * 8);
2834 } else if (byte != 0) {
2839 tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2843 /* Otherwise, stuff it in the constant pool. */
2844 tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2845 new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2846 tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2849 static bool tcg_out_cmp_vec_noinv(TCGContext *s, unsigned vece, TCGReg a0,
2850 TCGReg a1, TCGReg a2, TCGCond cond)
2852 bool need_swap = false, need_inv = false;
2870 need_swap = need_inv = true;
2873 g_assert_not_reached();
2877 cond = tcg_invert_cond(cond);
2883 cond = tcg_swap_cond(cond);
2888 tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2891 tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2894 tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2897 g_assert_not_reached();
2902 static void tcg_out_cmp_vec(TCGContext *s, unsigned vece, TCGReg a0,
2903 TCGReg a1, TCGReg a2, TCGCond cond)
2905 if (tcg_out_cmp_vec_noinv(s, vece, a0, a1, a2, cond)) {
2906 tcg_out_insn(s, VRRc, VNO, a0, a0, a0, 0);
2910 static void tcg_out_cmpsel_vec(TCGContext *s, unsigned vece, TCGReg a0,
2911 TCGReg c1, TCGReg c2, TCGArg v3,
2912 int const_v3, TCGReg v4, TCGCond cond)
2914 bool inv = tcg_out_cmp_vec_noinv(s, vece, TCG_VEC_TMP0, c1, c2, cond);
2918 tcg_out_insn(s, VRRe, VSEL, a0, v4, v3, TCG_VEC_TMP0);
2920 tcg_out_insn(s, VRRe, VSEL, a0, v3, v4, TCG_VEC_TMP0);
2924 tcg_out_insn(s, VRRc, VOC, a0, v4, TCG_VEC_TMP0, 0);
2926 tcg_out_insn(s, VRRc, VO, a0, v4, TCG_VEC_TMP0, 0);
2930 tcg_out_insn(s, VRRc, VN, a0, v4, TCG_VEC_TMP0, 0);
2932 tcg_out_insn(s, VRRc, VNC, a0, v4, TCG_VEC_TMP0, 0);
2937 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2938 unsigned vecl, unsigned vece,
2939 const TCGArg args[TCG_MAX_OP_ARGS],
2940 const int const_args[TCG_MAX_OP_ARGS])
2942 TCGType type = vecl + TCG_TYPE_V64;
2943 TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2946 case INDEX_op_ld_vec:
2947 tcg_out_ld(s, type, a0, a1, a2);
2949 case INDEX_op_st_vec:
2950 tcg_out_st(s, type, a0, a1, a2);
2952 case INDEX_op_dupm_vec:
2953 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2956 case INDEX_op_abs_vec:
2957 tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2959 case INDEX_op_neg_vec:
2960 tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2962 case INDEX_op_not_vec:
2963 tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2966 case INDEX_op_add_vec:
2967 tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2969 case INDEX_op_sub_vec:
2970 tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2972 case INDEX_op_and_vec:
2973 tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2975 case INDEX_op_andc_vec:
2976 tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2978 case INDEX_op_mul_vec:
2979 tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2981 case INDEX_op_or_vec:
2982 tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2984 case INDEX_op_orc_vec:
2985 tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2987 case INDEX_op_xor_vec:
2988 tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2990 case INDEX_op_nand_vec:
2991 tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2993 case INDEX_op_nor_vec:
2994 tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2996 case INDEX_op_eqv_vec:
2997 tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
3000 case INDEX_op_shli_vec:
3001 tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
3003 case INDEX_op_shri_vec:
3004 tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
3006 case INDEX_op_sari_vec:
3007 tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
3009 case INDEX_op_rotli_vec:
3010 tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
3012 case INDEX_op_shls_vec:
3013 tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
3015 case INDEX_op_shrs_vec:
3016 tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
3018 case INDEX_op_sars_vec:
3019 tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
3021 case INDEX_op_rotls_vec:
3022 tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
3024 case INDEX_op_shlv_vec:
3025 tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
3027 case INDEX_op_shrv_vec:
3028 tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
3030 case INDEX_op_sarv_vec:
3031 tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
3033 case INDEX_op_rotlv_vec:
3034 tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
3037 case INDEX_op_smin_vec:
3038 tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
3040 case INDEX_op_smax_vec:
3041 tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
3043 case INDEX_op_umin_vec:
3044 tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
3046 case INDEX_op_umax_vec:
3047 tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
3050 case INDEX_op_bitsel_vec:
3051 tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
3054 case INDEX_op_cmp_vec:
3055 tcg_out_cmp_vec(s, vece, a0, a1, a2, args[3]);
3057 case INDEX_op_cmpsel_vec:
3058 tcg_out_cmpsel_vec(s, vece, a0, a1, a2, args[3], const_args[3],
3062 case INDEX_op_s390_vuph_vec:
3063 tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
3065 case INDEX_op_s390_vupl_vec:
3066 tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
3068 case INDEX_op_s390_vpks_vec:
3069 tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
3072 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
3073 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
3075 g_assert_not_reached();
3079 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3082 case INDEX_op_abs_vec:
3083 case INDEX_op_add_vec:
3084 case INDEX_op_and_vec:
3085 case INDEX_op_andc_vec:
3086 case INDEX_op_bitsel_vec:
3087 case INDEX_op_eqv_vec:
3088 case INDEX_op_nand_vec:
3089 case INDEX_op_neg_vec:
3090 case INDEX_op_nor_vec:
3091 case INDEX_op_not_vec:
3092 case INDEX_op_or_vec:
3093 case INDEX_op_orc_vec:
3094 case INDEX_op_rotli_vec:
3095 case INDEX_op_rotls_vec:
3096 case INDEX_op_rotlv_vec:
3097 case INDEX_op_sari_vec:
3098 case INDEX_op_sars_vec:
3099 case INDEX_op_sarv_vec:
3100 case INDEX_op_shli_vec:
3101 case INDEX_op_shls_vec:
3102 case INDEX_op_shlv_vec:
3103 case INDEX_op_shri_vec:
3104 case INDEX_op_shrs_vec:
3105 case INDEX_op_shrv_vec:
3106 case INDEX_op_smax_vec:
3107 case INDEX_op_smin_vec:
3108 case INDEX_op_sub_vec:
3109 case INDEX_op_umax_vec:
3110 case INDEX_op_umin_vec:
3111 case INDEX_op_xor_vec:
3112 case INDEX_op_cmp_vec:
3113 case INDEX_op_cmpsel_vec:
3115 case INDEX_op_rotrv_vec:
3117 case INDEX_op_mul_vec:
3118 return vece < MO_64;
3119 case INDEX_op_ssadd_vec:
3120 case INDEX_op_sssub_vec:
3121 return vece < MO_64 ? -1 : 0;
3127 static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3128 TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3130 TCGv_vec h1 = tcg_temp_new_vec(type);
3131 TCGv_vec h2 = tcg_temp_new_vec(type);
3132 TCGv_vec l1 = tcg_temp_new_vec(type);
3133 TCGv_vec l2 = tcg_temp_new_vec(type);
3135 tcg_debug_assert (vece < MO_64);
3137 /* Unpack with sign-extension. */
3138 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3139 tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3140 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3141 tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3143 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3144 tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3145 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3146 tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3148 /* Arithmetic on a wider element size. */
3149 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3150 tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3151 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3152 tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3154 /* Pack with saturation. */
3155 vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3156 tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3158 tcg_temp_free_vec(h1);
3159 tcg_temp_free_vec(h2);
3160 tcg_temp_free_vec(l1);
3161 tcg_temp_free_vec(l2);
3164 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3168 TCGv_vec v0, v1, v2, t0;
3171 v0 = temp_tcgv_vec(arg_temp(a0));
3172 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3173 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3176 case INDEX_op_rotrv_vec:
3177 t0 = tcg_temp_new_vec(type);
3178 tcg_gen_neg_vec(vece, t0, v2);
3179 tcg_gen_rotlv_vec(vece, v0, v1, t0);
3180 tcg_temp_free_vec(t0);
3183 case INDEX_op_ssadd_vec:
3184 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3186 case INDEX_op_sssub_vec:
3187 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3191 g_assert_not_reached();
3196 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3199 case INDEX_op_goto_ptr:
3202 case INDEX_op_ld8u_i32:
3203 case INDEX_op_ld8u_i64:
3204 case INDEX_op_ld8s_i32:
3205 case INDEX_op_ld8s_i64:
3206 case INDEX_op_ld16u_i32:
3207 case INDEX_op_ld16u_i64:
3208 case INDEX_op_ld16s_i32:
3209 case INDEX_op_ld16s_i64:
3210 case INDEX_op_ld_i32:
3211 case INDEX_op_ld32u_i64:
3212 case INDEX_op_ld32s_i64:
3213 case INDEX_op_ld_i64:
3214 return C_O1_I1(r, r);
3216 case INDEX_op_st8_i32:
3217 case INDEX_op_st8_i64:
3218 case INDEX_op_st16_i32:
3219 case INDEX_op_st16_i64:
3220 case INDEX_op_st_i32:
3221 case INDEX_op_st32_i64:
3222 case INDEX_op_st_i64:
3223 return C_O0_I2(r, r);
3225 case INDEX_op_add_i32:
3226 case INDEX_op_add_i64:
3227 case INDEX_op_shl_i64:
3228 case INDEX_op_shr_i64:
3229 case INDEX_op_sar_i64:
3230 case INDEX_op_rotl_i32:
3231 case INDEX_op_rotl_i64:
3232 case INDEX_op_rotr_i32:
3233 case INDEX_op_rotr_i64:
3234 case INDEX_op_setcond_i32:
3235 case INDEX_op_negsetcond_i32:
3236 return C_O1_I2(r, r, ri);
3237 case INDEX_op_setcond_i64:
3238 case INDEX_op_negsetcond_i64:
3239 return C_O1_I2(r, r, rC);
3241 case INDEX_op_clz_i64:
3242 return C_O1_I2(r, r, rI);
3244 case INDEX_op_sub_i32:
3245 case INDEX_op_sub_i64:
3246 case INDEX_op_and_i32:
3247 case INDEX_op_or_i32:
3248 case INDEX_op_xor_i32:
3249 return C_O1_I2(r, r, ri);
3250 case INDEX_op_and_i64:
3251 return C_O1_I2(r, r, rNKR);
3252 case INDEX_op_or_i64:
3253 case INDEX_op_xor_i64:
3254 return C_O1_I2(r, r, rK);
3256 case INDEX_op_andc_i32:
3257 case INDEX_op_orc_i32:
3258 case INDEX_op_eqv_i32:
3259 return C_O1_I2(r, r, ri);
3260 case INDEX_op_andc_i64:
3261 return C_O1_I2(r, r, rKR);
3262 case INDEX_op_orc_i64:
3263 case INDEX_op_eqv_i64:
3264 return C_O1_I2(r, r, rNK);
3266 case INDEX_op_nand_i32:
3267 case INDEX_op_nand_i64:
3268 case INDEX_op_nor_i32:
3269 case INDEX_op_nor_i64:
3270 return C_O1_I2(r, r, r);
3272 case INDEX_op_mul_i32:
3273 return (HAVE_FACILITY(MISC_INSN_EXT2)
3275 : C_O1_I2(r, 0, ri));
3276 case INDEX_op_mul_i64:
3277 return (HAVE_FACILITY(MISC_INSN_EXT2)
3279 : C_O1_I2(r, 0, rJ));
3281 case INDEX_op_shl_i32:
3282 case INDEX_op_shr_i32:
3283 case INDEX_op_sar_i32:
3284 return C_O1_I2(r, r, ri);
3286 case INDEX_op_brcond_i32:
3287 return C_O0_I2(r, ri);
3288 case INDEX_op_brcond_i64:
3289 return C_O0_I2(r, rC);
3291 case INDEX_op_bswap16_i32:
3292 case INDEX_op_bswap16_i64:
3293 case INDEX_op_bswap32_i32:
3294 case INDEX_op_bswap32_i64:
3295 case INDEX_op_bswap64_i64:
3296 case INDEX_op_neg_i32:
3297 case INDEX_op_neg_i64:
3298 case INDEX_op_not_i32:
3299 case INDEX_op_not_i64:
3300 case INDEX_op_ext8s_i32:
3301 case INDEX_op_ext8s_i64:
3302 case INDEX_op_ext8u_i32:
3303 case INDEX_op_ext8u_i64:
3304 case INDEX_op_ext16s_i32:
3305 case INDEX_op_ext16s_i64:
3306 case INDEX_op_ext16u_i32:
3307 case INDEX_op_ext16u_i64:
3308 case INDEX_op_ext32s_i64:
3309 case INDEX_op_ext32u_i64:
3310 case INDEX_op_ext_i32_i64:
3311 case INDEX_op_extu_i32_i64:
3312 case INDEX_op_extract_i32:
3313 case INDEX_op_extract_i64:
3314 case INDEX_op_ctpop_i32:
3315 case INDEX_op_ctpop_i64:
3316 return C_O1_I1(r, r);
3318 case INDEX_op_qemu_ld_a32_i32:
3319 case INDEX_op_qemu_ld_a64_i32:
3320 case INDEX_op_qemu_ld_a32_i64:
3321 case INDEX_op_qemu_ld_a64_i64:
3322 return C_O1_I1(r, r);
3323 case INDEX_op_qemu_st_a32_i64:
3324 case INDEX_op_qemu_st_a64_i64:
3325 case INDEX_op_qemu_st_a32_i32:
3326 case INDEX_op_qemu_st_a64_i32:
3327 return C_O0_I2(r, r);
3328 case INDEX_op_qemu_ld_a32_i128:
3329 case INDEX_op_qemu_ld_a64_i128:
3330 return C_O2_I1(o, m, r);
3331 case INDEX_op_qemu_st_a32_i128:
3332 case INDEX_op_qemu_st_a64_i128:
3333 return C_O0_I3(o, m, r);
3335 case INDEX_op_deposit_i32:
3336 case INDEX_op_deposit_i64:
3337 return C_O1_I2(r, rZ, r);
3339 case INDEX_op_movcond_i32:
3340 return C_O1_I4(r, r, ri, rI, r);
3341 case INDEX_op_movcond_i64:
3342 return C_O1_I4(r, r, rC, rI, r);
3344 case INDEX_op_div2_i32:
3345 case INDEX_op_div2_i64:
3346 case INDEX_op_divu2_i32:
3347 case INDEX_op_divu2_i64:
3348 return C_O2_I3(o, m, 0, 1, r);
3350 case INDEX_op_mulu2_i64:
3351 return C_O2_I2(o, m, 0, r);
3352 case INDEX_op_muls2_i64:
3353 return C_O2_I2(o, m, r, r);
3355 case INDEX_op_add2_i32:
3356 case INDEX_op_sub2_i32:
3357 return C_N1_O1_I4(r, r, 0, 1, ri, r);
3359 case INDEX_op_add2_i64:
3360 case INDEX_op_sub2_i64:
3361 return C_N1_O1_I4(r, r, 0, 1, rJU, r);
3363 case INDEX_op_st_vec:
3364 return C_O0_I2(v, r);
3365 case INDEX_op_ld_vec:
3366 case INDEX_op_dupm_vec:
3367 return C_O1_I1(v, r);
3368 case INDEX_op_dup_vec:
3369 return C_O1_I1(v, vr);
3370 case INDEX_op_abs_vec:
3371 case INDEX_op_neg_vec:
3372 case INDEX_op_not_vec:
3373 case INDEX_op_rotli_vec:
3374 case INDEX_op_sari_vec:
3375 case INDEX_op_shli_vec:
3376 case INDEX_op_shri_vec:
3377 case INDEX_op_s390_vuph_vec:
3378 case INDEX_op_s390_vupl_vec:
3379 return C_O1_I1(v, v);
3380 case INDEX_op_add_vec:
3381 case INDEX_op_sub_vec:
3382 case INDEX_op_and_vec:
3383 case INDEX_op_andc_vec:
3384 case INDEX_op_or_vec:
3385 case INDEX_op_orc_vec:
3386 case INDEX_op_xor_vec:
3387 case INDEX_op_nand_vec:
3388 case INDEX_op_nor_vec:
3389 case INDEX_op_eqv_vec:
3390 case INDEX_op_cmp_vec:
3391 case INDEX_op_mul_vec:
3392 case INDEX_op_rotlv_vec:
3393 case INDEX_op_rotrv_vec:
3394 case INDEX_op_shlv_vec:
3395 case INDEX_op_shrv_vec:
3396 case INDEX_op_sarv_vec:
3397 case INDEX_op_smax_vec:
3398 case INDEX_op_smin_vec:
3399 case INDEX_op_umax_vec:
3400 case INDEX_op_umin_vec:
3401 case INDEX_op_s390_vpks_vec:
3402 return C_O1_I2(v, v, v);
3403 case INDEX_op_rotls_vec:
3404 case INDEX_op_shls_vec:
3405 case INDEX_op_shrs_vec:
3406 case INDEX_op_sars_vec:
3407 return C_O1_I2(v, v, r);
3408 case INDEX_op_bitsel_vec:
3409 return C_O1_I3(v, v, v, v);
3410 case INDEX_op_cmpsel_vec:
3411 return (TCG_TARGET_HAS_orc_vec
3412 ? C_O1_I4(v, v, v, vZM, v)
3413 : C_O1_I4(v, v, v, vZ, v));
3416 g_assert_not_reached();
3421 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3422 * Some distros have fixed this up locally, others have not.
3424 #ifndef HWCAP_S390_VXRS
3425 #define HWCAP_S390_VXRS 2048
3428 static void query_s390_facilities(void)
3430 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3433 /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
3434 is present on all 64-bit systems, but let's check for it anyway. */
3435 if (hwcap & HWCAP_S390_STFLE) {
3436 register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3437 register void *r1 __asm__("1") = s390_facilities;
3440 asm volatile(".word 0xb2b0,0x1000"
3441 : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3445 * Use of vector registers requires os support beyond the facility bit.
3446 * If the kernel does not advertise support, disable the facility bits.
3447 * There is nothing else we currently care about in the 3rd word, so
3448 * disable VECTOR with one store.
3450 if (!(hwcap & HWCAP_S390_VXRS)) {
3451 s390_facilities[2] = 0;
3455 * Minimum supported cpu revision is z196.
3456 * Check for all required facilities.
3457 * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3459 if (!HAVE_FACILITY(LONG_DISP)) {
3460 which = "long-displacement";
3463 if (!HAVE_FACILITY(EXT_IMM)) {
3464 which = "extended-immediate";
3467 if (!HAVE_FACILITY(GEN_INST_EXT)) {
3468 which = "general-instructions-extension";
3472 * Facility 45 is a big bin that contains: distinct-operands,
3473 * fast-BCR-serialization, high-word, population-count,
3474 * interlocked-access-1, and load/store-on-condition-1
3476 if (!HAVE_FACILITY(45)) {
3483 error_report("%s: missing required facility %s", __func__, which);
3487 static void tcg_target_init(TCGContext *s)
3489 query_s390_facilities();
3491 tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3492 tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3493 if (HAVE_FACILITY(VECTOR)) {
3494 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3495 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3498 tcg_target_call_clobber_regs = 0;
3499 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3500 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3501 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3502 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3503 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3504 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3505 /* The r6 register is technically call-saved, but it's also a parameter
3506 register, so it can get killed by setup for the qemu_st helper. */
3507 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3508 /* The return register can be considered call-clobbered. */
3509 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3511 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3512 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3513 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3514 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3515 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3516 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3517 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3518 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3519 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3520 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3521 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3522 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3523 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3524 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3525 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3526 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3527 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3528 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3529 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3530 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3531 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3532 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3533 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3534 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3536 s->reserved_regs = 0;
3537 tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3538 tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
3539 /* XXX many insns can't be used with R0, so we better avoid it for now */
3540 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3541 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3544 #define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
3545 + TCG_STATIC_CALL_ARGS_SIZE \
3546 + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3548 static void tcg_target_qemu_prologue(TCGContext *s)
3550 /* stmg %r6,%r15,48(%r15) (save registers) */
3551 tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3553 /* aghi %r15,-frame_size */
3554 tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3556 tcg_set_frame(s, TCG_REG_CALL_STACK,
3557 TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3558 CPU_TEMP_BUF_NLONGS * sizeof(long));
3560 if (!tcg_use_softmmu && guest_base >= 0x80000) {
3561 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3562 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3565 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3567 /* br %r3 (go to TB) */
3568 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3571 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3572 * and fall through to the rest of the epilogue.
3574 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3575 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3578 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3580 /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3581 tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3584 /* br %r14 (return) */
3585 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3588 static void tcg_out_tb_start(TCGContext *s)
3593 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3595 memset(p, 0x07, count * sizeof(tcg_insn_unit));
3600 uint8_t fde_def_cfa[4];
3601 uint8_t fde_reg_ofs[18];
3604 /* We're expecting a 2 byte uleb128 encoded value. */
3605 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3607 #define ELF_HOST_MACHINE EM_S390
3609 static const DebugFrame debug_frame = {
3610 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3613 .h.cie.code_align = 1,
3614 .h.cie.data_align = 8, /* sleb128 8 */
3615 .h.cie.return_column = TCG_REG_R14,
3617 /* Total FDE size does not include the "len" member. */
3618 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3621 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
3622 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3626 0x86, 6, /* DW_CFA_offset, %r6, 48 */
3627 0x87, 7, /* DW_CFA_offset, %r7, 56 */
3628 0x88, 8, /* DW_CFA_offset, %r8, 64 */
3629 0x89, 9, /* DW_CFA_offset, %r92, 72 */
3630 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
3631 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
3632 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
3633 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
3634 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
3638 void tcg_register_jit(const void *buf, size_t buf_size)
3640 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));