From 761d48d2b89a048badbd9c5cf45da75336891cfe Mon Sep 17 00:00:00 2001 From: Zoltan Herczeg Date: Wed, 11 May 2022 04:00:02 +0000 Subject: [PATCH] Improve constant generation Also implement CLZ. --- sljit_src/sljitLir.c | 3 +- sljit_src/sljitNativeMIPS_common.c | 4 --- sljit_src/sljitNativeRISCV_64.c | 39 ++++++++++++++------- sljit_src/sljitNativeRISCV_common.c | 70 +++++++++++++++++++++++++++++++++---- test_src/sljitTest.c | 50 ++++++++++++++++++-------- 5 files changed, 127 insertions(+), 39 deletions(-) diff --git a/sljit_src/sljitLir.c b/sljit_src/sljitLir.c index eeb9ee9..a434443 100644 --- a/sljit_src/sljitLir.c +++ b/sljit_src/sljitLir.c @@ -236,7 +236,8 @@ #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) # define PATCH_REL32 0x040 # define PATCH_ABS32 0x080 -# define PATCH_ABS52 0x100 +# define PATCH_ABS44 0x100 +# define PATCH_ABS52 0x200 #else /* !SLJIT_CONFIG_RISCV_64 */ # define PATCH_REL32 0x0 #endif /* SLJIT_CONFIG_RISCV_64 */ diff --git a/sljit_src/sljitNativeMIPS_common.c b/sljit_src/sljitNativeMIPS_common.c index 51a30bf..f2e8328 100644 --- a/sljit_src/sljitNativeMIPS_common.c +++ b/sljit_src/sljitNativeMIPS_common.c @@ -1487,10 +1487,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl if (!(flags & UNUSED_DEST)) FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); #else /* SLJIT_MIPS_REV < 1 */ - if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { - FAIL_IF(push_inst(compiler, SELECT_OP(DSRL32, SRL) | T(src2) | DA(EQUAL_FLAG) | SH_IMM(31), EQUAL_FLAG)); - return push_inst(compiler, XORI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG); - } /* Nearly all instructions are unmovable in the following sequence. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); /* Check zero. */ diff --git a/sljit_src/sljitNativeRISCV_64.c b/sljit_src/sljitNativeRISCV_64.c index 68b1662..16a5f5f 100644 --- a/sljit_src/sljitNativeRISCV_64.c +++ b/sljit_src/sljitNativeRISCV_64.c @@ -35,7 +35,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r if (imm > S32_MAX) { SLJIT_ASSERT((imm & 0x800) != 0); FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); - return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm & 0xfff)); + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); } if ((imm & 0x800) != 0) @@ -51,25 +51,40 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r /* Trailing zeroes could be used to produce shifted immediates. */ - high = imm >> 32; - if ((imm & 0x80000000l) != 0) { - if (high == 0) { - if ((imm & 0x800) != 0) - imm += 0x1000; + if (imm <= 0x7ffffffffffl && imm >= -0x80000000000l) { + high = imm >> 12; + + if (imm & 0x800) + high = ~high; - FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(imm & ~0xfff))); + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)0x80000000u)); + FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); + } else { + if ((high & 0x800) != 0) + high += 0x1000; - if ((imm & 0xfff) != 0) - FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(imm))); + FAIL_IF(push_inst(compiler, LUI | RD(dst_r) | (sljit_ins)(high & ~0xfff))); - FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(32))); - return push_inst(compiler, SRLI | RD(dst_r) | RS1(dst_r) | IMM_I(32)); + if ((high & 0xfff) != 0) + FAIL_IF(push_inst(compiler, ADDI | RD(dst_r) | RS1(dst_r) | IMM_I(high))); } - high = ~high; + + FAIL_IF(push_inst(compiler, SLLI | RD(dst_r) | RS1(dst_r) | IMM_I(12))); + + if ((imm & 0xfff) != 0) + return push_inst(compiler, XORI | RD(dst_r) | RS1(dst_r) | IMM_I(imm)); + + return SLJIT_SUCCESS; } + high = imm >> 32; imm = (sljit_s32)imm; + if ((imm & 0x80000000l) != 0) + high = ~high; + if (high <= 0x7ffff && high >= -0x80000) { FAIL_IF(push_inst(compiler, LUI | RD(tmp_r) | (sljit_ins)(high << 12))); high = 0x1000; diff --git a/sljit_src/sljitNativeRISCV_common.c b/sljit_src/sljitNativeRISCV_common.c index a16b393..7423109 100644 --- a/sljit_src/sljitNativeRISCV_common.c +++ b/sljit_src/sljitNativeRISCV_common.c @@ -145,6 +145,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { #if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) #define S32_MAX (0x7ffff7ffl) #define S32_MIN (-0x80000000l) +#define S44_MAX (0x7fffffff7ffl) #define S52_MAX (0x7ffffffffffffl) #endif @@ -229,6 +230,15 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i return inst + 1; } + if (target_addr <= S44_MAX) { + if (jump->flags & IS_COND) + inst[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7; + + jump->flags |= PATCH_ABS44; + inst[3] = inst[0]; + return inst + 4; + } + if (target_addr <= S52_MAX) { if (jump->flags & IS_COND) inst[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7; @@ -258,6 +268,11 @@ static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_la return 1; } + if (max_label <= S44_MAX) { + put_label->flags = PATCH_ABS44; + return 3; + } + if (max_label <= S52_MAX) { put_label->flags = PATCH_ABS52; return 4; @@ -301,18 +316,37 @@ static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg) #if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); #else /* !SLJIT_CONFIG_RISCV_32 */ - high = (sljit_sw)addr >> 32; - - if ((addr & 0x80000000l) != 0) - high = ~high; - - if ((high & 0x800) != 0) - high += 0x1000; if (flags & PATCH_ABS32) { SLJIT_ASSERT(addr <= S32_MAX); inst[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff); + } else if (flags & PATCH_ABS44) { + high = (sljit_sw)addr >> 12; + SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff); + + if (high > S32_MAX) { + SLJIT_ASSERT((high & 0x800) != 0); + inst[0] = LUI | RD(reg) | (sljit_ins)0x80000000u; + inst[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high); + } else { + if ((high & 0x800) != 0) + high += 0x1000; + + inst[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff); + inst[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high); + } + + inst[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12); + inst += 2; } else { + high = (sljit_sw)addr >> 32; + + if ((addr & 0x80000000l) != 0) + high = ~high; + + if ((high & 0x800) != 0) + high += 0x1000; + if (flags & PATCH_ABS52) { SLJIT_ASSERT(addr <= S52_MAX); inst[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12); @@ -1072,6 +1106,28 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #endif /* SLJIT_CONFIG_RISCV_64 */ case SLJIT_CLZ: + SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); + /* Nearly all instructions are unmovable in the following sequence. */ +#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(32))); +#else /* !SLJIT_CONFIG_RISCV_32 */ + if (op & SLJIT_32) { + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(src2) | IMM_I(32))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(32))); + } else { + FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src2) | IMM_I(0))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(64))); + } +#endif /* SLJIT_CONFIG_RISCV_32 */ + /* Check zero. */ + FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(6 * SSIZE_OF(ins)) << 7))); + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(TMP_ZERO) | IMM_I(0))); + FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(4 * SSIZE_OF(ins)) << 7))); + /* Loop for searching the highest bit. */ + FAIL_IF(push_inst(compiler, ADDI | RD(dst) | RS1(dst) | IMM_I(1))); + FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1))); + FAIL_IF(push_inst(compiler, BGE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(0x1fc001d - 1 * SSIZE_OF(ins)) << 7))); return SLJIT_SUCCESS; case SLJIT_ADD: diff --git a/test_src/sljitTest.c b/test_src/sljitTest.c index cc6162d..20782d1 100644 --- a/test_src/sljitTest.c +++ b/test_src/sljitTest.c @@ -6316,16 +6316,22 @@ static void test64(void) /* Test put label with absolute label addresses */ executable_code code; sljit_uw malloc_addr; - struct sljit_label label[4]; + struct sljit_label label[6]; struct sljit_put_label *put_label[2]; struct sljit_compiler* compiler; - sljit_uw buf[5]; + sljit_uw buf[7]; + sljit_s32 i; #if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) - sljit_sw offs1 = SLJIT_W(0x123456781122); - sljit_sw offs2 = SLJIT_W(0x1234567811223344); + /* Must be even because it is also used for addressing. */ + sljit_sw offs1 = SLJIT_W(0x1234567812l); + sljit_sw offs2 = SLJIT_W(0x123456781122l); + sljit_sw offs3 = SLJIT_W(0x7fffffff7ffl); + sljit_sw offs4 = SLJIT_W(0x1234567811223344l); #else /* !SLJIT_64BIT_ARCHITECTURE */ - sljit_sw offs1 = 0x12345678; - sljit_sw offs2 = (sljit_sw)0x80000000; + sljit_sw offs1 = (sljit_sw)0x80000000; + sljit_sw offs2 = (sljit_sw)0xe0000000; + sljit_sw offs3 = (sljit_sw)0x87654321; + sljit_sw offs4 = (sljit_sw)0xffffffff; #endif /* SLJIT_64BIT_ARCHITECTURE */ if (verbose) @@ -6359,12 +6365,16 @@ static void test64(void) label[3].addr = (sljit_uw)offs2; label[3].size = (sljit_uw)offs2 - malloc_addr; + label[4].addr = (sljit_uw)offs3; + label[4].size = (sljit_uw)offs3 - malloc_addr; + + label[5].addr = (sljit_uw)offs4; + label[5].size = (sljit_uw)offs4 - malloc_addr; + FAILED(!compiler, "cannot create compiler\n"); - buf[0] = 0; - buf[1] = 0; - buf[2] = 0; - buf[3] = 0; - buf[4] = 0; + + for (i = 0; i < 6; i++) + buf[i] = 0; sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, P), 3, 1, 0, 0, 2 * sizeof(sljit_sw)); @@ -6386,12 +6396,20 @@ static void test64(void) sljit_set_put_label(put_label[0], &label[1]); sljit_set_put_label(put_label[1], &label[1]); - put_label[0] = sljit_emit_put_label(compiler, SLJIT_R2, 0); + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R1, 0); sljit_set_put_label(put_label[0], &label[2]); - sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_R2, 0); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 4 * sizeof(sljit_uw), SLJIT_R1, 0); - put_label[0] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R2, 0); sljit_set_put_label(put_label[0], &label[3]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 5 * sizeof(sljit_uw), SLJIT_R2, 0); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_R1, 0); + sljit_set_put_label(put_label[0], &label[4]); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S0), 6 * sizeof(sljit_uw), SLJIT_R1, 0); + + put_label[0] = sljit_emit_put_label(compiler, SLJIT_RETURN_REG, 0); + sljit_set_put_label(put_label[0], &label[5]); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); code.code = sljit_generate_code(compiler); @@ -6399,12 +6417,14 @@ static void test64(void) sljit_free_compiler(compiler); SLJIT_ASSERT(SLJIT_FUNC_UADDR(code.code) >= malloc_addr && SLJIT_FUNC_UADDR(code.code) <= malloc_addr + 8); - FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)label[3].addr, "test64 case 1 failed\n"); + FAILED(code.func1((sljit_sw)&buf) != (sljit_sw)label[5].addr, "test64 case 1 failed\n"); FAILED(buf[0] != label[0].addr, "test64 case 2 failed\n"); FAILED(buf[1] != label[0].addr, "test64 case 3 failed\n"); FAILED(buf[2] != label[1].addr, "test64 case 4 failed\n"); FAILED(buf[3] != label[1].addr, "test64 case 5 failed\n"); FAILED(buf[4] != label[2].addr, "test64 case 6 failed\n"); + FAILED(buf[5] != label[3].addr, "test64 case 7 failed\n"); + FAILED(buf[6] != label[4].addr, "test64 case 8 failed\n"); sljit_free_code(code.code, NULL); -- 2.11.4.GIT