From b051ccf4bc410722c9a5cca9f2365cd8cd96c772 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 29 Oct 2024 19:42:17 +0100 Subject: [PATCH] codegen: keep the registers holding boolean values extended, so that we don't have to extend them when performing the logical operations --- c1-alpha.inc | 1 + c1-arm.inc | 1 + c1-arm64.inc | 1 + c1-hppa.inc | 1 + c1-ia64.inc | 1 + c1-loong.inc | 1 + c1-mips.inc | 1 + c1-power.inc | 1 + c1-riscv.inc | 1 + c1-s390.inc | 1 + c1-sparc.inc | 1 + c1-x86.inc | 1 + cg-frame.inc | 54 +++++++++++++++++++++++++++++------------------------- 13 files changed, 41 insertions(+), 25 deletions(-) diff --git a/c1-alpha.inc b/c1-alpha.inc index a1b4459..d44a4cc 100644 --- a/c1-alpha.inc +++ b/c1-alpha.inc @@ -47,6 +47,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) == 0 || (bits) == 2 || (bits) == 3) #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_8 +#define ARCH_BOOL_SIZE OP_SIZE_8 #define ARCH_HAS_FP_GP_MOV cpu_test_feature(CPU_FEATURE_fix) #define ARCH_NEEDS_BARRIER thread_needs_barriers diff --git a/c1-arm.inc b/c1-arm.inc index cd4064a..69d0abf 100644 --- a/c1-arm.inc +++ b/c1-arm.inc @@ -49,6 +49,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 1 #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE 32 +#define ARCH_BOOL_SIZE OP_SIZE_4 #define ARCH_HAS_FP_GP_MOV 1 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-arm64.inc b/c1-arm64.inc index ea5ca14..2d2f5aa 100644 --- a/c1-arm64.inc +++ b/c1-arm64.inc @@ -45,6 +45,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 1 #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_4 #define ARCH_HAS_FP_GP_MOV 1 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-hppa.inc b/c1-hppa.inc index 403287e..a060be8 100644 --- a/c1-hppa.inc +++ b/c1-hppa.inc @@ -60,6 +60,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3) #define ARCH_HAS_BTX(btx, size, cnst) (((btx) == BTX_BTS || (btx) == BTX_BTR || (btx) == BTX_BTEXT) && (((size) >= OP_SIZE_4))) #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 0 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-ia64.inc b/c1-ia64.inc index 18305b4..90a40ea 100644 --- a/c1-ia64.inc +++ b/c1-ia64.inc @@ -45,6 +45,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 4) #define ARCH_HAS_BTX(btx, size, cnst) (((btx) == BTX_BTS || (btx) == BTX_BTR) && (cnst)) #define ARCH_SHIFT_SIZE 32 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 1 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-loong.inc b/c1-loong.inc index 0be44b2..bf671d5 100644 --- a/c1-loong.inc +++ b/c1-loong.inc @@ -45,6 +45,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 0 #define ARCH_HAS_BTX(btx, size, cnst) (((btx) == BTX_BTR || (btx) == BTX_BTEXT) && (cnst)) #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 1 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-mips.inc b/c1-mips.inc index 22c9b0d..e91a169 100644 --- a/c1-mips.inc +++ b/c1-mips.inc @@ -56,6 +56,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) (MIPS_R6 && (bits) >= 1 && (bits) <= 4) #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 1 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-power.inc b/c1-power.inc index c8fe469..526d094 100644 --- a/c1-power.inc +++ b/c1-power.inc @@ -80,6 +80,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 0 #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_16 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 0 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-riscv.inc b/c1-riscv.inc index 9db3775..aa86054 100644 --- a/c1-riscv.inc +++ b/c1-riscv.inc @@ -45,6 +45,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3 && cpu_test_feature(CPU_FEATURE_zba)) #define ARCH_HAS_BTX(btx, size, cnst) (((size) == OP_SIZE_8 || (cnst)) && cpu_test_feature(CPU_FEATURE_zbs)) #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 0 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-s390.inc b/c1-s390.inc index ce51660..d1e1e86 100644 --- a/c1-s390.inc +++ b/c1-s390.inc @@ -56,6 +56,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 0 #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_8 +#define ARCH_BOOL_SIZE OP_SIZE_4 #define ARCH_HAS_FP_GP_MOV 0 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-sparc.inc b/c1-sparc.inc index 90c7348..de3bafc 100644 --- a/c1-sparc.inc +++ b/c1-sparc.inc @@ -54,6 +54,7 @@ #define ARCH_HAS_SHIFTED_ADD(bits) 0 #define ARCH_HAS_BTX(btx, size, cnst) 0 #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE OP_SIZE_NATIVE #define ARCH_HAS_FP_GP_MOV 0 #define ARCH_NEEDS_BARRIER 0 diff --git a/c1-x86.inc b/c1-x86.inc index 4d5d959..e2c013b 100644 --- a/c1-x86.inc +++ b/c1-x86.inc @@ -54,6 +54,7 @@ #define ARCH_HAS_BTX(btx, size, cnst) ((btx) != BTX_BTEXT && (size) >= OP_SIZE_2) #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3) #define ARCH_SHIFT_SIZE OP_SIZE_4 +#define ARCH_BOOL_SIZE log_2(sizeof(ajla_flat_option_t)) #define ARCH_HAS_FP_GP_MOV cpu_test_feature(CPU_FEATURE_sse2) #define ARCH_NEEDS_BARRIER 0 diff --git a/cg-frame.inc b/cg-frame.inc index baa7f20..f009772 100644 --- a/cg-frame.inc +++ b/cg-frame.inc @@ -29,6 +29,30 @@ endian not defined #endif +static const struct type *get_type_of_local(struct codegen_context *ctx, frame_t pos) +{ + const struct type *t; + const struct data *function = ctx->fn; + t = da(function,function)->local_variables[pos].type; + if (t) + TYPE_TAG_VALIDATE(t->tag); + return t; +} + +static unsigned real_type_to_op_size(unsigned real_type) +{ + switch (real_type) { + case 0: return OP_SIZE_2; + case 1: return OP_SIZE_4; + case 2: return OP_SIZE_8; + case 3: return OP_SIZE_10; + case 4: return OP_SIZE_16; + default: + internal(file_line, "real_type_to_op_size: invalid type %u", real_type); + return 0; + } +} + static bool attr_w gen_frame_address(struct codegen_context *ctx, frame_t slot, int64_t offset, unsigned reg) { @@ -173,12 +197,16 @@ ret: static bool attr_w gen_frame_get(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg, unsigned *dest) { + const struct type *t = get_type_of_local(ctx, slot); ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_get: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn))); if (ctx->registers[slot] >= 0) { unsigned reg = ctx->registers[slot]; if (ex != garbage && size < OP_SIZE_NATIVE && !reg_is_fp(reg)) { + if (t->tag == TYPE_TAG_flat_option && size <= ARCH_BOOL_SIZE) + goto skip_extend; g(gen_extend(ctx, size, ex, reg, reg)); } +skip_extend: *dest = reg; goto ret; } @@ -186,7 +214,7 @@ static bool attr_w gen_frame_get(struct codegen_context *ctx, unsigned size, enu g(gen_frame_load(ctx, size, ex, slot, offset, reg)); ret: #ifdef DEBUG_GARBAGE - if (size < OP_SIZE_NATIVE && ex == garbage) { + if (size < OP_SIZE_NATIVE && ex == garbage && t->tag != TYPE_TAG_flat_option) { uint64_t mask; g(gen_extend(ctx, size, zero_x, *dest, *dest)); mask = (rand()) | ((uint64_t)rand() << 31) | ((uint64_t)rand() << 62); @@ -700,30 +728,6 @@ static bool attr_w gen_frame_load_cmp_imm_set_cond(struct codegen_context *ctx, return true; } -static const struct type *get_type_of_local(struct codegen_context *ctx, frame_t pos) -{ - const struct type *t; - const struct data *function = ctx->fn; - t = da(function,function)->local_variables[pos].type; - if (t) - TYPE_TAG_VALIDATE(t->tag); - return t; -} - -static unsigned real_type_to_op_size(unsigned real_type) -{ - switch (real_type) { - case 0: return OP_SIZE_2; - case 1: return OP_SIZE_4; - case 2: return OP_SIZE_8; - case 3: return OP_SIZE_10; - case 4: return OP_SIZE_16; - default: - internal(file_line, "real_type_to_op_size: invalid type %u", real_type); - return 0; - } -} - static unsigned spill_size(const struct type *t) { if (TYPE_TAG_IS_REAL(t->tag)) { -- 2.11.4.GIT