2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define frame_offs(x) ((ssize_t)offsetof(struct frame_struct, x) - (ssize_t)frame_offset)
21 #if defined(C_LITTLE_ENDIAN)
22 #define lo_word(size) (0)
23 #define hi_word(size) ((size_t)1 << (size))
24 #elif defined(C_BIG_ENDIAN)
25 #define lo_word(size) ((size_t)1 << (size))
26 #define hi_word(size) (0)
33 static bool attr_w gen_frame_address(struct codegen_context *ctx, frame_t slot, int64_t offset, unsigned reg)
35 offset += (size_t)slot * slot_size;
36 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, reg, R_FRAME, offset, 0));
40 static bool attr_w gen_frame_load_raw(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg)
43 if (ex == garbage || ex == native) {
45 ex = ARCH_PREFERS_SX(size) ? sign_x : zero_x;
49 x_offset = offset + (size_t)slot * slot_size;
50 if (!ARCH_HAS_BWX && size < OP_SIZE_4) {
51 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
52 gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
56 g(gen_extend(ctx, size, ex, reg, reg));
60 #if defined(ARCH_ALPHA)
61 if (size < OP_SIZE_4) {
62 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
63 gen_insn(INSN_MOV, size, 0, 0);
68 g(gen_extend(ctx, size, ex, reg, reg));
72 if (size == OP_SIZE_4 && !reg_is_fp(reg) && ex == zero_x) {
73 g(gen_frame_load_raw(ctx, size, sign_x, slot, offset, reg));
74 g(gen_extend(ctx, size, ex, reg, reg));
79 #if defined(ARCH_MIPS)
80 if (reg_is_fp(reg) && size == OP_SIZE_8 && !MIPS_HAS_LS_DOUBLE) {
81 #if defined(C_LITTLE_ENDIAN)
82 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset, reg));
83 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset + 4, reg + 1));
85 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset, reg + 1));
86 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset + 4, reg));
91 #if defined(ARCH_IA64) || defined(ARCH_PARISC)
93 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
94 gen_insn(INSN_MOV, size, 0, 0);
98 g(gen_extend(ctx, size, ex, reg, reg));
103 #if defined(ARCH_POWER)
104 if (size == OP_SIZE_1 && ex == sign_x) {
105 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
106 gen_insn(INSN_MOV, size, 0, 0);
108 gen_address_offset();
110 g(gen_extend(ctx, size, ex, reg, reg));
115 #if defined(ARCH_S390)
116 if (size == OP_SIZE_1 && !cpu_test_feature(CPU_FEATURE_long_displacement)) {
117 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
118 gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_0_8, 0);
121 gen_address_offset();
123 g(gen_extend(ctx, size, ex, reg, reg));
127 if (size == OP_SIZE_16 && reg_is_fp(reg)) {
128 g(gen_frame_load_raw(ctx, OP_SIZE_8, zero_x, 0, x_offset, reg));
129 g(gen_frame_load_raw(ctx, OP_SIZE_8, zero_x, 0, x_offset + 8, reg + 2));
134 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : ex ? IMM_PURPOSE_LDR_SX_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
135 gen_insn(unlikely(ex == sign_x) ? INSN_MOVSX : INSN_MOV, size, 0, 0);
137 gen_address_offset();
142 static bool attr_w gen_frame_load(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg)
144 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
145 if (ctx->registers[slot] >= 0) {
146 if (unlikely(offset != 0))
147 internal(file_line, "gen_frame_load: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
148 if (ex != garbage && size < OP_SIZE_NATIVE && !reg_is_fp(reg)) {
149 g(gen_extend(ctx, size, ex, reg, ctx->registers[slot]));
152 g(gen_mov(ctx, !reg_is_fp(reg) ? OP_SIZE_NATIVE : size, reg, ctx->registers[slot]));
156 g(gen_frame_load_raw(ctx, size, ex, slot, offset, reg));
159 if (size < OP_SIZE_NATIVE && ex == garbage) {
161 g(gen_extend(ctx, size, zero_x, reg, reg));
162 mask = (rand()) | ((uint64_t)rand() << 31) | ((uint64_t)rand() << 62);
163 mask <<= 8ULL << size;
164 g(gen_imm(ctx, mask, IMM_PURPOSE_OR, OP_SIZE_NATIVE));
165 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
174 static bool attr_w gen_frame_get(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg, unsigned *dest)
176 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_get: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
177 if (ctx->registers[slot] >= 0) {
178 unsigned reg = ctx->registers[slot];
179 if (ex != garbage && size < OP_SIZE_NATIVE && !reg_is_fp(reg)) {
180 g(gen_extend(ctx, size, ex, reg, reg));
186 g(gen_frame_load(ctx, size, ex, slot, offset, reg));
189 if (size < OP_SIZE_NATIVE && ex == garbage) {
191 g(gen_extend(ctx, size, zero_x, *dest, *dest));
192 mask = (rand()) | ((uint64_t)rand() << 31) | ((uint64_t)rand() << 62);
193 mask <<= 8ULL << size;
194 g(gen_imm(ctx, mask, IMM_PURPOSE_OR, OP_SIZE_NATIVE));
195 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
204 #if defined(ARCH_X86)
205 static bool attr_w gen_frame_load_x87(struct codegen_context *ctx, unsigned insn, unsigned size, unsigned alu, frame_t slot)
207 g(gen_address(ctx, R_FRAME, (size_t)slot * slot_size, IMM_PURPOSE_LDR_OFFSET, size));
208 gen_insn(insn, size, alu, 0);
209 gen_address_offset();
213 static bool attr_w gen_frame_store_x87(struct codegen_context *ctx, unsigned insn, unsigned size, frame_t slot)
215 g(gen_address(ctx, R_FRAME, (size_t)slot * slot_size, IMM_PURPOSE_STR_OFFSET, size));
216 gen_insn(insn, size, 0, 0);
217 gen_address_offset();
222 static bool attr_w gen_frame_load_op(struct codegen_context *ctx, unsigned size, enum extend ex, unsigned alu, unsigned writes_flags, frame_t slot, int64_t offset, unsigned reg)
224 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load_op: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
225 if (ctx->registers[slot] >= 0) {
226 if (size != i_size(size) + (unsigned)zero && ex != garbage)
228 g(gen_3address_alu(ctx, i_size(size), alu, reg, reg, ctx->registers[slot], writes_flags));
231 #if defined(ARCH_X86) || defined(ARCH_S390)
232 #if defined(ARCH_S390)
233 if (size >= OP_SIZE_4)
236 offset += (size_t)slot * slot_size;
237 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
238 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(size), size, alu, (alu == ALU_MUL ? ALU_WRITES_FLAGS(alu, false) : 1) | writes_flags);
241 gen_address_offset();
246 #if defined(R_SCRATCH_NA_1)
247 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
248 g(gen_3address_alu(ctx, i_size(size), alu, reg, reg, R_SCRATCH_NA_1, writes_flags));
253 static bool attr_w attr_unused gen_frame_load_op1(struct codegen_context *ctx, unsigned size, unsigned alu, unsigned writes_flags, frame_t slot, int64_t offset, unsigned reg)
255 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load_op1: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
256 if (ctx->registers[slot] >= 0) {
257 g(gen_2address_alu1(ctx, size, alu, reg, ctx->registers[slot], writes_flags));
260 #if defined(ARCH_X86)
261 offset += (size_t)slot * slot_size;
262 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
263 gen_insn(INSN_ALU1 + ARCH_PARTIAL_ALU(size), size, alu, ALU1_WRITES_FLAGS(alu) | writes_flags);
265 gen_address_offset();
268 #if !defined(ARCH_X86)
269 g(gen_frame_load(ctx, size, garbage, slot, offset, reg));
270 g(gen_2address_alu1(ctx, size, alu, reg, reg, writes_flags));
276 static bool attr_w gen_frame_load_cmp(struct codegen_context *ctx, unsigned size, bool logical, enum extend attr_unused ex, bool swap, frame_t slot, int64_t offset, unsigned reg)
278 if (ctx->registers[slot] >= 0) {
279 if (size != i_size_cmp(size) + (unsigned)zero && ex != garbage)
281 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
284 gen_one(ctx->registers[slot]);
286 gen_one(ctx->registers[slot]);
291 #if defined(ARCH_S390) || defined(ARCH_X86)
292 #if defined(ARCH_S390)
293 if (size < OP_SIZE_4)
296 offset += (size_t)slot * slot_size;
297 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
298 gen_insn(INSN_CMP, size, 0, 1 + logical);
301 gen_address_offset();
303 gen_address_offset();
309 #if defined(R_SCRATCH_NA_1)
310 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
311 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
314 gen_one(R_SCRATCH_NA_1);
316 gen_one(R_SCRATCH_NA_1);
323 static bool attr_w gen_frame_load_cmp_imm(struct codegen_context *ctx, unsigned size, bool logical, enum extend attr_unused ex, frame_t slot, int64_t offset, int64_t value)
325 if (ctx->registers[slot] >= 0) {
326 #if defined(ARCH_X86)
327 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
328 gen_insn(INSN_CMP, size, 0, 1 + logical);
329 gen_one(ctx->registers[slot]);
332 if (size != i_size(size) + (unsigned)zero && size < OP_SIZE_4 && ex != garbage)
334 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
335 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
336 gen_one(ctx->registers[slot]);
341 #if defined(ARCH_X86)
342 offset += (size_t)slot * slot_size;
343 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_MVI_CLI_OFFSET, size));
344 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
345 gen_insn(INSN_CMP, size, 0, 1 + logical);
346 gen_address_offset();
350 #if defined(ARCH_S390)
351 if (size != OP_SIZE_1 || !logical)
353 offset += (size_t)slot * slot_size;
354 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_MVI_CLI_OFFSET, size));
355 gen_insn(INSN_CMP, size, 0, 1 + logical);
356 gen_address_offset();
358 gen_eight((int8_t)value);
361 #if defined(R_SCRATCH_NA_1)
364 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
365 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
366 gen_insn(INSN_CMP, i_size(size), 0, 1 + logical);
367 gen_one(R_SCRATCH_NA_1);
374 static bool attr_w gen_frame_load_2(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg1, unsigned reg2)
376 #if defined(ARCH_ARM64)
377 offset += (size_t)slot * slot_size;
378 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
379 gen_insn(INSN_LDP, size, 0, 0);
382 gen_address_offset();
385 #if defined(ARCH_ARM32)
386 if (likely(!(reg1 & 1)) && likely(reg2 == reg1 + 1) && likely(cpu_test_feature(CPU_FEATURE_armv6)))
387 #elif defined(ARCH_SPARC32)
388 if (likely(!(reg2 & 1)) && likely(reg1 == reg2 + 1))
389 #elif defined(ARCH_S390)
390 if (likely(reg1 == reg2 + 1))
395 offset += (size_t)slot * slot_size;
396 if (UNALIGNED_TRAP) {
397 if (unlikely((offset & ((2U << size) - 1)) != 0)) {
398 offset -= (size_t)slot * slot_size;
402 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
403 gen_insn(INSN_LDP, size, 0, 0);
406 gen_address_offset();
411 g(gen_frame_load(ctx, size, garbage, slot, offset + lo_word(size), reg1));
412 g(gen_frame_load(ctx, size, garbage, slot, offset + hi_word(size), reg2));
416 static bool attr_w gen_frame_store_raw(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg)
418 offset += (size_t)slot * slot_size;
420 size = maximum(OP_SIZE_4, size);
421 #if defined(ARCH_MIPS)
422 if (reg_is_fp(reg) && size == OP_SIZE_8 && !MIPS_HAS_LS_DOUBLE) {
423 #if defined(C_LITTLE_ENDIAN)
424 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset, reg));
425 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset + 4, reg + 1));
427 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset, reg + 1));
428 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset + 4, reg));
433 #if defined(ARCH_S390)
434 if (size == OP_SIZE_16 && reg_is_fp(reg)) {
435 g(gen_frame_store_raw(ctx, OP_SIZE_8, 0, offset, reg));
436 g(gen_frame_store_raw(ctx, OP_SIZE_8, 0, offset + 8, reg + 2));
440 g(gen_address(ctx, R_FRAME, offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
441 gen_insn(INSN_MOV, size, 0, 0);
442 gen_address_offset();
447 static bool attr_w gen_frame_store(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg)
449 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_store: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
450 if (ctx->registers[slot] >= 0) {
451 if (unlikely(offset != 0))
452 internal(file_line, "gen_frame_store: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
453 g(gen_mov(ctx, !reg_is_fp(reg) ? OP_SIZE_NATIVE : size, ctx->registers[slot], reg));
456 return gen_frame_store_raw(ctx, size, slot, offset, reg);
459 static unsigned gen_frame_target(struct codegen_context *ctx, frame_t slot_r, frame_t slot_na_1, frame_t slot_na_2, unsigned reg)
461 short d = ctx->registers[slot_r];
463 if (slot_na_1 != NO_FRAME_T && ctx->registers[slot_na_1] == d)
465 if (slot_na_2 != NO_FRAME_T && ctx->registers[slot_na_2] == d)
472 static bool attr_w gen_frame_store_2(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg1, unsigned reg2)
474 #if defined(ARCH_ARM64)
475 offset += (size_t)slot * slot_size;
476 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
477 gen_insn(INSN_STP, size, 0, 0);
478 gen_address_offset();
483 #if defined(ARCH_ARM32)
484 if (likely(!(reg1 & 1)) && likely(reg2 == reg1 + 1) && likely(cpu_test_feature(CPU_FEATURE_armv6)))
485 #elif defined(ARCH_SPARC32)
486 if (likely(!(reg2 & 1)) && likely(reg1 == reg2 + 1))
487 #elif defined(ARCH_S390)
488 if (likely(reg1 == reg2 + 1))
493 offset += (size_t)slot * slot_size;
494 if (UNALIGNED_TRAP) {
495 if (unlikely((offset & ((2U << size) - 1)) != 0)) {
496 offset -= (size_t)slot * slot_size;
500 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
501 gen_insn(INSN_STP, size, 0, 0);
502 gen_address_offset();
509 g(gen_frame_store(ctx, size, slot, offset + lo_word(size), reg1));
510 g(gen_frame_store(ctx, size, slot, offset + hi_word(size), reg2));
514 static bool attr_w gen_frame_store_imm_raw(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, int64_t imm)
516 offset += (size_t)slot * slot_size;
518 size = maximum(OP_SIZE_4, size);
519 g(gen_address(ctx, R_FRAME, offset, size == OP_SIZE_1 ? IMM_PURPOSE_MVI_CLI_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
520 g(gen_imm(ctx, imm, IMM_PURPOSE_STORE_VALUE, size));
521 gen_insn(INSN_MOV, size, 0, 0);
522 gen_address_offset();
527 static bool attr_w gen_frame_store_imm(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, int64_t imm)
529 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_store_imm: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
530 if (ctx->registers[slot] >= 0) {
531 if (unlikely(offset != 0))
532 internal(file_line, "gen_frame_store_imm: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
533 if (size == OP_SIZE_1)
534 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int8_t)imm : (int64_t)(uint8_t)imm;
535 if (size == OP_SIZE_2)
536 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int16_t)imm : (int64_t)(uint16_t)imm;
537 if (size == OP_SIZE_4)
538 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int32_t)imm : (int64_t)(uint32_t)imm;
539 g(gen_load_constant(ctx, ctx->registers[slot], imm));
542 return gen_frame_store_imm_raw(ctx, size, slot, offset, imm);
545 static bool attr_w gen_frame_clear_raw(struct codegen_context *ctx, unsigned size, frame_t slot)
547 g(gen_frame_store_imm_raw(ctx, size, slot, 0, 0));
551 static bool attr_w gen_frame_clear(struct codegen_context *ctx, unsigned size, frame_t slot)
553 g(gen_frame_store_imm(ctx, size, slot, 0, 0));
557 #if defined(ARCH_X86)
558 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned attr_unused size, bool attr_unused logical, unsigned cond, frame_t slot)
561 if (ctx->registers[slot] >= 0) {
562 unsigned reg = ctx->registers[slot];
563 #if defined(ARCH_X86_32)
565 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
566 gen_one(R_SCRATCH_1);
567 gen_one(R_SCRATCH_1);
569 g(gen_mov(ctx, OP_SIZE_1, reg, R_SCRATCH_1));
573 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
577 if (sizeof(ajla_flat_option_t) > 1) {
578 g(gen_mov(ctx, OP_SIZE_1, reg, reg));
583 offset = (size_t)slot * slot_size;
584 if (sizeof(ajla_flat_option_t) > 1) {
585 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
586 gen_one(R_SCRATCH_1);
587 gen_one(R_SCRATCH_1);
589 g(gen_mov(ctx, OP_SIZE_1, R_SCRATCH_1, R_SCRATCH_1));
591 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, R_SCRATCH_1));
593 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_STR_OFFSET, OP_SIZE_1));
594 gen_insn(INSN_SET_COND, OP_SIZE_1, cond, 0);
595 gen_address_offset();
599 #elif defined(ARCH_ARM64)
600 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned attr_unused size, bool attr_unused logical, unsigned cond, frame_t slot)
602 if (ctx->registers[slot] >= 0) {
603 gen_insn(INSN_SET_COND, OP_SIZE_4, cond, 0);
604 gen_one(ctx->registers[slot]);
606 gen_insn(INSN_SET_COND, OP_SIZE_4, cond, 0);
607 gen_one(R_SCRATCH_1);
608 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, R_SCRATCH_1));
613 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned size, bool logical, unsigned cond, frame_t slot)
615 unsigned target = gen_frame_target(ctx, slot, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
616 #if defined(ARCH_POWER)
617 if (!cpu_test_feature(CPU_FEATURE_v203))
618 #elif defined(ARCH_S390)
619 if (!cpu_test_feature(CPU_FEATURE_misc_45))
620 #elif defined(ARCH_SPARC32)
627 g(gen_load_constant(ctx, target, 1));
628 label = alloc_label(ctx);
629 if (unlikely(!label))
631 gen_insn(!logical ? INSN_JMP_COND : INSN_JMP_COND_LOGICAL, i_size_cmp(size), cond, 0);
633 g(gen_load_constant(ctx, target, 0));
637 g(gen_load_constant(ctx, target, 1));
638 g(gen_imm(ctx, 0, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
639 if (cond & COND_FP) {
640 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
642 #if defined(ARCH_S390)
643 gen_insn(logical ? INSN_CMOV_XCC : INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
645 gen_insn(size == OP_SIZE_8 ? INSN_CMOV_XCC : INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
652 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, target));
658 static bool attr_w gen_frame_cmp_imm_set_cond_reg(struct codegen_context *ctx, unsigned size, unsigned reg, int64_t imm, unsigned cond, frame_t slot_r)
661 dest_reg = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_CMP_RESULT);
662 g(gen_cmp_dest_reg(ctx, size, reg, (unsigned)-1, dest_reg, imm, cond));
663 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, dest_reg));
669 static bool attr_w gen_frame_load_cmp_set_cond(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg, unsigned cond, frame_t slot_r)
672 bool logical = COND_IS_LOGICAL(cond);
673 g(gen_frame_load_cmp(ctx, size, logical, ex, false, slot, offset, reg));
674 g(gen_frame_set_cond(ctx, size, logical, cond, slot_r));
676 unsigned src_reg, dest_reg;
677 g(gen_frame_get(ctx, size, ex, slot, offset, R_SCRATCH_NA_1, &src_reg));
678 dest_reg = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_NA_1);
679 g(gen_cmp_dest_reg(ctx, size, reg, src_reg, dest_reg, 0, cond));
680 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, dest_reg));
685 static bool attr_w gen_frame_load_cmp_imm_set_cond(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, int64_t value, unsigned cond, frame_t slot_r)
688 bool logical = COND_IS_LOGICAL(cond);
689 #if defined(ARCH_S390)
693 g(gen_frame_load_cmp_imm(ctx, size, logical, ex, slot, offset, value));
694 g(gen_frame_set_cond(ctx, size, false, cond, slot_r));
697 g(gen_frame_get(ctx, size, ex, slot, offset, R_SCRATCH_NA_1, &src_reg));
698 g(gen_frame_cmp_imm_set_cond_reg(ctx, size, src_reg, value, cond, slot_r));
703 static const struct type *get_type_of_local(struct codegen_context *ctx, frame_t pos)
705 const struct type *t;
706 const struct data *function = ctx->fn;
707 t = da(function,function)->local_variables[pos].type;
709 TYPE_TAG_VALIDATE(t->tag);
713 static unsigned real_type_to_op_size(unsigned real_type)
716 case 0: return OP_SIZE_2;
717 case 1: return OP_SIZE_4;
718 case 2: return OP_SIZE_8;
719 case 3: return OP_SIZE_10;
720 case 4: return OP_SIZE_16;
722 internal(file_line, "real_type_to_op_size: invalid type %u", real_type);
727 static unsigned spill_size(const struct type *t)
729 if (TYPE_TAG_IS_REAL(t->tag)) {
730 return real_type_to_op_size(TYPE_TAG_IDX_REAL(t->tag));
732 return log_2(t->size);
736 static bool attr_w spill(struct codegen_context *ctx, frame_t v)
738 const struct type *t = get_type_of_local(ctx, v);
739 g(gen_frame_store_raw(ctx, spill_size(t), v, 0, ctx->registers[v]));
743 static bool attr_w unspill(struct codegen_context *ctx, frame_t v)
745 const struct type *t = get_type_of_local(ctx, v);
746 g(gen_frame_load_raw(ctx, spill_size(t), garbage, v, 0, ctx->registers[v]));
750 static bool attr_w gen_upcall_start(struct codegen_context *ctx, unsigned args)
753 size_t attr_unused n_pushes;
754 ajla_assert_lo(ctx->upcall_args == -1, (file_line, "gen_upcall_start: gen_upcall_end not called"));
755 ctx->upcall_args = (int)args;
757 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
758 for (i = 0; i < ctx->need_spill_l; i++) {
759 unsigned reg = ctx->registers[ctx->need_spill[i]];
761 g(spill(ctx, ctx->need_spill[i]));
764 for (i = 0; i < ctx->need_spill_l; i++) {
765 unsigned reg = ctx->registers[ctx->need_spill[i]];
766 if (!reg_is_fp(reg)) {
767 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
773 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
777 for (i = 0; i < ctx->need_spill_l; i++)
778 g(spill(ctx, ctx->need_spill[i]));
783 static bool attr_w gen_upcall_end(struct codegen_context *ctx, unsigned args)
786 size_t attr_unused n_pushes;
787 ajla_assert_lo(ctx->upcall_args == (int)args, (file_line, "gen_upcall_end: gen_upcall_start mismatch: %d", ctx->upcall_args));
788 ctx->upcall_args = -1;
790 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
792 for (i = 0; i < ctx->need_spill_l; i++) {
793 unsigned reg = ctx->registers[ctx->need_spill[i]];
798 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
801 for (i = ctx->need_spill_l; i;) {
804 reg = ctx->registers[ctx->need_spill[i]];
805 if (!reg_is_fp(reg)) {
806 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
810 for (i = 0; i < ctx->need_spill_l; i++) {
811 unsigned reg = ctx->registers[ctx->need_spill[i]];
813 g(unspill(ctx, ctx->need_spill[i]));
816 for (i = 0; i < ctx->need_spill_l; i++)
817 g(unspill(ctx, ctx->need_spill[i]));
822 static bool attr_w gen_memcpy_raw(struct codegen_context *ctx, unsigned dest_base, int64_t dest_offset, unsigned src_base, int64_t src_offset, size_t size, size_t attr_unused align)
827 if (align < 4 || (size & 3))
830 #if defined(ARCH_S390)
832 if (!(size & 3) || cpu_test_feature(CPU_FEATURE_extended_imm))
833 goto do_explicit_copy;
835 if (size <= 0x100 && dest_offset >= 0 && dest_offset < 0x1000 && src_offset >= 0 && src_offset < 0x1000) {
836 gen_insn(INSN_MEMCPY, 0, 0, 0);
837 gen_one(ARG_ADDRESS_1);
839 gen_eight(dest_offset);
840 gen_one(ARG_ADDRESS_1);
842 gen_eight(src_offset);
850 if (size <= INLINE_COPY_SIZE) {
853 unsigned this_op_size;
854 #if defined(ARCH_ARM)
855 if (size >= 2U << OP_SIZE_NATIVE
856 #if defined(ARCH_ARM32)
857 && align >= 1U << OP_SIZE_NATIVE
860 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
861 gen_insn(INSN_LDP, OP_SIZE_NATIVE, 0, 0);
862 gen_one(R_SCRATCH_NA_1);
863 gen_one(R_SCRATCH_NA_2);
864 gen_address_offset();
866 g(gen_address(ctx, dest_base, dest_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
867 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
868 gen_address_offset();
869 gen_one(R_SCRATCH_NA_1);
870 gen_one(R_SCRATCH_NA_2);
872 size -= 2U << OP_SIZE_NATIVE;
873 src_offset += 2U << OP_SIZE_NATIVE;
874 dest_offset += 2U << OP_SIZE_NATIVE;
879 if (size >= 8 && OP_SIZE_NATIVE >= OP_SIZE_8)
888 this_step = minimum(this_step, align);
889 this_op_size = log_2(this_step);
891 g(gen_address(ctx, src_base, src_offset, ARCH_PREFERS_SX(this_op_size) ? IMM_PURPOSE_LDR_SX_OFFSET : IMM_PURPOSE_LDR_OFFSET, this_op_size));
892 gen_insn(ARCH_PREFERS_SX(this_op_size) ? INSN_MOVSX : INSN_MOV, this_op_size, 0, 0);
893 gen_one(R_SCRATCH_1);
894 gen_address_offset();
896 g(gen_address(ctx, dest_base, dest_offset, IMM_PURPOSE_STR_OFFSET, this_op_size));
897 gen_insn(INSN_MOV, this_op_size, 0, 0);
898 gen_address_offset();
899 gen_one(R_SCRATCH_1);
902 src_offset += this_step;
903 dest_offset += this_step;
909 g(gen_upcall_start(ctx, 3));
910 if (unlikely(R_ARG0 == src_base)) {
911 if (unlikely(R_ARG1 == dest_base))
912 internal(file_line, "gen_memcpy_raw: swapped registers: %u, %u", src_base, dest_base);
913 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG1, src_base, src_offset, 0));
914 g(gen_upcall_argument(ctx, 1));
917 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG0, dest_base, dest_offset, 0));
918 g(gen_upcall_argument(ctx, 0));
920 if (R_ARG0 != src_base) {
921 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG1, src_base, src_offset, 0));
922 g(gen_upcall_argument(ctx, 1));
925 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
926 if (cpu_test_feature(CPU_FEATURE_erms)) {
927 g(gen_load_constant(ctx, R_CX, size));
929 gen_insn(INSN_MEMCPY, 0, 0, 0);
930 gen_one(ARG_ADDRESS_1_POST_I);
933 gen_one(ARG_ADDRESS_1_POST_I);
937 g(gen_upcall_end(ctx, 3));
942 g(gen_load_constant(ctx, R_ARG2, size));
943 g(gen_upcall_argument(ctx, 2));
945 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, mem_copy), 3));
950 static bool attr_w gen_memcpy_to_slot(struct codegen_context *ctx, frame_t dest_slot, unsigned src_base, int64_t src_offset)
952 const struct type *t = get_type_of_local(ctx, dest_slot);
953 unsigned size = spill_size(t);
954 short dest_reg = ctx->registers[dest_slot];
956 if (ARCH_PREFERS_SX(size) && !reg_is_fp(dest_reg)) {
957 #if defined(ARCH_S390)
958 if (size == OP_SIZE_1 && !cpu_test_feature(CPU_FEATURE_long_displacement)) {
959 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDR_OFFSET, size));
960 gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_0_8, 0);
963 gen_address_offset();
964 g(gen_extend(ctx, size, sign_x, dest_reg, dest_reg));
968 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDR_SX_OFFSET, size));
969 gen_insn(INSN_MOVSX, size, 0, 0);
971 g(gen_address(ctx, src_base, src_offset, reg_is_fp(dest_reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
972 gen_insn(INSN_MOV, size, 0, 0);
975 gen_address_offset();
978 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)dest_slot * slot_size, src_base, src_offset, t->size, t->align));
982 static bool attr_w gen_memcpy_from_slot(struct codegen_context *ctx, unsigned dest_base, int64_t dest_offset, frame_t src_slot)
984 const struct type *t = get_type_of_local(ctx, src_slot);
985 unsigned size = spill_size(t);
986 short src_reg = ctx->registers[src_slot];
988 g(gen_address(ctx, dest_base, dest_offset, reg_is_fp(src_reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
989 gen_insn(INSN_MOV, size, 0, 0);
990 gen_address_offset();
994 g(gen_memcpy_raw(ctx, dest_base, dest_offset, R_FRAME, (size_t)src_slot * slot_size, t->size, t->align));
998 static bool attr_w gen_memcpy_slots(struct codegen_context *ctx, frame_t dest_slot, frame_t src_slot)
1000 const struct type *t = get_type_of_local(ctx, src_slot);
1001 unsigned size = spill_size(t);
1002 short dest_reg = ctx->registers[dest_slot];
1003 short src_reg = ctx->registers[src_slot];
1004 if (dest_reg >= 0 && src_reg >= 0) {
1005 g(gen_mov(ctx, reg_is_fp(src_reg) ? size : OP_SIZE_NATIVE, dest_reg, src_reg));
1008 if (dest_reg >= 0) {
1009 g(gen_frame_load(ctx, size, garbage, src_slot, 0, dest_reg));
1013 g(gen_frame_store(ctx, size, dest_slot, 0, src_reg));
1016 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)dest_slot * slot_size, R_FRAME, (size_t)src_slot * slot_size, t->size, maximum(slot_size, t->align)));
1020 static bool attr_w gen_clear_bitmap(struct codegen_context *ctx, unsigned additional_offset, unsigned dest_base, int64_t dest_offset, frame_t bitmap_slots)
1022 if (bitmap_slots <= INLINE_BITMAP_SLOTS) {
1023 bool attr_unused scratch_2_zeroed = false;
1024 size_t bitmap_length = (size_t)bitmap_slots * slot_size;
1025 size_t clear_offset = 0;
1026 additional_offset += (unsigned)dest_offset;
1027 #if defined(ARCH_X86)
1028 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_1, 0));
1030 #if defined(ARCH_ARM32) || defined(ARCH_S390)
1031 g(gen_load_constant(ctx, R_SCRATCH_1, 0));
1033 while (clear_offset < bitmap_length) {
1034 size_t len = bitmap_length - clear_offset;
1035 if (len > frame_align)
1037 if (additional_offset)
1038 len = minimum(len, additional_offset & -additional_offset);
1039 #if defined(ARCH_ARM32) || defined(ARCH_S390)
1040 len = minimum(len, 2U << OP_SIZE_NATIVE);
1041 if (len == 2U << OP_SIZE_NATIVE) {
1042 if (!scratch_2_zeroed) {
1043 g(gen_load_constant(ctx, R_SCRATCH_2, 0));
1044 scratch_2_zeroed = true;
1046 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
1047 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
1048 gen_address_offset();
1049 gen_one(R_SCRATCH_1);
1050 gen_one(R_SCRATCH_2);
1053 #elif defined(ARCH_ARM64)
1054 len = minimum(len, 1U << OP_SIZE_16);
1055 if (len == 1U << OP_SIZE_16) {
1056 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_8));
1057 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_8));
1058 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
1059 gen_address_offset();
1064 #elif defined(ARCH_X86)
1065 len = minimum(len, 1U << OP_SIZE_16);
1066 if (len == 1U << OP_SIZE_16 && cpu_test_feature(CPU_FEATURE_sse)) {
1067 if (!scratch_2_zeroed) {
1068 g(gen_3address_alu(ctx, OP_SIZE_16, ALU_XOR, R_XMM0, R_XMM0, R_XMM0, 0));
1069 scratch_2_zeroed = true;
1071 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_VLDR_VSTR_OFFSET, OP_SIZE_16));
1072 gen_insn(INSN_MOV, OP_SIZE_16, 0, 0);
1073 gen_address_offset();
1078 len = minimum(len, 1U << OP_SIZE_NATIVE);
1079 len = (size_t)1 << high_bit(len);
1080 #if defined(ARCH_X86) || defined(ARCH_ARM32) || defined(ARCH_S390)
1081 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_STR_OFFSET, log_2(len)));
1082 gen_insn(INSN_MOV, log_2(len), 0, 0);
1083 gen_address_offset();
1084 gen_one(R_SCRATCH_1);
1086 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_STR_OFFSET, log_2(len)));
1087 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, log_2(len)));
1088 gen_insn(INSN_MOV, log_2(len), 0, 0);
1089 gen_address_offset();
1094 clear_offset += len;
1095 additional_offset += len;
1099 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
1100 if (cpu_test_feature(CPU_FEATURE_erms)) {
1101 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
1104 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_DI, dest_base, dest_offset, 0));
1106 g(gen_load_constant(ctx, R_CX, (size_t)bitmap_slots * slot_size));
1108 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_AX, R_AX, R_AX, 0));
1110 gen_insn(INSN_MEMSET, 0, 0, 0);
1111 gen_one(ARG_ADDRESS_1_POST_I);
1117 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
1123 g(gen_upcall_start(ctx, 2));
1125 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG0, dest_base, dest_offset, 0));
1126 g(gen_upcall_argument(ctx, 0));
1128 g(gen_load_constant(ctx, R_ARG1, (size_t)bitmap_slots * slot_size));
1129 g(gen_upcall_argument(ctx, 1));
1131 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, mem_clear), 2));
1136 static bool attr_w load_function_offset(struct codegen_context *ctx, unsigned dest, size_t fn_offset)
1138 g(gen_frame_load_raw(ctx, OP_SIZE_ADDRESS, zero_x, 0, frame_offs(function), dest));
1140 g(gen_address(ctx, dest, fn_offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
1141 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
1143 gen_address_offset();