2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define frame_offs(x) ((ssize_t)offsetof(struct frame_struct, x) - (ssize_t)frame_offset)
21 #if defined(C_LITTLE_ENDIAN)
22 #define lo_word(size) (0)
23 #define hi_word(size) ((size_t)1 << (size))
24 #elif defined(C_BIG_ENDIAN)
25 #define lo_word(size) ((size_t)1 << (size))
26 #define hi_word(size) (0)
32 static bool attr_w gen_frame_load_raw(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg);
33 static bool attr_w gen_frame_store_raw(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg);
36 static const struct type *get_type_of_local(struct codegen_context *ctx, frame_t pos)
39 const struct data *function = ctx->fn;
40 t = da(function,function)->local_variables[pos].type;
42 TYPE_TAG_VALIDATE(t->tag);
46 static unsigned real_type_to_op_size(unsigned real_type)
49 case 0: return OP_SIZE_2;
50 case 1: return OP_SIZE_4;
51 case 2: return OP_SIZE_8;
52 case 3: return OP_SIZE_10;
53 case 4: return OP_SIZE_16;
55 internal(file_line, "real_type_to_op_size: invalid type %u", real_type);
60 static unsigned spill_size(const struct type *t)
62 if (!TYPE_TAG_IS_BUILTIN(t->tag)) {
64 } else if (TYPE_TAG_IS_REAL(t->tag)) {
65 return real_type_to_op_size(TYPE_TAG_IDX_REAL(t->tag));
67 return log_2(t->size);
71 static bool attr_w spill(struct codegen_context *ctx, frame_t v)
73 const struct type *t = get_type_of_local(ctx, v);
74 g(gen_frame_store_raw(ctx, spill_size(t), v, 0, ctx->registers[v]));
78 static bool attr_w unspill(struct codegen_context *ctx, frame_t v)
80 const struct type *t = get_type_of_local(ctx, v);
81 enum extend ex = garbage;
82 if (t->tag == TYPE_TAG_flat_option)
84 else if (!TYPE_IS_FLAT(t))
86 g(gen_frame_load_raw(ctx, spill_size(t), ex, v, 0, ctx->registers[v]));
91 static bool attr_w gen_frame_address(struct codegen_context *ctx, frame_t slot, int64_t offset, unsigned reg)
93 offset += (size_t)slot * slot_size;
94 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, reg, R_FRAME, offset, 0));
98 static bool attr_w gen_frame_load_raw(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg)
101 if (ex == garbage || ex == native) {
103 ex = ARCH_PREFERS_SX(size) ? sign_x : zero_x;
107 x_offset = offset + (size_t)slot * slot_size;
108 if (!ARCH_HAS_BWX && size < OP_SIZE_4) {
109 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
110 gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
112 gen_address_offset();
114 g(gen_extend(ctx, size, ex, reg, reg));
118 #if defined(ARCH_ALPHA)
119 if (size < OP_SIZE_4) {
120 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
121 gen_insn(INSN_MOV, size, 0, 0);
123 gen_address_offset();
126 g(gen_extend(ctx, size, ex, reg, reg));
130 if (size == OP_SIZE_4 && !reg_is_fp(reg) && ex == zero_x) {
131 g(gen_frame_load_raw(ctx, size, sign_x, slot, offset, reg));
132 g(gen_extend(ctx, size, ex, reg, reg));
137 #if defined(ARCH_MIPS)
138 if (reg_is_fp(reg) && size == OP_SIZE_8 && !MIPS_HAS_LS_DOUBLE) {
139 #if defined(C_LITTLE_ENDIAN)
140 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset, reg));
141 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset + 4, reg + 1));
143 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset, reg + 1));
144 g(gen_frame_load_raw(ctx, OP_SIZE_4, zero_x, slot, offset + 4, reg));
149 #if defined(ARCH_IA64) || defined(ARCH_PARISC)
151 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
152 gen_insn(INSN_MOV, size, 0, 0);
154 gen_address_offset();
156 g(gen_extend(ctx, size, ex, reg, reg));
161 #if defined(ARCH_POWER)
162 if (size == OP_SIZE_1 && ex == sign_x) {
163 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
164 gen_insn(INSN_MOV, size, 0, 0);
166 gen_address_offset();
168 g(gen_extend(ctx, size, ex, reg, reg));
173 #if defined(ARCH_S390)
174 if (size == OP_SIZE_1 && !cpu_test_feature(CPU_FEATURE_long_displacement)) {
175 g(gen_address(ctx, R_FRAME, x_offset, IMM_PURPOSE_LDR_OFFSET, size));
176 gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_0_8, 0);
179 gen_address_offset();
181 g(gen_extend(ctx, size, ex, reg, reg));
185 if (size == OP_SIZE_16 && reg_is_fp(reg)) {
186 g(gen_frame_load_raw(ctx, OP_SIZE_8, zero_x, 0, x_offset, reg));
187 g(gen_frame_load_raw(ctx, OP_SIZE_8, zero_x, 0, x_offset + 8, reg + 2));
192 g(gen_address(ctx, R_FRAME, x_offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : ex ? IMM_PURPOSE_LDR_SX_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
193 gen_insn(unlikely(ex == sign_x) ? INSN_MOVSX : INSN_MOV, size, 0, 0);
195 gen_address_offset();
200 static bool attr_w gen_frame_load(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg)
202 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
203 if (ctx->registers[slot] >= 0) {
204 if (unlikely(offset != 0))
205 internal(file_line, "gen_frame_load: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
206 if (ex != garbage && size < OP_SIZE_NATIVE && !reg_is_fp(reg)) {
207 g(gen_extend(ctx, size, ex, reg, ctx->registers[slot]));
210 g(gen_mov(ctx, !reg_is_fp(reg) ? OP_SIZE_NATIVE : size, reg, ctx->registers[slot]));
214 g(gen_frame_load_raw(ctx, size, ex, slot, offset, reg));
217 if (size < OP_SIZE_NATIVE && ex == garbage) {
219 g(gen_extend(ctx, size, zero_x, reg, reg));
220 mask = (rand()) | ((uint64_t)rand() << 31) | ((uint64_t)rand() << 62);
221 mask <<= 8ULL << size;
222 g(gen_imm(ctx, mask, IMM_PURPOSE_OR, OP_SIZE_NATIVE));
223 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
232 static bool attr_w gen_frame_get(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg, unsigned *dest)
234 const struct type *t = get_type_of_local(ctx, slot);
235 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_get: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
236 if (ctx->registers[slot] >= 0) {
237 unsigned reg = ctx->registers[slot];
238 if (ex != garbage && size < OP_SIZE_NATIVE && !reg_is_fp(reg)) {
239 if (t->tag == TYPE_TAG_flat_option && size <= ARCH_BOOL_SIZE)
241 g(gen_extend(ctx, size, ex, reg, reg));
248 g(gen_frame_load(ctx, size, ex, slot, offset, reg));
251 if (size < OP_SIZE_NATIVE && ex == garbage && t->tag != TYPE_TAG_flat_option) {
253 g(gen_extend(ctx, size, zero_x, *dest, *dest));
254 mask = (rand()) | ((uint64_t)rand() << 31) | ((uint64_t)rand() << 62);
255 mask <<= 8ULL << size;
256 g(gen_imm(ctx, mask, IMM_PURPOSE_OR, OP_SIZE_NATIVE));
257 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
266 #if defined(ARCH_X86)
267 static bool attr_w gen_frame_load_x87(struct codegen_context *ctx, unsigned insn, unsigned size, unsigned alu, frame_t slot)
269 if (ctx->registers[slot] >= 0)
271 g(gen_address(ctx, R_FRAME, (size_t)slot * slot_size, IMM_PURPOSE_LDR_OFFSET, size));
272 gen_insn(insn, size, alu, 0);
273 gen_address_offset();
277 static bool attr_w gen_frame_store_x87(struct codegen_context *ctx, unsigned insn, unsigned size, frame_t slot)
279 g(gen_address(ctx, R_FRAME, (size_t)slot * slot_size, IMM_PURPOSE_STR_OFFSET, size));
280 gen_insn(insn, size, 0, 0);
281 gen_address_offset();
282 if (ctx->registers[slot] >= 0)
283 g(unspill(ctx,slot));
288 static bool attr_w gen_frame_load_op(struct codegen_context *ctx, unsigned size, enum extend ex, unsigned alu, unsigned writes_flags, frame_t slot, int64_t offset, unsigned reg)
290 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load_op: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
291 if (ctx->registers[slot] >= 0) {
292 if (size != i_size(size) + (unsigned)zero && ex != garbage)
294 g(gen_3address_alu(ctx, i_size(size), alu, reg, reg, ctx->registers[slot], writes_flags));
297 #if defined(ARCH_X86) || defined(ARCH_S390)
298 #if defined(ARCH_S390)
299 if (size >= OP_SIZE_4)
302 offset += (size_t)slot * slot_size;
303 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
304 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(size), size, alu, (alu == ALU_MUL ? ALU_WRITES_FLAGS(alu, false) : 1) | writes_flags);
307 gen_address_offset();
312 #if defined(R_SCRATCH_NA_1)
313 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
314 g(gen_3address_alu(ctx, i_size(size), alu, reg, reg, R_SCRATCH_NA_1, writes_flags));
319 static bool attr_w attr_unused gen_frame_load_op1(struct codegen_context *ctx, unsigned size, unsigned alu, unsigned writes_flags, frame_t slot, int64_t offset, unsigned reg)
321 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_load_op1: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
322 if (ctx->registers[slot] >= 0) {
323 g(gen_2address_alu1(ctx, size, alu, reg, ctx->registers[slot], writes_flags));
326 #if defined(ARCH_X86)
327 offset += (size_t)slot * slot_size;
328 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
329 gen_insn(INSN_ALU1 + ARCH_PARTIAL_ALU(size), size, alu, ALU1_WRITES_FLAGS(alu) | writes_flags);
331 gen_address_offset();
334 #if !defined(ARCH_X86)
335 g(gen_frame_load(ctx, size, garbage, slot, offset, reg));
336 g(gen_2address_alu1(ctx, size, alu, reg, reg, writes_flags));
342 static bool attr_w gen_frame_load_cmp(struct codegen_context *ctx, unsigned size, bool logical, enum extend attr_unused ex, bool swap, frame_t slot, int64_t offset, unsigned reg)
344 if (ctx->registers[slot] >= 0) {
345 if (size != i_size_cmp(size) + (unsigned)zero && ex != garbage)
347 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
350 gen_one(ctx->registers[slot]);
352 gen_one(ctx->registers[slot]);
357 #if defined(ARCH_S390) || defined(ARCH_X86)
358 #if defined(ARCH_S390)
359 if (size < OP_SIZE_4)
362 offset += (size_t)slot * slot_size;
363 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, size));
364 gen_insn(INSN_CMP, size, 0, 1 + logical);
367 gen_address_offset();
369 gen_address_offset();
375 #if defined(R_SCRATCH_NA_1)
376 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
377 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
380 gen_one(R_SCRATCH_NA_1);
382 gen_one(R_SCRATCH_NA_1);
389 static bool attr_w gen_frame_load_cmp_imm(struct codegen_context *ctx, unsigned size, bool logical, enum extend attr_unused ex, frame_t slot, int64_t offset, int64_t value)
391 if (ctx->registers[slot] >= 0) {
392 #if defined(ARCH_X86)
393 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
394 gen_insn(INSN_CMP, size, 0, 1 + logical);
395 gen_one(ctx->registers[slot]);
398 if (size != i_size(size) + (unsigned)zero && size < OP_SIZE_4 && ex != garbage)
400 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
401 gen_insn(INSN_CMP, i_size_cmp(size), 0, 1 + logical);
402 gen_one(ctx->registers[slot]);
407 #if defined(ARCH_X86)
408 offset += (size_t)slot * slot_size;
409 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_MVI_CLI_OFFSET, size));
410 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
411 gen_insn(INSN_CMP, size, 0, 1 + logical);
412 gen_address_offset();
416 #if defined(ARCH_S390)
417 if (size != OP_SIZE_1 || !logical)
419 offset += (size_t)slot * slot_size;
420 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_MVI_CLI_OFFSET, size));
421 gen_insn(INSN_CMP, size, 0, 1 + logical);
422 gen_address_offset();
424 gen_eight((int8_t)value);
427 #if defined(R_SCRATCH_NA_1)
430 g(gen_frame_load(ctx, size, ex, slot, offset, R_SCRATCH_NA_1));
431 g(gen_imm(ctx, value, logical ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, size));
432 gen_insn(INSN_CMP, i_size(size), 0, 1 + logical);
433 gen_one(R_SCRATCH_NA_1);
440 static bool attr_w gen_frame_load_2(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg1, unsigned reg2)
442 #if defined(ARCH_ARM64)
443 offset += (size_t)slot * slot_size;
444 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
445 gen_insn(INSN_LDP, size, 0, 0);
448 gen_address_offset();
451 #if defined(ARCH_ARM32)
452 if (likely(!(reg1 & 1)) && likely(reg2 == reg1 + 1) && likely(cpu_test_feature(CPU_FEATURE_armv6)))
453 #elif defined(ARCH_SPARC32)
454 if (likely(!(reg2 & 1)) && likely(reg1 == reg2 + 1))
455 #elif defined(ARCH_S390)
456 if (likely(reg1 == reg2 + 1))
461 offset += (size_t)slot * slot_size;
462 if (UNALIGNED_TRAP) {
463 if (unlikely((offset & ((2U << size) - 1)) != 0)) {
464 offset -= (size_t)slot * slot_size;
468 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
469 gen_insn(INSN_LDP, size, 0, 0);
472 gen_address_offset();
477 g(gen_frame_load(ctx, size, garbage, slot, offset + lo_word(size), reg1));
478 g(gen_frame_load(ctx, size, garbage, slot, offset + hi_word(size), reg2));
482 static bool attr_w gen_frame_store_raw(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg)
484 offset += (size_t)slot * slot_size;
486 size = maximum(OP_SIZE_4, size);
487 #if defined(ARCH_MIPS)
488 if (reg_is_fp(reg) && size == OP_SIZE_8 && !MIPS_HAS_LS_DOUBLE) {
489 #if defined(C_LITTLE_ENDIAN)
490 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset, reg));
491 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset + 4, reg + 1));
493 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset, reg + 1));
494 g(gen_frame_store_raw(ctx, OP_SIZE_4, 0, offset + 4, reg));
499 #if defined(ARCH_S390)
500 if (size == OP_SIZE_16 && reg_is_fp(reg)) {
501 g(gen_frame_store_raw(ctx, OP_SIZE_8, 0, offset, reg));
502 g(gen_frame_store_raw(ctx, OP_SIZE_8, 0, offset + 8, reg + 2));
506 g(gen_address(ctx, R_FRAME, offset, reg_is_fp(reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
507 gen_insn(INSN_MOV, size, 0, 0);
508 gen_address_offset();
513 static bool attr_w gen_frame_store(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg)
515 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_store: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
516 if (ctx->registers[slot] >= 0) {
517 if (unlikely(offset != 0))
518 internal(file_line, "gen_frame_store: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
519 g(gen_mov(ctx, !reg_is_fp(reg) ? OP_SIZE_NATIVE : size, ctx->registers[slot], reg));
522 return gen_frame_store_raw(ctx, size, slot, offset, reg);
525 static unsigned gen_frame_target(struct codegen_context *ctx, frame_t slot_r, frame_t slot_na_1, frame_t slot_na_2, unsigned reg)
527 short d = ctx->registers[slot_r];
529 if (slot_na_1 != NO_FRAME_T && ctx->registers[slot_na_1] == d)
531 if (slot_na_2 != NO_FRAME_T && ctx->registers[slot_na_2] == d)
538 static bool attr_w gen_frame_store_2(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, unsigned reg1, unsigned reg2)
540 #if defined(ARCH_ARM64)
541 offset += (size_t)slot * slot_size;
542 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
543 gen_insn(INSN_STP, size, 0, 0);
544 gen_address_offset();
549 #if defined(ARCH_ARM32)
550 if (likely(!(reg1 & 1)) && likely(reg2 == reg1 + 1) && likely(cpu_test_feature(CPU_FEATURE_armv6)))
551 #elif defined(ARCH_SPARC32)
552 if (likely(!(reg2 & 1)) && likely(reg1 == reg2 + 1))
553 #elif defined(ARCH_S390)
554 if (likely(reg1 == reg2 + 1))
559 offset += (size_t)slot * slot_size;
560 if (UNALIGNED_TRAP) {
561 if (unlikely((offset & ((2U << size) - 1)) != 0)) {
562 offset -= (size_t)slot * slot_size;
566 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDP_STP_OFFSET, size));
567 gen_insn(INSN_STP, size, 0, 0);
568 gen_address_offset();
575 g(gen_frame_store(ctx, size, slot, offset + lo_word(size), reg1));
576 g(gen_frame_store(ctx, size, slot, offset + hi_word(size), reg2));
580 static bool attr_w gen_frame_store_imm_raw(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, int64_t imm)
582 offset += (size_t)slot * slot_size;
584 size = maximum(OP_SIZE_4, size);
585 g(gen_address(ctx, R_FRAME, offset, size == OP_SIZE_1 ? IMM_PURPOSE_MVI_CLI_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
586 g(gen_imm(ctx, imm, IMM_PURPOSE_STORE_VALUE, size));
587 gen_insn(INSN_MOV, size, 0, 0);
588 gen_address_offset();
593 static bool attr_w gen_frame_store_imm(struct codegen_context *ctx, unsigned size, frame_t slot, int64_t offset, int64_t imm)
595 ajla_assert_lo(slot >= MIN_USEABLE_SLOT && slot < function_n_variables(ctx->fn), (file_line, "gen_frame_store_imm: invalid slot: %lu >= %lu", (unsigned long)slot, (unsigned long)function_n_variables(ctx->fn)));
596 if (ctx->registers[slot] >= 0) {
597 if (unlikely(offset != 0))
598 internal(file_line, "gen_frame_store_imm: offset is non-zero: %"PRIdMAX"", (intmax_t)offset);
599 if (size == OP_SIZE_1)
600 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int8_t)imm : (int64_t)(uint8_t)imm;
601 if (size == OP_SIZE_2)
602 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int16_t)imm : (int64_t)(uint16_t)imm;
603 if (size == OP_SIZE_4)
604 imm = ARCH_PREFERS_SX(size) ? (int64_t)(int32_t)imm : (int64_t)(uint32_t)imm;
605 g(gen_load_constant(ctx, ctx->registers[slot], imm));
608 return gen_frame_store_imm_raw(ctx, size, slot, offset, imm);
611 static bool attr_w gen_frame_clear_raw(struct codegen_context *ctx, unsigned size, frame_t slot)
613 g(gen_frame_store_imm_raw(ctx, size, slot, 0, 0));
617 static bool attr_w gen_frame_clear(struct codegen_context *ctx, unsigned size, frame_t slot)
619 g(gen_frame_store_imm(ctx, size, slot, 0, 0));
620 if (ctx->registers[slot] >= 0)
625 #if defined(ARCH_X86)
626 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned attr_unused size, bool attr_unused logical, unsigned cond, frame_t slot)
629 if (ctx->registers[slot] >= 0) {
630 unsigned reg = ctx->registers[slot];
631 #if defined(ARCH_X86_32)
633 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
634 gen_one(R_SCRATCH_1);
635 gen_one(R_SCRATCH_1);
637 g(gen_mov(ctx, OP_SIZE_1, reg, R_SCRATCH_1));
641 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
645 if (sizeof(ajla_flat_option_t) > 1) {
646 g(gen_mov(ctx, OP_SIZE_1, reg, reg));
651 offset = (size_t)slot * slot_size;
652 if (sizeof(ajla_flat_option_t) > 1) {
653 gen_insn(INSN_SET_COND_PARTIAL, OP_SIZE_1, cond, 0);
654 gen_one(R_SCRATCH_1);
655 gen_one(R_SCRATCH_1);
657 g(gen_mov(ctx, OP_SIZE_1, R_SCRATCH_1, R_SCRATCH_1));
659 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, R_SCRATCH_1));
661 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_STR_OFFSET, OP_SIZE_1));
662 gen_insn(INSN_SET_COND, OP_SIZE_1, cond, 0);
663 gen_address_offset();
667 #elif defined(ARCH_ARM64)
668 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned attr_unused size, bool attr_unused logical, unsigned cond, frame_t slot)
670 if (ctx->registers[slot] >= 0) {
671 gen_insn(INSN_SET_COND, OP_SIZE_4, cond, 0);
672 gen_one(ctx->registers[slot]);
674 gen_insn(INSN_SET_COND, OP_SIZE_4, cond, 0);
675 gen_one(R_SCRATCH_1);
676 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, R_SCRATCH_1));
681 static bool attr_w gen_frame_set_cond(struct codegen_context *ctx, unsigned size, bool logical, unsigned cond, frame_t slot)
683 unsigned target = gen_frame_target(ctx, slot, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
684 #if defined(ARCH_POWER)
685 if (!cpu_test_feature(CPU_FEATURE_v203))
686 #elif defined(ARCH_S390)
687 if (!cpu_test_feature(CPU_FEATURE_misc_45))
688 #elif defined(ARCH_SPARC32)
695 g(gen_load_constant(ctx, target, 1));
696 label = alloc_label(ctx);
697 if (unlikely(!label))
699 gen_insn(!logical ? INSN_JMP_COND : INSN_JMP_COND_LOGICAL, i_size_cmp(size), cond, 0);
701 g(gen_load_constant(ctx, target, 0));
705 g(gen_load_constant(ctx, target, 1));
706 g(gen_imm(ctx, 0, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
707 if (cond & COND_FP) {
708 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
710 #if defined(ARCH_S390)
711 gen_insn(logical ? INSN_CMOV_XCC : INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
713 gen_insn(size == OP_SIZE_8 ? INSN_CMOV_XCC : INSN_CMOV, OP_SIZE_NATIVE, cond ^ 1, 0);
720 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot, 0, target));
726 static bool attr_w gen_frame_cmp_imm_set_cond_reg(struct codegen_context *ctx, unsigned size, unsigned reg, int64_t imm, unsigned cond, frame_t slot_r)
729 dest_reg = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_CMP_RESULT);
730 g(gen_cmp_dest_reg(ctx, size, reg, (unsigned)-1, dest_reg, imm, cond));
731 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, dest_reg));
737 static bool attr_w gen_frame_load_cmp_set_cond(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, unsigned reg, unsigned cond, frame_t slot_r)
740 bool logical = COND_IS_LOGICAL(cond);
741 g(gen_frame_load_cmp(ctx, size, logical, ex, false, slot, offset, reg));
742 g(gen_frame_set_cond(ctx, size, logical, cond, slot_r));
744 unsigned src_reg, dest_reg;
745 g(gen_frame_get(ctx, size, ex, slot, offset, R_SCRATCH_NA_1, &src_reg));
746 dest_reg = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_NA_1);
747 g(gen_cmp_dest_reg(ctx, size, reg, src_reg, dest_reg, 0, cond));
748 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, dest_reg));
753 static bool attr_w gen_frame_load_cmp_imm_set_cond(struct codegen_context *ctx, unsigned size, enum extend ex, frame_t slot, int64_t offset, int64_t value, unsigned cond, frame_t slot_r)
756 bool logical = COND_IS_LOGICAL(cond);
757 #if defined(ARCH_S390)
761 g(gen_frame_load_cmp_imm(ctx, size, logical, ex, slot, offset, value));
762 g(gen_frame_set_cond(ctx, size, false, cond, slot_r));
765 g(gen_frame_get(ctx, size, ex, slot, offset, R_SCRATCH_NA_1, &src_reg));
766 g(gen_frame_cmp_imm_set_cond_reg(ctx, size, src_reg, value, cond, slot_r));
771 static bool attr_w gen_upcall_start(struct codegen_context *ctx, unsigned args)
774 size_t attr_unused n_pushes;
775 ajla_assert_lo(ctx->upcall_args == -1, (file_line, "gen_upcall_start: gen_upcall_end not called"));
776 ctx->upcall_args = (int)args;
778 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
779 for (i = 0; i < ctx->need_spill_l; i++) {
780 unsigned reg = ctx->registers[ctx->need_spill[i]];
782 g(spill(ctx, ctx->need_spill[i]));
785 for (i = 0; i < ctx->need_spill_l; i++) {
786 unsigned reg = ctx->registers[ctx->need_spill[i]];
787 if (!reg_is_fp(reg)) {
788 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
794 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
798 for (i = 0; i < ctx->need_spill_l; i++)
799 g(spill(ctx, ctx->need_spill[i]));
804 static bool attr_w gen_upcall_end(struct codegen_context *ctx, unsigned args)
807 size_t attr_unused n_pushes;
808 ajla_assert_lo(ctx->upcall_args == (int)args, (file_line, "gen_upcall_end: gen_upcall_start mismatch: %d", ctx->upcall_args));
809 ctx->upcall_args = -1;
811 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
813 for (i = 0; i < ctx->need_spill_l; i++) {
814 unsigned reg = ctx->registers[ctx->need_spill[i]];
819 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
822 for (i = ctx->need_spill_l; i;) {
825 reg = ctx->registers[ctx->need_spill[i]];
826 if (!reg_is_fp(reg)) {
827 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
831 for (i = 0; i < ctx->need_spill_l; i++) {
832 unsigned reg = ctx->registers[ctx->need_spill[i]];
834 g(unspill(ctx, ctx->need_spill[i]));
837 for (i = 0; i < ctx->need_spill_l; i++)
838 g(unspill(ctx, ctx->need_spill[i]));
843 static bool attr_w gen_memcpy_raw(struct codegen_context *ctx, unsigned dest_base, int64_t dest_offset, unsigned src_base, int64_t src_offset, size_t size, size_t attr_unused align)
848 if (align < 4 || (size & 3))
851 #if defined(ARCH_S390)
853 if (!(size & 3) || cpu_test_feature(CPU_FEATURE_extended_imm))
854 goto do_explicit_copy;
856 if (size <= 0x100 && dest_offset >= 0 && dest_offset < 0x1000 && src_offset >= 0 && src_offset < 0x1000) {
857 gen_insn(INSN_MEMCPY, 0, 0, 0);
858 gen_one(ARG_ADDRESS_1);
860 gen_eight(dest_offset);
861 gen_one(ARG_ADDRESS_1);
863 gen_eight(src_offset);
871 if (size <= INLINE_COPY_SIZE) {
874 unsigned this_op_size;
875 #if defined(ARCH_ARM)
876 if (size >= 2U << OP_SIZE_NATIVE
877 #if defined(ARCH_ARM32)
878 && align >= 1U << OP_SIZE_NATIVE
881 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
882 gen_insn(INSN_LDP, OP_SIZE_NATIVE, 0, 0);
883 gen_one(R_SCRATCH_NA_1);
884 gen_one(R_SCRATCH_NA_2);
885 gen_address_offset();
887 g(gen_address(ctx, dest_base, dest_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
888 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
889 gen_address_offset();
890 gen_one(R_SCRATCH_NA_1);
891 gen_one(R_SCRATCH_NA_2);
893 size -= 2U << OP_SIZE_NATIVE;
894 src_offset += 2U << OP_SIZE_NATIVE;
895 dest_offset += 2U << OP_SIZE_NATIVE;
900 if (size >= 8 && OP_SIZE_NATIVE >= OP_SIZE_8)
909 this_step = minimum(this_step, align);
910 this_op_size = log_2(this_step);
912 g(gen_address(ctx, src_base, src_offset, ARCH_PREFERS_SX(this_op_size) ? IMM_PURPOSE_LDR_SX_OFFSET : IMM_PURPOSE_LDR_OFFSET, this_op_size));
913 gen_insn(ARCH_PREFERS_SX(this_op_size) ? INSN_MOVSX : INSN_MOV, this_op_size, 0, 0);
914 gen_one(R_SCRATCH_1);
915 gen_address_offset();
917 g(gen_address(ctx, dest_base, dest_offset, IMM_PURPOSE_STR_OFFSET, this_op_size));
918 gen_insn(INSN_MOV, this_op_size, 0, 0);
919 gen_address_offset();
920 gen_one(R_SCRATCH_1);
923 src_offset += this_step;
924 dest_offset += this_step;
930 g(gen_upcall_start(ctx, 3));
931 if (unlikely(R_ARG0 == src_base)) {
932 if (unlikely(R_ARG1 == dest_base))
933 internal(file_line, "gen_memcpy_raw: swapped registers: %u, %u", src_base, dest_base);
934 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG1, src_base, src_offset, 0));
935 g(gen_upcall_argument(ctx, 1));
938 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG0, dest_base, dest_offset, 0));
939 g(gen_upcall_argument(ctx, 0));
941 if (R_ARG0 != src_base) {
942 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG1, src_base, src_offset, 0));
943 g(gen_upcall_argument(ctx, 1));
946 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
947 if (cpu_test_feature(CPU_FEATURE_erms)) {
948 g(gen_load_constant(ctx, R_CX, size));
950 gen_insn(INSN_MEMCPY, 0, 0, 0);
951 gen_one(ARG_ADDRESS_1_POST_I);
954 gen_one(ARG_ADDRESS_1_POST_I);
958 g(gen_upcall_end(ctx, 3));
963 g(gen_load_constant(ctx, R_ARG2, size));
964 g(gen_upcall_argument(ctx, 2));
966 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, mem_copy), 3));
971 static bool attr_w gen_memcpy_to_slot(struct codegen_context *ctx, frame_t dest_slot, unsigned src_base, int64_t src_offset)
973 const struct type *t = get_type_of_local(ctx, dest_slot);
974 unsigned size = spill_size(t);
975 short dest_reg = ctx->registers[dest_slot];
977 if (ARCH_PREFERS_SX(size) && !reg_is_fp(dest_reg)) {
978 #if defined(ARCH_S390)
979 if (size == OP_SIZE_1 && !cpu_test_feature(CPU_FEATURE_long_displacement)) {
980 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDR_OFFSET, size));
981 gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_0_8, 0);
984 gen_address_offset();
985 g(gen_extend(ctx, size, sign_x, dest_reg, dest_reg));
989 g(gen_address(ctx, src_base, src_offset, IMM_PURPOSE_LDR_SX_OFFSET, size));
990 gen_insn(INSN_MOVSX, size, 0, 0);
992 g(gen_address(ctx, src_base, src_offset, reg_is_fp(dest_reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_LDR_OFFSET, size));
993 gen_insn(INSN_MOV, size, 0, 0);
996 gen_address_offset();
999 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)dest_slot * slot_size, src_base, src_offset, t->size, t->align));
1003 static bool attr_w gen_memcpy_from_slot(struct codegen_context *ctx, unsigned dest_base, int64_t dest_offset, frame_t src_slot)
1005 const struct type *t = get_type_of_local(ctx, src_slot);
1006 short src_reg = ctx->registers[src_slot];
1008 unsigned size = spill_size(t);
1009 g(gen_address(ctx, dest_base, dest_offset, reg_is_fp(src_reg) ? IMM_PURPOSE_VLDR_VSTR_OFFSET : IMM_PURPOSE_STR_OFFSET, size));
1010 gen_insn(INSN_MOV, size, 0, 0);
1011 gen_address_offset();
1015 g(gen_memcpy_raw(ctx, dest_base, dest_offset, R_FRAME, (size_t)src_slot * slot_size, t->size, t->align));
1019 static bool attr_w gen_memcpy_slots(struct codegen_context *ctx, frame_t dest_slot, frame_t src_slot)
1021 const struct type *t = get_type_of_local(ctx, src_slot);
1022 short dest_reg = ctx->registers[dest_slot];
1023 short src_reg = ctx->registers[src_slot];
1024 if (dest_reg >= 0 && src_reg >= 0) {
1025 unsigned size = spill_size(t);
1026 g(gen_mov(ctx, reg_is_fp(src_reg) ? size : OP_SIZE_NATIVE, dest_reg, src_reg));
1029 if (dest_reg >= 0) {
1030 unsigned size = spill_size(t);
1031 g(gen_frame_load(ctx, size, garbage, src_slot, 0, dest_reg));
1035 unsigned size = spill_size(t);
1036 g(gen_frame_store(ctx, size, dest_slot, 0, src_reg));
1039 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)dest_slot * slot_size, R_FRAME, (size_t)src_slot * slot_size, t->size, maximum(slot_size, t->align)));
1043 static bool attr_w gen_clear_bitmap(struct codegen_context *ctx, unsigned additional_offset, unsigned dest_base, int64_t dest_offset, frame_t bitmap_slots)
1045 if (bitmap_slots <= INLINE_BITMAP_SLOTS) {
1046 bool attr_unused scratch_2_zeroed = false;
1047 size_t bitmap_length = (size_t)bitmap_slots * slot_size;
1048 size_t clear_offset = 0;
1049 additional_offset += (unsigned)dest_offset;
1050 #if defined(ARCH_X86)
1051 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_1, 0));
1053 #if defined(ARCH_ARM32) || defined(ARCH_S390)
1054 g(gen_load_constant(ctx, R_SCRATCH_1, 0));
1056 while (clear_offset < bitmap_length) {
1057 size_t len = bitmap_length - clear_offset;
1058 if (len > frame_align)
1060 if (additional_offset)
1061 len = minimum(len, additional_offset & -additional_offset);
1062 #if defined(ARCH_ARM32) || defined(ARCH_S390)
1063 len = minimum(len, 2U << OP_SIZE_NATIVE);
1064 if (len == 2U << OP_SIZE_NATIVE) {
1065 if (!scratch_2_zeroed) {
1066 g(gen_load_constant(ctx, R_SCRATCH_2, 0));
1067 scratch_2_zeroed = true;
1069 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_NATIVE));
1070 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
1071 gen_address_offset();
1072 gen_one(R_SCRATCH_1);
1073 gen_one(R_SCRATCH_2);
1076 #elif defined(ARCH_ARM64)
1077 len = minimum(len, 1U << OP_SIZE_16);
1078 if (len == 1U << OP_SIZE_16) {
1079 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_LDP_STP_OFFSET, OP_SIZE_8));
1080 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_8));
1081 gen_insn(INSN_STP, OP_SIZE_NATIVE, 0, 0);
1082 gen_address_offset();
1087 #elif defined(ARCH_X86)
1088 len = minimum(len, 1U << OP_SIZE_16);
1089 if (len == 1U << OP_SIZE_16 && cpu_test_feature(CPU_FEATURE_sse)) {
1090 if (!scratch_2_zeroed) {
1091 g(gen_3address_alu(ctx, OP_SIZE_16, ALU_XOR, R_XMM0, R_XMM0, R_XMM0, 0));
1092 scratch_2_zeroed = true;
1094 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_VLDR_VSTR_OFFSET, OP_SIZE_16));
1095 gen_insn(INSN_MOV, OP_SIZE_16, 0, 0);
1096 gen_address_offset();
1101 len = minimum(len, 1U << OP_SIZE_NATIVE);
1102 len = (size_t)1 << high_bit(len);
1103 #if defined(ARCH_X86) || defined(ARCH_ARM32) || defined(ARCH_S390)
1104 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_STR_OFFSET, log_2(len)));
1105 gen_insn(INSN_MOV, log_2(len), 0, 0);
1106 gen_address_offset();
1107 gen_one(R_SCRATCH_1);
1109 g(gen_address(ctx, dest_base, dest_offset + clear_offset, IMM_PURPOSE_STR_OFFSET, log_2(len)));
1110 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, log_2(len)));
1111 gen_insn(INSN_MOV, log_2(len), 0, 0);
1112 gen_address_offset();
1117 clear_offset += len;
1118 additional_offset += len;
1122 #if (defined(ARCH_X86_64) || defined(ARCH_X86_X32)) && !defined(ARCH_X86_WIN_ABI)
1123 if (cpu_test_feature(CPU_FEATURE_erms)) {
1124 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
1127 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_DI, dest_base, dest_offset, 0));
1129 g(gen_load_constant(ctx, R_CX, (size_t)bitmap_slots * slot_size));
1131 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_AX, R_AX, R_AX, 0));
1133 gen_insn(INSN_MEMSET, 0, 0, 0);
1134 gen_one(ARG_ADDRESS_1_POST_I);
1140 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
1146 g(gen_upcall_start(ctx, 2));
1148 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_ADDRESS), ALU_ADD, R_ARG0, dest_base, dest_offset, 0));
1149 g(gen_upcall_argument(ctx, 0));
1151 g(gen_load_constant(ctx, R_ARG1, (size_t)bitmap_slots * slot_size));
1152 g(gen_upcall_argument(ctx, 1));
1154 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, mem_clear), 2));
1159 static bool attr_w load_function_offset(struct codegen_context *ctx, unsigned dest, size_t fn_offset)
1161 g(gen_frame_load_raw(ctx, OP_SIZE_ADDRESS, zero_x, 0, frame_offs(function), dest));
1163 g(gen_address(ctx, dest, fn_offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
1164 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
1166 gen_address_offset();