parisc: implement floating point register allocation
[ajla.git] / c1-riscv.inc
blobfe4057f0f9bfc98e0245e9fe322058b8b82c50be
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_8
20 #define OP_SIZE_ADDRESS                 OP_SIZE_NATIVE
22 #define JMP_LIMIT                       JMP_EXTRA_LONG
24 #define UNALIGNED_TRAP                  (!cpu_test_feature(CPU_FEATURE_unaligned))
26 #define ALU_WRITES_FLAGS(alu, im)       0
27 #define ALU1_WRITES_FLAGS(alu)          0
28 #define ROT_WRITES_FLAGS(alu)           0
29 #define COND_IS_LOGICAL(cond)           0
31 #define ARCH_PARTIAL_ALU(size)          0
32 #define ARCH_IS_3ADDRESS                1
33 #define ARCH_HAS_FLAGS                  0
34 #define ARCH_PREFERS_SX(size)           0
35 #define ARCH_HAS_BWX                    1
36 #define ARCH_HAS_MUL                    1
37 #define ARCH_HAS_DIV                    1
38 #define ARCH_HAS_ANDN                   cpu_test_feature(CPU_FEATURE_zbb)
39 #define ARCH_HAS_SHIFTED_ADD(bits)      ((bits) <= 3 && cpu_test_feature(CPU_FEATURE_zba))
40 #define ARCH_HAS_BTX(btx, size, cnst)   (((size) == OP_SIZE_8 || (cnst)) && cpu_test_feature(CPU_FEATURE_zbs))
41 #define ARCH_SHIFT_SIZE                 OP_SIZE_4
42 #define ARCH_NEEDS_BARRIER              0
44 #define i_size(size)                    OP_SIZE_NATIVE
45 #define i_size_rot(size)                maximum(size, OP_SIZE_4)
47 #define R_ZERO          0x00
48 #define R_RA            0x01
49 #define R_SP            0x02
50 #define R_GP            0x03
51 #define R_TP            0x04
52 #define R_T0            0x05
53 #define R_T1            0x06
54 #define R_T2            0x07
55 #define R_S0            0x08
56 #define R_S1            0x09
57 #define R_A0            0x0a
58 #define R_A1            0x0b
59 #define R_A2            0x0c
60 #define R_A3            0x0d
61 #define R_A4            0x0e
62 #define R_A5            0x0f
63 #define R_A6            0x10
64 #define R_A7            0x11
65 #define R_S2            0x12
66 #define R_S3            0x13
67 #define R_S4            0x14
68 #define R_S5            0x15
69 #define R_S6            0x16
70 #define R_S7            0x17
71 #define R_S8            0x18
72 #define R_S9            0x19
73 #define R_S10           0x1a
74 #define R_S11           0x1b
75 #define R_T3            0x1c
76 #define R_T4            0x1d
77 #define R_T5            0x1e
78 #define R_T6            0x1f
80 #define R_FT0           0x20
81 #define R_FT1           0x21
82 #define R_FT2           0x22
83 #define R_FT3           0x23
84 #define R_FT4           0x24
85 #define R_FT5           0x25
86 #define R_FT6           0x26
87 #define R_FT7           0x27
88 #define R_FS0           0x28
89 #define R_FS1           0x29
90 #define R_FA0           0x2a
91 #define R_FA1           0x2b
92 #define R_FA2           0x2c
93 #define R_FA3           0x2d
94 #define R_FA4           0x2e
95 #define R_FA5           0x2f
96 #define R_FA6           0x30
97 #define R_FA7           0x31
98 #define R_FS2           0x32
99 #define R_FS3           0x33
100 #define R_FS4           0x34
101 #define R_FS5           0x35
102 #define R_FS6           0x36
103 #define R_FS7           0x37
104 #define R_FS8           0x38
105 #define R_FS9           0x39
106 #define R_FS10          0x3a
107 #define R_FS11          0x3b
108 #define R_FT8           0x3c
109 #define R_FT9           0x3d
110 #define R_FT10          0x3e
111 #define R_FT11          0x3f
113 #define R_FRAME         R_S0
114 #define R_UPCALL        R_S1
115 #define R_TIMESTAMP     R_S2
117 #define R_SCRATCH_1     R_A0
118 #define R_SCRATCH_2     R_A1
119 #define R_SCRATCH_3     R_A2
120 #define R_SCRATCH_4     R_A3
121 #define R_SCRATCH_NA_1  R_A4
122 #define R_SCRATCH_NA_2  R_A5
123 #ifdef HAVE_BITWISE_FRAME
124 #define R_SCRATCH_NA_3  R_A6
125 #endif
127 #define R_SAVED_1       R_S3
128 #define R_SAVED_2       R_S4
130 #define R_ARG0          R_A0
131 #define R_ARG1          R_A1
132 #define R_ARG2          R_A2
133 #define R_ARG3          R_A3
134 #define R_RET0          R_A0
135 #define R_RET1          R_A1
137 #define R_OFFSET_IMM    R_T0
138 #define R_CONST_IMM     R_T1
139 #define R_CONST_HELPER  R_T2
140 #define R_CMP_RESULT    R_T3
142 #define FR_SCRATCH_1    R_FA0
143 #define FR_SCRATCH_2    R_FA1
145 #define SUPPORTED_FP    0x6
147 #define FRAME_SIZE      0x70
149 static bool reg_is_fp(unsigned reg)
151         return reg >= 0x20 && reg < 0x40;
154 static const uint8_t regs_saved[] = { R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11 };
155 static const uint8_t regs_volatile[] = { R_RA,
156 #ifndef HAVE_BITWISE_FRAME
157         R_A6,
158 #endif
159         R_A7, R_T4, R_T5, R_T6 };
160 static const uint8_t fp_saved[] = { 0 };
161 #define n_fp_saved 0U
162 static const uint8_t fp_volatile[] = { 0 };
163 #define n_fp_volatile 0U
164 #define reg_is_saved(r) (((r) >= R_S0 && (r) <= R_S1) || ((r) >= R_S2 && (r) <= R_S11))
166 static const struct {
167         uint32_t l;
168         uint16_t s;
169 } riscv_compress[] = {
170 #include "riscv-c.inc"
173 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
175         unsigned r = R_ZERO;
176         int32_t c1, c2, c3, c4;
178         c1 = c & 0xfffUL;
179         if (c1 & 0x800)
180                 c1 |= -0x800;
181         if (c1 < 0)
182                 c += 0x1000UL;
184         c2 = (c >> 12) & 0xfffffUL;
185         if (c2 & 0x80000)
186                 c2 |= -0x80000;
187         if (c2 < 0)
188                 c += 0x100000000UL;
190         c3 = (c >> 32) & 0xfffUL;
191         if (c3 & 0x800)
192                 c3 |= -0x800;
193         if (c3 < 0)
194                 c += 0x100000000000UL;
196         c4 = c >> 44;
197         if (c4 & 0x80000)
198                 c4 |= -0x80000;
200         if (c4) {
201                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
202                 gen_one(reg);
203                 gen_one(ARG_IMM);
204                 gen_eight((uint64_t)c4 << 12);
205                 r = reg;
206         }
207         if (c3) {
208                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
209                 gen_one(reg);
210                 gen_one(r);
211                 gen_one(ARG_IMM);
212                 gen_eight(c3);
213                 r = reg;
214         }
215         if (r != R_ZERO) {
216                 gen_insn(INSN_ROT, OP_SIZE_NATIVE, ROT_SHL, 0);
217                 gen_one(r);
218                 gen_one(r);
219                 gen_one(ARG_IMM);
220                 gen_eight(32);
221         }
222         if (c2) {
223                 if (r != R_ZERO) {
224                         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
225                         gen_one(R_CONST_HELPER);
226                         gen_one(ARG_IMM);
227                         gen_eight((uint64_t)c2 << 12);
229                         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
230                         gen_one(r);
231                         gen_one(r);
232                         gen_one(R_CONST_HELPER);
233                 } else {
234                         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
235                         gen_one(reg);
236                         gen_one(ARG_IMM);
237                         gen_eight((uint64_t)c2 << 12);
238                         r = reg;
239                 }
240         }
241         if (c1 || r == R_ZERO) {
242                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
243                 gen_one(reg);
244                 gen_one(r);
245                 gen_one(ARG_IMM);
246                 gen_eight(c1);
247         }
248         return true;
251 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
253         ctx->base_reg = base;
254         ctx->offset_imm = imm;
255         ctx->offset_reg = false;
256         switch (purpose) {
257                 case IMM_PURPOSE_LDR_OFFSET:
258                 case IMM_PURPOSE_LDR_SX_OFFSET:
259                 case IMM_PURPOSE_STR_OFFSET:
260                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
261                 case IMM_PURPOSE_MVI_CLI_OFFSET:
262                         if (likely(imm >= -0x800) && likely(imm < 0x800))
263                                 return true;
264                         break;
265                 default:
266                         internal(file_line, "gen_address: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
267         }
268         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
269         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
270         gen_one(R_OFFSET_IMM);
271         gen_one(R_OFFSET_IMM);
272         gen_one(base);
273         ctx->base_reg = R_OFFSET_IMM;
274         ctx->offset_imm = 0;
275         return true;
278 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
280         switch (purpose) {
281                 case IMM_PURPOSE_STORE_VALUE:
282                         if (!imm)
283                                 return true;
284                         break;
285                 case IMM_PURPOSE_ADD:
286                 case IMM_PURPOSE_AND:
287                 case IMM_PURPOSE_OR:
288                 case IMM_PURPOSE_XOR:
289                 case IMM_PURPOSE_TEST:
290                 case IMM_PURPOSE_CMP:
291                 case IMM_PURPOSE_CMP_LOGICAL:
292                         if (likely(imm >= -0x800) && likely(imm < 0x800))
293                                 return true;
294                         break;
295                 case IMM_PURPOSE_SUB:
296                         if (likely(imm > -0x800) && likely(imm <= 0x800))
297                                 return true;
298                         break;
299                 case IMM_PURPOSE_ANDN:
300                         break;
301                 case IMM_PURPOSE_JMP_2REGS:
302                         break;
303                 case IMM_PURPOSE_MUL:
304                         break;
305                 case IMM_PURPOSE_BITWISE:
306                         return true;
307                 default:
308                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
309         }
310         return false;
313 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
315         if (is_direct_const(imm, purpose, size)) {
316                 ctx->const_imm = imm;
317                 ctx->const_reg = false;
318         } else {
319                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
320                 ctx->const_reg = true;
321         }
322         return true;
325 static bool attr_w gen_entry(struct codegen_context *ctx)
327         g(gen_imm(ctx, -FRAME_SIZE, IMM_PURPOSE_ADD, OP_SIZE_NATIVE));
328         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
329         gen_one(R_SP);
330         gen_one(R_SP);
331         gen_imm_offset();
333         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x08, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
334         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
335         gen_address_offset();
336         gen_one(R_RA);
338         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x10, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
339         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
340         gen_address_offset();
341         gen_one(R_S0);
343         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x18, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
344         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
345         gen_address_offset();
346         gen_one(R_S1);
348         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x20, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
349         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
350         gen_address_offset();
351         gen_one(R_S2);
353         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x28, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
354         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
355         gen_address_offset();
356         gen_one(R_S3);
358         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x30, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
359         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
360         gen_address_offset();
361         gen_one(R_S4);
363         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x38, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
364         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
365         gen_address_offset();
366         gen_one(R_S5);
368         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x40, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
369         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
370         gen_address_offset();
371         gen_one(R_S6);
373         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x48, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
374         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
375         gen_address_offset();
376         gen_one(R_S7);
378         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x50, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
379         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
380         gen_address_offset();
381         gen_one(R_S8);
383         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x58, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
384         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
385         gen_address_offset();
386         gen_one(R_S9);
388         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x60, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
389         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
390         gen_address_offset();
391         gen_one(R_S10);
393         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x68, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
394         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
395         gen_address_offset();
396         gen_one(R_S11);
398         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
399         gen_one(R_FRAME);
400         gen_one(R_ARG0);
402         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
403         gen_one(R_UPCALL);
404         gen_one(R_ARG1);
406         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
407         gen_one(R_TIMESTAMP);
408         gen_one(R_ARG2);
410         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
411         gen_one(R_ARG3);
413         return true;
416 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
418         g(gen_load_constant(ctx, R_RET1, (int32_t)ip));
420         gen_insn(INSN_JMP, 0, 0, 0);
421         gen_four(escape_label);
423         return true;
426 static bool attr_w gen_escape(struct codegen_context *ctx)
428         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
429         gen_one(R_RET0);
430         gen_one(R_FRAME);
432         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x08, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
433         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
434         gen_one(R_RA);
435         gen_address_offset();
437         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x10, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
438         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
439         gen_one(R_S0);
440         gen_address_offset();
442         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x18, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
443         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
444         gen_one(R_S1);
445         gen_address_offset();
447         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x20, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
448         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
449         gen_one(R_S2);
450         gen_address_offset();
452         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x28, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
453         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
454         gen_one(R_S3);
455         gen_address_offset();
457         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x30, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
458         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
459         gen_one(R_S4);
460         gen_address_offset();
462         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x38, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
463         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
464         gen_one(R_S5);
465         gen_address_offset();
467         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x40, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
468         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
469         gen_one(R_S6);
470         gen_address_offset();
472         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x48, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
473         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
474         gen_one(R_S7);
475         gen_address_offset();
477         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x50, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
478         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
479         gen_one(R_S8);
480         gen_address_offset();
482         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x58, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
483         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
484         gen_one(R_S9);
485         gen_address_offset();
487         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x60, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
488         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
489         gen_one(R_S10);
490         gen_address_offset();
492         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x68, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
493         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
494         gen_one(R_S11);
495         gen_address_offset();
497         g(gen_imm(ctx, FRAME_SIZE, IMM_PURPOSE_ADD, OP_SIZE_NATIVE));
498         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
499         gen_one(R_SP);
500         gen_one(R_SP);
501         gen_imm_offset();
503         gen_insn(INSN_RET, 0, 0, 0);
505         return true;
508 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
510         return true;
513 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
515         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
516         gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
517         gen_one(R_SCRATCH_NA_1);
518         gen_address_offset();
520         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_ADDRESS, 0, 0);
521         gen_one(R_SCRATCH_NA_1);
523         g(gen_upcall_end(ctx, n_args));
525         return true;
528 static bool attr_w gen_cmp_test_jmp(struct codegen_context *ctx, unsigned insn, unsigned op_size, unsigned reg1, unsigned reg2, unsigned cond, uint32_t label);
530 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
532         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
533         gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
534         gen_one(R_SCRATCH_1);
535         gen_address_offset();
537         g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_1, R_TIMESTAMP, COND_NE, escape_label));
539         return true;