arm: implement floating point register allocation
[ajla.git] / c1-arm.inc
blobd44c7332cbd5c0c10cbaac500d0e8406a6bcd91b
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_4
20 #define OP_SIZE_ADDRESS                 OP_SIZE_4
22 #define JMP_LIMIT                       JMP_SHORTEST
24 #ifndef __ARM_FEATURE_UNALIGNED
25 #define UNALIGNED_TRAP                  1
26 #else
27 #define UNALIGNED_TRAP                  0
28 #endif
30 #define ALU_WRITES_FLAGS(alu, im)       0
31 #define ALU1_WRITES_FLAGS(alu)          0
32 #define ROT_WRITES_FLAGS(alu)           0
33 #define COND_IS_LOGICAL(cond)           0
35 #define ARCH_PARTIAL_ALU(size)          0
36 #define ARCH_IS_3ADDRESS                1
37 #define ARCH_HAS_FLAGS                  1
38 #define ARCH_PREFERS_SX(size)           0
39 #define ARCH_HAS_BWX                    1
40 #define ARCH_HAS_MUL                    1
41 #define ARCH_HAS_DIV                    cpu_test_feature(CPU_FEATURE_idiv)
42 #define ARCH_HAS_ANDN                   1
43 #define ARCH_HAS_SHIFTED_ADD(bits)      1
44 #define ARCH_HAS_BTX(btx, size, cnst)   0
45 #define ARCH_SHIFT_SIZE                 32
46 #define ARCH_NEEDS_BARRIER              0
48 #define i_size(size)                    OP_SIZE_4
49 #define i_size_rot(size)                OP_SIZE_4
51 #define R_0             0x00
52 #define R_1             0x01
53 #define R_2             0x02
54 #define R_3             0x03
55 #define R_4             0x04
56 #define R_5             0x05
57 #define R_6             0x06
58 #define R_7             0x07
59 #define R_8             0x08
60 #define R_9             0x09
61 #define R_10            0x0a
62 #define R_FP            0x0b
63 #define R_IP            0x0c
64 #define R_SP            0x0d
65 #define R_LR            0x0e
66 #define R_PC            0x0f
68 #define FSR_0           0x20
69 #define FSR_1           0x21
70 #define FSR_2           0x22
71 #define FSR_3           0x23
72 #define FSR_4           0x24
73 #define FSR_5           0x25
74 #define FSR_6           0x26
75 #define FSR_7           0x27
76 #define FSR_8           0x28
77 #define FSR_9           0x29
78 #define FSR_10          0x2a
79 #define FSR_11          0x2b
80 #define FSR_12          0x2c
81 #define FSR_13          0x2d
82 #define FSR_14          0x2e
83 #define FSR_15          0x2f
84 #define FSR_16          0x30
85 #define FSR_17          0x31
86 #define FSR_18          0x32
87 #define FSR_19          0x33
88 #define FSR_20          0x34
89 #define FSR_21          0x35
90 #define FSR_22          0x36
91 #define FSR_23          0x37
92 #define FSR_24          0x38
93 #define FSR_25          0x39
94 #define FSR_26          0x3a
95 #define FSR_27          0x3b
96 #define FSR_28          0x3c
97 #define FSR_29          0x3d
98 #define FSR_30          0x3e
99 #define FSR_31          0x3f
101 #define FDR_0           0x20
102 #define FDR_2           0x22
103 #define FDR_4           0x24
104 #define FDR_6           0x26
105 #define FDR_8           0x28
106 #define FDR_10          0x2a
107 #define FDR_12          0x2c
108 #define FDR_14          0x2e
109 #define FDR_16          0x30
110 #define FDR_18          0x32
111 #define FDR_20          0x34
112 #define FDR_22          0x36
113 #define FDR_24          0x38
114 #define FDR_26          0x3a
115 #define FDR_28          0x3c
116 #define FDR_30          0x3e
118 #define FQR_0           0x20
119 #define FQR_2           0x22
120 #define FQR_4           0x24
121 #define FQR_6           0x26
122 #define FQR_8           0x28
123 #define FQR_10          0x2a
124 #define FQR_12          0x2c
125 #define FQR_14          0x2e
126 #define FQR_16          0x30
127 #define FQR_18          0x32
128 #define FQR_20          0x34
129 #define FQR_22          0x36
130 #define FQR_24          0x38
131 #define FQR_26          0x3a
132 #define FQR_28          0x3c
133 #define FQR_30          0x3e
135 #define R_FRAME         R_4
136 #define R_UPCALL        R_5
138 #define R_SCRATCH_1     R_0
139 #define R_SCRATCH_2     R_1
140 #define R_SCRATCH_3     R_2
141 #define R_SCRATCH_4     R_SAVED_2
143 #define R_SAVED_1       R_6
144 #define R_SAVED_2       R_7
146 #define R_OFFSET_IMM    R_LR
147 #define R_CONST_IMM     R_IP
149 #define R_SCRATCH_NA_1  R_10
150 #define R_SCRATCH_NA_2  R_FP
151 #ifdef HAVE_BITWISE_FRAME
152 #define R_SCRATCH_NA_3  R_8
153 #endif
155 #define R_ARG0          R_0
156 #define R_ARG1          R_1
157 #define R_ARG2          R_2
158 #define R_ARG3          R_3
159 #define R_RET0          R_0
161 #define FR_SCRATCH_1    FDR_0
162 #define FR_SCRATCH_2    FDR_2
164 #define SUPPORTED_FP            (cpu_test_feature(CPU_FEATURE_vfp) * 0x6)
165 #define SUPPORTED_FP_HALF_CVT   (cpu_test_feature(CPU_FEATURE_half) * 0x1)
167 static bool reg_is_fp(unsigned reg)
169         return reg >= 0x20 && reg < 0x40;
172 static const uint8_t regs_saved[] = {
173 #ifndef HAVE_BITWISE_FRAME
174         R_8,
175 #endif
176         R_9 };
177 static const uint8_t regs_volatile[] = { R_3 };
178 static const uint8_t fp_saved[] = { 0 };
179 #define n_fp_saved 0U
180 static const uint8_t fp_volatile[] = { FDR_4, FDR_6, FDR_8, FDR_10, FDR_12, FDR_14 };
181 #define reg_is_saved(r) ((r) == R_8 || (r) == R_9)
183 static int gen_imm12(uint32_t c)
185         int rot;
186         for (rot = 0; rot < 32; rot += 2) {
187                 uint32_t val = c << rot | c >> (-rot & 31);
188                 if (val < 0x100)
189                         return val | (rot << 7);
190         }
191         return -1;
194 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint32_t c)
196         if (gen_imm12(c) >= 0 || gen_imm12(~c) >= 0) {
197                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
198                 gen_one(reg);
199                 gen_one(ARG_IMM);
200                 gen_eight(c);
201                 return true;
202         }
203         if (likely(cpu_test_feature(CPU_FEATURE_armv6t2))) {
204                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
205                 gen_one(reg);
206                 gen_one(ARG_IMM);
207                 gen_eight(c & 0xffff);
208                 if (c >> 16) {
209                         gen_insn(INSN_MOV_MASK, OP_SIZE_4, MOV_MASK_16_32, 0);
210                         gen_one(reg);
211                         gen_one(reg);
212                         gen_one(ARG_IMM);
213                         gen_eight(c >> 16);
214                 }
215         } else {
216                 bool need_init = true;
217                 int p;
218                 for (p = 0; p < 32; p += 8) {
219                         if ((c >> p) & 0xff) {
220                                 if (need_init) {
221                                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
222                                         gen_one(reg);
223                                         gen_one(ARG_IMM);
224                                         gen_eight(c & (0xff << p));
225                                         need_init = false;
226                                 } else {
227                                         gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, 0);
228                                         gen_one(reg);
229                                         gen_one(reg);
230                                         gen_one(ARG_IMM);
231                                         gen_eight(c & (0xff << p));
232                                 }
233                         }
234                 }
235                 if (need_init) {
236                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
237                         gen_one(reg);
238                         gen_one(ARG_IMM);
239                         gen_eight(0);
240                 }
241         }
242         return true;
245 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
247         ctx->base_reg = base;
248         ctx->offset_imm = imm;
249         ctx->offset_reg = false;
250         switch (purpose) {
251                 case IMM_PURPOSE_LDR_OFFSET:
252                 case IMM_PURPOSE_STR_OFFSET:
253                 case IMM_PURPOSE_MVI_CLI_OFFSET:
254                         if (size == OP_SIZE_2) {
255                                 if (imm >= -255 && imm <= 255)
256                                         return true;
257                         } else {
258                                 if (imm >= -4095 && imm <= 4095)
259                                         return true;
260                         }
261                         break;
262                 case IMM_PURPOSE_LDR_SX_OFFSET:
263                 case IMM_PURPOSE_LDP_STP_OFFSET:
264                         if (imm >= -255 && imm <= 255)
265                                 return true;
266                         break;
267                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
268                         if (size < OP_SIZE_4 && imm != 0)
269                                 break;
270                         if (unlikely((imm & 3) != 0))
271                                 break;
272                         if (imm >= -1023 && imm <= 1023)
273                                 return true;
274                         break;
275                 default:
276                         internal(file_line, "gen_address: invalid purpose %d", purpose);
277         }
278         if (purpose == IMM_PURPOSE_VLDR_VSTR_OFFSET) {
279                 if (gen_imm12(imm) >= 0) {
280                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
281                         gen_one(R_OFFSET_IMM);
282                         gen_one(base);
283                         gen_one(ARG_IMM);
284                         gen_eight(imm);
285                 } else {
286                         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
287                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
288                         gen_one(R_OFFSET_IMM);
289                         gen_one(R_OFFSET_IMM);
290                         gen_one(base);
291                 }
292                 ctx->base_reg = R_OFFSET_IMM;
293                 ctx->offset_imm = 0;
294                 return true;
295         }
296         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
297         ctx->offset_reg = true;
298         return true;
301 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
303         int imm12;
304         switch (purpose) {
305                 case IMM_PURPOSE_STORE_VALUE:
306                         break;
307                 case IMM_PURPOSE_ADD:
308                 case IMM_PURPOSE_SUB:
309                 case IMM_PURPOSE_CMP:
310                 case IMM_PURPOSE_CMP_LOGICAL:
311                 case IMM_PURPOSE_AND:
312                 case IMM_PURPOSE_OR:
313                 case IMM_PURPOSE_XOR:
314                 case IMM_PURPOSE_ANDN:
315                 case IMM_PURPOSE_TEST:
316                         imm12 = gen_imm12(imm);
317                         if (unlikely(imm12 == -1))
318                                 break;
319                         return true;
320                 case IMM_PURPOSE_CMOV:
321                         if (gen_imm12(imm) >= 0 || gen_imm12(~imm) >= 0)
322                                 return true;
323                         if ((uint32_t)imm < 0x10000 && likely(cpu_test_feature(CPU_FEATURE_armv6t2)))
324                                 return true;
325                         break;
326                 case IMM_PURPOSE_MUL:
327                         break;
328                 default:
329                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
330         }
331         return false;
334 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
336         if (is_direct_const(imm, purpose, size)) {
337                 ctx->const_imm = imm;
338                 ctx->const_reg = false;
339         } else {
340                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
341                 ctx->const_reg = true;
342         }
343         return true;
346 static bool attr_w gen_entry(struct codegen_context *ctx)
348         gen_insn(INSN_ARM_PUSH, OP_SIZE_NATIVE, 0, 0);
350         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
351         gen_one(R_FRAME);
352         gen_one(R_ARG0);
354         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
355         gen_one(R_UPCALL);
356         gen_one(R_ARG1);
358         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
359         gen_one(R_ARG3);
361         return true;
364 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
366         g(gen_load_constant(ctx, R_ARG1, ip));
368         gen_insn(INSN_JMP, 0, 0, 0);
369         gen_four(escape_label);
371         return true;
374 static bool attr_w gen_escape(struct codegen_context *ctx)
376         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
377         gen_one(R_ARG0);
378         gen_one(R_FRAME);
380         gen_insn(INSN_ARM_POP, OP_SIZE_NATIVE, 0, 0);
382         return true;
385 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
387         if (unlikely(arg >= 4))
388                 internal(file_line, "gen_upcall_argument: only 4 arguments supported");
389         return true;
392 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
394         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
395         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
396         gen_one(R_SCRATCH_NA_1);
397         gen_address_offset();
399         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
400         gen_one(R_SCRATCH_NA_1);
402         g(gen_upcall_end(ctx, n_args));
404         return true;
407 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
409         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
410         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
411         gen_one(R_SCRATCH_1);
412         gen_address_offset();
414         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
415         gen_one(R_SCRATCH_2);
416         gen_one(ARG_ADDRESS_1);
417         gen_one(R_SP);
418         gen_eight(0);
420         gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
421         gen_one(R_SCRATCH_1);
422         gen_one(R_SCRATCH_2);
424         gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
425         gen_four(escape_label);
427         return true;