ssa: move optimizations from P_BinaryOp to P_BinaryConstOp
[ajla.git] / c1-arm.inc
blob84c9e557dcdbb9f6533a4399a793ecefaf020ceb
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_4
20 #define OP_SIZE_ADDRESS                 OP_SIZE_4
22 #define JMP_LIMIT                       JMP_SHORTEST
24 #ifndef __ARM_FEATURE_UNALIGNED
25 #define UNALIGNED_TRAP                  1
26 #else
27 #define UNALIGNED_TRAP                  0
28 #endif
30 #define ALU_WRITES_FLAGS(alu, im)       0
31 #define ALU1_WRITES_FLAGS(alu)          0
32 #define ROT_WRITES_FLAGS(alu, size, im) 0
33 #define COND_IS_LOGICAL(cond)           0
35 #define ARCH_PARTIAL_ALU(size)          0
36 #define ARCH_IS_3ADDRESS(alu, f)        1
37 #define ARCH_IS_3ADDRESS_IMM(alu, f)    1
38 #define ARCH_IS_3ADDRESS_ROT(alu, size) 1
39 #define ARCH_IS_3ADDRESS_ROT_IMM(alu)   1
40 #define ARCH_IS_2ADDRESS(alu)           1
41 #define ARCH_IS_3ADDRESS_FP             1
42 #define ARCH_HAS_JMP_2REGS(cond)        0
43 #define ARCH_HAS_FLAGS                  1
44 #define ARCH_PREFERS_SX(size)           0
45 #define ARCH_HAS_BWX                    1
46 #define ARCH_HAS_MUL                    1
47 #define ARCH_HAS_DIV                    cpu_test_feature(CPU_FEATURE_idiv)
48 #define ARCH_HAS_ANDN                   1
49 #define ARCH_HAS_SHIFTED_ADD(bits)      1
50 #define ARCH_HAS_BTX(btx, size, cnst)   0
51 #define ARCH_SHIFT_SIZE                 32
52 #define ARCH_BOOL_SIZE                  OP_SIZE_4
53 #define ARCH_HAS_FP_GP_MOV              1
54 #define ARCH_NEEDS_BARRIER              0
56 #define i_size(size)                    OP_SIZE_4
57 #define i_size_rot(size)                OP_SIZE_4
58 #define i_size_cmp(size)                OP_SIZE_4
60 #define R_0             0x00
61 #define R_1             0x01
62 #define R_2             0x02
63 #define R_3             0x03
64 #define R_4             0x04
65 #define R_5             0x05
66 #define R_6             0x06
67 #define R_7             0x07
68 #define R_8             0x08
69 #define R_9             0x09
70 #define R_10            0x0a
71 #define R_FP            0x0b
72 #define R_IP            0x0c
73 #define R_SP            0x0d
74 #define R_LR            0x0e
75 #define R_PC            0x0f
77 #define FSR_0           0x20
78 #define FSR_1           0x21
79 #define FSR_2           0x22
80 #define FSR_3           0x23
81 #define FSR_4           0x24
82 #define FSR_5           0x25
83 #define FSR_6           0x26
84 #define FSR_7           0x27
85 #define FSR_8           0x28
86 #define FSR_9           0x29
87 #define FSR_10          0x2a
88 #define FSR_11          0x2b
89 #define FSR_12          0x2c
90 #define FSR_13          0x2d
91 #define FSR_14          0x2e
92 #define FSR_15          0x2f
93 #define FSR_16          0x30
94 #define FSR_17          0x31
95 #define FSR_18          0x32
96 #define FSR_19          0x33
97 #define FSR_20          0x34
98 #define FSR_21          0x35
99 #define FSR_22          0x36
100 #define FSR_23          0x37
101 #define FSR_24          0x38
102 #define FSR_25          0x39
103 #define FSR_26          0x3a
104 #define FSR_27          0x3b
105 #define FSR_28          0x3c
106 #define FSR_29          0x3d
107 #define FSR_30          0x3e
108 #define FSR_31          0x3f
110 #define FDR_0           0x20
111 #define FDR_2           0x22
112 #define FDR_4           0x24
113 #define FDR_6           0x26
114 #define FDR_8           0x28
115 #define FDR_10          0x2a
116 #define FDR_12          0x2c
117 #define FDR_14          0x2e
118 #define FDR_16          0x30
119 #define FDR_18          0x32
120 #define FDR_20          0x34
121 #define FDR_22          0x36
122 #define FDR_24          0x38
123 #define FDR_26          0x3a
124 #define FDR_28          0x3c
125 #define FDR_30          0x3e
127 #define FQR_0           0x20
128 #define FQR_2           0x22
129 #define FQR_4           0x24
130 #define FQR_6           0x26
131 #define FQR_8           0x28
132 #define FQR_10          0x2a
133 #define FQR_12          0x2c
134 #define FQR_14          0x2e
135 #define FQR_16          0x30
136 #define FQR_18          0x32
137 #define FQR_20          0x34
138 #define FQR_22          0x36
139 #define FQR_24          0x38
140 #define FQR_26          0x3a
141 #define FQR_28          0x3c
142 #define FQR_30          0x3e
144 #define R_FRAME         R_4
145 #define R_UPCALL        R_5
147 #define R_SCRATCH_1     R_0
148 #define R_SCRATCH_2     R_1
149 #define R_SCRATCH_3     R_2
150 #define R_SCRATCH_4     R_SAVED_2
152 #define R_SAVED_1       R_6
153 #define R_SAVED_2       R_7
155 #define R_OFFSET_IMM    R_LR
156 #define R_CONST_IMM     R_IP
158 #define R_SCRATCH_NA_1  R_10
159 #define R_SCRATCH_NA_2  R_FP
160 #ifdef HAVE_BITWISE_FRAME
161 #define R_SCRATCH_NA_3  R_8
162 #endif
164 #define R_ARG0          R_0
165 #define R_ARG1          R_1
166 #define R_ARG2          R_2
167 #define R_ARG3          R_3
168 #define R_RET0          R_0
170 #define FR_SCRATCH_1    FDR_0
171 #define FR_SCRATCH_2    FDR_2
173 #define SUPPORTED_FP            (cpu_test_feature(CPU_FEATURE_vfp) * 0x6)
174 #define SUPPORTED_FP_HALF_CVT   (cpu_test_feature(CPU_FEATURE_half) * 0x1)
176 static bool reg_is_fp(unsigned reg)
178         return reg >= 0x20 && reg < 0x40;
181 static const uint8_t regs_saved[] = {
182 #ifndef HAVE_BITWISE_FRAME
183         R_8,
184 #endif
185         R_9 };
186 static const uint8_t regs_volatile[] = { R_3 };
187 static const uint8_t fp_saved[] = { 0 };
188 #define n_fp_saved 0U
189 static const uint8_t fp_volatile[] = { FDR_4, FDR_6, FDR_8, FDR_10, FDR_12, FDR_14 };
190 #define reg_is_saved(r) ((r) == R_8 || (r) == R_9)
192 static int gen_imm12(uint32_t c)
194         int rot;
195         for (rot = 0; rot < 32; rot += 2) {
196                 uint32_t val = c << rot | c >> (-rot & 31);
197                 if (val < 0x100)
198                         return val | (rot << 7);
199         }
200         return -1;
203 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint32_t c)
205         if (gen_imm12(c) >= 0 || gen_imm12(~c) >= 0) {
206                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
207                 gen_one(reg);
208                 gen_one(ARG_IMM);
209                 gen_eight(c);
210                 return true;
211         }
212         if (likely(cpu_test_feature(CPU_FEATURE_armv6t2))) {
213                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
214                 gen_one(reg);
215                 gen_one(ARG_IMM);
216                 gen_eight(c & 0xffff);
217                 if (c >> 16) {
218                         gen_insn(INSN_MOV_MASK, OP_SIZE_4, MOV_MASK_16_32, 0);
219                         gen_one(reg);
220                         gen_one(reg);
221                         gen_one(ARG_IMM);
222                         gen_eight(c >> 16);
223                 }
224         } else {
225                 bool need_init = true;
226                 int p;
227                 for (p = 0; p < 32; p += 8) {
228                         if ((c >> p) & 0xff) {
229                                 if (need_init) {
230                                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
231                                         gen_one(reg);
232                                         gen_one(ARG_IMM);
233                                         gen_eight(c & (0xff << p));
234                                         need_init = false;
235                                 } else {
236                                         gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, 0);
237                                         gen_one(reg);
238                                         gen_one(reg);
239                                         gen_one(ARG_IMM);
240                                         gen_eight(c & (0xff << p));
241                                 }
242                         }
243                 }
244                 if (need_init) {
245                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
246                         gen_one(reg);
247                         gen_one(ARG_IMM);
248                         gen_eight(0);
249                 }
250         }
251         return true;
254 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
256         ctx->base_reg = base;
257         ctx->offset_imm = imm;
258         ctx->offset_reg = false;
259         switch (purpose) {
260                 case IMM_PURPOSE_LDR_OFFSET:
261                 case IMM_PURPOSE_STR_OFFSET:
262                 case IMM_PURPOSE_MVI_CLI_OFFSET:
263                         if (size == OP_SIZE_2) {
264                                 if (imm >= -255 && imm <= 255)
265                                         return true;
266                         } else {
267                                 if (imm >= -4095 && imm <= 4095)
268                                         return true;
269                         }
270                         break;
271                 case IMM_PURPOSE_LDR_SX_OFFSET:
272                 case IMM_PURPOSE_LDP_STP_OFFSET:
273                         if (imm >= -255 && imm <= 255)
274                                 return true;
275                         break;
276                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
277                         if (size < OP_SIZE_4 && imm != 0)
278                                 break;
279                         if (unlikely((imm & 3) != 0))
280                                 break;
281                         if (imm >= -1023 && imm <= 1023)
282                                 return true;
283                         break;
284                 default:
285                         internal(file_line, "gen_address: invalid purpose %d", purpose);
286         }
287         if (purpose == IMM_PURPOSE_VLDR_VSTR_OFFSET) {
288                 if (gen_imm12(imm) >= 0) {
289                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
290                         gen_one(R_OFFSET_IMM);
291                         gen_one(base);
292                         gen_one(ARG_IMM);
293                         gen_eight(imm);
294                 } else {
295                         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
296                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
297                         gen_one(R_OFFSET_IMM);
298                         gen_one(R_OFFSET_IMM);
299                         gen_one(base);
300                 }
301                 ctx->base_reg = R_OFFSET_IMM;
302                 ctx->offset_imm = 0;
303                 return true;
304         }
305         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
306         ctx->offset_reg = true;
307         return true;
310 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
312         int imm12;
313         switch (purpose) {
314                 case IMM_PURPOSE_STORE_VALUE:
315                         break;
316                 case IMM_PURPOSE_ADD:
317                 case IMM_PURPOSE_SUB:
318                 case IMM_PURPOSE_CMP:
319                 case IMM_PURPOSE_CMP_LOGICAL:
320                 case IMM_PURPOSE_AND:
321                 case IMM_PURPOSE_OR:
322                 case IMM_PURPOSE_XOR:
323                 case IMM_PURPOSE_ANDN:
324                 case IMM_PURPOSE_TEST:
325                         imm12 = gen_imm12(imm);
326                         if (unlikely(imm12 == -1))
327                                 break;
328                         return true;
329                 case IMM_PURPOSE_CMOV:
330                         if (gen_imm12(imm) >= 0 || gen_imm12(~imm) >= 0)
331                                 return true;
332                         if ((uint32_t)imm < 0x10000 && likely(cpu_test_feature(CPU_FEATURE_armv6t2)))
333                                 return true;
334                         break;
335                 case IMM_PURPOSE_MUL:
336                         break;
337                 default:
338                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
339         }
340         return false;
343 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
345         if (is_direct_const(imm, purpose, size)) {
346                 ctx->const_imm = imm;
347                 ctx->const_reg = false;
348         } else {
349                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
350                 ctx->const_reg = true;
351         }
352         return true;
355 static bool attr_w gen_entry(struct codegen_context *ctx)
357         gen_insn(INSN_ARM_PUSH, OP_SIZE_NATIVE, 0, 0);
359         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
360         gen_one(R_FRAME);
361         gen_one(R_ARG0);
363         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
364         gen_one(R_UPCALL);
365         gen_one(R_ARG1);
367         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
368         gen_one(R_ARG3);
370         return true;
373 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
375         g(gen_load_constant(ctx, R_ARG1, ip));
377         gen_insn(INSN_JMP, 0, 0, 0);
378         gen_four(escape_label);
380         return true;
383 static bool attr_w gen_escape(struct codegen_context *ctx)
385         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
386         gen_one(R_ARG0);
387         gen_one(R_FRAME);
389         gen_insn(INSN_ARM_POP, OP_SIZE_NATIVE, 0, 0);
391         return true;
394 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
396         if (unlikely(arg >= 4))
397                 internal(file_line, "gen_upcall_argument: only 4 arguments supported");
398         return true;
401 static bool attr_w gen_get_upcall_pointer(struct codegen_context *ctx, unsigned offset, unsigned reg)
403         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
404         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
405         gen_one(reg);
406         gen_address_offset();
408         return true;
411 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
413         g(gen_get_upcall_pointer(ctx, offset, R_SCRATCH_NA_1));
415         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
416         gen_one(R_SCRATCH_NA_1);
418         g(gen_upcall_end(ctx, n_args));
420         return true;
423 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
425         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
426         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
427         gen_one(R_SCRATCH_1);
428         gen_address_offset();
430         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
431         gen_one(R_SCRATCH_2);
432         gen_one(ARG_ADDRESS_1);
433         gen_one(R_SP);
434         gen_eight(0);
436         gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
437         gen_one(R_SCRATCH_1);
438         gen_one(R_SCRATCH_2);
440         gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
441         gen_four(escape_label);
443         return true;