x86: use push and pop also on x32
[ajla.git] / c1-arm.inc
blobc27da1342ff80368e01690e692583de05a8c509f
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_4
20 #define OP_SIZE_ADDRESS                 OP_SIZE_4
22 #define JMP_LIMIT                       JMP_SHORTEST
24 #ifndef __ARM_FEATURE_UNALIGNED
25 #define UNALIGNED_TRAP                  1
26 #else
27 #define UNALIGNED_TRAP                  0
28 #endif
30 #define ALU_WRITES_FLAGS(alu, im)       0
31 #define ALU1_WRITES_FLAGS(alu)          0
32 #define ROT_WRITES_FLAGS(alu)           0
33 #define COND_IS_LOGICAL(cond)           0
35 #define ARCH_PARTIAL_ALU(size)          0
36 #define ARCH_IS_3ADDRESS                1
37 #define ARCH_HAS_FLAGS                  1
38 #define ARCH_PREFERS_SX(size)           0
39 #define ARCH_HAS_BWX                    1
40 #define ARCH_HAS_MUL                    1
41 #define ARCH_HAS_DIV                    cpu_test_feature(CPU_FEATURE_idiv)
42 #define ARCH_HAS_ANDN                   1
43 #define ARCH_HAS_SHIFTED_ADD(bits)      1
44 #define ARCH_HAS_BTX(btx, size, cnst)   0
45 #define ARCH_SHIFT_SIZE                 32
46 #define ARCH_NEEDS_BARRIER              0
48 #define i_size(size)                    OP_SIZE_4
49 #define i_size_rot(size)                OP_SIZE_4
51 #define R_0             0x00
52 #define R_1             0x01
53 #define R_2             0x02
54 #define R_3             0x03
55 #define R_4             0x04
56 #define R_5             0x05
57 #define R_6             0x06
58 #define R_7             0x07
59 #define R_8             0x08
60 #define R_9             0x09
61 #define R_10            0x0a
62 #define R_FP            0x0b
63 #define R_IP            0x0c
64 #define R_SP            0x0d
65 #define R_LR            0x0e
66 #define R_PC            0x0f
68 #define FSR_0           0x20
69 #define FSR_1           0x21
70 #define FSR_2           0x22
71 #define FSR_3           0x23
72 #define FSR_4           0x24
73 #define FSR_5           0x25
74 #define FSR_6           0x26
75 #define FSR_7           0x27
76 #define FSR_8           0x28
77 #define FSR_9           0x29
78 #define FSR_10          0x2a
79 #define FSR_11          0x2b
80 #define FSR_12          0x2c
81 #define FSR_13          0x2d
82 #define FSR_14          0x2e
83 #define FSR_15          0x2f
84 #define FSR_16          0x30
85 #define FSR_17          0x31
86 #define FSR_18          0x32
87 #define FSR_19          0x33
88 #define FSR_20          0x34
89 #define FSR_21          0x35
90 #define FSR_22          0x36
91 #define FSR_23          0x37
92 #define FSR_24          0x38
93 #define FSR_25          0x39
94 #define FSR_26          0x3a
95 #define FSR_27          0x3b
96 #define FSR_28          0x3c
97 #define FSR_29          0x3d
98 #define FSR_30          0x3e
99 #define FSR_31          0x3f
101 #define FDR_0           0x20
102 #define FDR_2           0x22
103 #define FDR_4           0x24
104 #define FDR_6           0x26
105 #define FDR_8           0x28
106 #define FDR_10          0x2a
107 #define FDR_12          0x2c
108 #define FDR_14          0x2e
109 #define FDR_16          0x30
110 #define FDR_18          0x32
111 #define FDR_20          0x34
112 #define FDR_22          0x36
113 #define FDR_24          0x38
114 #define FDR_26          0x3a
115 #define FDR_28          0x3c
116 #define FDR_30          0x3e
118 #define FQR_0           0x20
119 #define FQR_2           0x22
120 #define FQR_4           0x24
121 #define FQR_6           0x26
122 #define FQR_8           0x28
123 #define FQR_10          0x2a
124 #define FQR_12          0x2c
125 #define FQR_14          0x2e
126 #define FQR_16          0x30
127 #define FQR_18          0x32
128 #define FQR_20          0x34
129 #define FQR_22          0x36
130 #define FQR_24          0x38
131 #define FQR_26          0x3a
132 #define FQR_28          0x3c
133 #define FQR_30          0x3e
135 #define R_FRAME         R_4
136 #define R_UPCALL        R_5
138 #define R_SCRATCH_1     R_0
139 #define R_SCRATCH_2     R_1
140 #define R_SCRATCH_3     R_2
141 #define R_SCRATCH_4     R_3
143 #define R_SAVED_1       R_6
144 #define R_SAVED_2       R_7
146 #define R_OFFSET_IMM    R_8
147 #define R_CONST_IMM     R_IP
149 #define R_SCRATCH_NA_1  R_10
150 #define R_SCRATCH_NA_2  R_FP
151 #define R_SCRATCH_NA_3  R_LR
153 #define R_ARG0          R_0
154 #define R_ARG1          R_1
155 #define R_ARG2          R_2
156 #define R_ARG3          R_3
157 #define R_RET0          R_0
159 #define FR_SCRATCH_1    FQR_0
160 #define FR_SCRATCH_2    FQR_4
162 #define SUPPORTED_FP            (cpu_test_feature(CPU_FEATURE_vfp) * 0x6)
163 #define SUPPORTED_FP_HALF_CVT   (cpu_test_feature(CPU_FEATURE_half) * 0x1)
165 static bool reg_is_fp(unsigned reg)
167         return reg >= 0x20 && reg < 0x40;
170 static const uint8_t reg_available[] = { 0 };
171 #define reg_is_saved(r) 1
173 static int gen_imm12(uint32_t c)
175         int rot;
176         for (rot = 0; rot < 32; rot += 2) {
177                 uint32_t val = c << rot | c >> (-rot & 31);
178                 if (val < 0x100)
179                         return val | (rot << 7);
180         }
181         return -1;
184 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint32_t c)
186         if (gen_imm12(c) >= 0 || gen_imm12(~c) >= 0) {
187                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
188                 gen_one(reg);
189                 gen_one(ARG_IMM);
190                 gen_eight(c);
191                 return true;
192         }
193         if (likely(cpu_test_feature(CPU_FEATURE_armv6t2))) {
194                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
195                 gen_one(reg);
196                 gen_one(ARG_IMM);
197                 gen_eight(c & 0xffff);
198                 if (c >> 16) {
199                         gen_insn(INSN_MOV_MASK, OP_SIZE_4, MOV_MASK_16_32, 0);
200                         gen_one(reg);
201                         gen_one(reg);
202                         gen_one(ARG_IMM);
203                         gen_eight(c >> 16);
204                 }
205         } else {
206                 bool need_init = true;
207                 int p;
208                 for (p = 0; p < 32; p += 8) {
209                         if ((c >> p) & 0xff) {
210                                 if (need_init) {
211                                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
212                                         gen_one(reg);
213                                         gen_one(ARG_IMM);
214                                         gen_eight(c & (0xff << p));
215                                         need_init = false;
216                                 } else {
217                                         gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, 0);
218                                         gen_one(reg);
219                                         gen_one(reg);
220                                         gen_one(ARG_IMM);
221                                         gen_eight(c & (0xff << p));
222                                 }
223                         }
224                 }
225                 if (need_init) {
226                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
227                         gen_one(reg);
228                         gen_one(ARG_IMM);
229                         gen_eight(0);
230                 }
231         }
232         return true;
235 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
237         ctx->base_reg = base;
238         ctx->offset_imm = imm;
239         ctx->offset_reg = false;
240         switch (purpose) {
241                 case IMM_PURPOSE_LDR_OFFSET:
242                 case IMM_PURPOSE_STR_OFFSET:
243                 case IMM_PURPOSE_MVI_CLI_OFFSET:
244                         if (size == OP_SIZE_2) {
245                                 if (imm >= -255 && imm <= 255)
246                                         return true;
247                         } else {
248                                 if (imm >= -4095 && imm <= 4095)
249                                         return true;
250                         }
251                         break;
252                 case IMM_PURPOSE_LDR_SX_OFFSET:
253                 case IMM_PURPOSE_LDP_STP_OFFSET:
254                         if (imm >= -255 && imm <= 255)
255                                 return true;
256                         break;
257                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
258                         if (size < OP_SIZE_4 && imm != 0)
259                                 break;
260                         if (unlikely((imm & 3) != 0))
261                                 break;
262                         if (imm >= -1023 && imm <= 1023)
263                                 return true;
264                         break;
265                 default:
266                         internal(file_line, "gen_address: invalid purpose %d", purpose);
267         }
268         if (purpose == IMM_PURPOSE_VLDR_VSTR_OFFSET) {
269                 if (gen_imm12(imm) >= 0) {
270                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
271                         gen_one(R_OFFSET_IMM);
272                         gen_one(base);
273                         gen_one(ARG_IMM);
274                         gen_eight(imm);
275                 } else {
276                         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
277                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
278                         gen_one(R_OFFSET_IMM);
279                         gen_one(R_OFFSET_IMM);
280                         gen_one(base);
281                 }
282                 ctx->base_reg = R_OFFSET_IMM;
283                 ctx->offset_imm = 0;
284                 return true;
285         }
286         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
287         ctx->offset_reg = true;
288         return true;
291 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
293         int imm12;
294         switch (purpose) {
295                 case IMM_PURPOSE_STORE_VALUE:
296                         break;
297                 case IMM_PURPOSE_ADD:
298                 case IMM_PURPOSE_SUB:
299                 case IMM_PURPOSE_CMP:
300                 case IMM_PURPOSE_CMP_LOGICAL:
301                 case IMM_PURPOSE_AND:
302                 case IMM_PURPOSE_OR:
303                 case IMM_PURPOSE_XOR:
304                 case IMM_PURPOSE_ANDN:
305                 case IMM_PURPOSE_TEST:
306                         imm12 = gen_imm12(imm);
307                         if (unlikely(imm12 == -1))
308                                 break;
309                         return true;
310                 case IMM_PURPOSE_CMOV:
311                         if (gen_imm12(imm) >= 0 || gen_imm12(~imm) >= 0)
312                                 return true;
313                         if ((uint32_t)imm < 0x10000 && likely(cpu_test_feature(CPU_FEATURE_armv6t2)))
314                                 return true;
315                         break;
316                 case IMM_PURPOSE_MUL:
317                         break;
318                 default:
319                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
320         }
321         return false;
324 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
326         if (is_direct_const(imm, purpose, size)) {
327                 ctx->const_imm = imm;
328                 ctx->const_reg = false;
329         } else {
330                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
331                 ctx->const_reg = true;
332         }
333         return true;
336 static bool attr_w gen_entry(struct codegen_context *ctx)
338         gen_insn(INSN_ARM_PUSH, OP_SIZE_NATIVE, 0, 0);
340         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
341         gen_one(R_FRAME);
342         gen_one(R_ARG0);
344         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
345         gen_one(R_UPCALL);
346         gen_one(R_ARG1);
348         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
349         gen_one(R_ARG3);
351         return true;
354 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
356         g(gen_load_constant(ctx, R_ARG1, ip));
358         gen_insn(INSN_JMP, 0, 0, 0);
359         gen_four(escape_label);
361         return true;
364 static bool attr_w gen_escape(struct codegen_context *ctx)
366         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
367         gen_one(R_ARG0);
368         gen_one(R_FRAME);
370         gen_insn(INSN_ARM_POP, OP_SIZE_NATIVE, 0, 0);
372         return true;
375 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
377         if (unlikely(arg >= 4))
378                 internal(file_line, "gen_upcall_argument: only 4 arguments supported");
379         return true;
382 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
384         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
385         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
386         gen_one(R_SCRATCH_NA_1);
387         gen_address_offset();
389         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
390         gen_one(R_SCRATCH_NA_1);
392         g(gen_upcall_end(ctx, n_args));
394         return true;
397 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
399         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
400         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
401         gen_one(R_SCRATCH_1);
402         gen_address_offset();
404         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
405         gen_one(R_SCRATCH_2);
406         gen_one(ARG_ADDRESS_1);
407         gen_one(R_SP);
408         gen_eight(0);
410         gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
411         gen_one(R_SCRATCH_1);
412         gen_one(R_SCRATCH_2);
414         gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
415         gen_four(escape_label);
417         return true;