s390: use signed tag comparisons, so that we could use
[ajla.git] / c1-arm.inc
blobcd4064a575fc73c5f956f61a874ea86d05f1c97c
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_4
20 #define OP_SIZE_ADDRESS                 OP_SIZE_4
22 #define JMP_LIMIT                       JMP_SHORTEST
24 #ifndef __ARM_FEATURE_UNALIGNED
25 #define UNALIGNED_TRAP                  1
26 #else
27 #define UNALIGNED_TRAP                  0
28 #endif
30 #define ALU_WRITES_FLAGS(alu, im)       0
31 #define ALU1_WRITES_FLAGS(alu)          0
32 #define ROT_WRITES_FLAGS(alu, size, im) 0
33 #define COND_IS_LOGICAL(cond)           0
35 #define ARCH_PARTIAL_ALU(size)          0
36 #define ARCH_IS_3ADDRESS(alu, f)        1
37 #define ARCH_IS_3ADDRESS_IMM(alu, f)    1
38 #define ARCH_IS_3ADDRESS_ROT(alu, size) 1
39 #define ARCH_IS_3ADDRESS_ROT_IMM(alu)   1
40 #define ARCH_IS_2ADDRESS(alu)           1
41 #define ARCH_IS_3ADDRESS_FP             1
42 #define ARCH_HAS_JMP_2REGS(cond)        0
43 #define ARCH_HAS_FLAGS                  1
44 #define ARCH_PREFERS_SX(size)           0
45 #define ARCH_HAS_BWX                    1
46 #define ARCH_HAS_MUL                    1
47 #define ARCH_HAS_DIV                    cpu_test_feature(CPU_FEATURE_idiv)
48 #define ARCH_HAS_ANDN                   1
49 #define ARCH_HAS_SHIFTED_ADD(bits)      1
50 #define ARCH_HAS_BTX(btx, size, cnst)   0
51 #define ARCH_SHIFT_SIZE                 32
52 #define ARCH_HAS_FP_GP_MOV              1
53 #define ARCH_NEEDS_BARRIER              0
55 #define i_size(size)                    OP_SIZE_4
56 #define i_size_rot(size)                OP_SIZE_4
57 #define i_size_cmp(size)                OP_SIZE_4
59 #define R_0             0x00
60 #define R_1             0x01
61 #define R_2             0x02
62 #define R_3             0x03
63 #define R_4             0x04
64 #define R_5             0x05
65 #define R_6             0x06
66 #define R_7             0x07
67 #define R_8             0x08
68 #define R_9             0x09
69 #define R_10            0x0a
70 #define R_FP            0x0b
71 #define R_IP            0x0c
72 #define R_SP            0x0d
73 #define R_LR            0x0e
74 #define R_PC            0x0f
76 #define FSR_0           0x20
77 #define FSR_1           0x21
78 #define FSR_2           0x22
79 #define FSR_3           0x23
80 #define FSR_4           0x24
81 #define FSR_5           0x25
82 #define FSR_6           0x26
83 #define FSR_7           0x27
84 #define FSR_8           0x28
85 #define FSR_9           0x29
86 #define FSR_10          0x2a
87 #define FSR_11          0x2b
88 #define FSR_12          0x2c
89 #define FSR_13          0x2d
90 #define FSR_14          0x2e
91 #define FSR_15          0x2f
92 #define FSR_16          0x30
93 #define FSR_17          0x31
94 #define FSR_18          0x32
95 #define FSR_19          0x33
96 #define FSR_20          0x34
97 #define FSR_21          0x35
98 #define FSR_22          0x36
99 #define FSR_23          0x37
100 #define FSR_24          0x38
101 #define FSR_25          0x39
102 #define FSR_26          0x3a
103 #define FSR_27          0x3b
104 #define FSR_28          0x3c
105 #define FSR_29          0x3d
106 #define FSR_30          0x3e
107 #define FSR_31          0x3f
109 #define FDR_0           0x20
110 #define FDR_2           0x22
111 #define FDR_4           0x24
112 #define FDR_6           0x26
113 #define FDR_8           0x28
114 #define FDR_10          0x2a
115 #define FDR_12          0x2c
116 #define FDR_14          0x2e
117 #define FDR_16          0x30
118 #define FDR_18          0x32
119 #define FDR_20          0x34
120 #define FDR_22          0x36
121 #define FDR_24          0x38
122 #define FDR_26          0x3a
123 #define FDR_28          0x3c
124 #define FDR_30          0x3e
126 #define FQR_0           0x20
127 #define FQR_2           0x22
128 #define FQR_4           0x24
129 #define FQR_6           0x26
130 #define FQR_8           0x28
131 #define FQR_10          0x2a
132 #define FQR_12          0x2c
133 #define FQR_14          0x2e
134 #define FQR_16          0x30
135 #define FQR_18          0x32
136 #define FQR_20          0x34
137 #define FQR_22          0x36
138 #define FQR_24          0x38
139 #define FQR_26          0x3a
140 #define FQR_28          0x3c
141 #define FQR_30          0x3e
143 #define R_FRAME         R_4
144 #define R_UPCALL        R_5
146 #define R_SCRATCH_1     R_0
147 #define R_SCRATCH_2     R_1
148 #define R_SCRATCH_3     R_2
149 #define R_SCRATCH_4     R_SAVED_2
151 #define R_SAVED_1       R_6
152 #define R_SAVED_2       R_7
154 #define R_OFFSET_IMM    R_LR
155 #define R_CONST_IMM     R_IP
157 #define R_SCRATCH_NA_1  R_10
158 #define R_SCRATCH_NA_2  R_FP
159 #ifdef HAVE_BITWISE_FRAME
160 #define R_SCRATCH_NA_3  R_8
161 #endif
163 #define R_ARG0          R_0
164 #define R_ARG1          R_1
165 #define R_ARG2          R_2
166 #define R_ARG3          R_3
167 #define R_RET0          R_0
169 #define FR_SCRATCH_1    FDR_0
170 #define FR_SCRATCH_2    FDR_2
172 #define SUPPORTED_FP            (cpu_test_feature(CPU_FEATURE_vfp) * 0x6)
173 #define SUPPORTED_FP_HALF_CVT   (cpu_test_feature(CPU_FEATURE_half) * 0x1)
175 static bool reg_is_fp(unsigned reg)
177         return reg >= 0x20 && reg < 0x40;
180 static const uint8_t regs_saved[] = {
181 #ifndef HAVE_BITWISE_FRAME
182         R_8,
183 #endif
184         R_9 };
185 static const uint8_t regs_volatile[] = { R_3 };
186 static const uint8_t fp_saved[] = { 0 };
187 #define n_fp_saved 0U
188 static const uint8_t fp_volatile[] = { FDR_4, FDR_6, FDR_8, FDR_10, FDR_12, FDR_14 };
189 #define reg_is_saved(r) ((r) == R_8 || (r) == R_9)
191 static int gen_imm12(uint32_t c)
193         int rot;
194         for (rot = 0; rot < 32; rot += 2) {
195                 uint32_t val = c << rot | c >> (-rot & 31);
196                 if (val < 0x100)
197                         return val | (rot << 7);
198         }
199         return -1;
202 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint32_t c)
204         if (gen_imm12(c) >= 0 || gen_imm12(~c) >= 0) {
205                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
206                 gen_one(reg);
207                 gen_one(ARG_IMM);
208                 gen_eight(c);
209                 return true;
210         }
211         if (likely(cpu_test_feature(CPU_FEATURE_armv6t2))) {
212                 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
213                 gen_one(reg);
214                 gen_one(ARG_IMM);
215                 gen_eight(c & 0xffff);
216                 if (c >> 16) {
217                         gen_insn(INSN_MOV_MASK, OP_SIZE_4, MOV_MASK_16_32, 0);
218                         gen_one(reg);
219                         gen_one(reg);
220                         gen_one(ARG_IMM);
221                         gen_eight(c >> 16);
222                 }
223         } else {
224                 bool need_init = true;
225                 int p;
226                 for (p = 0; p < 32; p += 8) {
227                         if ((c >> p) & 0xff) {
228                                 if (need_init) {
229                                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
230                                         gen_one(reg);
231                                         gen_one(ARG_IMM);
232                                         gen_eight(c & (0xff << p));
233                                         need_init = false;
234                                 } else {
235                                         gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, 0);
236                                         gen_one(reg);
237                                         gen_one(reg);
238                                         gen_one(ARG_IMM);
239                                         gen_eight(c & (0xff << p));
240                                 }
241                         }
242                 }
243                 if (need_init) {
244                         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
245                         gen_one(reg);
246                         gen_one(ARG_IMM);
247                         gen_eight(0);
248                 }
249         }
250         return true;
253 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
255         ctx->base_reg = base;
256         ctx->offset_imm = imm;
257         ctx->offset_reg = false;
258         switch (purpose) {
259                 case IMM_PURPOSE_LDR_OFFSET:
260                 case IMM_PURPOSE_STR_OFFSET:
261                 case IMM_PURPOSE_MVI_CLI_OFFSET:
262                         if (size == OP_SIZE_2) {
263                                 if (imm >= -255 && imm <= 255)
264                                         return true;
265                         } else {
266                                 if (imm >= -4095 && imm <= 4095)
267                                         return true;
268                         }
269                         break;
270                 case IMM_PURPOSE_LDR_SX_OFFSET:
271                 case IMM_PURPOSE_LDP_STP_OFFSET:
272                         if (imm >= -255 && imm <= 255)
273                                 return true;
274                         break;
275                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
276                         if (size < OP_SIZE_4 && imm != 0)
277                                 break;
278                         if (unlikely((imm & 3) != 0))
279                                 break;
280                         if (imm >= -1023 && imm <= 1023)
281                                 return true;
282                         break;
283                 default:
284                         internal(file_line, "gen_address: invalid purpose %d", purpose);
285         }
286         if (purpose == IMM_PURPOSE_VLDR_VSTR_OFFSET) {
287                 if (gen_imm12(imm) >= 0) {
288                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
289                         gen_one(R_OFFSET_IMM);
290                         gen_one(base);
291                         gen_one(ARG_IMM);
292                         gen_eight(imm);
293                 } else {
294                         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
295                         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
296                         gen_one(R_OFFSET_IMM);
297                         gen_one(R_OFFSET_IMM);
298                         gen_one(base);
299                 }
300                 ctx->base_reg = R_OFFSET_IMM;
301                 ctx->offset_imm = 0;
302                 return true;
303         }
304         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
305         ctx->offset_reg = true;
306         return true;
309 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
311         int imm12;
312         switch (purpose) {
313                 case IMM_PURPOSE_STORE_VALUE:
314                         break;
315                 case IMM_PURPOSE_ADD:
316                 case IMM_PURPOSE_SUB:
317                 case IMM_PURPOSE_CMP:
318                 case IMM_PURPOSE_CMP_LOGICAL:
319                 case IMM_PURPOSE_AND:
320                 case IMM_PURPOSE_OR:
321                 case IMM_PURPOSE_XOR:
322                 case IMM_PURPOSE_ANDN:
323                 case IMM_PURPOSE_TEST:
324                         imm12 = gen_imm12(imm);
325                         if (unlikely(imm12 == -1))
326                                 break;
327                         return true;
328                 case IMM_PURPOSE_CMOV:
329                         if (gen_imm12(imm) >= 0 || gen_imm12(~imm) >= 0)
330                                 return true;
331                         if ((uint32_t)imm < 0x10000 && likely(cpu_test_feature(CPU_FEATURE_armv6t2)))
332                                 return true;
333                         break;
334                 case IMM_PURPOSE_MUL:
335                         break;
336                 default:
337                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
338         }
339         return false;
342 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
344         if (is_direct_const(imm, purpose, size)) {
345                 ctx->const_imm = imm;
346                 ctx->const_reg = false;
347         } else {
348                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
349                 ctx->const_reg = true;
350         }
351         return true;
354 static bool attr_w gen_entry(struct codegen_context *ctx)
356         gen_insn(INSN_ARM_PUSH, OP_SIZE_NATIVE, 0, 0);
358         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
359         gen_one(R_FRAME);
360         gen_one(R_ARG0);
362         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
363         gen_one(R_UPCALL);
364         gen_one(R_ARG1);
366         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
367         gen_one(R_ARG3);
369         return true;
372 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
374         g(gen_load_constant(ctx, R_ARG1, ip));
376         gen_insn(INSN_JMP, 0, 0, 0);
377         gen_four(escape_label);
379         return true;
382 static bool attr_w gen_escape(struct codegen_context *ctx)
384         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
385         gen_one(R_ARG0);
386         gen_one(R_FRAME);
388         gen_insn(INSN_ARM_POP, OP_SIZE_NATIVE, 0, 0);
390         return true;
393 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
395         if (unlikely(arg >= 4))
396                 internal(file_line, "gen_upcall_argument: only 4 arguments supported");
397         return true;
400 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
402         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
403         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
404         gen_one(R_SCRATCH_NA_1);
405         gen_address_offset();
407         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
408         gen_one(R_SCRATCH_NA_1);
410         g(gen_upcall_end(ctx, n_args));
412         return true;
415 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
417         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
418         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
419         gen_one(R_SCRATCH_1);
420         gen_address_offset();
422         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
423         gen_one(R_SCRATCH_2);
424         gen_one(ARG_ADDRESS_1);
425         gen_one(R_SP);
426         gen_eight(0);
428         gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
429         gen_one(R_SCRATCH_1);
430         gen_one(R_SCRATCH_2);
432         gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
433         gen_four(escape_label);
435         return true;