codegen: improve the generated code on architectures with flags
[ajla.git] / c1-sparc.inc
blob6709cb26be2f50dc84d5d4c08da1a0bed4eb1791
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #if !defined(ARCH_SPARC64)
20 #define SPARC_9                         cpu_test_feature(CPU_FEATURE_sparc9)
21 #define FRAME_SIZE                      0x60
22 #define OP_SIZE_NATIVE                  (SPARC_9 ? OP_SIZE_8 : OP_SIZE_4)
23 #define OP_SIZE_ADDRESS                 OP_SIZE_4
24 #else
25 #define SPARC_9                         1
26 #define FRAME_SIZE                      0xb0
27 #define OP_SIZE_NATIVE                  OP_SIZE_8
28 #define OP_SIZE_ADDRESS                 OP_SIZE_8
29 #endif
31 #define JMP_LIMIT                       JMP_LONG
33 #define UNALIGNED_TRAP                  1
35 #define ALU_WRITES_FLAGS(alu, im)       0
36 #define ALU1_WRITES_FLAGS(alu)          0
37 #define ROT_WRITES_FLAGS(alu, size, im) 0
38 #define COND_IS_LOGICAL(cond)           0
40 #define ARCH_PARTIAL_ALU(size)          0
41 #define ARCH_IS_3ADDRESS(alu, f)        1
42 #define ARCH_IS_3ADDRESS_IMM(alu, f)    1
43 #define ARCH_IS_3ADDRESS_ROT(alu, size) 1
44 #define ARCH_IS_3ADDRESS_ROT_IMM(alu)   1
45 #define ARCH_IS_2ADDRESS(alu)           1
46 #define ARCH_IS_3ADDRESS_FP             1
47 #define ARCH_HAS_JMP_2REGS(cond)        0
48 #define ARCH_HAS_FLAGS                  1
49 #define ARCH_PREFERS_SX(size)           0
50 #define ARCH_HAS_BWX                    1
51 #define ARCH_HAS_MUL                    SPARC_9
52 #define ARCH_HAS_DIV                    SPARC_9
53 #define ARCH_HAS_ANDN                   1
54 #define ARCH_HAS_SHIFTED_ADD(bits)      0
55 #define ARCH_HAS_BTX(btx, size, cnst)   0
56 #define ARCH_SHIFT_SIZE                 OP_SIZE_4
57 #define ARCH_BOOL_SIZE                  OP_SIZE_NATIVE
58 #define ARCH_HAS_FP_GP_MOV              0
59 #define ARCH_NEEDS_BARRIER              0
61 #define i_size(size)                    OP_SIZE_NATIVE
62 #define i_size_rot(size)                maximum(size, OP_SIZE_4)
63 #define i_size_cmp(size)                maximum(size, OP_SIZE_4)
65 /*#define SUPPORT_QUAD_PRECISION*/
67 #define R_G0            0x00
68 #define R_G1            0x01
69 #define R_G2            0x02
70 #define R_G3            0x03
71 #define R_G4            0x04
72 #define R_G5            0x05
73 #define R_G6            0x06
74 #define R_G7            0x07
75 #define R_O0            0x08
76 #define R_O1            0x09
77 #define R_O2            0x0a
78 #define R_O3            0x0b
79 #define R_O4            0x0c
80 #define R_O5            0x0d
81 #define R_O6            0x0e
82 #define R_O7            0x0f
83 #define R_L0            0x10
84 #define R_L1            0x11
85 #define R_L2            0x12
86 #define R_L3            0x13
87 #define R_L4            0x14
88 #define R_L5            0x15
89 #define R_L6            0x16
90 #define R_L7            0x17
91 #define R_I0            0x18
92 #define R_I1            0x19
93 #define R_I2            0x1a
94 #define R_I3            0x1b
95 #define R_I4            0x1c
96 #define R_I5            0x1d
97 #define R_I6            0x1e
98 #define R_I7            0x1f
100 #define FSR_0           0x20
101 #define FSR_1           0x21
102 #define FSR_2           0x22
103 #define FSR_3           0x23
104 #define FSR_4           0x24
105 #define FSR_5           0x25
106 #define FSR_6           0x26
107 #define FSR_7           0x27
108 #define FSR_8           0x28
109 #define FSR_9           0x29
110 #define FSR_10          0x2a
111 #define FSR_11          0x2b
112 #define FSR_12          0x2c
113 #define FSR_13          0x2d
114 #define FSR_14          0x2e
115 #define FSR_15          0x2f
116 #define FSR_16          0x30
117 #define FSR_17          0x31
118 #define FSR_18          0x32
119 #define FSR_19          0x33
120 #define FSR_20          0x34
121 #define FSR_21          0x35
122 #define FSR_22          0x36
123 #define FSR_23          0x37
124 #define FSR_24          0x38
125 #define FSR_25          0x39
126 #define FSR_26          0x3a
127 #define FSR_27          0x3b
128 #define FSR_28          0x3c
129 #define FSR_29          0x3d
130 #define FSR_30          0x3e
131 #define FSR_31          0x3f
133 #define FDR_0           0x20
134 #define FDR_2           0x22
135 #define FDR_4           0x24
136 #define FDR_6           0x26
137 #define FDR_8           0x28
138 #define FDR_10          0x2a
139 #define FDR_12          0x2c
140 #define FDR_14          0x2e
141 #define FDR_16          0x30
142 #define FDR_18          0x32
143 #define FDR_20          0x34
144 #define FDR_22          0x36
145 #define FDR_24          0x38
146 #define FDR_26          0x3a
147 #define FDR_28          0x3c
148 #define FDR_30          0x3e
149 #define FDR_32          0x21
150 #define FDR_34          0x23
151 #define FDR_36          0x25
152 #define FDR_38          0x27
153 #define FDR_40          0x29
154 #define FDR_42          0x2b
155 #define FDR_44          0x2d
156 #define FDR_46          0x2f
157 #define FDR_48          0x31
158 #define FDR_50          0x33
159 #define FDR_52          0x35
160 #define FDR_54          0x37
161 #define FDR_56          0x39
162 #define FDR_58          0x3b
163 #define FDR_60          0x3d
164 #define FDR_62          0x3f
166 #define FQR_0           0x20
167 #define FQR_4           0x24
168 #define FQR_8           0x28
169 #define FQR_12          0x2c
170 #define FQR_16          0x30
171 #define FQR_20          0x34
172 #define FQR_24          0x38
173 #define FQR_28          0x3c
174 #define FQR_32          0x21
175 #define FQR_36          0x25
176 #define FQR_40          0x29
177 #define FQR_44          0x2d
178 #define FQR_48          0x31
179 #define FQR_52          0x35
180 #define FQR_56          0x39
181 #define FQR_60          0x3d
183 #define R_ZERO          0x00
184 #define R_SP            R_O6
186 #define R_FRAME         R_I0
187 #define R_UPCALL        R_I1
188 #define R_TIMESTAMP     R_I2
190 #define R_SCRATCH_1     R_O1
191 #define R_SCRATCH_2     R_O0
192 #define R_SCRATCH_3     R_O3
193 #define R_SCRATCH_4     R_O2
195 #define R_SCRATCH_NA_1  R_O4
196 #define R_SCRATCH_NA_2  R_O5
197 #ifdef HAVE_BITWISE_FRAME
198 #define R_SCRATCH_NA_3  R_O7
199 #endif
201 #define R_SAVED_1       R_L0
202 #define R_SAVED_2       R_L1
204 #define R_ARG0          R_O0
205 #define R_ARG1          R_O1
206 #define R_ARG2          R_O2
207 #define R_ARG3          R_O3
208 #define R_RET0          R_O0
210 #define R_OFFSET_IMM    R_G1
211 #define R_CONST_IMM     R_G2
212 #define R_CONST_HELPER  R_G3
214 #ifdef SUPPORT_QUAD_PRECISION
215 #define FR_SCRATCH_1    FDR_0
216 #define FR_SCRATCH_2    FDR_4
217 #define SUPPORTED_FP    0x16
218 #else
219 #define FR_SCRATCH_1    FDR_0
220 #define FR_SCRATCH_2    FDR_2
221 #define SUPPORTED_FP    0x6
222 #endif
224 static bool reg_is_fp(unsigned reg)
226         return reg >= 0x20 && reg < 0x40;
229 static const uint8_t regs_saved[] = { R_L2, R_L3, R_L4, R_L5, R_L6, R_L7, R_I3, R_I4, R_I5 };
230 static const uint8_t regs_volatile[] = { R_G4, R_G5,
231 #ifndef HAVE_BITWISE_FRAME
232                 R_O7,
233 #endif
234         };
235 static const uint8_t fp_saved[] = { 0 };
236 #define n_fp_saved 0U
237 #ifdef SUPPORT_QUAD_PRECISION
238 static const uint8_t fp_volatile[] = { FDR_8, FDR_12, FDR_16, FDR_20, FDR_24, FDR_28 };
239 #else
240 static const uint8_t fp_volatile[] = { FDR_4, FDR_6, FDR_8, FDR_10, FDR_12, FDR_14, FDR_16, FDR_18, FDR_20, FDR_22, FDR_24, FDR_26, FDR_28, FDR_30 };
241 #endif
242 #define reg_is_saved(r) ((r) >= R_L0 && (r) <= R_I7)
244 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
246         int32_t cl;
247         if (SPARC_9) {
248                 if (c >= (uint64_t)-4096) {
249                         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
250                         gen_one(reg);
251                         gen_one(R_ZERO);
252                         gen_one(ARG_IMM);
253                         gen_eight(c);
254                         return true;
255                 }
256                 if (c >= 0x100000000ULL) {
257                         int32_t cu = c >> 32;
258                         if (cu < -4096 || cu >= 4096) {
259                                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
260                                 gen_one(R_CONST_HELPER);
261                                 gen_one(ARG_IMM);
262                                 gen_eight(cu & 0xFFFFFC00UL);
263                                 cu &= 0x3FFU;
264                                 if (cu) {
265                                         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
266                                         gen_one(R_CONST_HELPER);
267                                         gen_one(R_CONST_HELPER);
268                                         gen_one(ARG_IMM);
269                                         gen_eight(cu);
270                                 }
271                         } else {
272                                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
273                                 gen_one(R_CONST_HELPER);
274                                 gen_one(R_ZERO);
275                                 gen_one(ARG_IMM);
276                                 gen_eight(cu);
277                         }
278                         if (!(c & 0xFFFFFFFFULL)) {
279                                 gen_insn(INSN_ROT, OP_SIZE_NATIVE, ROT_SHL, 0);
280                                 gen_one(reg);
281                                 gen_one(R_CONST_HELPER);
282                                 gen_one(ARG_IMM);
283                                 gen_eight(32);
284                                 return true;
285                         }
286                         gen_insn(INSN_ROT, OP_SIZE_NATIVE, ROT_SHL, 0);
287                         gen_one(R_CONST_HELPER);
288                         gen_one(R_CONST_HELPER);
289                         gen_one(ARG_IMM);
290                         gen_eight(32);
291                 }
292         }
293         cl = c;
294         if (cl < 0 || cl >= 4096) {
295                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
296                 gen_one(reg);
297                 gen_one(ARG_IMM);
298                 gen_eight(cl & 0xFFFFFC00UL);
299                 cl &= 0x3FFU;
300                 if (cl) {
301                         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
302                         gen_one(reg);
303                         gen_one(reg);
304                         gen_one(ARG_IMM);
305                         gen_eight(cl);
306                 }
307         } else {
308                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
309                 gen_one(reg);
310                 gen_one(R_ZERO);
311                 gen_one(ARG_IMM);
312                 gen_eight(cl);
313         }
314         if (SPARC_9) {
315                 if (c >= 0x100000000ULL) {
316                         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
317                         gen_one(reg);
318                         gen_one(reg);
319                         gen_one(R_CONST_HELPER);
320                 }
321         }
322         return true;
325 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
327         ctx->base_reg = base;
328         ctx->offset_imm = imm;
329         ctx->offset_reg = false;
330         switch (purpose) {
331                 case IMM_PURPOSE_LDR_OFFSET:
332                 case IMM_PURPOSE_LDR_SX_OFFSET:
333                 case IMM_PURPOSE_STR_OFFSET:
334                 case IMM_PURPOSE_LDP_STP_OFFSET:
335                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
336                 case IMM_PURPOSE_MVI_CLI_OFFSET:
337                         if (likely(imm >= -4096) && likely(imm < 4096))
338                                 return true;
339                         break;
340                 default:
341                         internal(file_line, "gen_address: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
342         }
344         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
345         ctx->offset_reg = true;
346         return true;
349 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
351         switch (purpose) {
352                 case IMM_PURPOSE_STORE_VALUE:
353                         if (!imm)
354                                 return true;
355                         break;
356                 case IMM_PURPOSE_ADD:
357                 case IMM_PURPOSE_SUB:
358                 case IMM_PURPOSE_CMP:
359                 case IMM_PURPOSE_CMP_LOGICAL:
360                 case IMM_PURPOSE_AND:
361                 case IMM_PURPOSE_OR:
362                 case IMM_PURPOSE_XOR:
363                 case IMM_PURPOSE_ANDN:
364                 case IMM_PURPOSE_TEST:
365                 case IMM_PURPOSE_MUL:
366                         if (likely(imm >= -4096) && likely(imm < 4096))
367                                 return true;
368                         break;
369                 case IMM_PURPOSE_CMOV:
370                         if (likely(imm >= -1024) && likely(imm < 1024))
371                                 return true;
372                         break;
373                 case IMM_PURPOSE_MOVR:
374                         if (likely(imm >= -512) && likely(imm < 512))
375                                 return true;
376                         break;
377                 default:
378                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
379         }
380         return false;
383 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
385         if (is_direct_const(imm, purpose, size)) {
386                 ctx->const_imm = imm;
387                 ctx->const_reg = false;
388         } else {
389                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
390                 ctx->const_reg = true;
391         }
392         return true;
395 static bool attr_w gen_entry(struct codegen_context *ctx)
397         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_SAVE, 0);
398         gen_one(R_SP);
399         gen_one(R_SP);
400         gen_one(ARG_IMM);
401         gen_eight(-FRAME_SIZE);
403         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
404         gen_one(R_I3);
406         return true;
409 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
411         g(gen_load_constant(ctx, R_I1, ip));
413         gen_insn(INSN_JMP, 0, 0, 0);
414         gen_four(escape_label);
416         return true;
419 static bool attr_w gen_escape(struct codegen_context *ctx)
421         gen_insn(INSN_RET, 0, 0, 0);
423         return true;
426 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
428         return true;
431 static bool attr_w gen_get_upcall_pointer(struct codegen_context *ctx, unsigned offset, unsigned reg)
433         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
434         gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
435         gen_one(reg);
436         gen_address_offset();
438         return true;
441 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
443         g(gen_get_upcall_pointer(ctx, offset, R_SCRATCH_NA_1));
445         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_NATIVE, 0, 0);
446         gen_one(R_SCRATCH_NA_1);
448         g(gen_upcall_end(ctx, n_args));
450         return true;
453 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
455         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
456         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
457         gen_one(R_SCRATCH_1);
458         gen_address_offset();
460         gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
461         gen_one(R_SCRATCH_1);
462         gen_one(R_TIMESTAMP);
464         gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
465         gen_four(escape_label);
467         return true;