codegen: a small improvement in do_bswap and do_brev
[ajla.git] / c1-loong.inc
blob516b632f260f923d2c5bf36905bdd08cb2f60a10
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_8
20 #define OP_SIZE_ADDRESS                 OP_SIZE_NATIVE
22 #define JMP_LIMIT                       JMP_LONG
24 #define UNALIGNED_TRAP                  (!cpu_test_feature(CPU_FEATURE_unaligned))
26 #define ALU_WRITES_FLAGS(alu, im)       0
27 #define ALU1_WRITES_FLAGS(alu)          0
28 #define ROT_WRITES_FLAGS(alu, size, im) 0
29 #define COND_IS_LOGICAL(cond)           0
31 #define ARCH_PARTIAL_ALU(size)          0
32 #define ARCH_IS_3ADDRESS(alu, f)        1
33 #define ARCH_IS_3ADDRESS_IMM(alu, f)    1
34 #define ARCH_IS_3ADDRESS_ROT(alu, size) 1
35 #define ARCH_IS_3ADDRESS_ROT_IMM(alu)   1
36 #define ARCH_IS_2ADDRESS(alu)           1
37 #define ARCH_IS_3ADDRESS_FP             1
38 #define ARCH_HAS_FLAGS                  0
39 #define ARCH_PREFERS_SX(size)           0
40 #define ARCH_HAS_BWX                    1
41 #define ARCH_HAS_MUL                    1
42 #define ARCH_HAS_DIV                    1
43 #define ARCH_HAS_ANDN                   1
44 #define ARCH_HAS_SHIFTED_ADD(bits)      0
45 #define ARCH_HAS_BTX(btx, size, cnst)   (((btx) == BTX_BTR || (btx) == BTX_BTEXT) && (cnst))
46 #define ARCH_SHIFT_SIZE                 OP_SIZE_4
47 #define ARCH_NEEDS_BARRIER              0
49 #define i_size(size)                    OP_SIZE_NATIVE
50 #define i_size_rot(size)                maximum(size, OP_SIZE_4)
51 #define i_size_cmp(size)                OP_SIZE_NATIVE
53 /*#define TIMESTAMP_IN_REGISTER*/
55 #define R_ZERO          0x00
56 #define R_RA            0x01
57 #define R_TP            0x02
58 #define R_SP            0x03
59 #define R_A0            0x04
60 #define R_A1            0x05
61 #define R_A2            0x06
62 #define R_A3            0x07
63 #define R_A4            0x08
64 #define R_A5            0x09
65 #define R_A6            0x0a
66 #define R_A7            0x0b
67 #define R_T0            0x0c
68 #define R_T1            0x0d
69 #define R_T2            0x0e
70 #define R_T3            0x0f
71 #define R_T4            0x10
72 #define R_T5            0x11
73 #define R_T6            0x12
74 #define R_T7            0x13
75 #define R_T8            0x14
76 #define R_RESERVED      0x15
77 #define R_FP            0x16
78 #define R_S0            0x17
79 #define R_S1            0x18
80 #define R_S2            0x19
81 #define R_S3            0x1a
82 #define R_S4            0x1b
83 #define R_S5            0x1c
84 #define R_S6            0x1d
85 #define R_S7            0x1e
86 #define R_S8            0x1f
88 #define R_FA0           0x20
89 #define R_FA1           0x21
90 #define R_FA2           0x22
91 #define R_FA3           0x23
92 #define R_FA4           0x24
93 #define R_FA5           0x25
94 #define R_FA6           0x26
95 #define R_FA7           0x27
96 #define R_FT0           0x28
97 #define R_FT1           0x29
98 #define R_FT2           0x2a
99 #define R_FT3           0x2b
100 #define R_FT4           0x2c
101 #define R_FT5           0x2d
102 #define R_FT6           0x2e
103 #define R_FT7           0x2f
104 #define R_FT8           0x30
105 #define R_FT9           0x31
106 #define R_FT10          0x32
107 #define R_FT11          0x33
108 #define R_FT12          0x34
109 #define R_FT13          0x35
110 #define R_FT14          0x36
111 #define R_FT15          0x37
112 #define R_FS0           0x38
113 #define R_FS1           0x39
114 #define R_FS2           0x3a
115 #define R_FS3           0x3b
116 #define R_FS4           0x3c
117 #define R_FS5           0x3d
118 #define R_FS6           0x3e
119 #define R_FS7           0x3f
121 #define R_FRAME         R_S0
122 #define R_UPCALL        R_S1
123 #ifdef TIMESTAMP_IN_REGISTER
124 #define R_TIMESTAMP     R_S4
125 #endif
127 #define R_SCRATCH_1     R_A0
128 #define R_SCRATCH_2     R_A1
129 #define R_SCRATCH_3     R_A2
130 #define R_SCRATCH_4     R_SAVED_2
131 #define R_SCRATCH_NA_1  R_A4
132 #define R_SCRATCH_NA_2  R_A5
133 #define R_SCRATCH_NA_3  R_A6
135 #define R_SAVED_1       R_S2
136 #define R_SAVED_2       R_S3
138 #define R_ARG0          R_A0
139 #define R_ARG1          R_A1
140 #define R_ARG2          R_A2
141 #define R_ARG3          R_A3
142 #define R_RET0          R_A0
143 #define R_RET1          R_A1
145 #define R_OFFSET_IMM    R_T0
146 #define R_CONST_IMM     R_T1
147 #define R_CMP_RESULT    R_T2
149 #define FR_SCRATCH_1    R_FA0
150 #define FR_SCRATCH_2    R_FA1
152 #define SUPPORTED_FP    0x6
154 #define FRAME_SIZE      0x60
156 static bool reg_is_fp(unsigned reg)
158         return reg >= 0x20 && reg < 0x40;
161 static const uint8_t regs_saved[] = {
162 #ifndef TIMESTAMP_IN_REGISTER
163         R_S4,
164 #endif
165         R_S5, R_S6, R_S7, R_S8, R_FP };
166 static const uint8_t regs_volatile[] = { R_RA, R_A3, R_A7, R_T3, R_T4, R_T5, R_T6, R_T7, R_T8 };
167 static const uint8_t fp_saved[] = { 0 };
168 #define n_fp_saved 0U
169 static const uint8_t fp_volatile[] = { R_FA2, R_FA3, R_FA4, R_FA5, R_FA6, R_FA7, R_FT0, R_FT1, R_FT2, R_FT3, R_FT4, R_FT5, R_FT6, R_FT7, R_FT8, R_FT9, R_FT10, R_FT11, R_FT12, R_FT13, R_FT14, R_FT15 };
170 #define reg_is_saved(r) ((r) >= R_FP && (r) <= R_S8)
172 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
174         uint64_t c0 = c & 0x0000000000000fffULL;
175         uint64_t c1 = c & 0x00000000fffff000ULL;
176         uint64_t c2 = c & 0x000fffff00000000ULL;
177         uint64_t c3 = c & 0xfff0000000000000ULL;
178         uint64_t top_bits = 0;
179         if (!(c0 | c1 | c2)) {
180                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
181                 gen_one(reg);
182                 gen_one(ARG_IMM);
183                 gen_eight(c3);
184                 return true;
185         }
186         if (c0 & 0x800ULL && c1 == 0xfffff000ULL) {
187                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
188                 gen_one(reg);
189                 gen_one(ARG_IMM);
190                 gen_eight(c0 | 0xfffffffffffff000ULL);
191                 top_bits = 0xffffffff00000000ULL;
192         } else {
193                 bool have_reg = false;
194                 if (c1) {
195                         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
196                         gen_one(reg);
197                         gen_one(ARG_IMM);
198                         gen_eight((uint64_t)(int32_t)c1);
199                         top_bits = (uint64_t)(int32_t)c1 & 0xffffffff00000000ULL;
200                         have_reg = true;
201                 }
202                 if (!have_reg || c0) {
203                         if (!have_reg) {
204                                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
205                                 gen_one(reg);
206                                 gen_one(ARG_IMM);
207                                 gen_eight(c0);
208                         } else {
209                                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_OR, 0);
210                                 gen_one(reg);
211                                 gen_one(reg);
212                                 gen_one(ARG_IMM);
213                                 gen_eight(c0);
214                         }
215                 }
216         }
217         if (top_bits != (c2 | c3)) {
218                 uint64_t c2x = c2;
219                 if (c2 & 0x0008000000000000ULL)
220                         c2x |= 0xfff0000000000000ULL;
221                 if (top_bits != c2x) {
222                         gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_32_64, 0);
223                         gen_one(reg);
224                         gen_one(reg);
225                         gen_one(ARG_IMM);
226                         gen_eight(c2x >> 32);
227                 }
228                 top_bits = c2x & 0xfff0000000000000ULL;
229                 if (top_bits != c3) {
230                         gen_insn(INSN_MOV_MASK, OP_SIZE_NATIVE, MOV_MASK_52_64, 0);
231                         gen_one(reg);
232                         gen_one(reg);
233                         gen_one(ARG_IMM);
234                         gen_eight(c3 >> 52);
235                 }
236         }
237         return true;
240 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
242         ctx->base_reg = base;
243         ctx->offset_imm = imm;
244         ctx->offset_reg = false;
245         switch (purpose) {
246                 case IMM_PURPOSE_LDR_OFFSET:
247                 case IMM_PURPOSE_LDR_SX_OFFSET:
248                 case IMM_PURPOSE_STR_OFFSET:
249                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
250                 case IMM_PURPOSE_MVI_CLI_OFFSET:
251                         if (likely(imm >= -0x800) && likely(imm < 0x800)) {
252                                 return true;
253                         }
254                         if (imm >= -0x8000 && imm < 0x8000 && !(imm & 3)) {
255                                 if (size == OP_SIZE_NATIVE)
256                                         return true;
257                                 if (purpose == IMM_PURPOSE_LDR_SX_OFFSET && size == OP_SIZE_4)
258                                         return true;
259                                 if (purpose == IMM_PURPOSE_STR_OFFSET && size == OP_SIZE_4)
260                                         return true;
261                         }
262                         break;
263                 default:
264                         internal(file_line, "gen_address: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
265         }
266         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
267         ctx->offset_reg = true;
268         return true;
271 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
273         switch (purpose) {
274                 case IMM_PURPOSE_STORE_VALUE:
275                         if (!imm)
276                                 return true;
277                         break;
278                 case IMM_PURPOSE_ADD:
279                 case IMM_PURPOSE_CMP:
280                 case IMM_PURPOSE_CMP_LOGICAL:
281                         if (likely(imm >= -0x800) && likely(imm < 0x800))
282                                 return true;
283                         break;
284                 case IMM_PURPOSE_SUB:
285                         if (likely(imm > -0x800) && likely(imm <= 0x800))
286                                 return true;
287                         break;
288                 case IMM_PURPOSE_AND:
289                 case IMM_PURPOSE_OR:
290                 case IMM_PURPOSE_XOR:
291                         if (likely(imm >= 0) && likely(imm < 0x1000))
292                                 return true;
293                         break;
294                 case IMM_PURPOSE_ANDN:
295                         break;
296                 case IMM_PURPOSE_TEST:
297                         break;
298                 case IMM_PURPOSE_JMP_2REGS:
299                         break;
300                 case IMM_PURPOSE_MUL:
301                         break;
302                 case IMM_PURPOSE_BITWISE:
303                         return true;
304                 default:
305                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
306         }
307         return false;
310 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
312         if (is_direct_const(imm, purpose, size)) {
313                 ctx->const_imm = imm;
314                 ctx->const_reg = false;
315         } else {
316                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
317                 ctx->const_reg = true;
318         }
319         return true;
322 static bool attr_w gen_entry(struct codegen_context *ctx)
324         int offset, i;
326         g(gen_imm(ctx, -FRAME_SIZE, IMM_PURPOSE_ADD, OP_SIZE_NATIVE));
327         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
328         gen_one(R_SP);
329         gen_one(R_SP);
330         gen_imm_offset();
332         offset = FRAME_SIZE - (1 << OP_SIZE_NATIVE);
334         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x08, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
335         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
336         gen_address_offset();
337         gen_one(R_RA);
338         offset -= 1 << OP_SIZE_NATIVE;
340         for (i = R_FP; i <= R_S8; i++) {
341                 g(gen_address(ctx, R_SP, offset, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
342                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
343                 gen_address_offset();
344                 gen_one(i);
345                 offset -= 1 << OP_SIZE_NATIVE;
346         }
348 #ifndef TIMESTAMP_IN_REGISTER
349         g(gen_address(ctx, R_SP, offset, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
350         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
351         gen_address_offset();
352         gen_one(R_ARG2);
353 #endif
355         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
356         gen_one(R_FRAME);
357         gen_one(R_ARG0);
359         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
360         gen_one(R_UPCALL);
361         gen_one(R_ARG1);
363 #ifdef TIMESTAMP_IN_REGISTER
364         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
365         gen_one(R_TIMESTAMP);
366         gen_one(R_ARG2);
367 #endif
369         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
370         gen_one(R_ARG3);
372         return true;
375 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
377         g(gen_load_constant(ctx, R_RET1, ip));
379         gen_insn(INSN_JMP, 0, 0, 0);
380         gen_four(escape_label);
382         return true;
385 static bool attr_w gen_escape(struct codegen_context *ctx)
387         int offset, i;
389         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
390         gen_one(R_RET0);
391         gen_one(R_FRAME);
393         offset = FRAME_SIZE - (1 << OP_SIZE_NATIVE);
395         g(gen_address(ctx, R_SP, FRAME_SIZE - 0x08, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
396         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
397         gen_one(R_RA);
398         gen_address_offset();
399         offset -= 1 << OP_SIZE_NATIVE;
401         for (i = R_FP; i <= R_S8; i++) {
402                 g(gen_address(ctx, R_SP, offset, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
403                 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
404                 gen_one(i);
405                 gen_address_offset();
406                 offset -= 1 << OP_SIZE_NATIVE;
407         }
409         g(gen_imm(ctx, FRAME_SIZE, IMM_PURPOSE_ADD, OP_SIZE_NATIVE));
410         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
411         gen_one(R_SP);
412         gen_one(R_SP);
413         gen_imm_offset();
415         gen_insn(INSN_RET, 0, 0, 0);
417         return true;
420 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
422         return true;
425 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
427         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
428         gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
429         gen_one(R_SCRATCH_NA_1);
430         gen_address_offset();
432         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_ADDRESS, 0, 0);
433         gen_one(R_SCRATCH_NA_1);
435         g(gen_upcall_end(ctx, n_args));
437         return true;
440 static bool attr_w gen_cmp_test_jmp(struct codegen_context *ctx, unsigned insn, unsigned op_size, unsigned reg1, unsigned reg2, unsigned cond, uint32_t label);
442 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
444         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
445         gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
446         gen_one(R_SCRATCH_1);
447         gen_address_offset();
449 #ifdef TIMESTAMP_IN_REGISTER
450         g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_1, R_TIMESTAMP, COND_NE, escape_label));
451 #else
452         g(gen_address(ctx, R_SP, 0, IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
453         gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
454         gen_one(R_SCRATCH_2);
455         gen_address_offset();
457         g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_1, R_SCRATCH_2, COND_NE, escape_label));
458 #endif
460         return true;