codegen: fix a bug in "bt" on big-endian machines
[ajla.git] / c1-alpha.inc
blob04fd4383c24847c35d2d707c0a4e8bcd788370bd
1 /*
2  * Copyright (C) 2024 Mikulas Patocka
3  *
4  * This file is part of Ajla.
5  *
6  * Ajla is free software: you can redistribute it and/or modify it under the
7  * terms of the GNU General Public License as published by the Free Software
8  * Foundation, either version 3 of the License, or (at your option) any later
9  * version.
10  *
11  * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * Ajla. If not, see <https://www.gnu.org/licenses/>.
17  */
19 #define OP_SIZE_NATIVE                  OP_SIZE_8
20 #define OP_SIZE_ADDRESS                 OP_SIZE_NATIVE
22 #define JMP_LIMIT                       JMP_SHORTEST
24 #define UNALIGNED_TRAP                  1
26 #define ALU_WRITES_FLAGS(alu, im)       0
27 #define ALU1_WRITES_FLAGS(alu)          0
28 #define ROT_WRITES_FLAGS(alu, size, im) 0
29 #define COND_IS_LOGICAL(cond)           0
31 #define ARCH_PARTIAL_ALU(size)          0
32 #define ARCH_IS_3ADDRESS(alu, f)        1
33 #define ARCH_IS_3ADDRESS_IMM(alu, f)    1
34 #define ARCH_IS_3ADDRESS_ROT(alu, size) 1
35 #define ARCH_IS_3ADDRESS_ROT_IMM(alu)   1
36 #define ARCH_IS_2ADDRESS(alu)           1
37 #define ARCH_IS_3ADDRESS_FP             1
38 #define ARCH_HAS_JMP_2REGS(cond)        0
39 #define ARCH_HAS_FLAGS                  0
40 #define ARCH_SUPPORTS_TRAPS             OS_SUPPORTS_TRAPS
41 #define ARCH_TRAP_BEFORE                0
42 #define ARCH_PREFERS_SX(size)           ((size) == OP_SIZE_4)
43 #define ARCH_HAS_BWX                    cpu_test_feature(CPU_FEATURE_bwx)
44 #define ARCH_HAS_MUL                    1
45 #define ARCH_HAS_DIV                    0
46 #define ARCH_HAS_ANDN                   1
47 #define ARCH_HAS_SHIFTED_ADD(bits)      ((bits) == 0 || (bits) == 2 || (bits) == 3)
48 #define ARCH_HAS_BTX(btx, size, cnst)   0
49 #define ARCH_SHIFT_SIZE                 OP_SIZE_8
50 #define ARCH_BOOL_SIZE                  OP_SIZE_8
51 #define ARCH_HAS_FP_GP_MOV              cpu_test_feature(CPU_FEATURE_fix)
52 #define ARCH_NEEDS_BARRIER              thread_needs_barriers
54 #define i_size(size)                    OP_SIZE_NATIVE
55 #define i_size_rot(size)                OP_SIZE_NATIVE
56 #define i_size_cmp(size)                OP_SIZE_NATIVE
58 /*#define TIMESTAMP_IN_REGISTER*/
60 #define R_V0            0x00
61 #define R_T0            0x01
62 #define R_T1            0x02
63 #define R_T2            0x03
64 #define R_T3            0x04
65 #define R_T4            0x05
66 #define R_T5            0x06
67 #define R_T6            0x07
68 #define R_T7            0x08
69 #define R_S0            0x09
70 #define R_S1            0x0a
71 #define R_S2            0x0b
72 #define R_S3            0x0c
73 #define R_S4            0x0d
74 #define R_S5            0x0e
75 #define R_FP            0x0f
76 #define R_A0            0x10
77 #define R_A1            0x11
78 #define R_A2            0x12
79 #define R_A3            0x13
80 #define R_A4            0x14
81 #define R_A5            0x15
82 #define R_T8            0x16
83 #define R_T9            0x17
84 #define R_T10           0x18
85 #define R_T11           0x19
86 #define R_RA            0x1a
87 #define R_T12           0x1b
88 #define R_AT            0x1c
89 #define R_GP            0x1d
90 #define R_SP            0x1e
91 #define R_ZERO          0x1f
93 #define R_F0            0x20
94 #define R_F1            0x21
95 #define R_F2            0x22
96 #define R_F3            0x23
97 #define R_F4            0x24
98 #define R_F5            0x25
99 #define R_F6            0x26
100 #define R_F7            0x27
101 #define R_F8            0x28
102 #define R_F9            0x29
103 #define R_F10           0x2a
104 #define R_F11           0x2b
105 #define R_F12           0x2c
106 #define R_F13           0x2d
107 #define R_F14           0x2e
108 #define R_F15           0x2f
109 #define R_F16           0x30
110 #define R_F17           0x31
111 #define R_F18           0x32
112 #define R_F19           0x33
113 #define R_F20           0x34
114 #define R_F21           0x35
115 #define R_F22           0x36
116 #define R_F23           0x37
117 #define R_F24           0x38
118 #define R_F25           0x39
119 #define R_F26           0x3a
120 #define R_F27           0x3b
121 #define R_F28           0x3c
122 #define R_F29           0x3d
123 #define R_F30           0x3e
124 #define R_FZERO         0x3f
126 #define R_FRAME         R_S0
127 #define R_UPCALL        R_S1
128 #ifdef TIMESTAMP_IN_REGISTER
129 #define R_TIMESTAMP     R_S2
130 #endif
132 #define R_SCRATCH_1     R_A0
133 #define R_SCRATCH_2     R_A1
134 #define R_SCRATCH_3     R_A2
135 #define R_SCRATCH_4     R_A3
136 #define R_SCRATCH_NA_1  R_T0
137 #define R_SCRATCH_NA_2  R_T1
138 #define R_SCRATCH_NA_3  R_T2
140 #define R_SAVED_1       R_S3
141 #define R_SAVED_2       R_S4
143 #define R_ARG0          R_A0
144 #define R_ARG1          R_A1
145 #define R_ARG2          R_A2
146 #define R_ARG3          R_A3
147 #define R_ARG4          R_A4
148 #define R_RET0          R_V0
150 #define R_OFFSET_IMM    R_T3
151 #define R_CONST_IMM     R_T4
152 #define R_CMP_RESULT    R_T5
154 #define FR_SCRATCH_1    R_F0
155 #define FR_SCRATCH_2    R_F1
156 #define FR_SCRATCH_3    R_F10
158 #define SUPPORTED_FP    0x6
160 #define FRAME_SIZE      0x50
162 static bool reg_is_fp(unsigned reg)
164         return reg >= 0x20 && reg < 0x40;
167 static const uint8_t regs_saved[] = {
168 #ifndef TIMESTAMP_IN_REGISTER
169         R_S2,
170 #endif
171         R_S5, R_FP };
172 static const uint8_t regs_volatile[] = { R_T6, R_T7, R_A4, R_A5, R_T8, R_T9, R_T10, R_T11, R_RA, R_T12, R_AT, R_GP };
173 static const uint8_t fp_saved[] = { 0 };
174 #define n_fp_saved 0U
175 static const uint8_t fp_volatile[] = { R_F11, R_F12, R_F13, R_F14, R_F15, R_F16, R_F17, R_F18, R_F19, R_F20, R_F21, R_F22, R_F23, R_F24, R_F25, R_F26, R_F27, R_F28, R_F29, R_F30 };
176 #define reg_is_saved(r) (((r) >= R_S0 && (r) <= R_FP) || ((r) >= R_F2 && (r) <= R_F9))
178 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
180         unsigned r = R_ZERO;
181         int16_t c1, c2, c3, c4;
182         c1 = (int16_t)c;
183         c &= ~0xffffUL;
184         if (c1 < 0)
185                 c += 0x10000UL;
186         c2 = (int16_t)(c >> 16);
187         c &= ~0xffffffffUL;
188         if (c2 < 0)
189                 c += 0x100000000UL;
190         c3 = (int16_t)(c >> 32);
191         c &= ~0xffffffffffffUL;
192         if (c3 < 0)
193                 c += 0x1000000000000UL;
194         c4 = (int16_t)(c >> 48);
195         if (c4) {
196                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
197                 gen_one(reg);
198                 gen_one(R_ZERO);
199                 gen_one(ARG_IMM);
200                 gen_eight((uint64_t)c4 << 16);
201                 r = reg;
202         }
203         if (c3) {
204                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
205                 gen_one(reg);
206                 gen_one(r);
207                 gen_one(ARG_IMM);
208                 gen_eight(c3);
209                 r = reg;
210         }
211         if (r != R_ZERO) {
212                 gen_insn(INSN_ROT, OP_SIZE_NATIVE, ROT_SHL, 0);
213                 gen_one(reg);
214                 gen_one(reg);
215                 gen_one(ARG_IMM);
216                 gen_eight(32);
217         }
218         if (c2) {
219                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
220                 gen_one(reg);
221                 gen_one(r);
222                 gen_one(ARG_IMM);
223                 gen_eight((uint64_t)c2 << 16);
224                 r = reg;
225         }
226         if (c1 || r == R_ZERO) {
227                 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
228                 gen_one(reg);
229                 gen_one(r);
230                 gen_one(ARG_IMM);
231                 gen_eight(c1);
232         }
233         return true;
236 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned size)
238         ctx->base_reg = base;
239         ctx->offset_imm = imm;
240         ctx->offset_reg = false;
241         switch (purpose) {
242                 case IMM_PURPOSE_LDR_OFFSET:
243                 case IMM_PURPOSE_LDR_SX_OFFSET:
244                 case IMM_PURPOSE_STR_OFFSET:
245                 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
246                 case IMM_PURPOSE_MVI_CLI_OFFSET:
247                         if (likely(imm >= -0x8000) && likely(imm < 0x8000))
248                                 return true;
249                         break;
250                 default:
251                         internal(file_line, "gen_address: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
252         }
253         g(gen_load_constant(ctx, R_OFFSET_IMM, imm));
254         gen_insn(INSN_ALU, OP_SIZE_ADDRESS, ALU_ADD, 0);
255         gen_one(R_OFFSET_IMM);
256         gen_one(R_OFFSET_IMM);
257         gen_one(base);
258         ctx->base_reg = R_OFFSET_IMM;
259         ctx->offset_imm = 0;
260         return true;
263 static bool is_direct_const(int64_t imm, unsigned purpose, unsigned size)
265         int64_t imm_copy = imm;
266         switch (purpose) {
267                 case IMM_PURPOSE_STORE_VALUE:
268                         if (!imm)
269                                 return true;
270                         break;
271                 case IMM_PURPOSE_SUB:
272                         imm_copy = -(uint64_t)imm_copy;
273                         /*-fallthrough*/
274                 case IMM_PURPOSE_ADD:
275                         if (likely(imm_copy >= -0x8000) && likely(imm_copy < 0x8000))
276                                 return true;
277                         if (imm_copy & 0xffff)
278                                 break;
279                         if (likely(imm_copy >= -0x80000000L) && likely(imm_copy < 0x80000000L))
280                                 return true;
281                         break;
282                 case IMM_PURPOSE_CMP:
283                 case IMM_PURPOSE_CMP_LOGICAL:
284                 case IMM_PURPOSE_AND:
285                 case IMM_PURPOSE_OR:
286                 case IMM_PURPOSE_XOR:
287                 case IMM_PURPOSE_ANDN:
288                 case IMM_PURPOSE_TEST:
289                 case IMM_PURPOSE_MUL:
290                 case IMM_PURPOSE_MOVR:
291                 case IMM_PURPOSE_ADD_TRAP:
292                 case IMM_PURPOSE_SUB_TRAP:
293                         if (imm >= 0 && imm < 256)
294                                 return true;
295                         break;
296                 default:
297                         internal(file_line, "is_direct_const: invalid purpose %u (imm %"PRIxMAX", size %u)", purpose, (uintmax_t)imm, size);
298         }
299         return false;
302 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
304         if (is_direct_const(imm, purpose, size)) {
305                 ctx->const_imm = imm;
306                 ctx->const_reg = false;
307         } else {
308                 g(gen_load_constant(ctx, R_CONST_IMM, imm));
309                 ctx->const_reg = true;
310         }
311         return true;
314 static bool attr_w gen_entry(struct codegen_context *ctx)
316         g(gen_imm(ctx, FRAME_SIZE, IMM_PURPOSE_SUB, OP_SIZE_NATIVE));
317         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_SUB, 0);
318         gen_one(R_SP);
319         gen_one(R_SP);
320         gen_imm_offset();
322         g(gen_address(ctx, R_SP, 0, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
323         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
324         gen_address_offset();
325         gen_one(R_RA);
327 #ifndef TIMESTAMP_IN_REGISTER
328         g(gen_address(ctx, R_SP, 8, IMM_PURPOSE_STR_OFFSET, OP_SIZE_4));
329         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
330         gen_address_offset();
331         gen_one(R_ARG3);
332 #endif
334         g(gen_address(ctx, R_SP, 16, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
335         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
336         gen_address_offset();
337         gen_one(R_S0);
339         g(gen_address(ctx, R_SP, 24, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
340         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
341         gen_address_offset();
342         gen_one(R_S1);
344         g(gen_address(ctx, R_SP, 32, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
345         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
346         gen_address_offset();
347         gen_one(R_S2);
349         g(gen_address(ctx, R_SP, 40, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
350         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
351         gen_address_offset();
352         gen_one(R_S3);
354         g(gen_address(ctx, R_SP, 48, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
355         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
356         gen_address_offset();
357         gen_one(R_S4);
359         g(gen_address(ctx, R_SP, 56, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
360         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
361         gen_address_offset();
362         gen_one(R_S5);
364         g(gen_address(ctx, R_SP, 64, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
365         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
366         gen_address_offset();
367         gen_one(R_FP);
369         g(gen_address(ctx, R_SP, 72, IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
370         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
371         gen_address_offset();
372         gen_one(R_ARG0);
374         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
375         gen_one(R_FRAME);
376         gen_one(R_ARG1);
378         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
379         gen_one(R_UPCALL);
380         gen_one(R_ARG2);
382 #ifdef TIMESTAMP_IN_REGISTER
383         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
384         gen_one(R_TIMESTAMP);
385         gen_one(R_ARG3);
386 #endif
388         gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
389         gen_one(R_ARG4);
391         return true;
394 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
396         g(gen_load_constant(ctx, R_SCRATCH_1, (int32_t)ip));
398         gen_insn(INSN_JMP, 0, 0, 0);
399         gen_four(escape_label);
401         return true;
404 static bool attr_w gen_escape(struct codegen_context *ctx)
406         g(gen_address(ctx, R_SP, 72, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
407         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
408         gen_one(R_RET0);
409         gen_address_offset();
411         g(gen_address(ctx, R_RET0, 0, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
412         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
413         gen_address_offset();
414         gen_one(R_FRAME);
416         g(gen_address(ctx, R_RET0, 8, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
417         gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
418         gen_address_offset();
419         gen_one(R_SCRATCH_1);
421         g(gen_address(ctx, R_SP, 0, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
422         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
423         gen_one(R_RA);
424         gen_address_offset();
426         g(gen_address(ctx, R_SP, 16, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
427         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
428         gen_one(R_S0);
429         gen_address_offset();
431         g(gen_address(ctx, R_SP, 24, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
432         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
433         gen_one(R_S1);
434         gen_address_offset();
436         g(gen_address(ctx, R_SP, 32, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
437         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
438         gen_one(R_S2);
439         gen_address_offset();
441         g(gen_address(ctx, R_SP, 40, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
442         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
443         gen_one(R_S3);
444         gen_address_offset();
446         g(gen_address(ctx, R_SP, 48, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
447         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
448         gen_one(R_S4);
449         gen_address_offset();
451         g(gen_address(ctx, R_SP, 56, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
452         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
453         gen_one(R_S5);
454         gen_address_offset();
456         g(gen_address(ctx, R_SP, 64, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
457         gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
458         gen_one(R_FP);
459         gen_address_offset();
461         g(gen_imm(ctx, FRAME_SIZE, IMM_PURPOSE_ADD, OP_SIZE_NATIVE));
462         gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_ADD, 0);
463         gen_one(R_SP);
464         gen_one(R_SP);
465         gen_imm_offset();
467         gen_insn(INSN_RET, 0, 0, 0);
469         return true;
472 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
474         return true;
477 static bool attr_w gen_get_upcall_pointer(struct codegen_context *ctx, unsigned offset, unsigned reg)
479         g(gen_address(ctx, R_UPCALL, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_ADDRESS));
480         gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
481         gen_one(reg);
482         gen_address_offset();
484         return true;
487 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
489         g(gen_get_upcall_pointer(ctx, offset, R_T12));
491         gen_insn(INSN_CALL_INDIRECT, OP_SIZE_ADDRESS, 0, 0);
492         gen_one(R_T12);
494         g(gen_upcall_end(ctx, n_args));
496         return true;
499 static bool attr_w gen_cmp_test_jmp(struct codegen_context *ctx, unsigned insn, unsigned op_size, unsigned reg1, unsigned reg2, unsigned cond, uint32_t label);
501 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
503         g(gen_address(ctx, R_UPCALL, offsetof(struct cg_upcall_vector_s, ts), IMM_PURPOSE_LDR_SX_OFFSET, OP_SIZE_4));
504         gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
505         gen_one(R_SCRATCH_1);
506         gen_address_offset();
508 #ifdef TIMESTAMP_IN_REGISTER
509         g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_1, R_TIMESTAMP, COND_NE, escape_label));
510 #else
511         g(gen_address(ctx, R_SP, 8, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_4));
512         gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
513         gen_one(R_SCRATCH_2);
514         gen_address_offset();
516         g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_1, R_SCRATCH_2, COND_NE, escape_label));
517 #endif
518         return true;