2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
20 #define OP_SIZE_NATIVE OP_SIZE_4
22 #define OP_SIZE_NATIVE OP_SIZE_8
26 #define OP_SIZE_ADDRESS OP_SIZE_4
28 #define OP_SIZE_ADDRESS OP_SIZE_8
31 #define JMP_LIMIT JMP_LONG
33 #define UNALIGNED_TRAP 0
35 #define ALU_WRITES_FLAGS(size, alu, is_mem, is_imm, imm) ((alu) == ALU_ADD && (size) >= OP_SIZE_2 && !(is_mem) ? 0 : ((alu) == ALU_ADD || (alu) == ALU_SUB) && (is_imm) && ((imm) == -1 || (imm) == 1) ? 1 : 3)
36 #define ALU1_WRITES_FLAGS(alu) ((alu) == ALU1_NOT || (alu) == ALU1_BSWAP ? 0 : 3)
37 #define ROT_WRITES_FLAGS(alu, size, im) (cpu_test_feature(CPU_FEATURE_bmi2) && (alu == ROT_SHL || alu == ROT_SHR || alu == ROT_SAR) && size >= OP_SIZE_4 && !(im) ? 0 : 1)
38 #define COND_IS_LOGICAL(cond) 0
40 #define ARCH_PARTIAL_ALU(size) ((size) <= OP_SIZE_2)
41 #define ARCH_IS_3ADDRESS(alu, f) ((alu) == ALU_ADD && !(f))
42 #define ARCH_IS_3ADDRESS_IMM(alu, f) ((alu) == ALU_ADD && !(f))
43 #define ARCH_IS_3ADDRESS_ROT(alu, size) (ROT_WRITES_FLAGS(alu, size, false) ? 0 : 1)
44 #define ARCH_IS_3ADDRESS_ROT_IMM(alu) 0
45 #define ARCH_IS_2ADDRESS(alu) ((alu) == ALU1_BSF || (alu) == ALU1_BSR || (alu) == ALU1_LZCNT || (alu) == ALU1_POPCNT)
46 #define ARCH_IS_3ADDRESS_FP cpu_test_feature(CPU_FEATURE_avx)
47 #define ARCH_HAS_JMP_2REGS(cond) 0
48 #define ARCH_HAS_FLAGS 1
49 #define ARCH_PREFERS_SX(size) 0
50 #define ARCH_HAS_BWX 1
51 #define ARCH_HAS_MUL 1
52 #define ARCH_HAS_DIV 1
53 #define ARCH_HAS_ANDN 0
54 #define ARCH_HAS_BTX(btx, size, cnst) ((btx) != BTX_BTEXT && (size) >= OP_SIZE_2)
55 #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3)
56 #define ARCH_SHIFT_SIZE OP_SIZE_4
57 #define ARCH_BOOL_SIZE log_2(sizeof(ajla_flat_option_t))
58 #define ARCH_HAS_FP_GP_MOV cpu_test_feature(CPU_FEATURE_sse2)
59 #define ARCH_NEEDS_BARRIER 0
61 #define i_size(size) (size)
62 #define i_size_rot(size) (size)
63 #define i_size_cmp(size) (size)
131 #define R_IS_GPR(r) ((r) < 16)
132 #define R_IS_XMM(r) ((r) >= R_XMM0 && (r) <= R_XMM31)
134 /*#define TIMESTAMP_IN_REGISTER*/
137 static uint8_t upcall_register = R_R15;
138 #define R_UPCALL upcall_register
139 #ifdef TIMESTAMP_IN_REGISTER
140 #define R_TIMESTAMP R_R14
142 #define R_CONST_IMM R_R11
144 #define R_CONST_IMM 255 /* this should not be used */
146 #define R_OFFSET_IMM 255 /* this should not be used */
148 #if defined(ARCH_X86_32)
150 #define R_SCRATCH_1 R_AX
151 #define R_SCRATCH_2 R_DX
152 #define R_SCRATCH_3 R_CX
153 #define R_SCRATCH_4 R_SAVED_2
154 #define R_SAVED_1 R_SI
155 #define R_SAVED_2 R_DI
156 #elif defined(ARCH_X86_WIN_ABI)
158 #define R_SCRATCH_1 R_AX
159 #define R_SCRATCH_2 R_DX
160 #define R_SCRATCH_3 R_CX
161 #define R_SCRATCH_4 R_SAVED_2
162 #define R_SAVED_1 R_SI
163 #define R_SAVED_2 R_DI
166 #define R_SCRATCH_1 R_AX
167 #define R_SCRATCH_2 R_DX
168 #define R_SCRATCH_3 R_CX
169 #define R_SCRATCH_4 R_SAVED_2
170 #define R_SAVED_1 R_BP
171 #define R_SAVED_2 R_R12
174 #define FR_SCRATCH_1 R_XMM0
175 #define FR_SCRATCH_2 R_XMM1
177 #if defined(ARCH_X86_32)
182 #elif defined(ARCH_X86_WIN_ABI)
195 #if defined(ARCH_X86_32)
196 #define ARG_SPACE 0x1c /* must be 0xc modulo 0x10 */
197 #define ARG_OFFSET 0x14
198 #elif defined(ARCH_X86_WIN_ABI) && !defined(TIMESTAMP_IN_REGISTER)
199 #define ARG_SPACE 0x28 /* must be 0x8 modulo 0x10 */
200 #define ARG_OFFSET 0xa0
201 #elif defined(ARCH_X86_WIN_ABI)
202 #define ARG_SPACE 0x20 /* must be 0x0 modulo 0x10 */
203 #define ARG_OFFSET 0x90
206 #define SUPPORTED_FP (cpu_test_feature(CPU_FEATURE_sse) * 0x2 + cpu_test_feature(CPU_FEATURE_sse2) * 0x4)
207 #define SUPPORTED_FP_X87 0xe
208 #define SUPPORTED_FP_HALF_CVT (cpu_test_feature(CPU_FEATURE_f16c) * 0x1)
210 static bool reg_is_fp(unsigned reg)
212 return reg >= 0x20 && reg < 0x40;
215 static bool reg_is_segment(unsigned reg)
217 return reg >= 0x18 && reg < 0x1e;
220 #if defined(ARCH_X86_32)
222 static const uint8_t regs_saved[] = { R_BX };
223 static const uint8_t regs_volatile[] = { 0 };
224 #define n_regs_volatile 0U
225 static const uint8_t fp_saved[] = { 0 };
226 #define n_fp_saved 0U
227 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7 };
228 #define reg_is_saved(r) ((r) == R_BX)
230 #elif defined(ARCH_X86_WIN_ABI)
232 static const uint8_t regs_saved[] = { R_BX, R_R12, R_R13,
233 #ifndef TIMESTAMP_IN_REGISTER
237 static const uint8_t regs_volatile[] = { R_R8, R_R9, R_R10 };
238 static const uint8_t fp_saved[] = { 0 };
239 #define n_fp_saved 0U
240 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5 };
241 #define reg_is_saved(r) ((r) == R_BX || ((r) >= R_R12 && ((r) <= R_R15)))
245 static const uint8_t regs_saved[] = { R_R13,
246 #ifndef TIMESTAMP_IN_REGISTER
251 #define n_regs_saved (n_array_elements(regs_saved) - !reg_is_segment(R_UPCALL))
252 static const uint8_t regs_volatile[] = { R_SI, R_DI, R_R8, R_R9, R_R10 };
253 static const uint8_t fp_saved[] = { 0 };
254 #define n_fp_saved 0U
255 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7, R_XMM8, R_XMM9, R_XMM10, R_XMM11, R_XMM12, R_XMM13, R_XMM14, R_XMM15 };
256 #define reg_is_saved(r) ((r) >= R_R13 && (r) <= R_R15)
259 static bool attr_w imm_is_8bit(int64_t imm)
261 return imm >= -0x80 && imm < 0x80;
264 static bool attr_w imm_is_32bit(int64_t attr_unused imm)
269 return imm >= -0x80000000LL && imm < 0x80000000LL;
273 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
275 if (OP_SIZE_NATIVE == OP_SIZE_4)
277 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
284 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned attr_unused size)
286 ctx->offset_imm = imm;
287 ctx->offset_reg = false;
288 ctx->base_reg = base;
290 case IMM_PURPOSE_LDR_OFFSET:
291 case IMM_PURPOSE_LDR_SX_OFFSET:
292 case IMM_PURPOSE_STR_OFFSET:
293 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
294 case IMM_PURPOSE_MVI_CLI_OFFSET:
297 internal(file_line, "gen_address: invalid purpose %d", purpose);
300 if (unlikely(!imm_is_32bit(imm)))
306 static bool is_direct_const(int64_t attr_unused imm, unsigned attr_unused purpose, unsigned attr_unused size)
311 return imm_is_32bit(imm);
315 static bool attr_w gen_entry(struct codegen_context *ctx)
317 #if defined(ARCH_X86_32)
318 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
321 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
324 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
327 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
330 gen_insn(INSN_ALU, OP_SIZE_4, ALU_SUB, 1);
334 gen_eight(ARG_SPACE);
336 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
338 gen_one(ARG_ADDRESS_1);
340 gen_eight(ARG_SPACE + ARG_OFFSET);
342 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
343 gen_one(ARG_ADDRESS_1);
345 gen_eight(ARG_SPACE + ARG_OFFSET + 12);
346 #elif defined(ARCH_X86_WIN_ABI)
347 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
350 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
353 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
356 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
359 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
362 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
365 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
368 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
371 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
373 #ifndef TIMESTAMP_IN_REGISTER
374 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
377 gen_insn(INSN_ALU, OP_SIZE_8, ALU_SUB, 1);
381 gen_eight(ARG_SPACE);
383 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
387 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
390 #ifdef TIMESTAMP_IN_REGISTER
391 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
392 gen_one(R_TIMESTAMP);
395 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
396 gen_one(ARG_ADDRESS_1);
398 gen_eight(ARG_OFFSET);
400 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
403 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
406 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
409 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
412 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
415 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
418 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
420 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
424 if (!reg_is_segment(R_UPCALL)) {
425 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
429 #ifdef TIMESTAMP_IN_REGISTER
430 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
431 gen_one(R_TIMESTAMP);
434 #if defined(ARCH_X86_X32)
435 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
439 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
445 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
447 #if defined(ARCH_X86_32) || defined(ARCH_X86_64)
448 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
453 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
456 gen_eight((uint64_t)ip << 32);
458 gen_insn(INSN_JMP, 0, 0, 0);
459 gen_four(escape_label);
464 static bool attr_w gen_escape(struct codegen_context *ctx)
466 #if defined(ARCH_X86_32)
467 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
471 gen_insn(INSN_ALU, OP_SIZE_4, ALU_ADD, 1);
475 gen_eight(ARG_SPACE);
477 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
480 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
483 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
486 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
489 gen_insn(INSN_RET, 0, 0, 0);
490 #elif defined(ARCH_X86_WIN_ABI)
491 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
495 #if defined(TIMESTAMP_IN_REGISTER)
496 gen_eight(ARG_SPACE);
498 gen_eight(ARG_SPACE + 8);
500 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
503 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
504 gen_one(ARG_ADDRESS_1);
509 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
510 gen_one(ARG_ADDRESS_1);
515 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
518 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
521 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
524 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
527 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
530 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
533 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
536 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
539 gen_insn(INSN_RET, 0, 0, 0);
541 #if defined(ARCH_X86_X32)
542 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
547 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
551 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
554 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
557 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
560 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
563 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
566 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
569 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
572 gen_insn(INSN_RET, 0, 0, 0);
577 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
579 #if defined(ARCH_X86_32)
580 ajla_assert_lo(arg * 4 < ARG_SPACE, (file_line, "gen_upcall_argument: argument %u", arg));
581 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
582 gen_one(ARG_ADDRESS_1);
590 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
592 #if defined(ARCH_X86_32)
593 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
595 gen_one(ARG_ADDRESS_1);
597 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
599 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
600 gen_one(ARG_ADDRESS_1);
603 #elif defined(ARCH_X86_X32)
604 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
606 gen_one(ARG_ADDRESS_1);
610 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
613 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
614 gen_one(ARG_ADDRESS_1);
618 g(gen_upcall_end(ctx, offset, n_args));
623 static bool attr_w gen_get_upcall_pointer(struct codegen_context *ctx, unsigned offset, unsigned reg)
625 #if defined(ARCH_X86_32)
626 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
628 gen_one(ARG_ADDRESS_1);
630 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
632 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
634 gen_one(ARG_ADDRESS_1);
638 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
640 gen_one(ARG_ADDRESS_1);
647 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
649 #if defined(ARCH_X86_32)
650 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
652 gen_one(ARG_ADDRESS_1);
654 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
656 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
658 gen_one(ARG_ADDRESS_1);
660 gen_eight(ARG_SPACE + ARG_OFFSET + 8);
662 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
664 gen_one(ARG_ADDRESS_1);
666 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
667 #elif defined(TIMESTAMP_IN_REGISTER)
668 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
669 gen_one(R_TIMESTAMP);
670 gen_one(ARG_ADDRESS_1);
672 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
674 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
676 gen_one(ARG_ADDRESS_1);
678 #if defined(ARCH_X86_WIN_ABI)
679 gen_eight(ARG_SPACE);
683 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
685 gen_one(ARG_ADDRESS_1);
687 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
689 gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
690 gen_four(escape_label);