2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
20 #define OP_SIZE_NATIVE OP_SIZE_4
22 #define OP_SIZE_NATIVE OP_SIZE_8
26 #define OP_SIZE_ADDRESS OP_SIZE_4
28 #define OP_SIZE_ADDRESS OP_SIZE_8
31 #define JMP_LIMIT JMP_LONG
33 #define UNALIGNED_TRAP 0
35 #define ALU_WRITES_FLAGS(alu, im) ((alu) != ALU_ADD ? 3 : 0)
36 #define ALU1_WRITES_FLAGS(alu) ((alu) == ALU1_INC || (alu) == ALU1_DEC ? 1 : (alu) == ALU1_NOT || (alu) == ALU1_BSWAP ? 0 : 3)
37 #define ROT_WRITES_FLAGS(alu, size, im) (cpu_test_feature(CPU_FEATURE_bmi2) && (alu == ROT_SHL || alu == ROT_SHR || alu == ROT_SAR) && size >= OP_SIZE_4 && !(im) ? 0 : 1)
38 #define COND_IS_LOGICAL(cond) 0
40 #define ARCH_PARTIAL_ALU(size) ((size) <= OP_SIZE_2)
41 #define ARCH_IS_3ADDRESS(alu, f) ((alu) == ALU_ADD && !(f))
42 #define ARCH_IS_3ADDRESS_IMM(alu, f) ((alu) == ALU_ADD && !(f))
43 #define ARCH_IS_3ADDRESS_ROT(alu, size) (ROT_WRITES_FLAGS(alu, size, false) ? 0 : 1)
44 #define ARCH_IS_3ADDRESS_ROT_IMM(alu) 0
45 #define ARCH_IS_2ADDRESS(alu) ((alu) == ALU1_BSF || (alu) == ALU1_BSR || (alu) == ALU1_LZCNT || (alu) == ALU1_POPCNT)
46 #define ARCH_IS_3ADDRESS_FP cpu_test_feature(CPU_FEATURE_avx)
47 #define ARCH_HAS_JMP_2REGS(cond) 0
48 #define ARCH_HAS_FLAGS 1
49 #define ARCH_PREFERS_SX(size) 0
50 #define ARCH_HAS_BWX 1
51 #define ARCH_HAS_MUL 1
52 #define ARCH_HAS_DIV 1
53 #define ARCH_HAS_ANDN 0
54 #define ARCH_HAS_BTX(btx, size, cnst) ((btx) != BTX_BTEXT && (size) >= OP_SIZE_2)
55 #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3)
56 #define ARCH_SHIFT_SIZE OP_SIZE_4
57 #define ARCH_BOOL_SIZE log_2(sizeof(ajla_flat_option_t))
58 #define ARCH_HAS_FP_GP_MOV cpu_test_feature(CPU_FEATURE_sse2)
59 #define ARCH_NEEDS_BARRIER 0
61 #define i_size(size) (size)
62 #define i_size_rot(size) (size)
63 #define i_size_cmp(size) (size)
131 #define R_IS_GPR(r) ((r) < 16)
132 #define R_IS_XMM(r) ((r) >= R_XMM0 && (r) <= R_XMM31)
134 /*#define TIMESTAMP_IN_REGISTER*/
137 static uint8_t upcall_register = R_R15;
138 #define R_UPCALL upcall_register
139 #ifdef TIMESTAMP_IN_REGISTER
140 #define R_TIMESTAMP R_R14
142 #define R_CONST_IMM R_R11
144 #define R_CONST_IMM 255 /* this should not be used */
146 #define R_OFFSET_IMM 255 /* this should not be used */
148 #if defined(ARCH_X86_32)
150 #define R_SCRATCH_1 R_AX
151 #define R_SCRATCH_2 R_DX
152 #define R_SCRATCH_3 R_CX
153 #define R_SCRATCH_4 R_SAVED_2
154 #define R_SAVED_1 R_SI
155 #define R_SAVED_2 R_DI
156 #elif defined(ARCH_X86_WIN_ABI)
158 #define R_SCRATCH_1 R_AX
159 #define R_SCRATCH_2 R_DX
160 #define R_SCRATCH_3 R_CX
161 #define R_SCRATCH_4 R_SAVED_2
162 #define R_SAVED_1 R_SI
163 #define R_SAVED_2 R_DI
166 #define R_SCRATCH_1 R_AX
167 #define R_SCRATCH_2 R_DX
168 #define R_SCRATCH_3 R_CX
169 #define R_SCRATCH_4 R_SAVED_2
170 #define R_SAVED_1 R_BP
171 #define R_SAVED_2 R_R12
174 #define FR_SCRATCH_1 R_XMM0
175 #define FR_SCRATCH_2 R_XMM1
177 #if defined(ARCH_X86_32)
182 #elif defined(ARCH_X86_WIN_ABI)
195 #if defined(ARCH_X86_32)
196 #define ARG_SPACE 0x1c /* must be 0xc modulo 0x10 */
197 #define ARG_OFFSET 0x14
198 #elif defined(ARCH_X86_WIN_ABI) && !defined(TIMESTAMP_IN_REGISTER)
199 #define ARG_SPACE 0x28 /* must be 0x8 modulo 0x10 */
200 #define ARG_OFFSET 0xa0
201 #elif defined(ARCH_X86_WIN_ABI)
202 #define ARG_SPACE 0x20 /* must be 0x0 modulo 0x10 */
203 #define ARG_OFFSET 0x90
206 #define SUPPORTED_FP (cpu_test_feature(CPU_FEATURE_sse) * 0x2 + cpu_test_feature(CPU_FEATURE_sse2) * 0x4)
207 #define SUPPORTED_FP_X87 0xe
208 #define SUPPORTED_FP_HALF_CVT (cpu_test_feature(CPU_FEATURE_f16c) * 0x1)
210 static bool reg_is_fp(unsigned reg)
212 return reg >= 0x20 && reg < 0x40;
215 static bool reg_is_segment(unsigned reg)
217 return reg >= 0x18 && reg < 0x1e;
220 #if defined(ARCH_X86_32)
222 static const uint8_t regs_saved[] = { R_BX };
223 static const uint8_t regs_volatile[] = { 0 };
224 #define n_regs_volatile 0U
225 static const uint8_t fp_saved[] = { 0 };
226 #define n_fp_saved 0U
227 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7 };
228 #define reg_is_saved(r) ((r) == R_BX)
230 #elif defined(ARCH_X86_WIN_ABI)
232 static const uint8_t regs_saved[] = { R_BX, R_R12, R_R13,
233 #ifndef TIMESTAMP_IN_REGISTER
237 static const uint8_t regs_volatile[] = { R_R8, R_R9, R_R10 };
238 static const uint8_t fp_saved[] = { 0 };
239 #define n_fp_saved 0U
240 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5 };
241 #define reg_is_saved(r) ((r) == R_BX || ((r) >= R_R12 && ((r) <= R_R15)))
245 static const uint8_t regs_saved[] = { R_R13,
246 #ifndef TIMESTAMP_IN_REGISTER
251 #define n_regs_saved (n_array_elements(regs_saved) - !reg_is_segment(R_UPCALL))
252 static const uint8_t regs_volatile[] = { R_SI, R_DI, R_R8, R_R9, R_R10 };
253 static const uint8_t fp_saved[] = { 0 };
254 #define n_fp_saved 0U
255 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7, R_XMM8, R_XMM9, R_XMM10, R_XMM11, R_XMM12, R_XMM13, R_XMM14, R_XMM15 };
256 #define reg_is_saved(r) ((r) >= R_R13 && (r) <= R_R15)
259 static bool attr_w imm_is_8bit(int64_t imm)
261 return imm >= -0x80 && imm < 0x80;
264 static bool attr_w imm_is_32bit(int64_t attr_unused imm)
269 return imm >= -0x80000000LL && imm < 0x80000000LL;
273 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
275 if (OP_SIZE_NATIVE == OP_SIZE_4)
277 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
284 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned attr_unused size)
286 ctx->offset_imm = imm;
287 ctx->offset_reg = false;
288 ctx->base_reg = base;
290 case IMM_PURPOSE_LDR_OFFSET:
291 case IMM_PURPOSE_LDR_SX_OFFSET:
292 case IMM_PURPOSE_STR_OFFSET:
293 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
294 case IMM_PURPOSE_MVI_CLI_OFFSET:
297 internal(file_line, "gen_address: invalid purpose %d", purpose);
300 if (unlikely(!imm_is_32bit(imm)))
306 static bool is_direct_const(int64_t attr_unused imm, unsigned attr_unused purpose, unsigned attr_unused size)
311 return imm_is_32bit(imm);
315 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
318 if (size == OP_SIZE_1 && (unlikely(imm < -0x80LL) || unlikely(imm >= 0x80LL)))
319 internal(file_line, "invalid imm for size 1: %016llx", (long long)imm);
320 if (size == OP_SIZE_2 && (unlikely(imm < -0x8000LL) || unlikely(imm >= 0x8000LL)))
321 internal(file_line, "invalid imm for size 2 : %016llx", (long long)imm);
322 if (size == OP_SIZE_4 && (unlikely(imm < -0x80000000LL) || unlikely(imm >= 0x80000000LL)))
323 internal(file_line, "invalid imm for size 3: %016llx", (long long)imm);
325 if (is_direct_const(imm, purpose, size)) {
326 ctx->const_imm = imm;
327 ctx->const_reg = false;
329 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
330 gen_one(R_CONST_IMM);
333 ctx->const_reg = true;
338 static bool attr_w gen_entry(struct codegen_context *ctx)
340 #if defined(ARCH_X86_32)
341 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
344 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
347 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
350 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
353 gen_insn(INSN_ALU, OP_SIZE_4, ALU_SUB, 1);
357 gen_eight(ARG_SPACE);
359 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
361 gen_one(ARG_ADDRESS_1);
363 gen_eight(ARG_SPACE + ARG_OFFSET);
365 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
366 gen_one(ARG_ADDRESS_1);
368 gen_eight(ARG_SPACE + ARG_OFFSET + 12);
369 #elif defined(ARCH_X86_WIN_ABI)
370 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
373 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
376 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
379 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
382 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
385 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
388 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
391 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
394 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
396 #ifndef TIMESTAMP_IN_REGISTER
397 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
400 gen_insn(INSN_ALU, OP_SIZE_8, ALU_SUB, 1);
404 gen_eight(ARG_SPACE);
406 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
410 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
413 #ifdef TIMESTAMP_IN_REGISTER
414 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
415 gen_one(R_TIMESTAMP);
418 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
419 gen_one(ARG_ADDRESS_1);
421 gen_eight(ARG_OFFSET);
423 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
426 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
429 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
432 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
435 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
438 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
441 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
443 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
447 if (!reg_is_segment(R_UPCALL)) {
448 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
452 #ifdef TIMESTAMP_IN_REGISTER
453 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
454 gen_one(R_TIMESTAMP);
457 #if defined(ARCH_X86_X32)
458 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
462 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
468 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
470 #if defined(ARCH_X86_32) || defined(ARCH_X86_64)
471 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
476 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
479 gen_eight((uint64_t)ip << 32);
481 gen_insn(INSN_JMP, 0, 0, 0);
482 gen_four(escape_label);
487 static bool attr_w gen_escape(struct codegen_context *ctx)
489 #if defined(ARCH_X86_32)
490 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
494 gen_insn(INSN_ALU, OP_SIZE_4, ALU_ADD, 1);
498 gen_eight(ARG_SPACE);
500 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
503 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
506 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
509 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
512 gen_insn(INSN_RET, 0, 0, 0);
513 #elif defined(ARCH_X86_WIN_ABI)
514 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
518 #if defined(TIMESTAMP_IN_REGISTER)
519 gen_eight(ARG_SPACE);
521 gen_eight(ARG_SPACE + 8);
523 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
526 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
527 gen_one(ARG_ADDRESS_1);
532 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
533 gen_one(ARG_ADDRESS_1);
538 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
541 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
544 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
547 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
550 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
553 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
556 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
559 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
562 gen_insn(INSN_RET, 0, 0, 0);
564 #if defined(ARCH_X86_X32)
565 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
570 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
574 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
577 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
580 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
583 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
586 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
589 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
592 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
595 gen_insn(INSN_RET, 0, 0, 0);
600 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
602 #if defined(ARCH_X86_32)
603 ajla_assert_lo(arg * 4 < ARG_SPACE, (file_line, "gen_upcall_argument: argument %u", arg));
604 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
605 gen_one(ARG_ADDRESS_1);
613 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
615 #if defined(ARCH_X86_32)
616 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
618 gen_one(ARG_ADDRESS_1);
620 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
622 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
623 gen_one(ARG_ADDRESS_1);
626 #elif defined(ARCH_X86_X32)
627 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
629 gen_one(ARG_ADDRESS_1);
633 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
636 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
637 gen_one(ARG_ADDRESS_1);
641 g(gen_upcall_end(ctx, n_args));
646 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
648 #if defined(ARCH_X86_32)
649 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
651 gen_one(ARG_ADDRESS_1);
653 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
655 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
657 gen_one(ARG_ADDRESS_1);
659 gen_eight(ARG_SPACE + ARG_OFFSET + 8);
661 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
663 gen_one(ARG_ADDRESS_1);
665 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
666 #elif defined(TIMESTAMP_IN_REGISTER)
667 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
668 gen_one(R_TIMESTAMP);
669 gen_one(ARG_ADDRESS_1);
671 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
673 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
675 gen_one(ARG_ADDRESS_1);
677 #if defined(ARCH_X86_WIN_ABI)
678 gen_eight(ARG_SPACE);
682 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
684 gen_one(ARG_ADDRESS_1);
686 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
688 gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
689 gen_four(escape_label);