2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
20 #define OP_SIZE_NATIVE OP_SIZE_4
22 #define OP_SIZE_NATIVE OP_SIZE_8
26 #define OP_SIZE_ADDRESS OP_SIZE_4
28 #define OP_SIZE_ADDRESS OP_SIZE_8
31 #define JMP_LIMIT JMP_LONG
33 #define UNALIGNED_TRAP 0
35 #define ALU_WRITES_FLAGS(alu, im) ((alu) != ALU_ADD ? 3 : 0)
36 #define ALU1_WRITES_FLAGS(alu) ((alu) == ALU1_INC || (alu) == ALU1_DEC ? 1 : (alu) == ALU1_NOT || (alu) == ALU1_BSWAP ? 0 : 3)
37 #define ROT_WRITES_FLAGS(alu, size, im) (cpu_test_feature(CPU_FEATURE_bmi2) && (alu == ROT_SHL || alu == ROT_SHR || alu == ROT_SAR) && size >= OP_SIZE_4 && !(im) ? 0 : 1)
38 #define COND_IS_LOGICAL(cond) 0
40 #define ARCH_PARTIAL_ALU(size) ((size) <= OP_SIZE_2)
41 #define ARCH_IS_3ADDRESS(alu, f) ((alu) == ALU_ADD && !(f))
42 #define ARCH_IS_3ADDRESS_IMM(alu, f) ((alu) == ALU_ADD && !(f))
43 #define ARCH_IS_3ADDRESS_ROT(alu, size) (ROT_WRITES_FLAGS(alu, size, false) ? 0 : 1)
44 #define ARCH_IS_3ADDRESS_ROT_IMM(alu) 0
45 #define ARCH_IS_2ADDRESS(alu) ((alu) == ALU1_BSF || (alu) == ALU1_BSR || (alu) == ALU1_LZCNT || (alu) == ALU1_POPCNT)
46 #define ARCH_IS_3ADDRESS_FP cpu_test_feature(CPU_FEATURE_avx)
47 #define ARCH_HAS_FLAGS 1
48 #define ARCH_PREFERS_SX(size) 0
49 #define ARCH_HAS_BWX 1
50 #define ARCH_HAS_MUL 1
51 #define ARCH_HAS_DIV 1
52 #define ARCH_HAS_ANDN 0
53 #define ARCH_HAS_BTX(btx, size, cnst) ((btx) != BTX_BTEXT && (size) >= OP_SIZE_2)
54 #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3)
55 #define ARCH_SHIFT_SIZE OP_SIZE_4
56 #define ARCH_HAS_FP_GP_MOV cpu_test_feature(CPU_FEATURE_sse2)
57 #define ARCH_NEEDS_BARRIER 0
59 #define i_size(size) (size)
60 #define i_size_rot(size) (size)
61 #define i_size_cmp(size) (size)
129 #define R_IS_GPR(r) ((r) < 16)
130 #define R_IS_XMM(r) ((r) >= R_XMM0 && (r) <= R_XMM31)
132 /*#define TIMESTAMP_IN_REGISTER*/
135 static uint8_t upcall_register = R_R15;
136 #define R_UPCALL upcall_register
137 #ifdef TIMESTAMP_IN_REGISTER
138 #define R_TIMESTAMP R_R14
140 #define R_CONST_IMM R_R11
142 #define R_CONST_IMM 255 /* this should not be used */
144 #define R_OFFSET_IMM 255 /* this should not be used */
146 #if defined(ARCH_X86_32)
148 #define R_SCRATCH_1 R_AX
149 #define R_SCRATCH_2 R_DX
150 #define R_SCRATCH_3 R_CX
151 #define R_SCRATCH_4 R_SAVED_2
152 #define R_SAVED_1 R_SI
153 #define R_SAVED_2 R_DI
154 #elif defined(ARCH_X86_WIN_ABI)
156 #define R_SCRATCH_1 R_AX
157 #define R_SCRATCH_2 R_DX
158 #define R_SCRATCH_3 R_CX
159 #define R_SCRATCH_4 R_SAVED_2
160 #define R_SAVED_1 R_SI
161 #define R_SAVED_2 R_DI
164 #define R_SCRATCH_1 R_AX
165 #define R_SCRATCH_2 R_DX
166 #define R_SCRATCH_3 R_CX
167 #define R_SCRATCH_4 R_SAVED_2
168 #define R_SAVED_1 R_BP
169 #define R_SAVED_2 R_R12
172 #define FR_SCRATCH_1 R_XMM0
173 #define FR_SCRATCH_2 R_XMM1
175 #if defined(ARCH_X86_32)
180 #elif defined(ARCH_X86_WIN_ABI)
193 #if defined(ARCH_X86_32)
194 #define ARG_SPACE 0x1c /* must be 0xc modulo 0x10 */
195 #define ARG_OFFSET 0x14
196 #elif defined(ARCH_X86_WIN_ABI) && !defined(TIMESTAMP_IN_REGISTER)
197 #define ARG_SPACE 0x28 /* must be 0x8 modulo 0x10 */
198 #define ARG_OFFSET 0xa0
199 #elif defined(ARCH_X86_WIN_ABI)
200 #define ARG_SPACE 0x20 /* must be 0x0 modulo 0x10 */
201 #define ARG_OFFSET 0x90
204 #define SUPPORTED_FP (cpu_test_feature(CPU_FEATURE_sse) * 0x2 + cpu_test_feature(CPU_FEATURE_sse2) * 0x4)
205 #define SUPPORTED_FP_X87 0xe
206 #define SUPPORTED_FP_HALF_CVT (cpu_test_feature(CPU_FEATURE_f16c) * 0x1)
208 static bool reg_is_fp(unsigned reg)
210 return reg >= 0x20 && reg < 0x40;
213 static bool reg_is_segment(unsigned reg)
215 return reg >= 0x18 && reg < 0x1e;
218 #if defined(ARCH_X86_32)
220 static const uint8_t regs_saved[] = { R_BX };
221 static const uint8_t regs_volatile[] = { 0 };
222 #define n_regs_volatile 0U
223 static const uint8_t fp_saved[] = { 0 };
224 #define n_fp_saved 0U
225 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7 };
226 #define reg_is_saved(r) ((r) == R_BX)
228 #elif defined(ARCH_X86_WIN_ABI)
230 static const uint8_t regs_saved[] = { R_BX, R_R12, R_R13,
231 #ifndef TIMESTAMP_IN_REGISTER
235 static const uint8_t regs_volatile[] = { R_R8, R_R9, R_R10 };
236 static const uint8_t fp_saved[] = { 0 };
237 #define n_fp_saved 0U
238 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5 };
239 #define reg_is_saved(r) ((r) == R_BX || ((r) >= R_R12 && ((r) <= R_R15)))
243 static const uint8_t regs_saved[] = { R_R13,
244 #ifndef TIMESTAMP_IN_REGISTER
249 #define n_regs_saved (n_array_elements(regs_saved) - !reg_is_segment(R_UPCALL))
250 static const uint8_t regs_volatile[] = { R_SI, R_DI, R_R8, R_R9, R_R10 };
251 static const uint8_t fp_saved[] = { 0 };
252 #define n_fp_saved 0U
253 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7, R_XMM8, R_XMM9, R_XMM10, R_XMM11, R_XMM12, R_XMM13, R_XMM14, R_XMM15 };
254 #define reg_is_saved(r) ((r) >= R_R13 && (r) <= R_R15)
257 static bool attr_w imm_is_8bit(int64_t imm)
259 return imm >= -0x80 && imm < 0x80;
262 static bool attr_w imm_is_32bit(int64_t attr_unused imm)
267 return imm >= -0x80000000LL && imm < 0x80000000LL;
271 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
273 if (OP_SIZE_NATIVE == OP_SIZE_4)
275 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
282 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned attr_unused size)
284 ctx->offset_imm = imm;
285 ctx->offset_reg = false;
286 ctx->base_reg = base;
288 case IMM_PURPOSE_LDR_OFFSET:
289 case IMM_PURPOSE_LDR_SX_OFFSET:
290 case IMM_PURPOSE_STR_OFFSET:
291 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
292 case IMM_PURPOSE_MVI_CLI_OFFSET:
295 internal(file_line, "gen_address: invalid purpose %d", purpose);
298 if (unlikely(!imm_is_32bit(imm)))
304 static bool is_direct_const(int64_t attr_unused imm, unsigned attr_unused purpose, unsigned attr_unused size)
309 return imm_is_32bit(imm);
313 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
316 if (size == OP_SIZE_1 && (unlikely(imm < -0x80LL) || unlikely(imm >= 0x80LL)))
317 internal(file_line, "invalid imm for size 1: %016llx", (long long)imm);
318 if (size == OP_SIZE_2 && (unlikely(imm < -0x8000LL) || unlikely(imm >= 0x8000LL)))
319 internal(file_line, "invalid imm for size 2 : %016llx", (long long)imm);
320 if (size == OP_SIZE_4 && (unlikely(imm < -0x80000000LL) || unlikely(imm >= 0x80000000LL)))
321 internal(file_line, "invalid imm for size 3: %016llx", (long long)imm);
323 if (is_direct_const(imm, purpose, size)) {
324 ctx->const_imm = imm;
325 ctx->const_reg = false;
327 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
328 gen_one(R_CONST_IMM);
331 ctx->const_reg = true;
336 static bool attr_w gen_entry(struct codegen_context *ctx)
338 #if defined(ARCH_X86_32)
339 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
342 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
345 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
348 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
351 gen_insn(INSN_ALU, OP_SIZE_4, ALU_SUB, 1);
355 gen_eight(ARG_SPACE);
357 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
359 gen_one(ARG_ADDRESS_1);
361 gen_eight(ARG_SPACE + ARG_OFFSET);
363 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
364 gen_one(ARG_ADDRESS_1);
366 gen_eight(ARG_SPACE + ARG_OFFSET + 12);
367 #elif defined(ARCH_X86_WIN_ABI)
368 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
371 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
374 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
377 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
380 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
383 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
386 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
389 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
392 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
394 #ifndef TIMESTAMP_IN_REGISTER
395 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
398 gen_insn(INSN_ALU, OP_SIZE_8, ALU_SUB, 1);
402 gen_eight(ARG_SPACE);
404 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
408 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
411 #ifdef TIMESTAMP_IN_REGISTER
412 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
413 gen_one(R_TIMESTAMP);
416 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
417 gen_one(ARG_ADDRESS_1);
419 gen_eight(ARG_OFFSET);
421 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
424 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
427 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
430 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
433 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
436 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
439 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
441 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
445 if (!reg_is_segment(R_UPCALL)) {
446 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
450 #ifdef TIMESTAMP_IN_REGISTER
451 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
452 gen_one(R_TIMESTAMP);
455 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
461 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
463 #if defined(ARCH_X86_32) || defined(ARCH_X86_64)
464 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
469 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
472 gen_eight((uint64_t)ip << 32);
474 gen_insn(INSN_JMP, 0, 0, 0);
475 gen_four(escape_label);
480 static bool attr_w gen_escape(struct codegen_context *ctx)
482 #if defined(ARCH_X86_32)
483 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
487 gen_insn(INSN_ALU, OP_SIZE_4, ALU_ADD, 1);
491 gen_eight(ARG_SPACE);
493 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
496 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
499 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
502 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
505 gen_insn(INSN_RET, 0, 0, 0);
506 #elif defined(ARCH_X86_WIN_ABI)
507 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
511 #if defined(TIMESTAMP_IN_REGISTER)
512 gen_eight(ARG_SPACE);
514 gen_eight(ARG_SPACE + 8);
516 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
519 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
520 gen_one(ARG_ADDRESS_1);
525 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
526 gen_one(ARG_ADDRESS_1);
531 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
534 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
537 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
540 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
543 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
546 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
549 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
552 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
555 gen_insn(INSN_RET, 0, 0, 0);
557 #if defined(ARCH_X86_X32)
558 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
563 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
567 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
570 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
573 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
576 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
579 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
582 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
585 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
588 gen_insn(INSN_RET, 0, 0, 0);
593 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
595 #if defined(ARCH_X86_32)
596 ajla_assert_lo(arg * 4 < ARG_SPACE, (file_line, "gen_upcall_argument: argument %u", arg));
597 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
598 gen_one(ARG_ADDRESS_1);
606 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
608 #if defined(ARCH_X86_32)
609 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
611 gen_one(ARG_ADDRESS_1);
613 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
615 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
616 gen_one(ARG_ADDRESS_1);
619 #elif defined(ARCH_X86_X32)
620 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
622 gen_one(ARG_ADDRESS_1);
626 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
629 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
630 gen_one(ARG_ADDRESS_1);
634 g(gen_upcall_end(ctx, n_args));
639 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
641 #if defined(ARCH_X86_32)
642 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
644 gen_one(ARG_ADDRESS_1);
646 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
648 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
650 gen_one(ARG_ADDRESS_1);
652 gen_eight(ARG_SPACE + ARG_OFFSET + 8);
654 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
656 gen_one(ARG_ADDRESS_1);
658 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
659 #elif defined(TIMESTAMP_IN_REGISTER)
660 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
661 gen_one(R_TIMESTAMP);
662 gen_one(ARG_ADDRESS_1);
664 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
666 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
668 gen_one(ARG_ADDRESS_1);
670 #if defined(ARCH_X86_WIN_ABI)
671 gen_eight(ARG_SPACE);
675 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
677 gen_one(ARG_ADDRESS_1);
679 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
681 gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
682 gen_four(escape_label);