2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
20 #define OP_SIZE_NATIVE OP_SIZE_4
22 #define OP_SIZE_NATIVE OP_SIZE_8
26 #define OP_SIZE_ADDRESS OP_SIZE_4
28 #define OP_SIZE_ADDRESS OP_SIZE_8
31 #define JMP_LIMIT JMP_LONG
33 #define UNALIGNED_TRAP 0
35 #define ALU_WRITES_FLAGS(alu, im) ((alu) != ALU_ADD ? 3 : 0)
36 #define ALU1_WRITES_FLAGS(alu) ((alu) == ALU1_INC || (alu) == ALU1_DEC ? 1 : (alu) == ALU1_NOT || (alu) == ALU1_BSWAP ? 0 : 3)
37 #define ROT_WRITES_FLAGS(alu, size, im) (cpu_test_feature(CPU_FEATURE_bmi2) && (alu == ROT_SHL || alu == ROT_SHR || alu == ROT_SAR) && size >= OP_SIZE_4 && !(im) ? 0 : 1)
38 #define COND_IS_LOGICAL(cond) 0
40 #define ARCH_PARTIAL_ALU(size) ((size) <= OP_SIZE_2)
41 #define ARCH_IS_3ADDRESS(alu, f) ((alu) == ALU_ADD && !(f))
42 #define ARCH_IS_3ADDRESS_IMM(alu, f) ((alu) == ALU_ADD && !(f))
43 #define ARCH_IS_3ADDRESS_ROT(alu, size) (ROT_WRITES_FLAGS(alu, size, false) ? 0 : 1)
44 #define ARCH_IS_3ADDRESS_ROT_IMM(alu) 0
45 #define ARCH_IS_2ADDRESS(alu) ((alu) == ALU1_BSF || (alu) == ALU1_BSR || (alu) == ALU1_LZCNT || (alu) == ALU1_POPCNT)
46 #define ARCH_IS_3ADDRESS_FP cpu_test_feature(CPU_FEATURE_avx)
47 #define ARCH_HAS_FLAGS 1
48 #define ARCH_PREFERS_SX(size) 0
49 #define ARCH_HAS_BWX 1
50 #define ARCH_HAS_MUL 1
51 #define ARCH_HAS_DIV 1
52 #define ARCH_HAS_ANDN 0
53 #define ARCH_HAS_BTX(btx, size, cnst) ((btx) != BTX_BTEXT && (size) >= OP_SIZE_2)
54 #define ARCH_HAS_SHIFTED_ADD(bits) ((bits) <= 3)
55 #define ARCH_SHIFT_SIZE OP_SIZE_4
56 #define ARCH_NEEDS_BARRIER 0
58 #define i_size(size) (size)
59 #define i_size_rot(size) (size)
60 #define i_size_cmp(size) (size)
128 #define R_IS_GPR(r) ((r) < 16)
129 #define R_IS_XMM(r) ((r) >= R_XMM0 && (r) <= R_XMM31)
131 /*#define TIMESTAMP_IN_REGISTER*/
134 static uint8_t upcall_register = R_R15;
135 #define R_UPCALL upcall_register
136 #ifdef TIMESTAMP_IN_REGISTER
137 #define R_TIMESTAMP R_R14
139 #define R_CONST_IMM R_R11
141 #define R_CONST_IMM 255 /* this should not be used */
143 #define R_OFFSET_IMM 255 /* this should not be used */
145 #if defined(ARCH_X86_32)
147 #define R_SCRATCH_1 R_AX
148 #define R_SCRATCH_2 R_DX
149 #define R_SCRATCH_3 R_CX
150 #define R_SCRATCH_4 R_SAVED_2
151 #define R_SAVED_1 R_SI
152 #define R_SAVED_2 R_DI
153 #elif defined(ARCH_X86_WIN_ABI)
155 #define R_SCRATCH_1 R_AX
156 #define R_SCRATCH_2 R_DX
157 #define R_SCRATCH_3 R_CX
158 #define R_SCRATCH_4 R_SAVED_2
159 #define R_SAVED_1 R_SI
160 #define R_SAVED_2 R_DI
163 #define R_SCRATCH_1 R_AX
164 #define R_SCRATCH_2 R_DX
165 #define R_SCRATCH_3 R_CX
166 #define R_SCRATCH_4 R_SAVED_2
167 #define R_SAVED_1 R_BP
168 #define R_SAVED_2 R_R12
171 #define FR_SCRATCH_1 R_XMM0
172 #define FR_SCRATCH_2 R_XMM1
174 #if defined(ARCH_X86_32)
179 #elif defined(ARCH_X86_WIN_ABI)
192 #if defined(ARCH_X86_32)
193 #define ARG_SPACE 0x1c /* must be 0xc modulo 0x10 */
194 #define ARG_OFFSET 0x14
195 #elif defined(ARCH_X86_WIN_ABI) && !defined(TIMESTAMP_IN_REGISTER)
196 #define ARG_SPACE 0x28 /* must be 0x8 modulo 0x10 */
197 #define ARG_OFFSET 0xa0
198 #elif defined(ARCH_X86_WIN_ABI)
199 #define ARG_SPACE 0x20 /* must be 0x0 modulo 0x10 */
200 #define ARG_OFFSET 0x90
203 #define SUPPORTED_FP (cpu_test_feature(CPU_FEATURE_sse) * 0x2 + cpu_test_feature(CPU_FEATURE_sse2) * 0x4)
204 #define SUPPORTED_FP_X87 0xe
205 #define SUPPORTED_FP_HALF_CVT (cpu_test_feature(CPU_FEATURE_f16c) * 0x1)
207 static bool reg_is_fp(unsigned reg)
209 return reg >= 0x20 && reg < 0x40;
212 static bool reg_is_segment(unsigned reg)
214 return reg >= 0x18 && reg < 0x1e;
217 #if defined(ARCH_X86_32)
219 static const uint8_t regs_saved[] = { R_BX };
220 static const uint8_t regs_volatile[] = { 0 };
221 #define n_regs_volatile 0U
222 static const uint8_t fp_saved[] = { 0 };
223 #define n_fp_saved 0U
224 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7 };
225 #define reg_is_saved(r) ((r) == R_BX)
227 #elif defined(ARCH_X86_WIN_ABI)
229 static const uint8_t regs_saved[] = { R_BX, R_R12, R_R13,
230 #ifndef TIMESTAMP_IN_REGISTER
234 static const uint8_t regs_volatile[] = { R_R8, R_R9, R_R10 };
235 static const uint8_t fp_saved[] = { 0 };
236 #define n_fp_saved 0U
237 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5 };
238 #define reg_is_saved(r) ((r) == R_BX || ((r) >= R_R12 && ((r) <= R_R15)))
242 static const uint8_t regs_saved[] = { R_R13,
243 #ifndef TIMESTAMP_IN_REGISTER
248 #define n_regs_saved (n_array_elements(regs_saved) - !reg_is_segment(R_UPCALL))
249 static const uint8_t regs_volatile[] = { R_SI, R_DI, R_R8, R_R9, R_R10 };
250 static const uint8_t fp_saved[] = { 0 };
251 #define n_fp_saved 0U
252 static const uint8_t fp_volatile[] = { R_XMM2, R_XMM3, R_XMM4, R_XMM5, R_XMM6, R_XMM7, R_XMM8, R_XMM9, R_XMM10, R_XMM11, R_XMM12, R_XMM13, R_XMM14, R_XMM15 };
253 #define reg_is_saved(r) ((r) >= R_R13 && (r) <= R_R15)
256 static bool attr_w imm_is_8bit(int64_t imm)
258 return imm >= -0x80 && imm < 0x80;
261 static bool attr_w imm_is_32bit(int64_t attr_unused imm)
266 return imm >= -0x80000000LL && imm < 0x80000000LL;
270 static bool attr_w gen_load_constant(struct codegen_context *ctx, unsigned reg, uint64_t c)
272 if (OP_SIZE_NATIVE == OP_SIZE_4)
274 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
281 static bool attr_w gen_address(struct codegen_context *ctx, unsigned base, int64_t imm, unsigned purpose, unsigned attr_unused size)
283 ctx->offset_imm = imm;
284 ctx->offset_reg = false;
285 ctx->base_reg = base;
287 case IMM_PURPOSE_LDR_OFFSET:
288 case IMM_PURPOSE_LDR_SX_OFFSET:
289 case IMM_PURPOSE_STR_OFFSET:
290 case IMM_PURPOSE_VLDR_VSTR_OFFSET:
291 case IMM_PURPOSE_MVI_CLI_OFFSET:
294 internal(file_line, "gen_address: invalid purpose %d", purpose);
297 if (unlikely(!imm_is_32bit(imm)))
303 static bool is_direct_const(int64_t attr_unused imm, unsigned attr_unused purpose, unsigned attr_unused size)
308 return imm_is_32bit(imm);
312 static bool attr_w gen_imm(struct codegen_context *ctx, int64_t imm, unsigned purpose, unsigned size)
315 if (size == OP_SIZE_1 && (unlikely(imm < -0x80LL) || unlikely(imm >= 0x80LL)))
316 internal(file_line, "invalid imm for size 1: %016llx", (long long)imm);
317 if (size == OP_SIZE_2 && (unlikely(imm < -0x8000LL) || unlikely(imm >= 0x8000LL)))
318 internal(file_line, "invalid imm for size 2 : %016llx", (long long)imm);
319 if (size == OP_SIZE_4 && (unlikely(imm < -0x80000000LL) || unlikely(imm >= 0x80000000LL)))
320 internal(file_line, "invalid imm for size 3: %016llx", (long long)imm);
322 if (is_direct_const(imm, purpose, size)) {
323 ctx->const_imm = imm;
324 ctx->const_reg = false;
326 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
327 gen_one(R_CONST_IMM);
330 ctx->const_reg = true;
335 static bool attr_w gen_entry(struct codegen_context *ctx)
337 #if defined(ARCH_X86_32)
338 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
341 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
344 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
347 gen_insn(INSN_PUSH, OP_SIZE_4, 0, 0);
350 gen_insn(INSN_ALU, OP_SIZE_4, ALU_SUB, 1);
354 gen_eight(ARG_SPACE);
356 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
358 gen_one(ARG_ADDRESS_1);
360 gen_eight(ARG_SPACE + ARG_OFFSET);
362 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
363 gen_one(ARG_ADDRESS_1);
365 gen_eight(ARG_SPACE + ARG_OFFSET + 12);
366 #elif defined(ARCH_X86_WIN_ABI)
367 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
370 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
373 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
376 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
379 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
382 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
385 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
388 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
391 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
393 #ifndef TIMESTAMP_IN_REGISTER
394 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
397 gen_insn(INSN_ALU, OP_SIZE_8, ALU_SUB, 1);
401 gen_eight(ARG_SPACE);
403 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
407 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
410 #ifdef TIMESTAMP_IN_REGISTER
411 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
412 gen_one(R_TIMESTAMP);
415 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
416 gen_one(ARG_ADDRESS_1);
418 gen_eight(ARG_OFFSET);
420 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
423 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
426 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
429 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
432 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
435 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
438 gen_insn(INSN_PUSH, OP_SIZE_8, 0, 0);
440 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
444 if (!reg_is_segment(R_UPCALL)) {
445 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
449 #ifdef TIMESTAMP_IN_REGISTER
450 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
451 gen_one(R_TIMESTAMP);
454 gen_insn(INSN_JMP_INDIRECT, 0, 0, 0);
460 static bool attr_w gen_escape_arg(struct codegen_context *ctx, ip_t ip, uint32_t escape_label)
462 #if defined(ARCH_X86_32) || defined(ARCH_X86_64)
463 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
468 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
471 gen_eight((uint64_t)ip << 32);
473 gen_insn(INSN_JMP, 0, 0, 0);
474 gen_four(escape_label);
479 static bool attr_w gen_escape(struct codegen_context *ctx)
481 #if defined(ARCH_X86_32)
482 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
486 gen_insn(INSN_ALU, OP_SIZE_4, ALU_ADD, 1);
490 gen_eight(ARG_SPACE);
492 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
495 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
498 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
501 gen_insn(INSN_POP, OP_SIZE_4, 0, 0);
504 gen_insn(INSN_RET, 0, 0, 0);
505 #elif defined(ARCH_X86_WIN_ABI)
506 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
510 #if defined(TIMESTAMP_IN_REGISTER)
511 gen_eight(ARG_SPACE);
513 gen_eight(ARG_SPACE + 8);
515 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
518 gen_insn(INSN_MOV, OP_SIZE_8, 0, 0);
519 gen_one(ARG_ADDRESS_1);
524 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
525 gen_one(ARG_ADDRESS_1);
530 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
533 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
536 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
539 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
542 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
545 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
548 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
551 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
554 gen_insn(INSN_RET, 0, 0, 0);
556 #if defined(ARCH_X86_X32)
557 gen_insn(INSN_ALU, OP_SIZE_8, ALU_ADD, 1);
562 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
566 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
569 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
572 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
575 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
578 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
581 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
584 gen_insn(INSN_POP, OP_SIZE_8, 0, 0);
587 gen_insn(INSN_RET, 0, 0, 0);
592 static bool attr_w gen_upcall_argument(struct codegen_context attr_unused *ctx, unsigned attr_unused arg)
594 #if defined(ARCH_X86_32)
595 ajla_assert_lo(arg * 4 < ARG_SPACE, (file_line, "gen_upcall_argument: argument %u", arg));
596 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
597 gen_one(ARG_ADDRESS_1);
605 static bool attr_w gen_upcall(struct codegen_context *ctx, unsigned offset, unsigned n_args)
607 #if defined(ARCH_X86_32)
608 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
610 gen_one(ARG_ADDRESS_1);
612 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
614 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_4, 0, 0);
615 gen_one(ARG_ADDRESS_1);
618 #elif defined(ARCH_X86_X32)
619 gen_insn(INSN_MOV, OP_SIZE_ADDRESS, 0, 0);
621 gen_one(ARG_ADDRESS_1);
625 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
628 gen_insn(INSN_CALL_INDIRECT, OP_SIZE_8, 0, 0);
629 gen_one(ARG_ADDRESS_1);
633 g(gen_upcall_end(ctx, n_args));
638 static bool attr_w gen_timestamp_test(struct codegen_context *ctx, uint32_t escape_label)
640 #if defined(ARCH_X86_32)
641 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
643 gen_one(ARG_ADDRESS_1);
645 gen_eight(ARG_SPACE + ARG_OFFSET + 4);
647 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
649 gen_one(ARG_ADDRESS_1);
651 gen_eight(ARG_SPACE + ARG_OFFSET + 8);
653 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
655 gen_one(ARG_ADDRESS_1);
657 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
658 #elif defined(TIMESTAMP_IN_REGISTER)
659 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
660 gen_one(R_TIMESTAMP);
661 gen_one(ARG_ADDRESS_1);
663 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
665 gen_insn(INSN_MOV, OP_SIZE_4, 0, 0);
667 gen_one(ARG_ADDRESS_1);
669 #if defined(ARCH_X86_WIN_ABI)
670 gen_eight(ARG_SPACE);
674 gen_insn(INSN_CMP, OP_SIZE_4, 0, 1);
676 gen_one(ARG_ADDRESS_1);
678 gen_eight(offsetof(struct cg_upcall_vector_s, ts));
680 gen_insn(INSN_JMP_COND, OP_SIZE_4, COND_NE, 0);
681 gen_four(escape_label);