2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define ARM64_AND_OR_EOR 0x0a000000U
20 #define ARM64_AND_OR_EOR_AND 0x00000000U
21 #define ARM64_AND_OR_EOR_REG_NOT 0x00200000U
22 #define ARM64_AND_OR_EOR_IMM_NOT 0x00400000U
23 #define ARM64_AND_OR_EOR_ORR 0x20000000U
24 #define ARM64_AND_OR_EOR_EOR 0x40000000U
25 #define ARM64_AND_OR_EOR_ANDS 0x60000000U
26 #define ARM64_AND_OR_EOR_SIZE 0x80000000U
27 #define ARM64_ADDSUB_SHIFTED 0x0b000000U
28 #define ARM64_ADDSUB_EXTENDED 0x0b200000U
29 #define ARM64_CNT 0x0e205800U
30 #define ARM64_ADDV 0x0e31b800U
31 #define ARM64_ADDV_SIZE 0x00c00000U
32 #define ARM64_ADDSUB_IMM 0x11000000U
33 #define ARM64_ADDSUB_IMM_SHIFT12 0x00400000U
34 #define ARM64_ADDSUB_SET_FLAGS 0x20000000U
35 #define ARM64_ADDSUB_SUB 0x40000000U
36 #define ARM64_ADDSUB_SIZE 0x80000000U
37 #define ARM64_AND_OR_EOR_IMM 0x12000000U
38 #define ARM64_MOVN_IMM16 0x12800000U
39 #define ARM64_MOVN_IMM16_SIZE 0x80000000U
40 #define ARM64_SUBFM 0x13000000U
41 #define ARM64_SUBFM_U 0x40000000U
42 #define ARM64_SUBFM_SIZE 0x80400000U
43 #define ARM64_EXTR 0x13800000U
44 #define ARM64_EXTR_SIZE 0x80400000U
45 #define ARM64_B 0x14000000U
46 #define ARM64_ADCSBC 0x1a000000U
47 #define ARM64_ADCSBC_SET_FLAGS 0x20000000U
48 #define ARM64_ADCSBC_SBC 0x40000000U
49 #define ARM64_ADCSBC_SIZE 0x80000000U
50 #define ARM64_CSEL 0x1a800000U
51 #define ARM64_CSEL_SEL 0x00000000U
52 #define ARM64_CSEL_INC 0x00000400U
53 #define ARM64_CSEL_INV 0x40000000U
54 #define ARM64_CSEL_NEG 0x40000400U
55 #define ARM64_CSEL_SIZE 0x80000000U
56 #define ARM64_CSET 0x1a9f07e0U
57 #define ARM64_CSET_SIZE 0x80000000U
58 #define ARM64_SUDIV 0x1ac00800U
59 #define ARM64_SUDIV_SDIV 0x00000400U
60 #define ARM64_SUDIV_SIZE 0x80000000U
61 #define ARM64_ROT 0x1ac02000U
62 #define ARM64_ROT_LSL 0x00000000U
63 #define ARM64_ROT_LSR 0x00000400U
64 #define ARM64_ROT_ASR 0x00000800U
65 #define ARM64_ROT_ROR 0x00000c00U
66 #define ARM64_ROT_SIZE 0x80000000U
67 #define ARM64_MADDSUB 0x1b000000U
68 #define ARM64_MADDSUB_MSUB 0x00008000U
69 #define ARM64_MADDSUB_SIZE 0x80000000U
70 #define ARM64_FP_ALU 0x1e200800U
71 #define ARM64_FP_ALU_MUL 0x00000000U
72 #define ARM64_FP_ALU_DIV 0x00001000U
73 #define ARM64_FP_ALU_ADD 0x00002000U
74 #define ARM64_FP_ALU_SUB 0x00003000U
75 #define ARM64_FP_ALU_SINGLE 0x00000000U
76 #define ARM64_FP_ALU_DOUBLE 0x00400000U
77 #define ARM64_FP_ALU_HALF 0x00c00000U
78 #define ARM64_FCMP 0x1e202000U
79 #define ARM64_FCMP_ZERO 0x00000008U
80 #define ARM64_FCMP_SINGLE 0x00000000U
81 #define ARM64_FCMP_DOUBLE 0x00400000U
82 #define ARM64_FCMP_HALF 0x00c00000U
83 #define ARM64_FP_ALU1 0x1e204000U
84 #define ARM64_FP_ALU1_MOV 0x00000000U
85 #define ARM64_FP_ALU1_NEG 0x00010000U
86 #define ARM64_FP_ALU1_SQRT 0x00018000U
87 #define ARM64_FP_ALU1_RINTN 0x00040000U
88 #define ARM64_FP_ALU1_RINTP 0x00048000U
89 #define ARM64_FP_ALU1_RINTM 0x00050000U
90 #define ARM64_FP_ALU1_RINTZ 0x00058000U
91 #define ARM64_FP_ALU1_SINGLE 0x00000000U
92 #define ARM64_FP_ALU1_DOUBLE 0x00400000U
93 #define ARM64_FP_ALU1_HALF 0x00c00000U
94 #define ARM64_SCVTF 0x1e220000U
95 #define ARM64_SCVTF_SINGLE 0x00000000U
96 #define ARM64_SCVTF_DOUBLE 0x00400000U
97 #define ARM64_SCVTF_HALF 0x00c00000U
98 #define ARM64_SCVTF_SIZE 0x80000000U
99 #define ARM64_FCVT 0x1e224000U
100 #define ARM64_FCVT_TO_SINGLE 0x00000000U
101 #define ARM64_FCVT_TO_DOUBLE 0x00008000U
102 #define ARM64_FCVT_TO_HALF 0x00018000U
103 #define ARM64_FCVT_FROM_SINGLE 0x00000000U
104 #define ARM64_FCVT_FROM_DOUBLE 0x00400000U
105 #define ARM64_FCVT_FROM_HALF 0x00c00000U
106 #define ARM64_FMOV 0x1e260000U
107 #define ARM64_FMOV_S_W 0x00010000U
108 #define ARM64_FMOV_D_X 0x80410000U
109 #define ARM64_FCVTZS 0x1e380000U
110 #define ARM64_FCVTZS_SINGLE 0x00000000U
111 #define ARM64_FCVTZS_DOUBLE 0x00400000U
112 #define ARM64_FCVTZS_HALF 0x00c00000U
113 #define ARM64_FCVTZS_SIZE 0x80000000U
114 #define ARM64_LDPSTP 0x28000000U
115 #define ARM64_LDPSTP_LD 0x00400000U
116 #define ARM64_LDPSTP_POST_INDEX 0x00800000U
117 #define ARM64_LDPSTP_IMM 0x01000000U
118 #define ARM64_LDPSTP_PRE_INDEX 0x01800000U
119 #define ARM64_LDPSTP_SIZE 0x80000000U
120 #define ARM64_MOV 0x2a0003e0U
121 #define ARM64_MOV_SIZE 0x80000000U
122 #define ARM64_CB 0x34000000U
123 #define ARM64_CBZ_CBNZ 0x01000000U
124 #define ARM64_CBZ_SIZE 0x80000000U
125 #define ARM64_TB 0x36000000U
126 #define ARM64_TB_TBNZ 0x01000000U
127 #define ARM64_LDST 0x38000000U
128 #define ARM64_LDST_POST_INDEX 0x00000400U
129 #define ARM64_LDST_PRE_INDEX 0x00000c00U
130 #define ARM64_LDST_2REGS 0x00200800U
131 #define ARM64_LDST_2REGS_UXTW 0x00004000U
132 #define ARM64_LDST_2REGS_NORMAL 0x00006000U
133 #define ARM64_LDST_2REGS_SCALE 0x00007000U
134 #define ARM64_LDST_2REGS_SXTW 0x0000c000U
135 #define ARM64_LDST_2REGS_SXTX 0x0000e000U
136 #define ARM64_LDST_ST 0x00000000U
137 #define ARM64_LDST_LD_UX 0x00400000U
138 #define ARM64_LDST_LD_SX 0x00800000U
139 #define ARM64_LDST_LD_SXW 0x00c00000U
140 #define ARM64_LDST_SCALED_12BIT 0x01000000U
141 #define ARM64_LDST_FP 0x04000000U
142 #define ARM64_LDST_SIZE1 0x40000000U
143 #define ARM64_LDST_SIZE 0xc0000000U
144 #define ARM64_LDST_FP_8 0x00000000U
145 #define ARM64_LDST_FP_16 0x40000000U
146 #define ARM64_LDST_FP_32 0x80000000U
147 #define ARM64_LDST_FP_64 0xc0000000U
148 #define ARM64_LDST_FP_128 0x00800000U
149 #define ARM64_MOV_IMM16 0x52800000U
150 #define ARM64_MOV_IMM16_SIZE 0x80000000U
151 #define ARM64_B_COND 0x54000000U
152 #define ARM64_REV 0x5ac00000U
153 #define ARM64_REV_1 0x00000000U
154 #define ARM64_REV_16 0x00000400U
155 #define ARM64_REV_32 0x00000800U
156 #define ARM64_REV_64 0x00000c00U
157 #define ARM64_REV_SIZE 0x80000000U
158 #define ARM64_CLZ 0x5ac01000U
159 #define ARM64_CLZ_SIZE 0x80000000U
160 #define ARM64_MOVK 0x72800000U
161 #define ARM64_MOVK_SIZE 0x80000000U
162 #define ARM64_SMADDSUBL 0x9b200000U
163 #define ARM64_SMADDSUBL_SUB 0x00008000U
164 #define ARM64_SMADDSUBL_U 0x00800000U
165 #define ARM64_SUMULH 0x9b407c00U
166 #define ARM64_SUMULH_U 0x00800000U
167 #define ARM64_BR 0xd61f0000U
168 #define ARM64_BLR 0xd63f0000U
169 #define ARM64_RET 0xd65f03c0U
171 static const int8_t jmp_cond[48] = {
172 0x6, 0x7, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
173 0x4, 0x5, -1, -1, 0xb, 0xa, 0xd, 0xc,
174 -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
177 -1, -1, 0x6, 0x7, -1, -1, -1, -1,
180 static const int16_t rot_codes[8] = {
191 static bool attr_w cgen_ldr_str(struct codegen_context *ctx, unsigned ldst_mode, unsigned size, uint8_t reg, uint8_t *address)
194 uint32_t mc = ARM64_LDST;
196 mc |= ARM64_LDST_SIZE1 * size;
197 if (address[0] >= ARG_ADDRESS_2 && address[0] <= ARG_ADDRESS_2_SXTW) {
198 imm = get_imm(&address[3]);
199 if (unlikely(imm != 0))
201 mc |= ARM64_LDST_2REGS;
202 if (address[0] == ARG_ADDRESS_2) {
203 mc |= ARM64_LDST_2REGS_NORMAL;
204 } else if ((unsigned)address[0] - ARG_ADDRESS_2 == size) {
205 mc |= ARM64_LDST_2REGS_SCALE;
206 } else if (address[0] == ARG_ADDRESS_2_UXTW) {
207 mc |= ARM64_LDST_2REGS_UXTW;
208 } else if (address[0] == ARG_ADDRESS_2_SXTW) {
209 mc |= ARM64_LDST_2REGS_SXTW;
214 mc |= (uint32_t)address[1] << 5;
215 mc |= (uint32_t)address[2] << 16;
219 imm = get_imm(&address[2]);
220 if (imm >= -256 && imm <= 255) {
221 if (address[0] == ARG_ADDRESS_1) {
222 } else if (address[0] == ARG_ADDRESS_1_PRE_I) {
223 mc |= ARM64_LDST_PRE_INDEX;
224 } else if (address[0] == ARG_ADDRESS_1_POST_I) {
225 mc |= ARM64_LDST_POST_INDEX;
230 mc |= (uint32_t)address[1] << 5;
231 mc |= (imm & 0x1ff) << 12;
235 if (unlikely(address[0] != ARG_ADDRESS_1))
237 if (unlikely((imm & ((1 << size) - 1)) != 0) || unlikely(imm < 0))
240 if (unlikely(imm >= 0x1000))
242 mc |= ARM64_LDST_SCALED_12BIT;
244 mc |= (uint32_t)address[1] << 5;
245 mc |= (imm & 0xfff) << 10;
250 internal(file_line, "cgen_ldr_str: invalid address: %02x, %02x, %"PRIxMAX"", reg, address[0], (uintmax_t)imm);
254 static bool attr_w cgen_mov(struct codegen_context *ctx, unsigned size, bool sx)
258 uint8_t *arg1 = ctx->code_position;
259 uint8_t *arg2 = arg1 + arg_size(*arg1);
260 ctx->code_position = arg2 + arg_size(*arg2);
263 if (size < (sx ? OP_SIZE_8 : OP_SIZE_4)) {
264 mc = ARM64_SUBFM | (sx * ARM64_SUBFM_SIZE);
266 mc |= (uint32_t)arg2[0] << 5;
267 mc |= ((8U << size) - 1) << 10;
268 mc |= ARM64_SUBFM_U * !sx;
272 if (arg1[0] == R_SP || arg2[0] == R_SP) {
273 mc = ARM64_ADDSUB_IMM;
275 mc |= (uint32_t)arg2[0] << 5;
277 /* !!! TODO: handle shifted register */
280 mc |= (uint32_t)arg2[0] << 16;
282 mc |= ARM64_MOV_SIZE * (size == OP_SIZE_8);
286 if (arg2[0] == ARG_IMM) {
287 if (unlikely(size < OP_SIZE_4))
288 internal(file_line, "cgen_mov: unsupported size %u", size);
289 imm = get_imm(&arg2[1]);
290 if (imm >= 0 && imm < 0x10000) {
291 mc = ARM64_MOV_IMM16;
292 mc |= ARM64_MOV_IMM16_SIZE * (size == OP_SIZE_8);
294 mc |= (uint32_t)imm << 5;
298 if (~imm >= 0 && ~imm < 0x10000) {
300 mc = ARM64_MOVN_IMM16;
301 mc |= ARM64_MOVN_IMM16_SIZE * (size == OP_SIZE_8);
303 mc |= (uint32_t)imm << 5;
307 internal(file_line, "cgen_mov: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
309 if (!sx || size == OP_SIZE_NATIVE)
310 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX, size, arg1[0], arg2);
312 return cgen_ldr_str(ctx, ARM64_LDST_LD_SX, size, arg1[0], arg2);
314 if (reg_is_fp(arg1[0])) {
316 if (size < OP_SIZE_4)
318 mc = ARM64_FMOV | (size == OP_SIZE_4 ? ARM64_FMOV_S_W : ARM64_FMOV_D_X);
320 mc |= (uint32_t)arg2[0] << 5;
324 if (reg_is_fp(arg2[0])) {
325 mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_MOV;
327 case OP_SIZE_2: mc |= ARM64_FP_ALU1_HALF; break;
328 case OP_SIZE_4: mc |= ARM64_FP_ALU1_SINGLE; break;
329 case OP_SIZE_8: mc |= ARM64_FP_ALU1_DOUBLE; break;
330 default: internal(file_line, "cgen_mov: invalid size %u", size);
333 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
337 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX | ARM64_LDST_FP, size, arg1[0] & 31, arg2);
340 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, arg2[0], arg1);
342 if (reg_is_fp(arg2[0])) {
343 return cgen_ldr_str(ctx, ARM64_LDST_ST | ARM64_LDST_FP, size, arg2[0] & 31, arg1);
345 if (arg2[0] == ARG_IMM) {
346 imm = get_imm(&arg2[1]);
348 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, 0x1f, arg1);
351 internal(file_line, "cgen_mov: invalid arguments %02x, %02x", arg1[0], arg2[0]);
355 static bool attr_w cgen_alu_args(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu, bool not, uint8_t *arg1, uint8_t *arg2, uint8_t *arg3)
360 if (unlikely(arg1[0] >= 32))
362 if (unlikely(alu == ALU_MUL)) {
364 if (size == OP_SIZE_8 &&
365 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_SXTW &&
366 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_SXTW) {
369 mc = ARM64_SMADDSUBL;
370 } else if (size == OP_SIZE_8 &&
371 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_UXTW &&
372 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_UXTW) {
375 mc = ARM64_SMADDSUBL | ARM64_SMADDSUBL_U;
377 if (unlikely(arg2[0] >= 32) && unlikely(arg3[0] >= 32))
379 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
387 if (unlikely(arg2[0] >= 32))
389 if (unlikely(alu == ALU_UMULH) || unlikely(alu == ALU_SMULH)) {
390 if (unlikely(arg3[0] >= 32))
392 if (unlikely(size != OP_SIZE_8))
395 mc |= ARM64_SUMULH_U * (alu == ALU_UMULH);
397 mc |= (uint32_t)arg2[0] << 5;
398 mc |= (uint32_t)arg3[0] << 16;
402 if (unlikely(alu == ALU_UDIV) || unlikely(alu == ALU_SDIV)) {
403 if (unlikely(arg3[0] >= 32))
406 mc |= ARM64_SUDIV_SDIV * (alu == ALU_SDIV);
407 mc |= ARM64_SUDIV_SIZE * (size == OP_SIZE_8);
409 mc |= (uint32_t)arg2[0] << 5;
410 mc |= (uint32_t)arg3[0] << 16;
414 if (unlikely(alu == ALU_ADC) || unlikely(alu == ALU_SBB)) {
415 if (arg3[0] == ARG_IMM) {
416 imm = get_imm(&arg3[1]);
417 if (unlikely(imm != 0))
420 } else if (unlikely(arg3[0] >= 32)) {
424 mc |= ARM64_ADCSBC_SBC * (alu == ALU_SBB);
426 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
428 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
430 mc |= (uint32_t)arg2[0] << 5;
431 mc |= (uint32_t)arg3[0] << 16;
435 if (alu == ALU_ADD || alu == ALU_SUB) {
437 mc |= ARM64_ADDSUB_SUB * (alu == ALU_SUB);
439 mc |= ARM64_ADDSUB_SHIFTED;
442 if (arg3[0] == ARG_EXTENDED_REGISTER) {
443 mc |= ARM64_ADDSUB_EXTENDED;
444 mc |= (uint32_t)arg3[1] << 10;
448 if (arg3[0] == ARG_SHIFTED_REGISTER) {
449 if (unlikely((arg3[1] >> 6) == 3))
451 mc |= ARM64_ADDSUB_SHIFTED;
452 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
453 mc |= (uint32_t)(arg3[1] >> 6) << 22;
457 if (arg3[0] == ARG_IMM) {
458 mc |= ARM64_ADDSUB_IMM;
459 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
460 imm = get_imm(&arg3[1]);
461 if (likely(imm >= 0) && likely(imm < 0x1000)) {
465 mc |= (uint32_t)arg2[0] << 5;
466 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
470 if (likely(!(imm & 0xfff))) {
471 imm = (uint64_t)imm >> 12;
472 if (likely(imm < 0x1000)) {
473 mc |= ARM64_ADDSUB_IMM_SHIFT12;
480 if (alu == ALU_AND || alu == ALU_OR || alu == ALU_XOR) {
483 if (arg3[0] != ARG_IMM)
484 mc |= ARM64_AND_OR_EOR_REG_NOT;
486 if (alu == ALU_AND) {
487 mc |= writes_flags ? ARM64_AND_OR_EOR_ANDS : ARM64_AND_OR_EOR_AND;
489 if (unlikely(writes_flags))
491 mc |= alu == ALU_OR ? ARM64_AND_OR_EOR_ORR : ARM64_AND_OR_EOR_EOR;
494 mc |= ARM64_AND_OR_EOR;
497 if (arg3[0] == ARG_SHIFTED_REGISTER) {
498 mc |= ARM64_AND_OR_EOR;
499 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
500 mc |= (uint32_t)(arg3[1] >> 6) << 22;
504 if (arg3[0] == ARG_IMM) {
506 mc |= ARM64_AND_OR_EOR_SIZE * (size == OP_SIZE_8);
507 mc |= ARM64_AND_OR_EOR_IMM;
508 imm = get_imm(&arg3[1]);
511 if (size == OP_SIZE_4)
513 code = value_to_code(size, imm);
514 if (unlikely(code < 0))
515 internal(file_line, "cgen_alu_args: invalid immediate value %"PRIxMAX"", (uintmax_t)imm);
517 mc |= (uint32_t)arg2[0] << 5;
518 mc ^= (uint32_t)code << 10;
526 internal(file_line, "cgen_alu_args: invalid arguments %02x, %02x, %02x, %u, %u", arg1[0], arg2[0], arg3[0], alu, writes_flags);
530 static bool attr_w cgen_cmp(struct codegen_context *ctx, unsigned size, bool cmn)
533 uint8_t *arg1 = ctx->code_position;
534 uint8_t *arg2 = arg1 + arg_size(*arg1);
535 ctx->code_position = arg2 + arg_size(*arg2);
536 return cgen_alu_args(ctx, size, true, cmn ? ALU_ADD : ALU_SUB, false, &z, arg1, arg2);
539 static bool attr_w cgen_test(struct codegen_context *ctx, unsigned size)
542 uint8_t *arg1 = ctx->code_position;
543 uint8_t *arg2 = arg1 + arg_size(*arg1);
544 ctx->code_position = arg2 + arg_size(*arg2);
545 return cgen_alu_args(ctx, size, true, ALU_AND, false, &z, arg1, arg2);
548 static bool attr_w cgen_alu(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
551 uint8_t *arg1 = ctx->code_position;
552 uint8_t *arg2 = arg1 + arg_size(*arg1);
553 uint8_t *arg3 = arg2 + arg_size(*arg2);
554 ctx->code_position = arg3 + arg_size(*arg3);
557 case ALU_ORN: alu = ALU_OR; not = true; break;
558 case ALU_ANDN: alu = ALU_AND; not = true; break;
559 case ALU_XORN: alu = ALU_XOR; not = true; break;
561 return cgen_alu_args(ctx, size, writes_flags, alu, not, arg1, arg2, arg3);
564 static bool attr_w cgen_alu1(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
568 uint8_t one_imm[9] = { ARG_IMM, 1, 0, 0, 0, 0, 0, 0, 0 };
569 uint8_t *arg1 = ctx->code_position;
570 uint8_t *arg2 = arg1 + arg_size(*arg1);
571 ctx->code_position = arg2 + arg_size(*arg2);
574 return cgen_alu_args(ctx, size, writes_flags, ALU_OR, true, arg1, &z, arg2);
576 return cgen_alu_args(ctx, size, writes_flags, ALU_SUB, false, arg1, &z, arg2);
578 return cgen_alu_args(ctx, size, writes_flags, ALU_SBB, false, arg1, &z, arg2);
580 return cgen_alu_args(ctx, size, writes_flags, ALU_ADD, false, arg1, arg2, one_imm);
582 return cgen_alu_args(ctx, size, writes_flags, ALU_SUB, false, arg1, arg2, one_imm);
587 if (alu == ALU1_BREV) {
589 } else if (alu == ALU1_BSWAP16) {
591 } else if (alu == ALU1_BSWAP) {
592 if (size == OP_SIZE_4)
597 mc |= ARM64_REV_SIZE * (size == OP_SIZE_8);
599 mc |= (uint32_t)arg2[0] << 5;
604 mc |= ARM64_CLZ_SIZE * (size == OP_SIZE_8);
606 mc |= (uint32_t)arg2[0] << 5;
610 internal(file_line, "cgen_alu1: invalid arguments");
615 static bool attr_w cgen_rot_imm(struct codegen_context *ctx, unsigned size, uint8_t rot, uint8_t *arg1, uint8_t *arg2, uint8_t imm)
618 if (unlikely(rot == ROT_ROL) || rot == ROT_SHL) {
621 imm &= (1U << (size + 3)) - 1;
623 mc |= (rot == ROT_ROR || rot == ROT_ROL ? ARM64_EXTR_SIZE : ARM64_SUBFM_SIZE) * (size == OP_SIZE_8);
629 mc |= (uint32_t)arg2[0] << 5;
630 mc |= (uint32_t)arg2[0] << 16;
631 mc |= (uint32_t)imm << 10;
634 mc |= ARM64_SUBFM | ARM64_SUBFM_U;
636 mc |= (uint32_t)arg2[0] << 5;
637 mc |= (uint32_t)imm << 16;
639 imm &= (1U << (size + 3)) - 1;
640 mc |= (uint32_t)(imm << 10);
645 mc |= (rot == ROT_SHR) * ARM64_SUBFM_U;
647 mc |= (uint32_t)arg2[0] << 5;
648 mc |= (uint32_t)imm << 16;
649 mc |= ((1U << (size + 3)) - 1) << 10;
656 static bool attr_w cgen_rot(struct codegen_context *ctx, unsigned size, unsigned rot)
660 uint8_t *arg1 = ctx->code_position;
661 uint8_t *arg2 = arg1 + arg_size(*arg1);
662 uint8_t *arg3 = arg2 + arg_size(*arg2);
663 ctx->code_position = arg3 + arg_size(*arg3);
664 if (arg3[0] == ARG_IMM)
665 return cgen_rot_imm(ctx, size, rot, arg1, arg2, arg3[1]);
666 arm_rot = rot_codes[rot];
667 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arm_rot < 0))
668 internal(file_line, "cgen_rot: invalid arguments");
670 mc |= ARM64_ROT_SIZE * (size == OP_SIZE_8);
673 mc |= (uint32_t)arg2[0] << 5;
674 mc |= (uint32_t)arg3[0] << 16;
679 static bool attr_w cgen_madd(struct codegen_context *ctx, unsigned size, bool sub)
682 uint8_t *arg1 = ctx->code_position;
683 uint8_t *arg2 = arg1 + arg_size(*arg1);
684 uint8_t *arg3 = arg2 + arg_size(*arg2);
685 uint8_t *arg4 = arg3 + arg_size(*arg3);
686 ctx->code_position = arg4 + arg_size(*arg4);
687 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arg4[0] >= 32))
688 internal(file_line, "cgen_madd: invalid arguments");
690 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
691 mc |= ARM64_MADDSUB_MSUB * sub;
693 mc |= (uint32_t)arg2[0] << 5;
694 mc |= (uint32_t)arg3[0] << 16;
695 mc |= (uint32_t)arg4[0] << 10;
700 static bool attr_w cgen_set_cond(struct codegen_context *ctx, unsigned size, unsigned aux)
704 uint8_t *arg1 = ctx->code_position;
705 ctx->code_position = arg1 + arg_size(*arg1);
706 cond = jmp_cond[aux];
707 if (unlikely(cond < 0) || unlikely(arg1[0] >= 31))
708 internal(file_line, "cgen_set_cond: invalid arguments: %02x, %u, %u", arg1[0], size, aux);
710 mc |= ARM64_CSET_SIZE * (size == OP_SIZE_8);
711 mc |= (uint32_t)(cond ^ 1) << 12;
717 static bool attr_w cgen_csel(struct codegen_context *ctx, uint32_t insn, unsigned size, unsigned aux)
723 uint8_t *arg1 = ctx->code_position;
724 uint8_t *arg2 = arg1 + arg_size(*arg1);
725 uint8_t *arg3 = arg2 + arg_size(*arg2);
726 ctx->code_position = arg3 + arg_size(*arg3);
727 if (arg2[0] == ARG_IMM) {
728 imm = get_imm(&arg2[1]);
729 if (unlikely(imm != 0))
733 if (arg3[0] == ARG_IMM) {
734 imm = get_imm(&arg3[1]);
735 if (unlikely(imm != 0))
739 cond = jmp_cond[aux];
740 if (unlikely(cond < 0))
744 case INSN_CSEL_SEL: mc |= ARM64_CSEL_SEL; break;
745 case INSN_CSEL_INC: mc |= ARM64_CSEL_INC; break;
746 case INSN_CSEL_INV: mc |= ARM64_CSEL_INV; break;
747 case INSN_CSEL_NEG: mc |= ARM64_CSEL_NEG; break;
751 mc |= ARM64_CSEL_SIZE * (size == OP_SIZE_8);
753 mc |= (uint32_t)arg2[0] << 16;
754 mc |= (uint32_t)arg3[0] << 5;
755 mc |= (uint32_t)cond << 12;
759 internal(file_line, "cgen_csel: invalid arguments");
762 static bool attr_w cgen_ldp_stp(struct codegen_context *ctx, bool ldr, unsigned size)
764 uint8_t *arg1, *arg2, *arg3;
769 arg1 = ctx->code_position;
770 arg2 = arg1 + arg_size(*arg1);
771 arg3 = arg2 + arg_size(*arg2);
772 ctx->code_position = arg3 + arg_size(*arg3);
773 if (arg2[0] == ARG_IMM) {
774 imm = get_imm(&arg2[1]);
775 if (unlikely(imm != 0))
779 if (arg3[0] == ARG_IMM) {
780 imm = get_imm(&arg3[1]);
781 if (unlikely(imm != 0))
786 arg2 = ctx->code_position;
787 arg3 = arg2 + arg_size(*arg2);
788 arg1 = arg3 + arg_size(*arg3);
789 ctx->code_position = arg1 + arg_size(*arg1);
792 mc |= ARM64_LDPSTP_LD * (uint32_t)ldr;
793 mc |= ARM64_LDPSTP_SIZE * (size == OP_SIZE_8);
794 if (arg1[0] == ARG_ADDRESS_1) {
795 mc |= ARM64_LDPSTP_IMM;
796 } else if (arg1[0] == ARG_ADDRESS_1_PRE_I) {
797 mc |= ARM64_LDPSTP_PRE_INDEX;
798 } else if (arg1[0] == ARG_ADDRESS_1_POST_I) {
799 mc |= ARM64_LDPSTP_POST_INDEX;
803 if (unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32))
806 mc |= (uint32_t)arg3[0] << 10;
807 mc |= (uint32_t)arg1[1] << 5;
808 imm = get_imm(&arg1[2]);
809 if (unlikely((imm & ((1 << size) - 1)) != 0))
812 if (unlikely(imm < -64) || unlikely(imm > 63))
814 mc |= (imm & 127) << 15;
819 internal(file_line, "cgen_ldp_stp: invalid arguments %02x, %02x, %02x", arg1[0], arg2[0], arg3[0]);
823 static bool attr_w cgen_mov_mask(struct codegen_context *ctx, unsigned aux)
827 uint8_t *arg1 = ctx->code_position;
828 uint8_t *arg2 = arg1 + arg_size(*arg1);
829 uint8_t *arg3 = arg2 + arg_size(*arg2);
830 ctx->code_position = arg3 + arg_size(*arg3);
831 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] != ARG_IMM))
832 internal(file_line, "cgen_mov_mask: bad arguments");
834 mc |= ARM64_MOVK_SIZE;
835 mc |= (uint32_t)aux << 21;
836 imm = get_imm(&arg3[1]);
837 if (unlikely(imm >= 0x10000))
838 internal(file_line, "cgen_mov_mask: bad number");
839 mc |= (imm & 0xffff) << 5;
845 static bool attr_w cgen_fp_cmp(struct codegen_context *ctx, unsigned op_size)
848 uint8_t *arg1 = ctx->code_position;
849 uint8_t *arg2 = arg1 + arg_size(*arg1);
850 ctx->code_position = arg2 + arg_size(*arg2);
853 case OP_SIZE_2: mc |= ARM64_FCMP_HALF; break;
854 case OP_SIZE_4: mc |= ARM64_FCMP_SINGLE; break;
855 case OP_SIZE_8: mc |= ARM64_FCMP_DOUBLE; break;
856 default: internal(file_line, "cgen_fp_cmp: invalid size %u", op_size);
858 mc |= ((uint32_t)(arg1[0] & 31)) << 5;
859 mc |= ((uint32_t)(arg2[0] & 31)) << 16;
864 static bool attr_w cgen_fp_alu(struct codegen_context *ctx, unsigned op_size, unsigned aux)
867 uint8_t *arg1 = ctx->code_position;
868 uint8_t *arg2 = arg1 + arg_size(*arg1);
869 uint8_t *arg3 = arg2 + arg_size(*arg2);
870 ctx->code_position = arg3 + arg_size(*arg3);
873 case FP_ALU_ADD: mc |= ARM64_FP_ALU_ADD; break;
874 case FP_ALU_SUB: mc |= ARM64_FP_ALU_SUB; break;
875 case FP_ALU_MUL: mc |= ARM64_FP_ALU_MUL; break;
876 case FP_ALU_DIV: mc |= ARM64_FP_ALU_DIV; break;
877 default: internal(file_line, "cgen_fp_alu: invalid alu %u", aux);
880 case OP_SIZE_2: mc |= ARM64_FP_ALU_HALF; break;
881 case OP_SIZE_4: mc |= ARM64_FP_ALU_SINGLE; break;
882 case OP_SIZE_8: mc |= ARM64_FP_ALU_DOUBLE; break;
883 default: internal(file_line, "cgen_fp_alu: invalid size %u", op_size);
886 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
887 mc |= ((uint32_t)(arg3[0] & 31)) << 16;
892 static bool attr_w cgen_fp_alu1(struct codegen_context *ctx, unsigned op_size, unsigned aux)
895 uint8_t *arg1 = ctx->code_position;
896 uint8_t *arg2 = arg1 + arg_size(*arg1);
897 ctx->code_position = arg2 + arg_size(*arg2);
899 case FP_ALU1_NEG: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_NEG; break;
900 case FP_ALU1_SQRT: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_SQRT; break;
901 case FP_ALU1_ROUND: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTN; break;
902 case FP_ALU1_FLOOR: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTM; break;
903 case FP_ALU1_CEIL: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTP; break;
904 case FP_ALU1_TRUNC: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTZ; break;
905 case FP_ALU1_VCNT8: mc = ARM64_CNT; goto do_regs;
906 case FP_ALU1_ADDV: mc = ARM64_ADDV; goto do_regs;
907 default: internal(file_line, "cgen_fp_alu1: invalid alu %u", aux);
910 case OP_SIZE_2: mc |= ARM64_FP_ALU1_HALF; break;
911 case OP_SIZE_4: mc |= ARM64_FP_ALU1_SINGLE; break;
912 case OP_SIZE_8: mc |= ARM64_FP_ALU1_DOUBLE; break;
913 default: internal(file_line, "cgen_fp_alu1: invalid size %u", op_size);
917 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
922 static bool attr_w cgen_fp_to_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
925 uint8_t *arg1 = ctx->code_position;
926 uint8_t *arg2 = arg1 + arg_size(*arg1);
927 ctx->code_position = arg2 + arg_size(*arg2);
929 switch (int_op_size) {
930 case OP_SIZE_4: break;
931 case OP_SIZE_8: mc |= ARM64_FCVTZS_SIZE; break;
932 default: internal(file_line, "cgen_fp_to_int: invalid int size %u", int_op_size);
934 switch (fp_op_size) {
935 case OP_SIZE_2: mc |= ARM64_FCVTZS_HALF; break;
936 case OP_SIZE_4: mc |= ARM64_FCVTZS_SINGLE; break;
937 case OP_SIZE_8: mc |= ARM64_FCVTZS_DOUBLE; break;
938 default: internal(file_line, "cgen_fp_to_int: invalid fp size %u", fp_op_size);
941 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
946 static bool attr_w cgen_fp_from_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
949 uint8_t *arg1 = ctx->code_position;
950 uint8_t *arg2 = arg1 + arg_size(*arg1);
951 ctx->code_position = arg2 + arg_size(*arg2);
953 switch (int_op_size) {
954 case OP_SIZE_4: break;
955 case OP_SIZE_8: mc |= ARM64_SCVTF_SIZE; break;
956 default: internal(file_line, "cgen_fp_from_int: invalid int size %u", int_op_size);
958 switch (fp_op_size) {
959 case OP_SIZE_2: mc |= ARM64_SCVTF_HALF; break;
960 case OP_SIZE_4: mc |= ARM64_SCVTF_SINGLE; break;
961 case OP_SIZE_8: mc |= ARM64_SCVTF_DOUBLE; break;
962 default: internal(file_line, "cgen_fp_from_int: invalid fp size %u", fp_op_size);
965 mc |= ((uint32_t)arg2[0]) << 5;
970 static bool attr_w cgen_fp_cvt(struct codegen_context *ctx, unsigned from_op_size, unsigned to_op_size)
973 uint8_t *arg1 = ctx->code_position;
974 uint8_t *arg2 = arg1 + arg_size(*arg1);
975 ctx->code_position = arg2 + arg_size(*arg2);
977 switch (from_op_size) {
978 case OP_SIZE_2: mc |= ARM64_FCVT_FROM_HALF; break;
979 case OP_SIZE_4: mc |= ARM64_FCVT_FROM_SINGLE; break;
980 case OP_SIZE_8: mc |= ARM64_FCVT_FROM_DOUBLE; break;
981 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
983 switch (to_op_size) {
984 case OP_SIZE_2: mc |= ARM64_FCVT_TO_HALF; break;
985 case OP_SIZE_4: mc |= ARM64_FCVT_TO_SINGLE; break;
986 case OP_SIZE_8: mc |= ARM64_FCVT_TO_DOUBLE; break;
987 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
990 mc |= ((uint32_t)arg2[0] & 31) << 5;
995 static bool attr_w cgen_jmp_cond(struct codegen_context *ctx, unsigned aux, unsigned length)
997 int8_t cond = jmp_cond[aux];
998 if (unlikely(cond < 0))
999 internal(file_line, "cgen_jmp_cond: invalid condition %u", aux);
1003 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
1004 cgen_four(ARM64_B_COND | cond);
1007 cgen_four(ARM64_B_COND | (cond ^ 1) | 0x40);
1008 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1012 internal(file_line, "cgen_jmp_cond: invalid length %u", length);
1017 static bool attr_w cgen_jmp_reg(struct codegen_context *ctx, unsigned size, unsigned aux, unsigned length)
1019 uint32_t mc = ARM64_CB;
1020 mc |= ARM64_CBZ_SIZE * (size == OP_SIZE_8);
1021 mc |= cget_one(ctx);
1026 mc |= ARM64_CBZ_CBNZ;
1029 internal(file_line, "cgen_jmp_reg: invalid condition %u", aux);
1034 g(add_relocation(ctx, JMP_SHORT, 1, NULL));
1038 cgen_four((mc ^ ARM64_CBZ_CBNZ) | 0x40);
1039 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1043 internal(file_line, "cgen_jmp_reg: invalid length %u", length);
1048 static bool attr_w cgen_jmp_reg_bit(struct codegen_context *ctx, unsigned bit, bool jnz, unsigned length)
1050 uint32_t mc = ARM64_TB;
1051 mc |= ARM64_TB_TBNZ * (uint32_t)jnz;
1052 mc |= cget_one(ctx);
1053 mc |= (uint32_t)(bit & 31) << 19;
1054 mc |= (uint32_t)(bit >> 5 << 31);
1057 g(add_relocation(ctx, JMP_SHORTEST, 1, NULL));
1062 cgen_four((mc ^ ARM64_TB_TBNZ) | 0x40);
1063 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1067 internal(file_line, "cgen_jmp_reg_bit: invalid length %u", length);
1072 static bool attr_w resolve_relocation(struct codegen_context *ctx, struct relocation *reloc)
1075 int64_t offs = (int64_t)(ctx->label_to_pos[reloc->label_id] >> 2) - (int64_t)(reloc->position >> 2);
1076 switch (reloc->length) {
1078 if (unlikely(offs < -0x00002000) || unlikely(offs >= 0x00002000))
1080 memcpy(&mc, ctx->mcode + reloc->position, 4);
1082 mc |= ((uint32_t)offs << 5) & 0x0007ffe0;
1083 memcpy(ctx->mcode + reloc->position, &mc, 4);
1086 if (unlikely(offs < -0x00040000) || unlikely(offs >= 0x00040000))
1088 memcpy(&mc, ctx->mcode + reloc->position, 4);
1090 mc |= ((uint32_t)offs << 5) & 0x00ffffe0;
1091 memcpy(ctx->mcode + reloc->position, &mc, 4);
1094 if (unlikely(offs < -0x02000000) || unlikely(offs >= 0x02000000))
1096 memcpy(&mc, ctx->mcode + reloc->position, 4);
1098 mc |= offs & 0x03ffffffU;
1099 memcpy(ctx->mcode + reloc->position, &mc, 4);
1102 internal(file_line, "resolve_relocation: invalid relocation length %u", reloc->length);
1107 static bool attr_w cgen_insn(struct codegen_context *ctx, uint32_t insn)
1110 /*debug("insn: %08x (%s)", insn, da(ctx->fn,function)->function_name);*/
1111 switch (insn_opcode(insn)) {
1119 cgen_four(ARM64_RET);
1121 case INSN_CALL_INDIRECT:
1122 reg = cget_one(ctx);
1123 cgen_four(ARM64_BLR | (reg << 5));
1126 g(cgen_mov(ctx, insn_op_size(insn), false));
1129 g(cgen_mov(ctx, insn_op_size(insn), true));
1132 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1134 g(cgen_cmp(ctx, insn_op_size(insn), false));
1137 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1139 g(cgen_cmp(ctx, insn_op_size(insn), true));
1142 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1144 g(cgen_test(ctx, insn_op_size(insn)));
1147 case INSN_ALU_FLAGS:
1148 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1150 g(cgen_alu(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1153 case INSN_ALU1_FLAGS:
1154 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1156 g(cgen_alu1(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1159 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1161 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn)));
1164 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1166 g(cgen_madd(ctx, insn_op_size(insn), insn_aux(insn)));
1169 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1171 g(cgen_set_cond(ctx, insn_op_size(insn), insn_aux(insn)));
1175 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1177 g(cgen_csel(ctx, INSN_CSEL_SEL, insn_op_size(insn), insn_aux(insn)));
1183 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1185 g(cgen_csel(ctx, insn_opcode(insn), insn_op_size(insn), insn_aux(insn)));
1189 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1191 g(cgen_ldp_stp(ctx, insn_opcode(insn) == INSN_LDP, insn_op_size(insn)));
1194 if (unlikely(insn_op_size(insn) != OP_SIZE_8))
1196 g(cgen_mov_mask(ctx, insn_aux(insn)));
1199 g(cgen_fp_cmp(ctx, insn_op_size(insn)));
1202 g(cgen_fp_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1205 g(cgen_fp_alu1(ctx, insn_op_size(insn), insn_aux(insn)));
1207 case INSN_FP_TO_INT32:
1208 case INSN_FP_TO_INT64:
1209 g(cgen_fp_to_int(ctx, insn_opcode(insn) == INSN_FP_TO_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1211 case INSN_FP_FROM_INT32:
1212 case INSN_FP_FROM_INT64:
1213 g(cgen_fp_from_int(ctx, insn_opcode(insn) == INSN_FP_FROM_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1216 g(cgen_fp_cvt(ctx, insn_op_size(insn), insn_aux(insn)));
1219 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1223 g(cgen_jmp_cond(ctx, insn_aux(insn), insn_jump_size(insn)));
1226 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1228 g(cgen_jmp_reg(ctx, insn_op_size(insn), insn_aux(insn), insn_jump_size(insn)));
1230 case INSN_JMP_REG_BIT:
1231 g(cgen_jmp_reg_bit(ctx, insn_aux(insn) & 63, insn_aux(insn) >> 6, insn_jump_size(insn)));
1233 case INSN_JMP_INDIRECT:
1234 reg = cget_one(ctx);
1235 cgen_four(ARM64_BR | (reg << 5));
1239 internal(file_line, "cgen_insn: invalid insn %08x", insn);