2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define ARM64_AND_OR_EOR 0x0a000000U
20 #define ARM64_AND_OR_EOR_AND 0x00000000U
21 #define ARM64_AND_OR_EOR_REG_NOT 0x00200000U
22 #define ARM64_AND_OR_EOR_IMM_NOT 0x00400000U
23 #define ARM64_AND_OR_EOR_ORR 0x20000000U
24 #define ARM64_AND_OR_EOR_EOR 0x40000000U
25 #define ARM64_AND_OR_EOR_ANDS 0x60000000U
26 #define ARM64_AND_OR_EOR_SIZE 0x80000000U
27 #define ARM64_ADDSUB_SHIFTED 0x0b000000U
28 #define ARM64_ADDSUB_EXTENDED 0x0b200000U
29 #define ARM64_CNT 0x0e205800U
30 #define ARM64_ADDV 0x0e31b800U
31 #define ARM64_ADDV_SIZE 0x00c00000U
32 #define ARM64_ADDSUB_IMM 0x11000000U
33 #define ARM64_ADDSUB_IMM_SHIFT12 0x00400000U
34 #define ARM64_ADDSUB_SET_FLAGS 0x20000000U
35 #define ARM64_ADDSUB_SUB 0x40000000U
36 #define ARM64_ADDSUB_SIZE 0x80000000U
37 #define ARM64_AND_OR_EOR_IMM 0x12000000U
38 #define ARM64_MOVN_IMM16 0x12800000U
39 #define ARM64_MOVN_IMM16_SIZE 0x80000000U
40 #define ARM64_SUBFM 0x13000000U
41 #define ARM64_SUBFM_U 0x40000000U
42 #define ARM64_SUBFM_SIZE 0x80400000U
43 #define ARM64_EXTR 0x13800000U
44 #define ARM64_EXTR_SIZE 0x80400000U
45 #define ARM64_B 0x14000000U
46 #define ARM64_ADCSBC 0x1a000000U
47 #define ARM64_ADCSBC_SET_FLAGS 0x20000000U
48 #define ARM64_ADCSBC_SBC 0x40000000U
49 #define ARM64_ADCSBC_SIZE 0x80000000U
50 #define ARM64_CSEL 0x1a800000U
51 #define ARM64_CSEL_SEL 0x00000000U
52 #define ARM64_CSEL_INC 0x00000400U
53 #define ARM64_CSEL_INV 0x40000000U
54 #define ARM64_CSEL_NEG 0x40000400U
55 #define ARM64_CSEL_SIZE 0x80000000U
56 #define ARM64_CSET 0x1a9f07e0U
57 #define ARM64_CSET_SIZE 0x80000000U
58 #define ARM64_SUDIV 0x1ac00800U
59 #define ARM64_SUDIV_SDIV 0x00000400U
60 #define ARM64_SUDIV_SIZE 0x80000000U
61 #define ARM64_ROT 0x1ac02000U
62 #define ARM64_ROT_LSL 0x00000000U
63 #define ARM64_ROT_LSR 0x00000400U
64 #define ARM64_ROT_ASR 0x00000800U
65 #define ARM64_ROT_ROR 0x00000c00U
66 #define ARM64_ROT_SIZE 0x80000000U
67 #define ARM64_MADDSUB 0x1b000000U
68 #define ARM64_MADDSUB_MSUB 0x00008000U
69 #define ARM64_MADDSUB_SIZE 0x80000000U
70 #define ARM64_FP_ALU 0x1e200800U
71 #define ARM64_FP_ALU_MUL 0x00000000U
72 #define ARM64_FP_ALU_DIV 0x00001000U
73 #define ARM64_FP_ALU_ADD 0x00002000U
74 #define ARM64_FP_ALU_SUB 0x00003000U
75 #define ARM64_FP_ALU_SINGLE 0x00000000U
76 #define ARM64_FP_ALU_DOUBLE 0x00400000U
77 #define ARM64_FP_ALU_HALF 0x00c00000U
78 #define ARM64_FCMP 0x1e202000U
79 #define ARM64_FCMP_ZERO 0x00000008U
80 #define ARM64_FCMP_SINGLE 0x00000000U
81 #define ARM64_FCMP_DOUBLE 0x00400000U
82 #define ARM64_FCMP_HALF 0x00c00000U
83 #define ARM64_FP_ALU1 0x1e204000U
84 #define ARM64_FP_ALU1_MOV 0x00000000U
85 #define ARM64_FP_ALU1_NEG 0x00010000U
86 #define ARM64_FP_ALU1_SQRT 0x00018000U
87 #define ARM64_FP_ALU1_RINTN 0x00040000U
88 #define ARM64_FP_ALU1_RINTP 0x00048000U
89 #define ARM64_FP_ALU1_RINTM 0x00050000U
90 #define ARM64_FP_ALU1_RINTZ 0x00058000U
91 #define ARM64_FP_ALU1_SINGLE 0x00000000U
92 #define ARM64_FP_ALU1_DOUBLE 0x00400000U
93 #define ARM64_FP_ALU1_HALF 0x00c00000U
94 #define ARM64_SCVTF 0x1e220000U
95 #define ARM64_SCVTF_SINGLE 0x00000000U
96 #define ARM64_SCVTF_DOUBLE 0x00400000U
97 #define ARM64_SCVTF_HALF 0x00c00000U
98 #define ARM64_SCVTF_SIZE 0x80000000U
99 #define ARM64_FCVT 0x1e224000U
100 #define ARM64_FCVT_TO_SINGLE 0x00000000U
101 #define ARM64_FCVT_TO_DOUBLE 0x00008000U
102 #define ARM64_FCVT_TO_HALF 0x00018000U
103 #define ARM64_FCVT_FROM_SINGLE 0x00000000U
104 #define ARM64_FCVT_FROM_DOUBLE 0x00400000U
105 #define ARM64_FCVT_FROM_HALF 0x00c00000U
106 #define ARM64_FMOV 0x1e260000U
107 #define ARM64_FMOV_S_W 0x00010000U
108 #define ARM64_FMOV_D_X 0x80410000U
109 #define ARM64_FCVTZS 0x1e380000U
110 #define ARM64_FCVTZS_SINGLE 0x00000000U
111 #define ARM64_FCVTZS_DOUBLE 0x00400000U
112 #define ARM64_FCVTZS_HALF 0x00c00000U
113 #define ARM64_FCVTZS_SIZE 0x80000000U
114 #define ARM64_LDPSTP 0x28000000U
115 #define ARM64_LDPSTP_LD 0x00400000U
116 #define ARM64_LDPSTP_POST_INDEX 0x00800000U
117 #define ARM64_LDPSTP_IMM 0x01000000U
118 #define ARM64_LDPSTP_PRE_INDEX 0x01800000U
119 #define ARM64_LDPSTP_SIZE 0x80000000U
120 #define ARM64_MOV 0x2a0003e0U
121 #define ARM64_MOV_SIZE 0x80000000U
122 #define ARM64_CB 0x34000000U
123 #define ARM64_CBZ_CBNZ 0x01000000U
124 #define ARM64_CBZ_SIZE 0x80000000U
125 #define ARM64_TB 0x36000000U
126 #define ARM64_TB_TBNZ 0x01000000U
127 #define ARM64_LDST 0x38000000U
128 #define ARM64_LDST_POST_INDEX 0x00000400U
129 #define ARM64_LDST_PRE_INDEX 0x00000c00U
130 #define ARM64_LDST_2REGS 0x00200800U
131 #define ARM64_LDST_2REGS_UXTW 0x00004000U
132 #define ARM64_LDST_2REGS_NORMAL 0x00006000U
133 #define ARM64_LDST_2REGS_SCALE 0x00007000U
134 #define ARM64_LDST_2REGS_SXTW 0x0000c000U
135 #define ARM64_LDST_2REGS_SXTX 0x0000e000U
136 #define ARM64_LDST_ST 0x00000000U
137 #define ARM64_LDST_LD_UX 0x00400000U
138 #define ARM64_LDST_LD_SX 0x00800000U
139 #define ARM64_LDST_LD_SXW 0x00c00000U
140 #define ARM64_LDST_SCALED_12BIT 0x01000000U
141 #define ARM64_LDST_FP 0x04000000U
142 #define ARM64_LDST_SIZE1 0x40000000U
143 #define ARM64_LDST_SIZE 0xc0000000U
144 #define ARM64_LDST_FP_8 0x00000000U
145 #define ARM64_LDST_FP_16 0x40000000U
146 #define ARM64_LDST_FP_32 0x80000000U
147 #define ARM64_LDST_FP_64 0xc0000000U
148 #define ARM64_LDST_FP_128 0x00800000U
149 #define ARM64_MOV_IMM16 0x52800000U
150 #define ARM64_MOV_IMM16_SIZE 0x80000000U
151 #define ARM64_B_COND 0x54000000U
152 #define ARM64_REV 0x5ac00000U
153 #define ARM64_REV_1 0x00000000U
154 #define ARM64_REV_16 0x00000400U
155 #define ARM64_REV_32 0x00000800U
156 #define ARM64_REV_64 0x00000c00U
157 #define ARM64_REV_SIZE 0x80000000U
158 #define ARM64_CLZ 0x5ac01000U
159 #define ARM64_CLZ_SIZE 0x80000000U
160 #define ARM64_MOVK 0x72800000U
161 #define ARM64_MOVK_SIZE 0x80000000U
162 #define ARM64_SMADDSUBL 0x9b200000U
163 #define ARM64_SMADDSUBL_SUB 0x00008000U
164 #define ARM64_SMADDSUBL_U 0x00800000U
165 #define ARM64_SUMULH 0x9b407c00U
166 #define ARM64_SUMULH_U 0x00800000U
167 #define ARM64_BR 0xd61f0000U
168 #define ARM64_BLR 0xd63f0000U
169 #define ARM64_RET 0xd65f03c0U
171 static const int8_t jmp_cond[48] = {
172 0x6, 0x7, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
173 0x4, 0x5, -1, -1, 0xb, 0xa, 0xd, 0xc,
174 -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, -1, -1, -1, -1, -1, -1,
176 -1, -1, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
177 -1, -1, 0x6, 0x7, -1, -1, -1, -1,
180 static const int16_t rot_codes[8] = {
191 static bool attr_w cgen_ldr_str(struct codegen_context *ctx, unsigned ldst_mode, unsigned size, uint8_t reg, uint8_t *address)
194 uint32_t mc = ARM64_LDST;
196 mc |= ARM64_LDST_SIZE1 * size;
197 if (address[0] >= ARG_ADDRESS_2 && address[0] <= ARG_ADDRESS_2_SXTW) {
198 imm = get_imm(&address[3]);
199 if (unlikely(imm != 0))
201 mc |= ARM64_LDST_2REGS;
202 if (address[0] == ARG_ADDRESS_2) {
203 mc |= ARM64_LDST_2REGS_NORMAL;
204 } else if ((unsigned)address[0] - ARG_ADDRESS_2 == size) {
205 mc |= ARM64_LDST_2REGS_SCALE;
206 } else if (address[0] == ARG_ADDRESS_2_UXTW) {
207 mc |= ARM64_LDST_2REGS_UXTW;
208 } else if (address[0] == ARG_ADDRESS_2_SXTW) {
209 mc |= ARM64_LDST_2REGS_SXTW;
214 mc |= (uint32_t)address[1] << 5;
215 mc |= (uint32_t)address[2] << 16;
219 imm = get_imm(&address[2]);
220 if (imm >= -256 && imm <= 255) {
221 if (address[0] == ARG_ADDRESS_1) {
222 } else if (address[0] == ARG_ADDRESS_1_PRE_I) {
223 mc |= ARM64_LDST_PRE_INDEX;
224 } else if (address[0] == ARG_ADDRESS_1_POST_I) {
225 mc |= ARM64_LDST_POST_INDEX;
230 mc |= (uint32_t)address[1] << 5;
231 mc |= (imm & 0x1ff) << 12;
235 if (unlikely(address[0] != ARG_ADDRESS_1))
237 if (unlikely((imm & ((1 << size) - 1)) != 0) || unlikely(imm < 0))
240 if (unlikely(imm >= 0x1000))
242 mc |= ARM64_LDST_SCALED_12BIT;
244 mc |= (uint32_t)address[1] << 5;
245 mc |= (imm & 0xfff) << 10;
250 internal(file_line, "cgen_ldr_str: invalid address: %02x, %02x, %"PRIxMAX"", reg, address[0], (uintmax_t)imm);
254 static bool attr_w cgen_mov(struct codegen_context *ctx, unsigned size, bool sx)
258 uint8_t *arg1 = ctx->code_position;
259 uint8_t *arg2 = arg1 + arg_size(*arg1);
260 ctx->code_position = arg2 + arg_size(*arg2);
263 if (size < (sx ? OP_SIZE_8 : OP_SIZE_4)) {
264 mc = ARM64_SUBFM | (sx * ARM64_SUBFM_SIZE);
266 mc |= (uint32_t)arg2[0] << 5;
267 mc |= ((8U << size) - 1) << 10;
268 mc |= ARM64_SUBFM_U * !sx;
272 if (arg1[0] == R_SP || arg2[0] == R_SP) {
273 mc = ARM64_ADDSUB_IMM;
275 mc |= (uint32_t)arg2[0] << 5;
277 /* !!! TODO: handle shifted register */
280 mc |= (uint32_t)arg2[0] << 16;
282 mc |= ARM64_MOV_SIZE * (size == OP_SIZE_8);
286 if (arg2[0] == ARG_IMM) {
287 if (unlikely(size < OP_SIZE_4))
288 internal(file_line, "cgen_mov: unsupported size %u", size);
289 imm = get_imm(&arg2[1]);
290 if (imm >= 0 && imm < 0x10000) {
291 mc = ARM64_MOV_IMM16;
292 mc |= ARM64_MOV_IMM16_SIZE * (size == OP_SIZE_8);
294 mc |= (uint32_t)imm << 5;
298 if (~imm >= 0 && ~imm < 0x10000) {
300 mc = ARM64_MOVN_IMM16;
301 mc |= ARM64_MOVN_IMM16_SIZE * (size == OP_SIZE_8);
303 mc |= (uint32_t)imm << 5;
307 internal(file_line, "cgen_mov: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
309 if (!sx || size == OP_SIZE_NATIVE)
310 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX, size, arg1[0], arg2);
312 return cgen_ldr_str(ctx, ARM64_LDST_LD_SX, size, arg1[0], arg2);
314 if (reg_is_fp(arg1[0])) {
316 if (size < OP_SIZE_4)
318 mc = ARM64_FMOV | (size == OP_SIZE_4 ? ARM64_FMOV_S_W : ARM64_FMOV_D_X);
320 mc |= (uint32_t)arg2[0] << 5;
324 if (reg_is_fp(arg2[0])) {
325 mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_MOV;
327 case OP_SIZE_2: mc |= ARM64_FP_ALU1_HALF; break;
328 case OP_SIZE_4: mc |= ARM64_FP_ALU1_SINGLE; break;
329 case OP_SIZE_8: mc |= ARM64_FP_ALU1_DOUBLE; break;
330 default: internal(file_line, "cgen_mov: invalid size %u", size);
333 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
337 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX | ARM64_LDST_FP, size, arg1[0] & 31, arg2);
340 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, arg2[0], arg1);
342 if (reg_is_fp(arg2[0])) {
343 return cgen_ldr_str(ctx, ARM64_LDST_ST | ARM64_LDST_FP, size, arg2[0] & 31, arg1);
345 if (arg2[0] == ARG_IMM) {
346 imm = get_imm(&arg2[1]);
348 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, 0x1f, arg1);
351 internal(file_line, "cgen_mov: invalid arguments %02x, %02x", arg1[0], arg2[0]);
355 static bool attr_w cgen_alu_args(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu, bool not, uint8_t *arg1, uint8_t *arg2, uint8_t *arg3)
360 if (unlikely(arg1[0] >= 32))
362 if (unlikely(alu == ALU_MUL)) {
364 if (size == OP_SIZE_8 &&
365 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_SXTW &&
366 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_SXTW) {
369 mc = ARM64_SMADDSUBL;
370 } else if (size == OP_SIZE_8 &&
371 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_UXTW &&
372 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_UXTW) {
375 mc = ARM64_SMADDSUBL | ARM64_SMADDSUBL_U;
377 if (unlikely(arg2[0] >= 32) && unlikely(arg3[0] >= 32))
379 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
387 if (unlikely(arg2[0] >= 32))
389 if (unlikely(alu == ALU_UMULH) || unlikely(alu == ALU_SMULH)) {
390 if (unlikely(arg3[0] >= 32))
392 if (unlikely(size != OP_SIZE_8))
395 mc |= ARM64_SUMULH_U * (alu == ALU_UMULH);
397 mc |= (uint32_t)arg2[0] << 5;
398 mc |= (uint32_t)arg3[0] << 16;
402 if (unlikely(alu == ALU_UDIV) || unlikely(alu == ALU_SDIV)) {
403 if (unlikely(arg3[0] >= 32))
406 mc |= ARM64_SUDIV_SDIV * (alu == ALU_SDIV);
407 mc |= ARM64_SUDIV_SIZE * (size == OP_SIZE_8);
409 mc |= (uint32_t)arg2[0] << 5;
410 mc |= (uint32_t)arg3[0] << 16;
414 if (unlikely(alu == ALU_ADC) || unlikely(alu == ALU_SBB)) {
415 if (arg3[0] == ARG_IMM) {
416 imm = get_imm(&arg3[1]);
417 if (unlikely(imm != 0))
420 } else if (unlikely(arg3[0] >= 32)) {
424 mc |= ARM64_ADCSBC_SBC * (alu == ALU_SBB);
426 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
428 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
430 mc |= (uint32_t)arg2[0] << 5;
431 mc |= (uint32_t)arg3[0] << 16;
435 if (alu == ALU_ADD || alu == ALU_SUB) {
437 mc |= ARM64_ADDSUB_SUB * (alu == ALU_SUB);
439 mc |= ARM64_ADDSUB_SHIFTED;
442 if (arg3[0] == ARG_EXTENDED_REGISTER) {
443 mc |= ARM64_ADDSUB_EXTENDED;
444 mc |= (uint32_t)arg3[1] << 10;
448 if (arg3[0] == ARG_SHIFTED_REGISTER) {
449 if (unlikely((arg3[1] >> 6) == 3))
451 mc |= ARM64_ADDSUB_SHIFTED;
452 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
453 mc |= (uint32_t)(arg3[1] >> 6) << 22;
457 if (arg3[0] == ARG_IMM) {
458 mc |= ARM64_ADDSUB_IMM;
459 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
460 imm = get_imm(&arg3[1]);
461 if (likely(imm >= 0) && likely(imm < 0x1000)) {
465 mc |= (uint32_t)arg2[0] << 5;
466 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
470 if (likely(!(imm & 0xfff))) {
471 imm = (uint64_t)imm >> 12;
472 if (likely(imm < 0x1000)) {
473 mc |= ARM64_ADDSUB_IMM_SHIFT12;
480 if (alu == ALU_AND || alu == ALU_OR || alu == ALU_XOR) {
483 if (arg3[0] != ARG_IMM)
484 mc |= ARM64_AND_OR_EOR_REG_NOT;
486 if (alu == ALU_AND) {
487 mc |= writes_flags ? ARM64_AND_OR_EOR_ANDS : ARM64_AND_OR_EOR_AND;
489 if (unlikely(writes_flags))
491 mc |= alu == ALU_OR ? ARM64_AND_OR_EOR_ORR : ARM64_AND_OR_EOR_EOR;
494 mc |= ARM64_AND_OR_EOR;
497 if (arg3[0] == ARG_SHIFTED_REGISTER) {
498 mc |= ARM64_AND_OR_EOR;
499 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
500 mc |= (uint32_t)(arg3[1] >> 6) << 22;
504 if (arg3[0] == ARG_IMM) {
506 mc |= ARM64_AND_OR_EOR_SIZE * (size == OP_SIZE_8);
507 mc |= ARM64_AND_OR_EOR_IMM;
508 imm = get_imm(&arg3[1]);
511 if (size == OP_SIZE_4)
513 code = value_to_code(size, imm);
514 if (unlikely(code < 0))
515 internal(file_line, "cgen_alu_args: invalid immediate value %"PRIxMAX"", (uintmax_t)imm);
517 mc |= (uint32_t)arg2[0] << 5;
518 mc ^= (uint32_t)code << 10;
526 internal(file_line, "cgen_alu_args: invalid arguments %02x, %02x, %02x, %u, %u", arg1[0], arg2[0], arg3[0], alu, writes_flags);
530 static bool attr_w cgen_cmp(struct codegen_context *ctx, unsigned size, bool cmn)
533 uint8_t *arg1 = ctx->code_position;
534 uint8_t *arg2 = arg1 + arg_size(*arg1);
535 ctx->code_position = arg2 + arg_size(*arg2);
536 return cgen_alu_args(ctx, size, true, cmn ? ALU_ADD : ALU_SUB, false, &z, arg1, arg2);
539 static bool attr_w cgen_test(struct codegen_context *ctx, unsigned size)
542 uint8_t *arg1 = ctx->code_position;
543 uint8_t *arg2 = arg1 + arg_size(*arg1);
544 ctx->code_position = arg2 + arg_size(*arg2);
545 return cgen_alu_args(ctx, size, true, ALU_AND, false, &z, arg1, arg2);
548 static bool attr_w cgen_alu(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
551 uint8_t *arg1 = ctx->code_position;
552 uint8_t *arg2 = arg1 + arg_size(*arg1);
553 uint8_t *arg3 = arg2 + arg_size(*arg2);
554 ctx->code_position = arg3 + arg_size(*arg3);
557 case ALU_ORN: alu = ALU_OR; not = true; break;
558 case ALU_ANDN: alu = ALU_AND; not = true; break;
559 case ALU_XORN: alu = ALU_XOR; not = true; break;
561 return cgen_alu_args(ctx, size, writes_flags, alu, not, arg1, arg2, arg3);
564 static bool attr_w cgen_alu1(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
568 uint8_t *arg1 = ctx->code_position;
569 uint8_t *arg2 = arg1 + arg_size(*arg1);
570 ctx->code_position = arg2 + arg_size(*arg2);
573 return cgen_alu_args(ctx, size, writes_flags, ALU_OR, true, arg1, &z, arg2);
575 return cgen_alu_args(ctx, size, writes_flags, ALU_SUB, false, arg1, &z, arg2);
577 return cgen_alu_args(ctx, size, writes_flags, ALU_SBB, false, arg1, &z, arg2);
582 if (alu == ALU1_BREV) {
584 } else if (alu == ALU1_BSWAP16) {
586 } else if (alu == ALU1_BSWAP) {
587 if (size == OP_SIZE_4)
592 mc |= ARM64_REV_SIZE * (size == OP_SIZE_8);
594 mc |= (uint32_t)arg2[0] << 5;
599 mc |= ARM64_CLZ_SIZE * (size == OP_SIZE_8);
601 mc |= (uint32_t)arg2[0] << 5;
605 internal(file_line, "cgen_alu1: invalid arguments");
610 static bool attr_w cgen_rot_imm(struct codegen_context *ctx, unsigned size, uint8_t rot, uint8_t *arg1, uint8_t *arg2, uint8_t imm)
613 if (unlikely(rot == ROT_ROL) || rot == ROT_SHL) {
616 imm &= (1U << (size + 3)) - 1;
618 mc |= (rot == ROT_ROR || rot == ROT_ROL ? ARM64_EXTR_SIZE : ARM64_SUBFM_SIZE) * (size == OP_SIZE_8);
624 mc |= (uint32_t)arg2[0] << 5;
625 mc |= (uint32_t)arg2[0] << 16;
626 mc |= (uint32_t)imm << 10;
629 mc |= ARM64_SUBFM | ARM64_SUBFM_U;
631 mc |= (uint32_t)arg2[0] << 5;
632 mc |= (uint32_t)imm << 16;
634 imm &= (1U << (size + 3)) - 1;
635 mc |= (uint32_t)(imm << 10);
640 mc |= (rot == ROT_SHR) * ARM64_SUBFM_U;
642 mc |= (uint32_t)arg2[0] << 5;
643 mc |= (uint32_t)imm << 16;
644 mc |= ((1U << (size + 3)) - 1) << 10;
651 static bool attr_w cgen_rot(struct codegen_context *ctx, unsigned size, unsigned rot)
655 uint8_t *arg1 = ctx->code_position;
656 uint8_t *arg2 = arg1 + arg_size(*arg1);
657 uint8_t *arg3 = arg2 + arg_size(*arg2);
658 ctx->code_position = arg3 + arg_size(*arg3);
659 if (arg3[0] == ARG_IMM)
660 return cgen_rot_imm(ctx, size, rot, arg1, arg2, arg3[1]);
661 arm_rot = rot_codes[rot];
662 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arm_rot < 0))
663 internal(file_line, "cgen_rot: invalid arguments");
665 mc |= ARM64_ROT_SIZE * (size == OP_SIZE_8);
668 mc |= (uint32_t)arg2[0] << 5;
669 mc |= (uint32_t)arg3[0] << 16;
674 static bool attr_w cgen_madd(struct codegen_context *ctx, unsigned size, bool sub)
677 uint8_t *arg1 = ctx->code_position;
678 uint8_t *arg2 = arg1 + arg_size(*arg1);
679 uint8_t *arg3 = arg2 + arg_size(*arg2);
680 uint8_t *arg4 = arg3 + arg_size(*arg3);
681 ctx->code_position = arg4 + arg_size(*arg4);
682 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arg4[0] >= 32))
683 internal(file_line, "cgen_madd: invalid arguments");
685 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
686 mc |= ARM64_MADDSUB_MSUB * sub;
688 mc |= (uint32_t)arg2[0] << 5;
689 mc |= (uint32_t)arg3[0] << 16;
690 mc |= (uint32_t)arg4[0] << 10;
695 static bool attr_w cgen_set_cond(struct codegen_context *ctx, unsigned size, unsigned aux)
699 uint8_t *arg1 = ctx->code_position;
700 ctx->code_position = arg1 + arg_size(*arg1);
701 cond = jmp_cond[aux];
702 if (unlikely(cond < 0) || unlikely(arg1[0] >= 31))
703 internal(file_line, "cgen_set_cond: invalid arguments: %02x, %u, %u", arg1[0], size, aux);
705 mc |= ARM64_CSET_SIZE * (size == OP_SIZE_8);
706 mc |= (uint32_t)(cond ^ 1) << 12;
712 static bool attr_w cgen_csel(struct codegen_context *ctx, uint32_t insn, unsigned size, unsigned aux)
718 uint8_t *arg1 = ctx->code_position;
719 uint8_t *arg2 = arg1 + arg_size(*arg1);
720 uint8_t *arg3 = arg2 + arg_size(*arg2);
721 ctx->code_position = arg3 + arg_size(*arg3);
722 if (arg2[0] == ARG_IMM) {
723 imm = get_imm(&arg2[1]);
724 if (unlikely(imm != 0))
728 if (arg3[0] == ARG_IMM) {
729 imm = get_imm(&arg3[1]);
730 if (unlikely(imm != 0))
734 cond = jmp_cond[aux];
735 if (unlikely(cond < 0))
739 case INSN_CSEL_SEL: mc |= ARM64_CSEL_SEL; break;
740 case INSN_CSEL_INC: mc |= ARM64_CSEL_INC; break;
741 case INSN_CSEL_INV: mc |= ARM64_CSEL_INV; break;
742 case INSN_CSEL_NEG: mc |= ARM64_CSEL_NEG; break;
746 mc |= ARM64_CSEL_SIZE * (size == OP_SIZE_8);
748 mc |= (uint32_t)arg2[0] << 16;
749 mc |= (uint32_t)arg3[0] << 5;
750 mc |= (uint32_t)cond << 12;
754 internal(file_line, "cgen_csel: invalid arguments");
757 static bool attr_w cgen_ldp_stp(struct codegen_context *ctx, bool ldr, unsigned size)
759 uint8_t *arg1, *arg2, *arg3;
764 arg1 = ctx->code_position;
765 arg2 = arg1 + arg_size(*arg1);
766 arg3 = arg2 + arg_size(*arg2);
767 ctx->code_position = arg3 + arg_size(*arg3);
768 if (arg2[0] == ARG_IMM) {
769 imm = get_imm(&arg2[1]);
770 if (unlikely(imm != 0))
774 if (arg3[0] == ARG_IMM) {
775 imm = get_imm(&arg3[1]);
776 if (unlikely(imm != 0))
781 arg2 = ctx->code_position;
782 arg3 = arg2 + arg_size(*arg2);
783 arg1 = arg3 + arg_size(*arg3);
784 ctx->code_position = arg1 + arg_size(*arg1);
787 mc |= ARM64_LDPSTP_LD * (uint32_t)ldr;
788 mc |= ARM64_LDPSTP_SIZE * (size == OP_SIZE_8);
789 if (arg1[0] == ARG_ADDRESS_1) {
790 mc |= ARM64_LDPSTP_IMM;
791 } else if (arg1[0] == ARG_ADDRESS_1_PRE_I) {
792 mc |= ARM64_LDPSTP_PRE_INDEX;
793 } else if (arg1[0] == ARG_ADDRESS_1_POST_I) {
794 mc |= ARM64_LDPSTP_POST_INDEX;
798 if (unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32))
801 mc |= (uint32_t)arg3[0] << 10;
802 mc |= (uint32_t)arg1[1] << 5;
803 imm = get_imm(&arg1[2]);
804 if (unlikely((imm & ((1 << size) - 1)) != 0))
807 if (unlikely(imm < -64) || unlikely(imm > 63))
809 mc |= (imm & 127) << 15;
814 internal(file_line, "cgen_ldp_stp: invalid arguments %02x, %02x, %02x", arg1[0], arg2[0], arg3[0]);
818 static bool attr_w cgen_mov_mask(struct codegen_context *ctx, unsigned aux)
822 uint8_t *arg1 = ctx->code_position;
823 uint8_t *arg2 = arg1 + arg_size(*arg1);
824 uint8_t *arg3 = arg2 + arg_size(*arg2);
825 ctx->code_position = arg3 + arg_size(*arg3);
826 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] != ARG_IMM))
827 internal(file_line, "cgen_mov_mask: bad arguments");
829 mc |= ARM64_MOVK_SIZE;
830 mc |= (uint32_t)aux << 21;
831 imm = get_imm(&arg3[1]);
832 if (unlikely(imm >= 0x10000))
833 internal(file_line, "cgen_mov_mask: bad number");
834 mc |= (imm & 0xffff) << 5;
840 static bool attr_w cgen_fp_cmp(struct codegen_context *ctx, unsigned op_size)
843 uint8_t *arg1 = ctx->code_position;
844 uint8_t *arg2 = arg1 + arg_size(*arg1);
845 ctx->code_position = arg2 + arg_size(*arg2);
848 case OP_SIZE_2: mc |= ARM64_FCMP_HALF; break;
849 case OP_SIZE_4: mc |= ARM64_FCMP_SINGLE; break;
850 case OP_SIZE_8: mc |= ARM64_FCMP_DOUBLE; break;
851 default: internal(file_line, "cgen_fp_cmp: invalid size %u", op_size);
853 mc |= ((uint32_t)(arg1[0] & 31)) << 5;
854 mc |= ((uint32_t)(arg2[0] & 31)) << 16;
859 static bool attr_w cgen_fp_alu(struct codegen_context *ctx, unsigned op_size, unsigned aux)
862 uint8_t *arg1 = ctx->code_position;
863 uint8_t *arg2 = arg1 + arg_size(*arg1);
864 uint8_t *arg3 = arg2 + arg_size(*arg2);
865 ctx->code_position = arg3 + arg_size(*arg3);
868 case FP_ALU_ADD: mc |= ARM64_FP_ALU_ADD; break;
869 case FP_ALU_SUB: mc |= ARM64_FP_ALU_SUB; break;
870 case FP_ALU_MUL: mc |= ARM64_FP_ALU_MUL; break;
871 case FP_ALU_DIV: mc |= ARM64_FP_ALU_DIV; break;
872 default: internal(file_line, "cgen_fp_alu: invalid alu %u", aux);
875 case OP_SIZE_2: mc |= ARM64_FP_ALU_HALF; break;
876 case OP_SIZE_4: mc |= ARM64_FP_ALU_SINGLE; break;
877 case OP_SIZE_8: mc |= ARM64_FP_ALU_DOUBLE; break;
878 default: internal(file_line, "cgen_fp_alu: invalid size %u", op_size);
881 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
882 mc |= ((uint32_t)(arg3[0] & 31)) << 16;
887 static bool attr_w cgen_fp_alu1(struct codegen_context *ctx, unsigned op_size, unsigned aux)
890 uint8_t *arg1 = ctx->code_position;
891 uint8_t *arg2 = arg1 + arg_size(*arg1);
892 ctx->code_position = arg2 + arg_size(*arg2);
894 case FP_ALU1_NEG: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_NEG; break;
895 case FP_ALU1_SQRT: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_SQRT; break;
896 case FP_ALU1_ROUND: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTN; break;
897 case FP_ALU1_FLOOR: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTM; break;
898 case FP_ALU1_CEIL: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTP; break;
899 case FP_ALU1_TRUNC: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTZ; break;
900 case FP_ALU1_VCNT8: mc = ARM64_CNT; goto do_regs;
901 case FP_ALU1_ADDV: mc = ARM64_ADDV; goto do_regs;
902 default: internal(file_line, "cgen_fp_alu1: invalid alu %u", aux);
905 case OP_SIZE_2: mc |= ARM64_FP_ALU1_HALF; break;
906 case OP_SIZE_4: mc |= ARM64_FP_ALU1_SINGLE; break;
907 case OP_SIZE_8: mc |= ARM64_FP_ALU1_DOUBLE; break;
908 default: internal(file_line, "cgen_fp_alu1: invalid size %u", op_size);
912 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
917 static bool attr_w cgen_fp_to_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
920 uint8_t *arg1 = ctx->code_position;
921 uint8_t *arg2 = arg1 + arg_size(*arg1);
922 ctx->code_position = arg2 + arg_size(*arg2);
924 switch (int_op_size) {
925 case OP_SIZE_4: break;
926 case OP_SIZE_8: mc |= ARM64_FCVTZS_SIZE; break;
927 default: internal(file_line, "cgen_fp_to_int: invalid int size %u", int_op_size);
929 switch (fp_op_size) {
930 case OP_SIZE_2: mc |= ARM64_FCVTZS_HALF; break;
931 case OP_SIZE_4: mc |= ARM64_FCVTZS_SINGLE; break;
932 case OP_SIZE_8: mc |= ARM64_FCVTZS_DOUBLE; break;
933 default: internal(file_line, "cgen_fp_to_int: invalid fp size %u", fp_op_size);
936 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
941 static bool attr_w cgen_fp_from_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
944 uint8_t *arg1 = ctx->code_position;
945 uint8_t *arg2 = arg1 + arg_size(*arg1);
946 ctx->code_position = arg2 + arg_size(*arg2);
948 switch (int_op_size) {
949 case OP_SIZE_4: break;
950 case OP_SIZE_8: mc |= ARM64_SCVTF_SIZE; break;
951 default: internal(file_line, "cgen_fp_from_int: invalid int size %u", int_op_size);
953 switch (fp_op_size) {
954 case OP_SIZE_2: mc |= ARM64_SCVTF_HALF; break;
955 case OP_SIZE_4: mc |= ARM64_SCVTF_SINGLE; break;
956 case OP_SIZE_8: mc |= ARM64_SCVTF_DOUBLE; break;
957 default: internal(file_line, "cgen_fp_from_int: invalid fp size %u", fp_op_size);
960 mc |= ((uint32_t)arg2[0]) << 5;
965 static bool attr_w cgen_fp_cvt(struct codegen_context *ctx, unsigned from_op_size, unsigned to_op_size)
968 uint8_t *arg1 = ctx->code_position;
969 uint8_t *arg2 = arg1 + arg_size(*arg1);
970 ctx->code_position = arg2 + arg_size(*arg2);
972 switch (from_op_size) {
973 case OP_SIZE_2: mc |= ARM64_FCVT_FROM_HALF; break;
974 case OP_SIZE_4: mc |= ARM64_FCVT_FROM_SINGLE; break;
975 case OP_SIZE_8: mc |= ARM64_FCVT_FROM_DOUBLE; break;
976 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
978 switch (to_op_size) {
979 case OP_SIZE_2: mc |= ARM64_FCVT_TO_HALF; break;
980 case OP_SIZE_4: mc |= ARM64_FCVT_TO_SINGLE; break;
981 case OP_SIZE_8: mc |= ARM64_FCVT_TO_DOUBLE; break;
982 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
985 mc |= ((uint32_t)arg2[0] & 31) << 5;
990 static bool attr_w cgen_jmp_cond(struct codegen_context *ctx, unsigned aux, unsigned length)
992 int8_t cond = jmp_cond[aux];
993 if (unlikely(cond < 0))
994 internal(file_line, "cgen_jmp_cond: invalid condition %u", aux);
998 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
999 cgen_four(ARM64_B_COND | cond);
1002 cgen_four(ARM64_B_COND | (cond ^ 1) | 0x40);
1003 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1007 internal(file_line, "cgen_jmp_cond: invalid length %u", length);
1012 static bool attr_w cgen_jmp_reg(struct codegen_context *ctx, unsigned size, unsigned aux, unsigned length)
1014 uint32_t mc = ARM64_CB;
1015 mc |= ARM64_CBZ_SIZE * (size == OP_SIZE_8);
1016 mc |= cget_one(ctx);
1021 mc |= ARM64_CBZ_CBNZ;
1024 internal(file_line, "cgen_jmp_reg: invalid condition %u", aux);
1029 g(add_relocation(ctx, JMP_SHORT, 1, NULL));
1033 cgen_four((mc ^ ARM64_CBZ_CBNZ) | 0x40);
1034 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1038 internal(file_line, "cgen_jmp_reg: invalid length %u", length);
1043 static bool attr_w cgen_jmp_reg_bit(struct codegen_context *ctx, unsigned bit, bool jnz, unsigned length)
1045 uint32_t mc = ARM64_TB;
1046 mc |= ARM64_TB_TBNZ * (uint32_t)jnz;
1047 mc |= cget_one(ctx);
1048 mc |= (uint32_t)(bit & 31) << 19;
1049 mc |= (uint32_t)(bit >> 5 << 31);
1052 g(add_relocation(ctx, JMP_SHORTEST, 1, NULL));
1057 cgen_four((mc ^ ARM64_TB_TBNZ) | 0x40);
1058 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1062 internal(file_line, "cgen_jmp_reg_bit: invalid length %u", length);
1067 static bool attr_w resolve_relocation(struct codegen_context *ctx, struct relocation *reloc)
1070 int64_t offs = (int64_t)(ctx->label_to_pos[reloc->label_id] >> 2) - (int64_t)(reloc->position >> 2);
1071 switch (reloc->length) {
1073 if (unlikely(offs < -0x00002000) || unlikely(offs >= 0x00002000))
1075 memcpy(&mc, ctx->mcode + reloc->position, 4);
1077 mc |= ((uint32_t)offs << 5) & 0x0007ffe0;
1078 memcpy(ctx->mcode + reloc->position, &mc, 4);
1081 if (unlikely(offs < -0x00040000) || unlikely(offs >= 0x00040000))
1083 memcpy(&mc, ctx->mcode + reloc->position, 4);
1085 mc |= ((uint32_t)offs << 5) & 0x00ffffe0;
1086 memcpy(ctx->mcode + reloc->position, &mc, 4);
1089 if (unlikely(offs < -0x02000000) || unlikely(offs >= 0x02000000))
1091 memcpy(&mc, ctx->mcode + reloc->position, 4);
1093 mc |= offs & 0x03ffffffU;
1094 memcpy(ctx->mcode + reloc->position, &mc, 4);
1097 internal(file_line, "resolve_relocation: invalid relocation length %u", reloc->length);
1102 static bool attr_w cgen_insn(struct codegen_context *ctx, uint32_t insn)
1105 /*debug("insn: %08x (%s)", insn, da(ctx->fn,function)->function_name);*/
1106 switch (insn_opcode(insn)) {
1114 cgen_four(ARM64_RET);
1116 case INSN_CALL_INDIRECT:
1117 reg = cget_one(ctx);
1118 cgen_four(ARM64_BLR | (reg << 5));
1121 g(cgen_mov(ctx, insn_op_size(insn), false));
1124 g(cgen_mov(ctx, insn_op_size(insn), true));
1127 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1129 g(cgen_cmp(ctx, insn_op_size(insn), false));
1132 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1134 g(cgen_cmp(ctx, insn_op_size(insn), true));
1137 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1139 g(cgen_test(ctx, insn_op_size(insn)));
1142 case INSN_ALU_FLAGS:
1143 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1145 g(cgen_alu(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1148 case INSN_ALU1_FLAGS:
1149 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1151 g(cgen_alu1(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1154 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1156 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn)));
1159 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1161 g(cgen_madd(ctx, insn_op_size(insn), insn_aux(insn)));
1164 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1166 g(cgen_set_cond(ctx, insn_op_size(insn), insn_aux(insn)));
1170 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1172 g(cgen_csel(ctx, INSN_CSEL_SEL, insn_op_size(insn), insn_aux(insn)));
1178 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1180 g(cgen_csel(ctx, insn_opcode(insn), insn_op_size(insn), insn_aux(insn)));
1184 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1186 g(cgen_ldp_stp(ctx, insn_opcode(insn) == INSN_LDP, insn_op_size(insn)));
1189 if (unlikely(insn_op_size(insn) != OP_SIZE_8))
1191 g(cgen_mov_mask(ctx, insn_aux(insn)));
1194 g(cgen_fp_cmp(ctx, insn_op_size(insn)));
1197 g(cgen_fp_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1200 g(cgen_fp_alu1(ctx, insn_op_size(insn), insn_aux(insn)));
1202 case INSN_FP_TO_INT32:
1203 case INSN_FP_TO_INT64:
1204 g(cgen_fp_to_int(ctx, insn_opcode(insn) == INSN_FP_TO_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1206 case INSN_FP_FROM_INT32:
1207 case INSN_FP_FROM_INT64:
1208 g(cgen_fp_from_int(ctx, insn_opcode(insn) == INSN_FP_FROM_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1211 g(cgen_fp_cvt(ctx, insn_op_size(insn), insn_aux(insn)));
1214 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1218 g(cgen_jmp_cond(ctx, insn_aux(insn), insn_jump_size(insn)));
1221 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1223 g(cgen_jmp_reg(ctx, insn_op_size(insn), insn_aux(insn), insn_jump_size(insn)));
1225 case INSN_JMP_REG_BIT:
1226 g(cgen_jmp_reg_bit(ctx, insn_aux(insn) & 63, insn_aux(insn) >> 6, insn_jump_size(insn)));
1228 case INSN_JMP_INDIRECT:
1229 reg = cget_one(ctx);
1230 cgen_four(ARM64_BR | (reg << 5));
1234 internal(file_line, "cgen_insn: invalid insn %08x", insn);