2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define ARM64_AND_OR_EOR 0x0a000000U
20 #define ARM64_AND_OR_EOR_AND 0x00000000U
21 #define ARM64_AND_OR_EOR_REG_NOT 0x00200000U
22 #define ARM64_AND_OR_EOR_IMM_NOT 0x00400000U
23 #define ARM64_AND_OR_EOR_ORR 0x20000000U
24 #define ARM64_AND_OR_EOR_EOR 0x40000000U
25 #define ARM64_AND_OR_EOR_ANDS 0x60000000U
26 #define ARM64_AND_OR_EOR_SIZE 0x80000000U
27 #define ARM64_ADDSUB_SHIFTED 0x0b000000U
28 #define ARM64_ADDSUB_EXTENDED 0x0b200000U
29 #define ARM64_CNT 0x0e205800U
30 #define ARM64_ADDV 0x0e31b800U
31 #define ARM64_ADDV_SIZE 0x00c00000U
32 #define ARM64_ADDSUB_IMM 0x11000000U
33 #define ARM64_ADDSUB_IMM_SHIFT12 0x00400000U
34 #define ARM64_ADDSUB_SET_FLAGS 0x20000000U
35 #define ARM64_ADDSUB_SUB 0x40000000U
36 #define ARM64_ADDSUB_SIZE 0x80000000U
37 #define ARM64_AND_OR_EOR_IMM 0x12000000U
38 #define ARM64_MOVN_IMM16 0x12800000U
39 #define ARM64_MOVN_IMM16_SIZE 0x80000000U
40 #define ARM64_SUBFM 0x13000000U
41 #define ARM64_SUBFM_U 0x40000000U
42 #define ARM64_SUBFM_SIZE 0x80400000U
43 #define ARM64_EXTR 0x13800000U
44 #define ARM64_EXTR_SIZE 0x80400000U
45 #define ARM64_B 0x14000000U
46 #define ARM64_ADCSBC 0x1a000000U
47 #define ARM64_ADCSBC_SET_FLAGS 0x20000000U
48 #define ARM64_ADCSBC_SBC 0x40000000U
49 #define ARM64_ADCSBC_SIZE 0x80000000U
50 #define ARM64_CSEL 0x1a800000U
51 #define ARM64_CSEL_SEL 0x00000000U
52 #define ARM64_CSEL_INC 0x00000400U
53 #define ARM64_CSEL_INV 0x40000000U
54 #define ARM64_CSEL_NEG 0x40000400U
55 #define ARM64_CSEL_SIZE 0x80000000U
56 #define ARM64_CSET 0x1a9f07e0U
57 #define ARM64_CSET_SIZE 0x80000000U
58 #define ARM64_SUDIV 0x1ac00800U
59 #define ARM64_SUDIV_SDIV 0x00000400U
60 #define ARM64_SUDIV_SIZE 0x80000000U
61 #define ARM64_ROT 0x1ac02000U
62 #define ARM64_ROT_LSL 0x00000000U
63 #define ARM64_ROT_LSR 0x00000400U
64 #define ARM64_ROT_ASR 0x00000800U
65 #define ARM64_ROT_ROR 0x00000c00U
66 #define ARM64_ROT_SIZE 0x80000000U
67 #define ARM64_MADDSUB 0x1b000000U
68 #define ARM64_MADDSUB_MSUB 0x00008000U
69 #define ARM64_MADDSUB_SIZE 0x80000000U
70 #define ARM64_FP_ALU 0x1e200800U
71 #define ARM64_FP_ALU_MUL 0x00000000U
72 #define ARM64_FP_ALU_DIV 0x00001000U
73 #define ARM64_FP_ALU_ADD 0x00002000U
74 #define ARM64_FP_ALU_SUB 0x00003000U
75 #define ARM64_FP_ALU_SINGLE 0x00000000U
76 #define ARM64_FP_ALU_DOUBLE 0x00400000U
77 #define ARM64_FP_ALU_HALF 0x00c00000U
78 #define ARM64_FCMP 0x1e202000U
79 #define ARM64_FCMP_ZERO 0x00000008U
80 #define ARM64_FCMP_SINGLE 0x00000000U
81 #define ARM64_FCMP_DOUBLE 0x00400000U
82 #define ARM64_FCMP_HALF 0x00c00000U
83 #define ARM64_FP_ALU1 0x1e204000U
84 #define ARM64_FP_ALU1_NEG 0x00010000U
85 #define ARM64_FP_ALU1_SQRT 0x00018000U
86 #define ARM64_FP_ALU1_RINTN 0x00040000U
87 #define ARM64_FP_ALU1_RINTP 0x00048000U
88 #define ARM64_FP_ALU1_RINTM 0x00050000U
89 #define ARM64_FP_ALU1_RINTZ 0x00058000U
90 #define ARM64_FP_ALU1_SINGLE 0x00000000U
91 #define ARM64_FP_ALU1_DOUBLE 0x00400000U
92 #define ARM64_FP_ALU1_HALF 0x00c00000U
93 #define ARM64_SCVTF 0x1e220000U
94 #define ARM64_SCVTF_SINGLE 0x00000000U
95 #define ARM64_SCVTF_DOUBLE 0x00400000U
96 #define ARM64_SCVTF_HALF 0x00c00000U
97 #define ARM64_SCVTF_SIZE 0x80000000U
98 #define ARM64_FCVT 0x1e224000U
99 #define ARM64_FCVT_TO_SINGLE 0x00000000U
100 #define ARM64_FCVT_TO_DOUBLE 0x00008000U
101 #define ARM64_FCVT_TO_HALF 0x00018000U
102 #define ARM64_FCVT_FROM_SINGLE 0x00000000U
103 #define ARM64_FCVT_FROM_DOUBLE 0x00400000U
104 #define ARM64_FCVT_FROM_HALF 0x00c00000U
105 #define ARM64_FMOV 0x1e260000U
106 #define ARM64_FMOV_S_W 0x00010000U
107 #define ARM64_FMOV_D_X 0x80410000U
108 #define ARM64_FCVTZS 0x1e380000U
109 #define ARM64_FCVTZS_SINGLE 0x00000000U
110 #define ARM64_FCVTZS_DOUBLE 0x00400000U
111 #define ARM64_FCVTZS_HALF 0x00c00000U
112 #define ARM64_FCVTZS_SIZE 0x80000000U
113 #define ARM64_LDPSTP 0x28000000U
114 #define ARM64_LDPSTP_LD 0x00400000U
115 #define ARM64_LDPSTP_POST_INDEX 0x00800000U
116 #define ARM64_LDPSTP_IMM 0x01000000U
117 #define ARM64_LDPSTP_PRE_INDEX 0x01800000U
118 #define ARM64_LDPSTP_SIZE 0x80000000U
119 #define ARM64_MOV 0x2a0003e0U
120 #define ARM64_MOV_SIZE 0x80000000U
121 #define ARM64_CB 0x34000000U
122 #define ARM64_CBZ_CBNZ 0x01000000U
123 #define ARM64_CBZ_SIZE 0x80000000U
124 #define ARM64_TB 0x36000000U
125 #define ARM64_TB_TBNZ 0x01000000U
126 #define ARM64_LDST 0x38000000U
127 #define ARM64_LDST_POST_INDEX 0x00000400U
128 #define ARM64_LDST_PRE_INDEX 0x00000c00U
129 #define ARM64_LDST_2REGS 0x00200800U
130 #define ARM64_LDST_2REGS_UXTW 0x00004000U
131 #define ARM64_LDST_2REGS_NORMAL 0x00006000U
132 #define ARM64_LDST_2REGS_SCALE 0x00007000U
133 #define ARM64_LDST_2REGS_SXTW 0x0000c000U
134 #define ARM64_LDST_2REGS_SXTX 0x0000e000U
135 #define ARM64_LDST_ST 0x00000000U
136 #define ARM64_LDST_LD_UX 0x00400000U
137 #define ARM64_LDST_LD_SX 0x00800000U
138 #define ARM64_LDST_LD_SXW 0x00c00000U
139 #define ARM64_LDST_SCALED_12BIT 0x01000000U
140 #define ARM64_LDST_FP 0x04000000U
141 #define ARM64_LDST_SIZE1 0x40000000U
142 #define ARM64_LDST_SIZE 0xc0000000U
143 #define ARM64_LDST_FP_8 0x00000000U
144 #define ARM64_LDST_FP_16 0x40000000U
145 #define ARM64_LDST_FP_32 0x80000000U
146 #define ARM64_LDST_FP_64 0xc0000000U
147 #define ARM64_LDST_FP_128 0x00800000U
148 #define ARM64_MOV_IMM16 0x52800000U
149 #define ARM64_MOV_IMM16_SIZE 0x80000000U
150 #define ARM64_B_COND 0x54000000U
151 #define ARM64_REV 0x5ac00000U
152 #define ARM64_REV_1 0x00000000U
153 #define ARM64_REV_16 0x00000400U
154 #define ARM64_REV_32 0x00000800U
155 #define ARM64_REV_64 0x00000c00U
156 #define ARM64_REV_SIZE 0x80000000U
157 #define ARM64_CLZ 0x5ac01000U
158 #define ARM64_CLZ_SIZE 0x80000000U
159 #define ARM64_MOVK 0x72800000U
160 #define ARM64_MOVK_SIZE 0x80000000U
161 #define ARM64_SMADDSUBL 0x9b200000U
162 #define ARM64_SMADDSUBL_SUB 0x00008000U
163 #define ARM64_SMADDSUBL_U 0x00800000U
164 #define ARM64_SUMULH 0x9b407c00U
165 #define ARM64_SUMULH_U 0x00800000U
166 #define ARM64_BR 0xd61f0000U
167 #define ARM64_BLR 0xd63f0000U
168 #define ARM64_RET 0xd65f03c0U
170 static const int8_t jmp_cond[48] = {
171 0x6, 0x7, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
172 0x4, 0x5, -1, -1, 0xb, 0xa, 0xd, 0xc,
173 -1, -1, -1, -1, -1, -1, -1, -1,
174 -1, -1, -1, -1, -1, -1, -1, -1,
175 -1, -1, 0x3, 0x2, 0x0, 0x1, 0x9, 0x8,
176 -1, -1, 0x6, 0x7, -1, -1, -1, -1,
179 static const int16_t rot_codes[8] = {
190 static bool attr_w cgen_ldr_str(struct codegen_context *ctx, unsigned ldst_mode, unsigned size, uint8_t reg, uint8_t *address)
193 uint32_t mc = ARM64_LDST;
195 mc |= ARM64_LDST_SIZE1 * size;
196 if (address[0] >= ARG_ADDRESS_2 && address[0] <= ARG_ADDRESS_2_SXTW) {
197 imm = get_imm(&address[3]);
198 if (unlikely(imm != 0))
200 mc |= ARM64_LDST_2REGS;
201 if (address[0] == ARG_ADDRESS_2) {
202 mc |= ARM64_LDST_2REGS_NORMAL;
203 } else if ((unsigned)address[0] - ARG_ADDRESS_2 == size) {
204 mc |= ARM64_LDST_2REGS_SCALE;
205 } else if (address[0] == ARG_ADDRESS_2_UXTW) {
206 mc |= ARM64_LDST_2REGS_UXTW;
207 } else if (address[0] == ARG_ADDRESS_2_SXTW) {
208 mc |= ARM64_LDST_2REGS_SXTW;
213 mc |= (uint32_t)address[1] << 5;
214 mc |= (uint32_t)address[2] << 16;
218 imm = get_imm(&address[2]);
219 if (imm >= -256 && imm <= 255) {
220 if (address[0] == ARG_ADDRESS_1) {
221 } else if (address[0] == ARG_ADDRESS_1_PRE_I) {
222 mc |= ARM64_LDST_PRE_INDEX;
223 } else if (address[0] == ARG_ADDRESS_1_POST_I) {
224 mc |= ARM64_LDST_POST_INDEX;
229 mc |= (uint32_t)address[1] << 5;
230 mc |= (imm & 0x1ff) << 12;
234 if (unlikely(address[0] != ARG_ADDRESS_1))
236 if (unlikely((imm & ((1 << size) - 1)) != 0) || unlikely(imm < 0))
239 if (unlikely(imm >= 0x1000))
241 mc |= ARM64_LDST_SCALED_12BIT;
243 mc |= (uint32_t)address[1] << 5;
244 mc |= (imm & 0xfff) << 10;
249 internal(file_line, "cgen_ldr_str: invalid address: %02x, %02x, %"PRIxMAX"", reg, address[0], (uintmax_t)imm);
253 static bool attr_w cgen_mov(struct codegen_context *ctx, unsigned size, bool sx)
257 uint8_t *arg1 = ctx->code_position;
258 uint8_t *arg2 = arg1 + arg_size(*arg1);
259 ctx->code_position = arg2 + arg_size(*arg2);
263 internal(file_line, "cgen_mov: unsupported sign extension");
264 if (unlikely(size < OP_SIZE_4))
265 internal(file_line, "cgen_mov: unsupported size %u", size);
266 if (arg1[0] == R_SP || arg2[0] == R_SP) {
267 mc = ARM64_ADDSUB_IMM;
269 mc |= (uint32_t)arg2[0] << 5;
271 /* !!! TODO: handle shifted register */
274 mc |= (uint32_t)arg2[0] << 16;
276 mc |= ARM64_MOV_SIZE * (size == OP_SIZE_8);
280 if (arg2[0] == ARG_IMM) {
281 if (unlikely(size < OP_SIZE_4))
282 internal(file_line, "cgen_mov: unsupported size %u", size);
283 imm = get_imm(&arg2[1]);
284 if (imm >= 0 && imm < 0x10000) {
285 mc = ARM64_MOV_IMM16;
286 mc |= ARM64_MOV_IMM16_SIZE * (size == OP_SIZE_8);
288 mc |= (uint32_t)imm << 5;
292 if (~imm >= 0 && ~imm < 0x10000) {
294 mc = ARM64_MOVN_IMM16;
295 mc |= ARM64_MOVN_IMM16_SIZE * (size == OP_SIZE_8);
297 mc |= (uint32_t)imm << 5;
301 internal(file_line, "cgen_mov: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
303 if (!sx || size == OP_SIZE_NATIVE)
304 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX, size, arg1[0], arg2);
306 return cgen_ldr_str(ctx, ARM64_LDST_LD_SX, size, arg1[0], arg2);
310 if (size < OP_SIZE_4)
312 mc = ARM64_FMOV | (size == OP_SIZE_4 ? ARM64_FMOV_S_W : ARM64_FMOV_D_X);
314 mc |= (uint32_t)arg2[0] << 5;
320 return cgen_ldr_str(ctx, ARM64_LDST_LD_UX | ARM64_LDST_FP, size, arg1[0] & 31, arg2);
323 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, arg2[0], arg1);
326 return cgen_ldr_str(ctx, ARM64_LDST_ST | ARM64_LDST_FP, size, arg2[0] & 31, arg1);
328 if (arg2[0] == ARG_IMM) {
329 imm = get_imm(&arg2[1]);
331 return cgen_ldr_str(ctx, ARM64_LDST_ST, size, 0x1f, arg1);
334 internal(file_line, "cgen_mov: invalid arguments %02x, %02x", arg1[0], arg2[0]);
338 static bool attr_w cgen_alu_args(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu, bool not, uint8_t *arg1, uint8_t *arg2, uint8_t *arg3)
343 if (unlikely(arg1[0] >= 32))
345 if (unlikely(alu == ALU_MUL)) {
347 if (size == OP_SIZE_8 &&
348 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_SXTW &&
349 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_SXTW) {
352 mc = ARM64_SMADDSUBL;
353 } else if (size == OP_SIZE_8 &&
354 arg2[0] == ARG_EXTENDED_REGISTER && arg2[1] == ARG_EXTEND_UXTW &&
355 arg3[0] == ARG_EXTENDED_REGISTER && arg3[1] == ARG_EXTEND_UXTW) {
358 mc = ARM64_SMADDSUBL | ARM64_SMADDSUBL_U;
360 if (unlikely(arg2[0] >= 32) && unlikely(arg3[0] >= 32))
362 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
370 if (unlikely(arg2[0] >= 32))
372 if (unlikely(alu == ALU_UMULH) || unlikely(alu == ALU_SMULH)) {
373 if (unlikely(arg3[0] >= 32))
375 if (unlikely(size != OP_SIZE_8))
378 mc |= ARM64_SUMULH_U * (alu == ALU_UMULH);
380 mc |= (uint32_t)arg2[0] << 5;
381 mc |= (uint32_t)arg3[0] << 16;
385 if (unlikely(alu == ALU_UDIV) || unlikely(alu == ALU_SDIV)) {
386 if (unlikely(arg3[0] >= 32))
389 mc |= ARM64_SUDIV_SDIV * (alu == ALU_SDIV);
390 mc |= ARM64_SUDIV_SIZE * (size == OP_SIZE_8);
392 mc |= (uint32_t)arg2[0] << 5;
393 mc |= (uint32_t)arg3[0] << 16;
397 if (unlikely(alu == ALU_ADC) || unlikely(alu == ALU_SBB)) {
398 if (arg3[0] == ARG_IMM) {
399 imm = get_imm(&arg3[1]);
400 if (unlikely(imm != 0))
403 } else if (unlikely(arg3[0] >= 32)) {
407 mc |= ARM64_ADCSBC_SBC * (alu == ALU_SBB);
409 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
411 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
413 mc |= (uint32_t)arg2[0] << 5;
414 mc |= (uint32_t)arg3[0] << 16;
418 if (alu == ALU_ADD || alu == ALU_SUB) {
420 mc |= ARM64_ADDSUB_SUB * (alu == ALU_SUB);
422 mc |= ARM64_ADDSUB_SHIFTED;
425 if (arg3[0] == ARG_EXTENDED_REGISTER) {
426 mc |= ARM64_ADDSUB_EXTENDED;
427 mc |= (uint32_t)arg3[1] << 10;
431 if (arg3[0] == ARG_SHIFTED_REGISTER) {
432 if (unlikely((arg3[1] >> 6) == 3))
434 mc |= ARM64_ADDSUB_SHIFTED;
435 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
436 mc |= (uint32_t)(arg3[1] >> 6) << 22;
440 if (arg3[0] == ARG_IMM) {
441 mc |= ARM64_ADDSUB_IMM;
442 mc |= ARM64_ADCSBC_SET_FLAGS * (uint32_t)!!writes_flags;
443 imm = get_imm(&arg3[1]);
444 if (likely(imm >= 0) && likely(imm < 0x1000)) {
448 mc |= (uint32_t)arg2[0] << 5;
449 mc |= ARM64_ADCSBC_SIZE * (size == OP_SIZE_8);
453 if (likely(!(imm & 0xfff))) {
454 imm = (uint64_t)imm >> 12;
455 if (likely(imm < 0x1000)) {
456 mc |= ARM64_ADDSUB_IMM_SHIFT12;
463 if (alu == ALU_AND || alu == ALU_OR || alu == ALU_XOR) {
466 if (arg3[0] != ARG_IMM)
467 mc |= ARM64_AND_OR_EOR_REG_NOT;
469 if (alu == ALU_AND) {
470 mc |= writes_flags ? ARM64_AND_OR_EOR_ANDS : ARM64_AND_OR_EOR_AND;
472 if (unlikely(writes_flags))
474 mc |= alu == ALU_OR ? ARM64_AND_OR_EOR_ORR : ARM64_AND_OR_EOR_EOR;
477 mc |= ARM64_AND_OR_EOR;
480 if (arg3[0] == ARG_SHIFTED_REGISTER) {
481 mc |= ARM64_AND_OR_EOR;
482 mc |= (uint32_t)(arg3[1] & ARG_SHIFT_AMOUNT) << 10;
483 mc |= (uint32_t)(arg3[1] >> 6) << 22;
487 if (arg3[0] == ARG_IMM) {
489 mc |= ARM64_AND_OR_EOR_SIZE * (size == OP_SIZE_8);
490 mc |= ARM64_AND_OR_EOR_IMM;
491 imm = get_imm(&arg3[1]);
494 if (size == OP_SIZE_4)
496 code = value_to_code(size, imm);
497 if (unlikely(code < 0))
498 internal(file_line, "cgen_alu_args: invalid immediate value %"PRIxMAX"", (uintmax_t)imm);
500 mc |= (uint32_t)arg2[0] << 5;
501 mc ^= (uint32_t)code << 10;
509 internal(file_line, "cgen_alu_args: invalid arguments %02x, %02x, %02x, %u, %u", arg1[0], arg2[0], arg3[0], alu, writes_flags);
513 static bool attr_w cgen_cmp(struct codegen_context *ctx, unsigned size, bool cmn)
516 uint8_t *arg1 = ctx->code_position;
517 uint8_t *arg2 = arg1 + arg_size(*arg1);
518 ctx->code_position = arg2 + arg_size(*arg2);
519 return cgen_alu_args(ctx, size, true, cmn ? ALU_ADD : ALU_SUB, false, &z, arg1, arg2);
522 static bool attr_w cgen_test(struct codegen_context *ctx, unsigned size)
525 uint8_t *arg1 = ctx->code_position;
526 uint8_t *arg2 = arg1 + arg_size(*arg1);
527 ctx->code_position = arg2 + arg_size(*arg2);
528 return cgen_alu_args(ctx, size, true, ALU_AND, false, &z, arg1, arg2);
531 static bool attr_w cgen_alu(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
534 uint8_t *arg1 = ctx->code_position;
535 uint8_t *arg2 = arg1 + arg_size(*arg1);
536 uint8_t *arg3 = arg2 + arg_size(*arg2);
537 ctx->code_position = arg3 + arg_size(*arg3);
540 case ALU_ORN: alu = ALU_OR; not = true; break;
541 case ALU_ANDN: alu = ALU_AND; not = true; break;
542 case ALU_XORN: alu = ALU_XOR; not = true; break;
544 return cgen_alu_args(ctx, size, writes_flags, alu, not, arg1, arg2, arg3);
547 static bool attr_w cgen_alu1(struct codegen_context *ctx, unsigned size, unsigned writes_flags, unsigned alu)
551 uint8_t one_imm[9] = { ARG_IMM, 1, 0, 0, 0, 0, 0, 0, 0 };
552 uint8_t *arg1 = ctx->code_position;
553 uint8_t *arg2 = arg1 + arg_size(*arg1);
554 ctx->code_position = arg2 + arg_size(*arg2);
557 return cgen_alu_args(ctx, size, writes_flags, ALU_OR, true, arg1, &z, arg2);
559 return cgen_alu_args(ctx, size, writes_flags, ALU_SUB, false, arg1, &z, arg2);
561 return cgen_alu_args(ctx, size, writes_flags, ALU_SBB, false, arg1, &z, arg2);
563 return cgen_alu_args(ctx, size, writes_flags, ALU_ADD, false, arg1, arg2, one_imm);
565 return cgen_alu_args(ctx, size, writes_flags, ALU_SUB, false, arg1, arg2, one_imm);
570 if (alu == ALU1_BREV) {
572 } else if (alu == ALU1_BSWAP16) {
574 } else if (alu == ALU1_BSWAP) {
575 if (size == OP_SIZE_4)
580 mc |= ARM64_REV_SIZE * (size == OP_SIZE_8);
582 mc |= (uint32_t)arg2[0] << 5;
587 mc |= ARM64_CLZ_SIZE * (size == OP_SIZE_8);
589 mc |= (uint32_t)arg2[0] << 5;
593 internal(file_line, "cgen_alu1: invalid arguments");
598 static bool attr_w cgen_rot_imm(struct codegen_context *ctx, unsigned size, uint8_t rot, uint8_t *arg1, uint8_t *arg2, uint8_t imm)
601 if (unlikely(rot == ROT_ROL) || rot == ROT_SHL) {
604 imm &= (1U << (size + 3)) - 1;
606 mc |= (rot == ROT_ROR || rot == ROT_ROL ? ARM64_EXTR_SIZE : ARM64_SUBFM_SIZE) * (size == OP_SIZE_8);
612 mc |= (uint32_t)arg2[0] << 5;
613 mc |= (uint32_t)arg2[0] << 16;
614 mc |= (uint32_t)imm << 10;
617 mc |= ARM64_SUBFM | ARM64_SUBFM_U;
619 mc |= (uint32_t)arg2[0] << 5;
620 mc |= (uint32_t)imm << 16;
622 imm &= (1U << (size + 3)) - 1;
623 mc |= (uint32_t)(imm << 10);
628 mc |= (rot == ROT_SHR) * ARM64_SUBFM_U;
630 mc |= (uint32_t)arg2[0] << 5;
631 mc |= (uint32_t)imm << 16;
632 mc |= ((1U << (size + 3)) - 1) << 10;
639 static bool attr_w cgen_rot(struct codegen_context *ctx, unsigned size, unsigned rot)
643 uint8_t *arg1 = ctx->code_position;
644 uint8_t *arg2 = arg1 + arg_size(*arg1);
645 uint8_t *arg3 = arg2 + arg_size(*arg2);
646 ctx->code_position = arg3 + arg_size(*arg3);
647 if (arg3[0] == ARG_IMM)
648 return cgen_rot_imm(ctx, size, rot, arg1, arg2, arg3[1]);
649 arm_rot = rot_codes[rot];
650 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arm_rot < 0))
651 internal(file_line, "cgen_rot: invalid arguments");
653 mc |= ARM64_ROT_SIZE * (size == OP_SIZE_8);
656 mc |= (uint32_t)arg2[0] << 5;
657 mc |= (uint32_t)arg3[0] << 16;
662 static bool attr_w cgen_madd(struct codegen_context *ctx, unsigned size, bool sub)
665 uint8_t *arg1 = ctx->code_position;
666 uint8_t *arg2 = arg1 + arg_size(*arg1);
667 uint8_t *arg3 = arg2 + arg_size(*arg2);
668 uint8_t *arg4 = arg3 + arg_size(*arg3);
669 ctx->code_position = arg4 + arg_size(*arg4);
670 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32) || unlikely(arg4[0] >= 32))
671 internal(file_line, "cgen_madd: invalid arguments");
673 mc |= ARM64_MADDSUB_SIZE * (size == OP_SIZE_8);
674 mc |= ARM64_MADDSUB_MSUB * sub;
676 mc |= (uint32_t)arg2[0] << 5;
677 mc |= (uint32_t)arg3[0] << 16;
678 mc |= (uint32_t)arg4[0] << 10;
683 static bool attr_w cgen_set_cond(struct codegen_context *ctx, unsigned size, unsigned aux)
687 uint8_t *arg1 = ctx->code_position;
688 ctx->code_position = arg1 + arg_size(*arg1);
689 cond = jmp_cond[aux];
690 if (unlikely(cond < 0) || unlikely(arg1[0] >= 31))
691 internal(file_line, "cgen_set_cond: invalid arguments: %02x, %u, %u", arg1[0], size, aux);
693 mc |= ARM64_CSET_SIZE * (size == OP_SIZE_8);
694 mc |= (uint32_t)(cond ^ 1) << 12;
700 static bool attr_w cgen_csel(struct codegen_context *ctx, uint32_t insn, unsigned size, unsigned aux)
706 uint8_t *arg1 = ctx->code_position;
707 uint8_t *arg2 = arg1 + arg_size(*arg1);
708 uint8_t *arg3 = arg2 + arg_size(*arg2);
709 ctx->code_position = arg3 + arg_size(*arg3);
710 if (arg2[0] == ARG_IMM) {
711 imm = get_imm(&arg2[1]);
712 if (unlikely(imm != 0))
716 if (arg3[0] == ARG_IMM) {
717 imm = get_imm(&arg3[1]);
718 if (unlikely(imm != 0))
722 cond = jmp_cond[aux];
723 if (unlikely(cond < 0))
727 case INSN_CSEL_SEL: mc |= ARM64_CSEL_SEL; break;
728 case INSN_CSEL_INC: mc |= ARM64_CSEL_INC; break;
729 case INSN_CSEL_INV: mc |= ARM64_CSEL_INV; break;
730 case INSN_CSEL_NEG: mc |= ARM64_CSEL_NEG; break;
734 mc |= ARM64_CSEL_SIZE * (size == OP_SIZE_8);
736 mc |= (uint32_t)arg2[0] << 16;
737 mc |= (uint32_t)arg3[0] << 5;
738 mc |= (uint32_t)cond << 12;
742 internal(file_line, "cgen_csel: invalid arguments");
745 static bool attr_w cgen_ldp_stp(struct codegen_context *ctx, bool ldr, unsigned size)
747 uint8_t *arg1, *arg2, *arg3;
752 arg1 = ctx->code_position;
753 arg2 = arg1 + arg_size(*arg1);
754 arg3 = arg2 + arg_size(*arg2);
755 ctx->code_position = arg3 + arg_size(*arg3);
756 if (arg2[0] == ARG_IMM) {
757 imm = get_imm(&arg2[1]);
758 if (unlikely(imm != 0))
762 if (arg3[0] == ARG_IMM) {
763 imm = get_imm(&arg3[1]);
764 if (unlikely(imm != 0))
769 arg2 = ctx->code_position;
770 arg3 = arg2 + arg_size(*arg2);
771 arg1 = arg3 + arg_size(*arg3);
772 ctx->code_position = arg1 + arg_size(*arg1);
775 mc |= ARM64_LDPSTP_LD * (uint32_t)ldr;
776 mc |= ARM64_LDPSTP_SIZE * (size == OP_SIZE_8);
777 if (arg1[0] == ARG_ADDRESS_1) {
778 mc |= ARM64_LDPSTP_IMM;
779 } else if (arg1[0] == ARG_ADDRESS_1_PRE_I) {
780 mc |= ARM64_LDPSTP_PRE_INDEX;
781 } else if (arg1[0] == ARG_ADDRESS_1_POST_I) {
782 mc |= ARM64_LDPSTP_POST_INDEX;
786 if (unlikely(arg2[0] >= 32) || unlikely(arg3[0] >= 32))
789 mc |= (uint32_t)arg3[0] << 10;
790 mc |= (uint32_t)arg1[1] << 5;
791 imm = get_imm(&arg1[2]);
792 if (unlikely((imm & ((1 << size) - 1)) != 0))
795 if (unlikely(imm < -64) || unlikely(imm > 63))
797 mc |= (imm & 127) << 15;
802 internal(file_line, "cgen_ldp_stp: invalid arguments %02x, %02x, %02x", arg1[0], arg2[0], arg3[0]);
806 static bool attr_w cgen_mov_mask(struct codegen_context *ctx, unsigned aux)
810 uint8_t *arg1 = ctx->code_position;
811 uint8_t *arg2 = arg1 + arg_size(*arg1);
812 uint8_t *arg3 = arg2 + arg_size(*arg2);
813 ctx->code_position = arg3 + arg_size(*arg3);
814 if (unlikely(arg1[0] >= 32) || unlikely(arg2[0] >= 32) || unlikely(arg3[0] != ARG_IMM))
815 internal(file_line, "cgen_mov_mask: bad arguments");
817 mc |= ARM64_MOVK_SIZE;
818 mc |= (uint32_t)aux << 21;
819 imm = get_imm(&arg3[1]);
820 if (unlikely(imm >= 0x10000))
821 internal(file_line, "cgen_mov_mask: bad number");
822 mc |= (imm & 0xffff) << 5;
828 static bool attr_w cgen_fp_cmp(struct codegen_context *ctx, unsigned op_size)
831 uint8_t *arg1 = ctx->code_position;
832 uint8_t *arg2 = arg1 + arg_size(*arg1);
833 ctx->code_position = arg2 + arg_size(*arg2);
836 case OP_SIZE_2: mc |= ARM64_FCMP_HALF; break;
837 case OP_SIZE_4: mc |= ARM64_FCMP_SINGLE; break;
838 case OP_SIZE_8: mc |= ARM64_FCMP_DOUBLE; break;
839 default: internal(file_line, "cgen_fp_cmp: invalid size %u", op_size);
841 mc |= ((uint32_t)(arg1[0] & 31)) << 5;
842 mc |= ((uint32_t)(arg2[0] & 31)) << 16;
847 static bool attr_w cgen_fp_alu(struct codegen_context *ctx, unsigned op_size, unsigned aux)
850 uint8_t *arg1 = ctx->code_position;
851 uint8_t *arg2 = arg1 + arg_size(*arg1);
852 uint8_t *arg3 = arg2 + arg_size(*arg2);
853 ctx->code_position = arg3 + arg_size(*arg3);
856 case FP_ALU_ADD: mc |= ARM64_FP_ALU_ADD; break;
857 case FP_ALU_SUB: mc |= ARM64_FP_ALU_SUB; break;
858 case FP_ALU_MUL: mc |= ARM64_FP_ALU_MUL; break;
859 case FP_ALU_DIV: mc |= ARM64_FP_ALU_DIV; break;
860 default: internal(file_line, "cgen_fp_alu: invalid alu %u", aux);
863 case OP_SIZE_2: mc |= ARM64_FP_ALU_HALF; break;
864 case OP_SIZE_4: mc |= ARM64_FP_ALU_SINGLE; break;
865 case OP_SIZE_8: mc |= ARM64_FP_ALU_DOUBLE; break;
866 default: internal(file_line, "cgen_fp_alu: invalid size %u", op_size);
869 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
870 mc |= ((uint32_t)(arg3[0] & 31)) << 16;
875 static bool attr_w cgen_fp_alu1(struct codegen_context *ctx, unsigned op_size, unsigned aux)
878 uint8_t *arg1 = ctx->code_position;
879 uint8_t *arg2 = arg1 + arg_size(*arg1);
880 ctx->code_position = arg2 + arg_size(*arg2);
882 case FP_ALU1_NEG: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_NEG; break;
883 case FP_ALU1_SQRT: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_SQRT; break;
884 case FP_ALU1_ROUND: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTN; break;
885 case FP_ALU1_FLOOR: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTM; break;
886 case FP_ALU1_CEIL: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTP; break;
887 case FP_ALU1_TRUNC: mc = ARM64_FP_ALU1 | ARM64_FP_ALU1_RINTZ; break;
888 case FP_ALU1_VCNT8: mc = ARM64_CNT; goto do_regs;
889 case FP_ALU1_ADDV: mc = ARM64_ADDV; goto do_regs;
890 default: internal(file_line, "cgen_fp_alu1: invalid alu %u", aux);
893 case OP_SIZE_2: mc |= ARM64_FP_ALU1_HALF; break;
894 case OP_SIZE_4: mc |= ARM64_FP_ALU1_SINGLE; break;
895 case OP_SIZE_8: mc |= ARM64_FP_ALU1_DOUBLE; break;
896 default: internal(file_line, "cgen_fp_alu1: invalid size %u", op_size);
900 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
905 static bool attr_w cgen_fp_to_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
908 uint8_t *arg1 = ctx->code_position;
909 uint8_t *arg2 = arg1 + arg_size(*arg1);
910 ctx->code_position = arg2 + arg_size(*arg2);
912 switch (int_op_size) {
913 case OP_SIZE_4: break;
914 case OP_SIZE_8: mc |= ARM64_FCVTZS_SIZE; break;
915 default: internal(file_line, "cgen_fp_to_int: invalid int size %u", int_op_size);
917 switch (fp_op_size) {
918 case OP_SIZE_2: mc |= ARM64_FCVTZS_HALF; break;
919 case OP_SIZE_4: mc |= ARM64_FCVTZS_SINGLE; break;
920 case OP_SIZE_8: mc |= ARM64_FCVTZS_DOUBLE; break;
921 default: internal(file_line, "cgen_fp_to_int: invalid fp size %u", fp_op_size);
924 mc |= ((uint32_t)(arg2[0] & 31)) << 5;
929 static bool attr_w cgen_fp_from_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
932 uint8_t *arg1 = ctx->code_position;
933 uint8_t *arg2 = arg1 + arg_size(*arg1);
934 ctx->code_position = arg2 + arg_size(*arg2);
936 switch (int_op_size) {
937 case OP_SIZE_4: break;
938 case OP_SIZE_8: mc |= ARM64_SCVTF_SIZE; break;
939 default: internal(file_line, "cgen_fp_from_int: invalid int size %u", int_op_size);
941 switch (fp_op_size) {
942 case OP_SIZE_2: mc |= ARM64_SCVTF_HALF; break;
943 case OP_SIZE_4: mc |= ARM64_SCVTF_SINGLE; break;
944 case OP_SIZE_8: mc |= ARM64_SCVTF_DOUBLE; break;
945 default: internal(file_line, "cgen_fp_from_int: invalid fp size %u", fp_op_size);
948 mc |= ((uint32_t)arg2[0]) << 5;
953 static bool attr_w cgen_fp_cvt(struct codegen_context *ctx, unsigned from_op_size, unsigned to_op_size)
956 uint8_t *arg1 = ctx->code_position;
957 uint8_t *arg2 = arg1 + arg_size(*arg1);
958 ctx->code_position = arg2 + arg_size(*arg2);
960 switch (from_op_size) {
961 case OP_SIZE_2: mc |= ARM64_FCVT_FROM_HALF; break;
962 case OP_SIZE_4: mc |= ARM64_FCVT_FROM_SINGLE; break;
963 case OP_SIZE_8: mc |= ARM64_FCVT_FROM_DOUBLE; break;
964 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
966 switch (to_op_size) {
967 case OP_SIZE_2: mc |= ARM64_FCVT_TO_HALF; break;
968 case OP_SIZE_4: mc |= ARM64_FCVT_TO_SINGLE; break;
969 case OP_SIZE_8: mc |= ARM64_FCVT_TO_DOUBLE; break;
970 default: internal(file_line, "cgen_fp_cvt: invalid types %u, %u", from_op_size, to_op_size);
973 mc |= ((uint32_t)arg2[0] & 31) << 5;
978 static bool attr_w cgen_jmp_cond(struct codegen_context *ctx, unsigned aux, unsigned length)
980 int8_t cond = jmp_cond[aux];
981 if (unlikely(cond < 0))
982 internal(file_line, "cgen_jmp_cond: invalid condition %u", aux);
986 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
987 cgen_four(ARM64_B_COND | cond);
990 cgen_four(ARM64_B_COND | (cond ^ 1) | 0x40);
991 g(add_relocation(ctx, JMP_LONG, 0, NULL));
995 internal(file_line, "cgen_jmp_cond: invalid length %u", length);
1000 static bool attr_w cgen_jmp_reg(struct codegen_context *ctx, unsigned size, unsigned aux, unsigned length)
1002 uint32_t mc = ARM64_CB;
1003 mc |= ARM64_CBZ_SIZE * (size == OP_SIZE_8);
1004 mc |= cget_one(ctx);
1009 mc |= ARM64_CBZ_CBNZ;
1012 internal(file_line, "cgen_jmp_reg: invalid condition %u", aux);
1017 g(add_relocation(ctx, JMP_SHORT, 1, NULL));
1021 cgen_four((mc ^ ARM64_CBZ_CBNZ) | 0x40);
1022 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1026 internal(file_line, "cgen_jmp_reg: invalid length %u", length);
1031 static bool attr_w cgen_jmp_reg_bit(struct codegen_context *ctx, unsigned bit, bool jnz, unsigned length)
1033 uint32_t mc = ARM64_TB;
1034 mc |= ARM64_TB_TBNZ * (uint32_t)jnz;
1035 mc |= cget_one(ctx);
1036 mc |= (uint32_t)(bit & 31) << 19;
1037 mc |= (uint32_t)(bit >> 5 << 31);
1040 g(add_relocation(ctx, JMP_SHORTEST, 1, NULL));
1045 cgen_four((mc ^ ARM64_TB_TBNZ) | 0x40);
1046 g(add_relocation(ctx, JMP_LONG, 1, NULL));
1050 internal(file_line, "cgen_jmp_reg_bit: invalid length %u", length);
1055 static bool attr_w resolve_relocation(struct codegen_context *ctx, struct relocation *reloc)
1058 int64_t offs = (int64_t)(ctx->label_to_pos[reloc->label_id] >> 2) - (int64_t)(reloc->position >> 2);
1059 switch (reloc->length) {
1061 if (unlikely(offs < -0x00002000) || unlikely(offs >= 0x00002000))
1063 memcpy(&mc, ctx->mcode + reloc->position, 4);
1065 mc |= ((uint32_t)offs << 5) & 0x0007ffe0;
1066 memcpy(ctx->mcode + reloc->position, &mc, 4);
1069 if (unlikely(offs < -0x00040000) || unlikely(offs >= 0x00040000))
1071 memcpy(&mc, ctx->mcode + reloc->position, 4);
1073 mc |= ((uint32_t)offs << 5) & 0x00ffffe0;
1074 memcpy(ctx->mcode + reloc->position, &mc, 4);
1077 if (unlikely(offs < -0x02000000) || unlikely(offs >= 0x02000000))
1079 memcpy(&mc, ctx->mcode + reloc->position, 4);
1081 mc |= offs & 0x03ffffffU;
1082 memcpy(ctx->mcode + reloc->position, &mc, 4);
1085 internal(file_line, "resolve_relocation: invalid relocation length %u", reloc->length);
1090 static bool attr_w cgen_insn(struct codegen_context *ctx, uint32_t insn)
1093 /*debug("insn: %08x (%s)", insn, da(ctx->fn,function)->function_name);*/
1094 switch (insn_opcode(insn)) {
1102 cgen_four(ARM64_RET);
1104 case INSN_CALL_INDIRECT:
1105 reg = cget_one(ctx);
1106 cgen_four(ARM64_BLR | (reg << 5));
1109 g(cgen_mov(ctx, insn_op_size(insn), false));
1112 g(cgen_mov(ctx, insn_op_size(insn), true));
1115 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1117 g(cgen_cmp(ctx, insn_op_size(insn), false));
1120 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1122 g(cgen_cmp(ctx, insn_op_size(insn), true));
1125 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1127 g(cgen_test(ctx, insn_op_size(insn)));
1130 case INSN_ALU_FLAGS:
1131 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1133 g(cgen_alu(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1136 case INSN_ALU1_FLAGS:
1137 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1139 g(cgen_alu1(ctx, insn_op_size(insn), insn_writes_flags(insn), insn_aux(insn)));
1142 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1144 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn)));
1147 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1149 g(cgen_madd(ctx, insn_op_size(insn), insn_aux(insn)));
1152 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1154 g(cgen_set_cond(ctx, insn_op_size(insn), insn_aux(insn)));
1158 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1160 g(cgen_csel(ctx, INSN_CSEL_SEL, insn_op_size(insn), insn_aux(insn)));
1166 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1168 g(cgen_csel(ctx, insn_opcode(insn), insn_op_size(insn), insn_aux(insn)));
1172 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1174 g(cgen_ldp_stp(ctx, insn_opcode(insn) == INSN_LDP, insn_op_size(insn)));
1177 if (unlikely(insn_op_size(insn) != OP_SIZE_8))
1179 g(cgen_mov_mask(ctx, insn_aux(insn)));
1182 g(cgen_fp_cmp(ctx, insn_op_size(insn)));
1185 g(cgen_fp_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1188 g(cgen_fp_alu1(ctx, insn_op_size(insn), insn_aux(insn)));
1190 case INSN_FP_TO_INT32:
1191 case INSN_FP_TO_INT64:
1192 g(cgen_fp_to_int(ctx, insn_opcode(insn) == INSN_FP_TO_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1194 case INSN_FP_FROM_INT32:
1195 case INSN_FP_FROM_INT64:
1196 g(cgen_fp_from_int(ctx, insn_opcode(insn) == INSN_FP_FROM_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1199 g(cgen_fp_cvt(ctx, insn_op_size(insn), insn_aux(insn)));
1202 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1206 g(cgen_jmp_cond(ctx, insn_aux(insn), insn_jump_size(insn)));
1209 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1211 g(cgen_jmp_reg(ctx, insn_op_size(insn), insn_aux(insn), insn_jump_size(insn)));
1213 case INSN_JMP_REG_BIT:
1214 g(cgen_jmp_reg_bit(ctx, insn_aux(insn) & 63, insn_aux(insn) >> 6, insn_jump_size(insn)));
1216 case INSN_JMP_INDIRECT:
1217 reg = cget_one(ctx);
1218 cgen_four(ARM64_BR | (reg << 5));
1222 internal(file_line, "cgen_insn: invalid insn %08x", insn);