2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 static bool attr_w gen_jump(struct codegen_context *ctx, int32_t jmp_offset, unsigned op_size, unsigned cond, unsigned reg1, unsigned reg2);
21 static bool attr_w gen_alu_upcall(struct codegen_context *ctx, size_t upcall, unsigned op_size, frame_t slot_1, frame_t slot_2, frame_t slot_r, uint32_t label_ovf)
23 if (slot_is_register(ctx, slot_1))
24 g(spill(ctx, slot_1));
25 if (slot_2 != NO_FRAME_T && slot_is_register(ctx, slot_2))
26 g(spill(ctx, slot_2));
27 g(gen_upcall_start(ctx, frame_t_is_const(slot_2) ? 4 : slot_2 != NO_FRAME_T ? 3 : 2));
28 g(gen_frame_address(ctx, slot_1, 0, R_ARG0));
29 g(gen_upcall_argument(ctx, 0));
30 if (frame_t_is_const(slot_2)) {
32 g(gen_load_constant(ctx, R_ARG1, frame_t_get_const(slot_2)));
33 g(gen_upcall_argument(ctx, 1));
34 g(gen_frame_address(ctx, slot_r, 0, R_ARG2));
35 g(gen_upcall_argument(ctx, 2));
36 g(gen_get_upcall_pointer(ctx, upcall, R_ARG3));
37 g(gen_upcall_argument(ctx, 3));
38 x_offs = offsetof(struct cg_upcall_vector_s, INT_binary_const_int8_t) + op_size * sizeof(void (*)(void));
39 g(gen_upcall(ctx, x_offs, 4));
40 } else if (slot_2 != NO_FRAME_T) {
41 g(gen_frame_address(ctx, slot_2, 0, R_ARG1));
42 g(gen_upcall_argument(ctx, 1));
43 g(gen_frame_address(ctx, slot_r, 0, R_ARG2));
44 g(gen_upcall_argument(ctx, 2));
45 g(gen_upcall(ctx, upcall, 3));
47 g(gen_frame_address(ctx, slot_r, 0, R_ARG1));
48 g(gen_upcall_argument(ctx, 1));
49 g(gen_upcall(ctx, upcall, 2));
51 if (slot_is_register(ctx, slot_r))
52 g(unspill(ctx, slot_r));
54 g(gen_jmp_on_zero(ctx, OP_SIZE_1, R_RET0, COND_E, label_ovf));
58 static bool attr_w gen_alu_typed_upcall(struct codegen_context *ctx, size_t upcall, unsigned op_size, frame_t slot_1, frame_t slot_2, frame_t slot_r, uint32_t label_ovf)
60 upcall += op_size * sizeof(void (*)(void));
61 return gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, label_ovf);
68 #define MODE_ARRAY_LEN_GT 4
70 static bool attr_w gen_alu(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, frame_t slot_r)
74 unsigned reg1, reg2, reg3, target;
76 case MODE_FIXED: switch (op) {
77 case OPCODE_FIXED_OP_add: alu = ALU_ADD; goto do_alu;
78 case OPCODE_FIXED_OP_subtract: alu = ALU_SUB; goto do_alu;
79 case OPCODE_FIXED_OP_multiply: goto do_multiply;
80 case OPCODE_FIXED_OP_divide:
81 case OPCODE_FIXED_OP_divide_alt1: sgn = true; mod = false; goto do_divide;
82 case OPCODE_FIXED_OP_udivide:
83 case OPCODE_FIXED_OP_udivide_alt1: sgn = false; mod = false; goto do_divide;
84 case OPCODE_FIXED_OP_modulo:
85 case OPCODE_FIXED_OP_modulo_alt1: sgn = true; mod = true; goto do_divide;
86 case OPCODE_FIXED_OP_umodulo:
87 case OPCODE_FIXED_OP_umodulo_alt1: sgn = false; mod = true; goto do_divide;
88 case OPCODE_FIXED_OP_power: return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_binary_power_int8_t), op_size, slot_1, slot_2, slot_r, 0);
89 case OPCODE_FIXED_OP_and: alu = ALU_AND; goto do_alu;
90 case OPCODE_FIXED_OP_or: alu = ALU_OR; goto do_alu;
91 case OPCODE_FIXED_OP_xor: alu = ALU_XOR; goto do_alu;
92 case OPCODE_FIXED_OP_shl: alu = ROT_SHL; goto do_shift;
93 case OPCODE_FIXED_OP_shr: alu = ROT_SAR; goto do_shift;
94 case OPCODE_FIXED_OP_ushr: alu = ROT_SHR; goto do_shift;
95 case OPCODE_FIXED_OP_rol: alu = ROT_ROL; goto do_shift;
96 case OPCODE_FIXED_OP_ror: alu = ROT_ROR; goto do_shift;
97 case OPCODE_FIXED_OP_bts: alu = BTX_BTS; goto do_bt;
98 case OPCODE_FIXED_OP_btr: alu = BTX_BTR; goto do_bt;
99 case OPCODE_FIXED_OP_btc: alu = BTX_BTC; goto do_bt;
100 case OPCODE_FIXED_OP_equal: alu = COND_E; goto do_compare;
101 case OPCODE_FIXED_OP_not_equal: alu = COND_NE; goto do_compare;
102 case OPCODE_FIXED_OP_less: alu = COND_L; goto do_compare;
103 case OPCODE_FIXED_OP_less_equal: alu = COND_LE; goto do_compare;
104 case OPCODE_FIXED_OP_greater: alu = COND_G; goto do_compare;
105 case OPCODE_FIXED_OP_greater_equal: alu = COND_GE; goto do_compare;
106 case OPCODE_FIXED_OP_uless: alu = COND_B; goto do_compare;
107 case OPCODE_FIXED_OP_uless_equal: alu = COND_BE; goto do_compare;
108 case OPCODE_FIXED_OP_ugreater: alu = COND_A; goto do_compare;
109 case OPCODE_FIXED_OP_ugreater_equal: alu = COND_AE; goto do_compare;
110 case OPCODE_FIXED_OP_bt: alu = BTX_BT; goto do_bt;
111 default: internal(file_line, "gen_alu: unsupported fixed operation %u", op);
113 case MODE_INT: switch (op) {
114 case OPCODE_INT_OP_add: alu = ALU_ADD; goto do_alu;
115 case OPCODE_INT_OP_subtract: alu = ALU_SUB; goto do_alu;
116 case OPCODE_INT_OP_multiply: goto do_multiply;
117 case OPCODE_INT_OP_divide:
118 case OPCODE_INT_OP_divide_alt1: sgn = true; mod = false; goto do_divide;
119 case OPCODE_INT_OP_modulo:
120 case OPCODE_INT_OP_modulo_alt1: sgn = true; mod = true; goto do_divide;
121 case OPCODE_INT_OP_power: return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_power_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf);
122 case OPCODE_INT_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
123 case OPCODE_INT_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
124 case OPCODE_INT_OP_xor: alu = ALU_XOR; mode = MODE_FIXED; goto do_alu;
125 case OPCODE_INT_OP_shl: alu = ROT_SHL; goto do_shift;
126 case OPCODE_INT_OP_shr: alu = ROT_SAR; goto do_shift;
127 case OPCODE_INT_OP_bts: alu = BTX_BTS; goto do_bt;
128 case OPCODE_INT_OP_btr: alu = BTX_BTR; goto do_bt;
129 case OPCODE_INT_OP_btc: alu = BTX_BTC; goto do_bt;
130 case OPCODE_INT_OP_equal: alu = COND_E; goto do_compare;
131 case OPCODE_INT_OP_not_equal: alu = COND_NE; goto do_compare;
132 case OPCODE_INT_OP_less: alu = COND_L; goto do_compare;
133 case OPCODE_INT_OP_less_equal: alu = COND_LE; goto do_compare;
134 case OPCODE_INT_OP_greater: alu = COND_G; goto do_compare;
135 case OPCODE_INT_OP_greater_equal: alu = COND_GE; goto do_compare;
136 case OPCODE_INT_OP_bt: alu = BTX_BT; goto do_bt;
137 default: internal(file_line, "gen_alu: unsupported int operation %u", op);
139 case MODE_BOOL: switch (op) {
140 case OPCODE_BOOL_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
141 case OPCODE_BOOL_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
142 case OPCODE_BOOL_OP_equal: alu = COND_E; goto do_compare;
143 case OPCODE_BOOL_OP_not_equal: alu = ALU_XOR; mode = MODE_FIXED; goto do_alu;
144 case OPCODE_BOOL_OP_less: alu = COND_L; goto do_compare;
145 case OPCODE_BOOL_OP_less_equal: alu = COND_LE; goto do_compare;
146 case OPCODE_BOOL_OP_greater: alu = COND_G; goto do_compare;
147 case OPCODE_BOOL_OP_greater_equal: alu = COND_GE; goto do_compare;
148 default: internal(file_line, "gen_alu: unsupported bool operation %u", op);
151 internal(file_line, "gen_alu: unsupported mode %u", mode);
157 size_t attr_unused offset;
158 uint8_t attr_unused long_imm;
159 unsigned first_flags;
160 unsigned second_flags;
162 unsigned attr_unused op_size_flags;
164 if (unlikely(op_size > OP_SIZE_NATIVE)) {
165 #if !defined(ARCH_X86) && !defined(ARCH_ARM) && !defined(ARCH_PARISC) && !defined(ARCH_POWER) && !defined(ARCH_SPARC32)
166 if (mode == MODE_FIXED) {
167 if (alu == ALU_ADD) {
168 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_add_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, 0));
170 } else if (alu == ALU_SUB) {
171 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_subtract_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, 0));
174 } else if (mode == MODE_INT) {
175 if (alu == ALU_ADD) {
176 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_binary_add_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, label_ovf));
178 } else if (alu == ALU_SUB) {
179 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_binary_subtract_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, label_ovf));
184 first_flags = alu == ALU_ADD || alu == ALU_SUB ? 2 : 0;
185 second_flags = mode == MODE_INT ? 1 : 0;
186 second_alu = alu == ALU_ADD ? ALU_ADC : alu == ALU_SUB ? ALU_SBB : alu;
187 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
188 #if defined(ARCH_X86)
189 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, alu, first_flags, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
190 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, second_alu, second_flags, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
192 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
193 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, first_flags));
194 #if defined(ARCH_PARISC)
195 if (mode == MODE_INT) {
196 gen_insn(INSN_ALU_FLAGS_TRAP, OP_SIZE_NATIVE, second_alu, ALU_WRITES_FLAGS(second_alu, false));
197 gen_one(R_SCRATCH_2);
198 gen_one(R_SCRATCH_2);
199 gen_one(R_SCRATCH_4);
204 gen_insn(first_flags ? INSN_ALU_FLAGS : INSN_ALU, OP_SIZE_NATIVE, second_alu, second_flags | ALU_WRITES_FLAGS(second_alu, false));
205 gen_one(R_SCRATCH_2);
206 gen_one(R_SCRATCH_2);
207 gen_one(R_SCRATCH_4);
210 #if !defined(ARCH_PARISC)
211 if (mode == MODE_INT) {
212 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_O, 0);
216 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
220 if ((ARCH_HAS_FLAGS || ARCH_SUPPORTS_TRAPS) && slot_2 == slot_r && slot_1 != slot_2 && alu_is_commutative(alu)) {
225 if ((ARCH_HAS_FLAGS || ARCH_SUPPORTS_TRAPS) && slot_1 == slot_r && (slot_1 != slot_2 || mode != MODE_INT) && i_size_cmp(op_size) == op_size + zero
226 #if defined(ARCH_POWER)
227 && op_size == OP_SIZE_NATIVE
231 unsigned undo_alu = alu == ALU_ADD ? ALU_SUB : ALU_ADD;
232 if (slot_is_register(ctx, slot_1)) {
233 unsigned reg1 = ctx->registers[slot_1];
234 if (slot_is_register(ctx, slot_2)
235 #if !defined(ARCH_POWER)
236 || frame_t_is_const(slot_2)
239 unsigned reg2 = frame_t_is_const(slot_2) ? 0xff /* avoid warning */ : ctx->registers[slot_2];
240 if (mode == MODE_INT && ARCH_SUPPORTS_TRAPS) {
241 if (frame_t_is_const(slot_2))
242 g(gen_imm(ctx, frame_t_get_const(slot_2), alu_trap_purpose(alu), i_size(op_size)));
243 gen_insn(INSN_ALU_TRAP, op_size, alu, ALU_WRITES_FLAGS(alu, frame_t_is_const(slot_2) && is_imm()));
246 if (frame_t_is_const(slot_2))
250 if (ARCH_TRAP_BEFORE) {
254 ce = alloc_undo_label(ctx);
257 gen_four(ce->undo_label);
261 if (frame_t_is_const(slot_2))
262 g(gen_3address_alu_imm(ctx, i_size(op_size), alu, reg1, reg1, frame_t_get_const(slot_2), mode == MODE_INT));
264 g(gen_3address_alu(ctx, i_size(op_size), alu, reg1, reg1, reg2, mode == MODE_INT));
265 if (mode == MODE_INT) {
267 ce = alloc_undo_label(ctx);
270 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
271 gen_four(ce->undo_label);
273 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
274 ce->undo_op_size = i_size(op_size);
275 ce->undo_aux = undo_alu;
276 ce->undo_writes_flags = ALU_WRITES_FLAGS(undo_alu, frame_t_is_const(slot_2) && is_imm());
277 m = mark_params(ctx);
280 if (frame_t_is_const(slot_2))
284 copy_params(ctx, ce, m);
288 #if defined(ARCH_S390) || defined(ARCH_X86)
289 else if (!frame_t_is_const(slot_2)) {
291 int64_t offset = (size_t)slot_2 * slot_size;
292 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
293 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, 1);
296 gen_address_offset();
297 if (mode == MODE_INT) {
298 ce = alloc_undo_label(ctx);
301 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
302 ce->undo_op_size = i_size(op_size);
303 ce->undo_aux = undo_alu;
304 ce->undo_writes_flags = ARCH_HAS_FLAGS;
305 m = mark_params(ctx);
308 gen_address_offset();
309 copy_params(ctx, ce, m);
310 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
311 gen_four(ce->undo_label);
317 #if defined(ARCH_X86)
321 int64_t offset = (size_t)slot_1 * slot_size;
322 if (!frame_t_is_const(slot_2))
323 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_1, ®2));
324 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
325 if (frame_t_is_const(slot_2))
326 g(gen_imm(ctx, frame_t_get_const(slot_2), alu_purpose(alu), i_size(op_size)));
327 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, 1);
328 gen_address_offset();
329 gen_address_offset();
330 if (frame_t_is_const(slot_2))
334 if (mode == MODE_INT) {
335 ce = alloc_undo_label(ctx);
338 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
339 ce->undo_op_size = i_size(op_size);
340 ce->undo_aux = undo_alu;
341 ce->undo_writes_flags = ARCH_HAS_FLAGS;
342 m = mark_params(ctx);
343 gen_address_offset();
344 gen_address_offset();
345 if (frame_t_is_const(slot_2))
349 copy_params(ctx, ce, m);
350 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
351 gen_four(ce->undo_label);
358 #if defined(ARCH_X86)
360 #elif defined(ARCH_S390)
361 if (op_size >= OP_SIZE_4)
362 #elif ARCH_HAS_FLAGS && !defined(ARCH_POWER)
363 if (op_size == i_size(op_size) + (unsigned)zero && frame_t_is_const(slot_2))
365 if (mode != MODE_INT && op_size == i_size(op_size) + (unsigned)zero && frame_t_is_const(slot_2))
368 if (mode == MODE_INT) {
369 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
371 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
373 g(gen_frame_load(ctx, op_size, garbage, slot_1, 0, false, target));
374 g(gen_frame_load_op(ctx, op_size, garbage, alu, mode == MODE_INT, slot_2, 0, false, target));
375 goto check_ovf_store;
377 op_size_flags = !ARCH_HAS_FLAGS && !ARCH_SUPPORTS_TRAPS ? OP_SIZE_NATIVE : OP_SIZE_4;
378 #if defined(ARCH_POWER)
379 op_size_flags = OP_SIZE_NATIVE;
381 g(gen_frame_get(ctx, op_size, mode == MODE_INT && (op_size < op_size_flags || ARCH_SUPPORTS_TRAPS) ? sign_x : garbage, slot_1, R_SCRATCH_1, ®1));
382 if (frame_t_is_const(slot_2)
383 #if defined(ARCH_POWER)
390 g(gen_frame_get(ctx, op_size, mode == MODE_INT && (op_size < op_size_flags || ARCH_SUPPORTS_TRAPS) ? sign_x : garbage, slot_2, R_SCRATCH_2, ®2));
394 if (mode == MODE_INT && op_size >= OP_SIZE_4) {
395 if (ARCH_SUPPORTS_TRAPS) {
396 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
398 g(gen_imm(ctx, frame_t_get_const(slot_2), alu_trap_purpose(alu), op_size));
399 gen_insn(INSN_ALU_TRAP, op_size, alu, ALU_WRITES_FLAGS(alu, c && is_imm()));
407 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
410 if (op_size >= OP_SIZE_NATIVE) {
411 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
413 g(gen_3address_alu_imm(ctx, i_size(op_size), alu, target, reg1, frame_t_get_const(slot_2), 0));
415 g(gen_3address_alu(ctx, i_size(op_size), alu, target, reg1, reg2, 0));
416 #if defined(ARCH_IA64)
418 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_1, reg1, frame_t_get_const(slot_2), 0));
419 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_2, target, frame_t_get_const(slot_2), 0));
421 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_1, reg1, reg2, 0));
422 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_2, target, reg2, 0));
424 if (alu == ALU_ADD) {
425 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_SCRATCH_1, R_SCRATCH_2, R_SCRATCH_1, 0));
427 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
429 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), R_SCRATCH_1, R_SCRATCH_1, COND_S, label_ovf));
432 g(gen_cmp_test_jmp(ctx, INSN_CMP, i_size(op_size), reg1, target, (frame_t_get_const(slot_2) >= 0) ^ (alu != ALU_ADD) ? COND_G : COND_L, label_ovf));
434 gen_insn(INSN_CMP_DEST_REG, i_size(op_size), COND_L, 0);
435 gen_one(R_SCRATCH_1);
436 if (alu == ALU_ADD) {
444 g(gen_imm(ctx, 0, IMM_PURPOSE_CMP, i_size(op_size)));
445 gen_insn(INSN_CMP_DEST_REG, i_size(op_size), COND_L, 0);
446 gen_one(R_SCRATCH_2);
450 g(gen_cmp_test_jmp(ctx, INSN_CMP, i_size(op_size), R_SCRATCH_1, R_SCRATCH_2, COND_NE, label_ovf));
453 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
458 if (mode == MODE_INT) {
459 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
460 } else if (!ARCH_IS_3ADDRESS(alu, mode == MODE_INT && op_size >= op_size_flags) && !alu_is_commutative(alu)) {
461 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
463 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
466 g(gen_3address_alu_imm(ctx, i_size(op_size), alu, target, reg1, frame_t_get_const(slot_2), mode == MODE_INT && op_size >= op_size_flags));
468 g(gen_3address_alu(ctx, i_size(op_size), alu, target, reg1, reg2, mode == MODE_INT && op_size >= op_size_flags));
471 if (mode == MODE_INT && unlikely(op_size < op_size_flags)) {
472 g(gen_cmp_extended(ctx, op_size_flags, op_size, target, R_SCRATCH_2, label_ovf));
475 if (mode == MODE_INT) {
476 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
479 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
487 size_t attr_unused offset;
488 uint8_t attr_unused long_imm;
489 if (unlikely(op_size > OP_SIZE_NATIVE) || unlikely(!ARCH_HAS_MUL)) {
490 if (mode == MODE_INT) {
491 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf));
494 #if defined(ARCH_X86)
495 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, hi_word(OP_SIZE_NATIVE), true, R_CX));
496 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, hi_word(OP_SIZE_NATIVE), true, R_AX));
497 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_MUL, true, slot_2, lo_word(OP_SIZE_NATIVE), true, R_CX));
498 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_MUL, true, slot_1, lo_word(OP_SIZE_NATIVE), true, R_AX));
499 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_CX, R_CX, R_AX, 0));
500 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, lo_word(OP_SIZE_NATIVE), true, R_AX));
502 offset = (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE);
503 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
504 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 1);
508 gen_address_offset();
510 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_DX, R_DX, R_CX, 0));
512 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_AX, R_DX));
515 #elif defined(ARCH_ARM32)
516 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
517 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
519 g(gen_mov(ctx, OP_SIZE_NATIVE, R_SCRATCH_NA_1, R_SCRATCH_1));
521 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, R_SCRATCH_4, R_SCRATCH_1, R_SCRATCH_4, 0));
523 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
524 gen_one(R_SCRATCH_2);
525 gen_one(R_SCRATCH_3);
526 gen_one(R_SCRATCH_2);
527 gen_one(R_SCRATCH_4);
529 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 0);
530 gen_one(R_SCRATCH_1);
531 gen_one(R_SCRATCH_4);
532 gen_one(R_SCRATCH_NA_1);
533 gen_one(R_SCRATCH_3);
535 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_4, 0));
537 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
540 #elif defined(ARCH_ARM64)
541 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
542 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
544 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_UMULH, R_SCRATCH_NA_1, R_SCRATCH_1, R_SCRATCH_3, 0));
546 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
547 gen_one(R_SCRATCH_NA_1);
548 gen_one(R_SCRATCH_2);
549 gen_one(R_SCRATCH_3);
550 gen_one(R_SCRATCH_NA_1);
552 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
553 gen_one(R_SCRATCH_2);
554 gen_one(R_SCRATCH_1);
555 gen_one(R_SCRATCH_4);
556 gen_one(R_SCRATCH_NA_1);
558 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, 0));
560 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
564 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, 0));
569 #if defined(ARCH_X86)
570 if (mode == MODE_INT) {
571 if (op_size != OP_SIZE_1 && slot_r == slot_1 && slot_is_register(ctx, slot_1)) {
573 target = ctx->registers[slot_1];
574 g(gen_mov(ctx, op_size, R_SCRATCH_1, target));
575 g(gen_frame_load_op(ctx, op_size, garbage, ALU_MUL, mode == MODE_INT, slot_2, 0, false, target));
576 ce = alloc_undo_label(ctx);
579 ce->undo_opcode = INSN_MOV;
580 ce->undo_op_size = op_size;
582 ce->undo_writes_flags = 0;
583 ce->undo_parameters[0] = target;
584 ce->undo_parameters[1] = R_SCRATCH_1;
585 ce->undo_parameters_len = 2;
586 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
587 gen_four(ce->undo_label);
590 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
592 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
594 if (op_size == OP_SIZE_1)
595 target = R_SCRATCH_1;
596 g(gen_frame_load(ctx, op_size, garbage, slot_1, 0, false, target));
597 if (op_size == OP_SIZE_1 && frame_t_is_const(slot_2)) {
598 g(gen_load_constant(ctx, R_SCRATCH_3, frame_t_get_const(slot_2)));
599 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(op_size), op_size, ALU_MUL, 1);
602 gen_one(R_SCRATCH_3);
604 g(gen_frame_load_op(ctx, op_size, garbage, ALU_MUL, mode == MODE_INT, slot_2, 0, false, target));
606 if (mode == MODE_INT) {
607 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
610 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
613 #if defined(ARCH_ALPHA)
614 if (mode == MODE_INT && op_size >= OP_SIZE_4 && ARCH_SUPPORTS_TRAPS) {
615 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
616 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
617 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
619 gen_insn(INSN_ALU_TRAP, op_size, ALU_MUL, ALU_WRITES_FLAGS(ALU_MUL, false));
624 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
629 #if defined(ARCH_ARM32)
630 if (mode == MODE_INT && op_size == OP_SIZE_4) {
631 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
632 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
633 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
635 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 0);
637 gen_one(R_SCRATCH_4);
641 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
642 gen_one(R_SCRATCH_4);
643 gen_one(ARG_SHIFTED_REGISTER);
644 gen_one(ARG_SHIFT_ASR | 0x1f);
647 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
650 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
655 #if defined(ARCH_ARM64)
656 if (mode == MODE_INT && op_size == OP_SIZE_4) {
657 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
658 g(gen_frame_get(ctx, op_size, op_size < OP_SIZE_4 ? sign_x : garbage, slot_1, R_SCRATCH_1, ®1));
659 g(gen_frame_get(ctx, op_size, op_size < OP_SIZE_4 ? sign_x : garbage, slot_2, R_SCRATCH_2, ®2));
660 gen_insn(INSN_ALU, OP_SIZE_8, ALU_MUL, ALU_WRITES_FLAGS(ALU_MUL, false));
662 gen_one(ARG_EXTENDED_REGISTER);
663 gen_one(ARG_EXTEND_SXTW);
665 gen_one(ARG_EXTENDED_REGISTER);
666 gen_one(ARG_EXTEND_SXTW);
669 gen_insn(INSN_CMP, OP_SIZE_8, 0, 1);
671 gen_one(ARG_EXTENDED_REGISTER);
672 gen_one(ARG_EXTEND_SXTW);
675 gen_insn(INSN_JMP_COND, OP_SIZE_8, COND_NE, 0);
678 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
682 if (mode == MODE_INT && op_size == OP_SIZE_8) {
683 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
684 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
685 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
686 g(gen_3address_alu(ctx, OP_SIZE_8, ALU_SMULH, R_SCRATCH_3, reg1, reg2, 0));
688 g(gen_3address_alu(ctx, OP_SIZE_8, ALU_MUL, target, reg1, reg2, 0));
690 gen_insn(INSN_CMP, OP_SIZE_8, 0, 1);
691 gen_one(R_SCRATCH_3);
692 gen_one(ARG_SHIFTED_REGISTER);
693 gen_one(ARG_SHIFT_ASR | 0x3f);
696 gen_insn(INSN_JMP_COND, OP_SIZE_8, COND_NE, 0);
699 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
704 #if defined(ARCH_POWER)
705 if (mode == MODE_INT && op_size >= OP_SIZE_4) {
706 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
707 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
708 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
710 g(gen_3address_alu(ctx, op_size, ALU_MUL, target, reg1, reg2, 1));
712 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
715 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
720 #if defined(ARCH_LOONGARCH64) || (defined(ARCH_MIPS) && MIPS_R6) || defined(ARCH_RISCV64)
721 if (mode == MODE_INT && op_size == OP_SIZE_NATIVE) {
722 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
723 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
724 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
726 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SMULH, R_SCRATCH_3, reg1, reg2, 0));
728 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, target, reg1, reg2, 0));
730 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, (8U << OP_SIZE_NATIVE) - 1, 0));
732 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_3, R_SCRATCH_4, COND_NE, label_ovf));
734 g(gen_frame_store(ctx, OP_SIZE_NATIVE, slot_r, 0, target));
739 #if defined(ARCH_S390)
740 if (mode == MODE_INT && op_size >= OP_SIZE_4 && likely(cpu_test_feature(CPU_FEATURE_misc_insn_ext_2))) {
741 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
742 g(gen_frame_load(ctx, op_size, sign_x, slot_1, 0, false, target));
743 g(gen_frame_load_op(ctx, op_size, sign_x, ALU_MUL, 1, slot_2, 0, false, target));
745 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
748 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
752 #if (defined(ARCH_MIPS) && !MIPS_R6) || defined(ARCH_S390)
753 #if defined(ARCH_MIPS)
754 if (mode == MODE_INT && op_size >= OP_SIZE_4)
756 #if defined(ARCH_S390)
757 if (mode == MODE_INT && op_size == OP_SIZE_4)
760 #if defined(ARCH_S390)
761 target = R_SCRATCH_1;
763 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
765 g(gen_frame_get(ctx, op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
766 g(gen_frame_get(ctx, op_size, sign_x, slot_2, R_SCRATCH_3, ®2));
768 gen_insn(INSN_MUL_L, op_size, 0, 0);
770 gen_one(R_SCRATCH_2);
774 g(gen_3address_rot_imm(ctx, op_size, ROT_SAR, R_SCRATCH_4, target, (8U << op_size) - 1, false));
776 g(gen_cmp_test_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_2, R_SCRATCH_4, COND_NE, label_ovf));
778 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
782 if (mode == MODE_INT && op_size == OP_SIZE_NATIVE) {
783 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf));
787 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
788 if (op_size < OP_SIZE_NATIVE && mode == MODE_INT) {
789 g(gen_frame_get(ctx, op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
790 g(gen_frame_get(ctx, op_size, sign_x, slot_2, R_SCRATCH_2, ®2));
792 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, target, reg1, reg2, 0));
794 g(gen_frame_load(ctx, op_size, sign_x, slot_1, 0, false, target));
795 g(gen_frame_load_op(ctx, op_size, sign_x, ALU_MUL, 0, slot_2, 0, false, target));
798 if (mode == MODE_INT) {
799 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, op_size, target, R_SCRATCH_2, label_ovf));
802 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
811 uint32_t attr_unused label_skip = 0; /* avoid warning */
812 uint32_t attr_unused label_skip2 = 0; /* avoid warning */
813 uint32_t attr_unused label_end = 0; /* avoid warning */
814 uint32_t attr_unused label_div_0 = 0; /* avoid warning */
815 unsigned attr_unused divide_alu = 0; /* avoid warning */
816 bool attr_unused have_mod = false;
817 bool attr_unused force_sx = false;
818 unsigned attr_unused div_op_size = i_size(op_size);
819 if (unlikely(op_size > OP_SIZE_NATIVE) || unlikely(!ARCH_HAS_DIV)
820 #if defined(ARCH_S390)
821 || !(Z || (op_size <= OP_SIZE_4 && sgn))
825 if (mode == MODE_INT) {
826 upcall = !mod ? offsetof(struct cg_upcall_vector_s, INT_binary_divide_int8_t) : offsetof(struct cg_upcall_vector_s, INT_binary_modulo_int8_t);
828 upcall = !mod ? offsetof(struct cg_upcall_vector_s, FIXED_binary_divide_int8_t) : offsetof(struct cg_upcall_vector_s, FIXED_binary_modulo_int8_t);
830 upcall = !mod ? offsetof(struct cg_upcall_vector_s, FIXED_binary_udivide_int8_t) : offsetof(struct cg_upcall_vector_s, FIXED_binary_umodulo_int8_t);
832 g(gen_alu_typed_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, mode == MODE_INT ? label_ovf : 0));
835 #if defined(ARCH_X86) || defined(ARCH_S390)
836 if (mode == MODE_FIXED) {
837 label_skip = alloc_label(ctx);
838 if (unlikely(!label_skip))
840 label_end = alloc_label(ctx);
841 if (unlikely(!label_end))
844 label_skip2 = alloc_label(ctx);
845 if (unlikely(!label_skip2))
849 #if defined(ARCH_X86)
850 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX || R_SCRATCH_3 != R_CX)
851 internal(file_line, "gen_alu: bad scratch registers");
853 g(gen_frame_load(ctx, op_size, sgn ? sign_x : zero_x, slot_1, 0, false, R_SCRATCH_1));
854 g(gen_frame_load(ctx, op_size, sgn ? sign_x : zero_x, slot_2, 0, false, R_SCRATCH_3));
856 g(gen_jmp_on_zero(ctx, i_size(op_size), R_SCRATCH_3, COND_E, mode == MODE_INT ? label_ovf : label_skip));
860 uint32_t label_not_minus_1;
861 label_not_minus_1 = alloc_label(ctx);
862 if (unlikely(!label_not_minus_1))
865 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_3, -1, COND_NE, label_not_minus_1));
867 val = -(uint64_t)0x80 << (((1 << op_size) - 1) * 8);
868 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_1, val, COND_E, mode == MODE_INT ? label_ovf : label_skip2));
870 gen_label(label_not_minus_1);
873 #if defined(ARCH_X86)
874 if (op_size >= OP_SIZE_2) {
876 gen_insn(INSN_CWD + ARCH_PARTIAL_ALU(op_size), op_size, 0, 0);
877 gen_one(R_SCRATCH_2);
878 gen_one(R_SCRATCH_1);
879 if (op_size == OP_SIZE_2)
880 gen_one(R_SCRATCH_2);
882 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_2, 0));
885 gen_insn(INSN_DIV_L, op_size, sgn, 1);
886 gen_one(R_SCRATCH_1);
887 gen_one(i_size(op_size) == OP_SIZE_1 ? R_SCRATCH_1 : R_SCRATCH_2);
888 gen_one(R_SCRATCH_1);
889 gen_one(i_size(op_size) == OP_SIZE_1 ? R_SCRATCH_1 : R_SCRATCH_2);
890 gen_one(R_SCRATCH_3);
893 g(gen_load_constant(ctx, R_SCRATCH_2, 0));
894 } else if (op_size <= OP_SIZE_4) {
895 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, R_SCRATCH_1, (1U << (OP_SIZE_NATIVE + 3)) - 1, false));
897 gen_insn(INSN_DIV_L, i_size(op_size), sgn, 1);
898 gen_one(R_SCRATCH_2);
899 gen_one(R_SCRATCH_1);
900 gen_one(R_SCRATCH_2);
901 gen_one(R_SCRATCH_1);
902 gen_one(R_SCRATCH_3);
904 if (mod && i_size(op_size) == OP_SIZE_1) {
905 g(gen_3address_rot_imm(ctx, OP_SIZE_2, ROT_SHR, R_SCRATCH_1, R_SCRATCH_1, 8, 0));
906 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
908 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_2));
910 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
912 if (mode == MODE_FIXED) {
913 gen_insn(INSN_JMP, 0, 0, 0);
917 gen_label(label_skip2);
920 g(gen_frame_clear(ctx, op_size, slot_r));
922 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
924 gen_insn(INSN_JMP, 0, 0, 0);
928 gen_label(label_skip);
930 g(gen_frame_clear(ctx, op_size, slot_r));
932 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
933 gen_label(label_end);
937 #if defined(ARCH_MIPS)
939 div_op_size = maximum(op_size, OP_SIZE_4);
940 if (op_size == OP_SIZE_4)
943 #if defined(ARCH_POWER)
944 have_mod = cpu_test_feature(CPU_FEATURE_v30);
945 div_op_size = maximum(op_size, OP_SIZE_4);
947 #if defined(ARCH_LOONGARCH64) || defined(ARCH_RISCV64)
949 div_op_size = maximum(op_size, OP_SIZE_4);
951 label_end = alloc_label(ctx);
952 if (unlikely(!label_end))
955 g(gen_frame_get(ctx, op_size, (sgn && op_size < i_size(op_size)) || force_sx ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
956 g(gen_frame_get(ctx, op_size, (sgn && op_size < i_size(op_size)) || force_sx ? sign_x : zero_x, slot_2, R_SCRATCH_2, ®2));
957 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
959 if (ARCH_PREFERS_SX(op_size) && !sgn && op_size < i_size(op_size)) {
960 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
962 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_2, reg2));
966 if (mode == MODE_INT) {
967 g(gen_jmp_on_zero(ctx, i_size(op_size), reg2, COND_E, label_ovf));
970 uint32_t label_not_minus_1;
971 label_not_minus_1 = alloc_label(ctx);
972 if (unlikely(!label_not_minus_1))
975 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg2, -1, COND_NE, label_not_minus_1));
977 val = 0xFFFFFFFFFFFFFF80ULL << (((1 << op_size) - 1) * 8);
978 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg1, val, COND_E, label_ovf));
980 gen_label(label_not_minus_1);
983 #if !(defined(ARCH_ARM) && ARM_ASM_DIV_NO_TRAP)
985 g(gen_load_constant(ctx, target, 0));
987 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
989 g(gen_jmp_on_zero(ctx, i_size(op_size), reg2, COND_E, label_end));
992 uint32_t label_not_minus_1;
993 label_not_minus_1 = alloc_label(ctx);
994 if (unlikely(!label_not_minus_1))
997 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg2, -1, COND_NE, label_not_minus_1));
1000 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
1002 g(gen_load_constant(ctx, target, 0));
1005 val = 0xFFFFFFFFFFFFFF80ULL << (((1 << op_size) - 1) * 8);
1006 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg1, val, COND_E, label_end));
1008 gen_label(label_not_minus_1);
1012 if (mod && have_mod) {
1013 g(gen_3address_alu(ctx, div_op_size, sgn ? ALU_SREM : ALU_UREM, target, reg1, reg2, 0));
1015 g(gen_3address_alu(ctx, div_op_size, sgn ? ALU_SDIV : ALU_UDIV, target, reg1, reg2, 0));
1018 if (mod && !have_mod) {
1019 #if defined(ARCH_ARM)
1020 gen_insn(INSN_MADD, i_size(op_size), 1, 0);
1026 g(gen_3address_alu(ctx, i_size(op_size), ALU_MUL, R_SCRATCH_2, reg2, target, 0));
1027 g(gen_3address_alu(ctx, i_size(op_size), ALU_SUB, target, reg1, R_SCRATCH_2, 0));
1031 gen_label(label_end);
1032 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1043 int64_t cnst = 0; /* avoid warning */
1044 bool c = frame_t_is_const(slot_2);
1045 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1047 if (mode == MODE_FIXED) {
1049 case ROT_SHL: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_shl_,TYPE_INT_MAX));
1051 case ROT_SAR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_shr_,TYPE_INT_MAX));
1053 case ROT_SHR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ushr_,TYPE_INT_MAX));
1055 case ROT_ROL: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_rol_,TYPE_INT_MAX));
1057 case ROT_ROR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ror_,TYPE_INT_MAX));
1059 default: internal(file_line, "do_alu: invalid shift %u", alu);
1063 case ROT_SHL: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_shl_,TYPE_INT_MAX));
1065 case ROT_SAR: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_shr_,TYPE_INT_MAX));
1067 default: internal(file_line, "do_alu: invalid shift %u", alu);
1070 g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, mode == MODE_INT ? label_ovf : 0));
1073 op_s = i_size_rot(op_size);
1074 #if defined(ARCH_X86)
1075 if (slot_1 == slot_r && !slot_is_register(ctx, slot_1) && !(mode == MODE_INT && alu == ROT_SHL)) {
1076 int64_t offset = (size_t)slot_1 * slot_size;
1078 cnst = frame_t_get_const(slot_2);
1079 if (mode == MODE_INT) {
1080 if ((uint64_t)cnst > (8U << op_size) - 1) {
1081 gen_insn(INSN_JMP, 0, 0, 0);
1082 gen_four(label_ovf);
1086 cnst &= (8U << op_size) - 1;
1088 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, op_size));
1089 gen_insn(INSN_ROT + ARCH_PARTIAL_ALU(op_size), op_size, alu, 1);
1090 gen_address_offset();
1091 gen_address_offset();
1095 g(gen_frame_load(ctx, op_size, garbage, slot_2, 0, false, R_SCRATCH_3));
1096 if (mode == MODE_INT) {
1097 int64_t imm = (8U << op_size) - 1;
1098 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_3, imm, COND_A, label_ovf));
1099 } else if ((alu != ROT_ROL && alu != ROT_ROR) && op_size < OP_SIZE_4) {
1100 g(gen_3address_alu_imm(ctx, OP_SIZE_1, ALU_AND, R_SCRATCH_3, R_SCRATCH_3, (8U << op_size) - 1, 0));
1102 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, op_size));
1103 gen_insn(INSN_ROT + ARCH_PARTIAL_ALU(op_size), op_size, alu, 1);
1104 gen_address_offset();
1105 gen_address_offset();
1106 gen_one(R_SCRATCH_3);
1110 if (mode == MODE_INT && alu == ROT_SHL && op_size < OP_SIZE_NATIVE)
1113 must_mask = op_size < ARCH_SHIFT_SIZE;
1114 sx = (alu == ROT_SAR && op_size < op_s) || (alu == ROT_SHL && op_size < OP_SIZE_NATIVE && mode == MODE_INT);
1115 #if defined(ARCH_MIPS)
1116 sx |= op_size == OP_SIZE_4;
1118 g(gen_frame_get(ctx, op_size, sx ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
1120 reg3 = 0xff; /* avoid warning */
1121 cnst = frame_t_get_const(slot_2);
1123 #if defined(ARCH_X86)
1124 if (!ARCH_IS_3ADDRESS_ROT(alu, op_size)) {
1125 g(gen_frame_load(ctx, op_size, garbage, slot_2, 0, false, R_SCRATCH_3));
1129 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_3, ®3));
1131 if (ARCH_PREFERS_SX(op_size) && !sx && op_size < op_s) {
1132 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
1136 if (mode == MODE_INT) {
1137 int64_t imm = (8U << op_size) - 1;
1139 if ((uint64_t)cnst > (uint64_t)imm) {
1140 gen_insn(INSN_JMP, 0, 0, 0);
1141 gen_four(label_ovf);
1145 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg3, imm, COND_A, label_ovf));
1148 #if defined(ARCH_ARM)
1149 if (alu == ROT_ROL) {
1151 cnst = -(uint64_t)cnst;
1153 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1159 #if defined(ARCH_LOONGARCH64)
1160 if (alu == ROT_ROL && op_size >= OP_SIZE_4) {
1162 cnst = -(uint64_t)cnst;
1164 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1170 #if defined(ARCH_MIPS)
1171 if (MIPS_HAS_ROT && alu == ROT_ROL && op_size >= OP_SIZE_4) {
1173 cnst = -(uint64_t)cnst;
1175 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1181 #if defined(ARCH_POWER)
1182 if (alu == ROT_ROR && op_size >= OP_SIZE_4) {
1184 cnst = -(uint64_t)cnst;
1186 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1192 #if defined(ARCH_S390)
1193 if (Z && alu == ROT_ROR && op_size >= OP_SIZE_4) {
1195 cnst = -(uint64_t)cnst;
1197 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1204 cnst &= (8U << op_size) - 1;
1205 } else if (must_mask) {
1206 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_3, reg3, (8U << op_size) - 1, 0));
1211 #if defined(ARCH_X86)
1212 if (mode == MODE_INT && alu == ROT_SHL) {
1213 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_2);
1215 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_2);
1218 g(gen_3address_rot_imm(ctx, op_s, alu, target, reg1, cnst, 0));
1220 g(gen_3address_rot(ctx, op_s, alu, target, reg1, reg3));
1223 if (mode == MODE_INT && alu == ROT_SHL) {
1224 if (op_size < OP_SIZE_NATIVE) {
1225 gen_insn(INSN_MOVSX, op_size, 0, 0);
1226 gen_one(R_SCRATCH_4);
1229 g(gen_cmp_test_jmp(ctx, INSN_CMP, op_s, target, R_SCRATCH_4, COND_NE, label_ovf));
1232 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, cnst, 0));
1234 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, reg3));
1237 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, reg1, R_SCRATCH_4, COND_NE, label_ovf));
1240 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1243 #if defined(ARCH_ARM)
1244 if (op_size <= OP_SIZE_2 && alu == ROT_ROR) {
1245 gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
1246 gen_one(R_SCRATCH_1);
1248 gen_one(ARG_SHIFTED_REGISTER);
1249 gen_one(ARG_SHIFT_LSL | (8U << op_size));
1251 if (op_size == OP_SIZE_1)
1255 goto do_generic_shift;
1257 #if defined(ARCH_LOONGARCH64)
1258 if (alu == ROT_ROR && op_size >= OP_SIZE_4)
1259 goto do_generic_shift;
1261 #if defined(ARCH_MIPS)
1262 if (MIPS_HAS_ROT && alu == ROT_ROR && op_size >= OP_SIZE_4)
1263 goto do_generic_shift;
1265 #if defined(ARCH_POWER)
1266 if (alu == ROT_ROL && op_size >= OP_SIZE_4)
1267 goto do_generic_shift;
1269 #if defined(ARCH_RISCV64)
1270 if ((alu == ROT_ROL || alu == ROT_ROR) && likely(cpu_test_feature(CPU_FEATURE_zbb))) {
1271 if (likely(op_size >= OP_SIZE_4)) {
1272 goto do_generic_shift;
1276 #if defined(ARCH_S390)
1277 if (Z && alu == ROT_ROL && op_size >= OP_SIZE_4)
1278 goto do_generic_shift;
1280 if (alu == ROT_ROL || alu == ROT_ROR) {
1281 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
1283 g(gen_3address_rot_imm(ctx, op_s, alu == ROT_ROL ? ROT_SHL : ROT_SHR, R_SCRATCH_2, reg1, cnst, 0));
1284 g(gen_3address_rot_imm(ctx, op_s, alu == ROT_ROL ? ROT_SHR : ROT_SHL, target, reg1, -(uint64_t)cnst & ((8U << op_size) - 1), 0));
1286 g(gen_3address_rot(ctx, op_s, alu == ROT_ROL ? ROT_SHL : ROT_SHR, R_SCRATCH_2, reg1, reg3));
1287 g(gen_2address_alu1(ctx, i_size(OP_SIZE_4), ALU1_NEG, R_SCRATCH_3, reg3, 0));
1289 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_3, R_SCRATCH_3, (8U << op_size) - 1, 0));
1291 g(gen_3address_rot(ctx, op_s, alu == ROT_ROL ? ROT_SHR : ROT_SHL, target, reg1, R_SCRATCH_3));
1293 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, target, target, R_SCRATCH_2, 0));
1294 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1298 goto do_generic_shift;
1300 if (mode == MODE_INT && alu == ROT_SHL) {
1301 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
1302 #if defined(ARCH_S390)
1303 if (op_size >= OP_SIZE_4) {
1305 g(gen_3address_rot_imm(ctx, op_size, ROT_SAL, target, reg1, cnst, 0));
1307 g(gen_3address_rot(ctx, op_size, ROT_SAL, target, reg1, reg3));
1309 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
1310 gen_four(label_ovf);
1313 if (op_size <= OP_SIZE_NATIVE - 1) {
1315 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, alu, target, reg1, cnst, 0));
1317 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, alu, target, reg1, reg3));
1319 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, op_size, target, R_SCRATCH_2, label_ovf));
1322 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_2, reg1, cnst, 0));
1323 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, R_SCRATCH_2, cnst, 0));
1325 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_2, reg1, reg3));
1326 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, R_SCRATCH_2, reg3));
1329 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, reg1, R_SCRATCH_4, COND_NE, label_ovf));
1331 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_2));
1336 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
1338 g(gen_3address_rot_imm(ctx, op_s, alu, target, reg1, cnst, 0));
1340 g(gen_3address_rot(ctx, op_s, alu, target, reg1, reg3));
1343 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1350 unsigned attr_unused op_s;
1352 bool c = frame_t_is_const(slot_2);
1353 int64_t cnst = !c ? 0 : frame_t_get_const(slot_2);
1354 int64_t max_imm = (8U << op_size) - 1;
1356 if (mode == MODE_INT) {
1357 if (alu == BTX_BT ? (uint64_t)cnst > (uint64_t)max_imm : (uint64_t)cnst >= (uint64_t)max_imm) {
1358 gen_insn(INSN_JMP, 0, 0, 0);
1359 gen_four(label_ovf);
1365 #if defined(ARCH_X86)
1366 if ((alu == BTX_BT || slot_1 == slot_r) && !slot_is_register(ctx, slot_1)) {
1368 unsigned n_op_size = minimum(op_size, OP_SIZE_NATIVE);
1369 g(gen_frame_get(ctx, n_op_size, garbage, slot_2, R_SCRATCH_1, ®2));
1370 if (mode == MODE_INT) {
1372 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, n_op_size, reg2, max_imm, alu == BTX_BT ? COND_A : COND_AE, label_ovf));
1373 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1374 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
1375 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
1376 gen_address_offset();
1379 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
1380 gen_four(label_ovf);
1385 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_AND, R_SCRATCH_1, reg2, (8U << op_size) - 1, 0));
1389 offset = (size_t)slot_1 * slot_size;
1390 if (c && cnst >= 8U << OP_SIZE_NATIVE) {
1391 offset += 1U << OP_SIZE_NATIVE;
1392 cnst -= 8U << OP_SIZE_NATIVE;
1394 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_STR_OFFSET, maximum(n_op_size, OP_SIZE_2)));
1395 if (alu == BTX_BT) {
1396 gen_insn(INSN_BT, maximum(n_op_size, OP_SIZE_2), 0, 1);
1397 gen_address_offset();
1404 g(gen_frame_set_cond(ctx, maximum(n_op_size, OP_SIZE_2), false, COND_B, slot_r));
1406 gen_insn(INSN_BTX, maximum(n_op_size, OP_SIZE_2), alu, 1);
1407 gen_address_offset();
1408 gen_address_offset();
1419 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1421 if (mode == MODE_FIXED) {
1423 case BTX_BTS: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_bts_,TYPE_INT_MAX));
1425 case BTX_BTR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_btr_,TYPE_INT_MAX));
1427 case BTX_BTC: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_btc_,TYPE_INT_MAX));
1429 case BTX_BT: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_bt_,TYPE_INT_MAX));
1431 default: internal(file_line, "do_alu: invalid bit test %u", alu);
1435 case BTX_BTS: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_bts_,TYPE_INT_MAX));
1437 case BTX_BTR: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_btr_,TYPE_INT_MAX));
1439 case BTX_BTC: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_btc_,TYPE_INT_MAX));
1441 case BTX_BT: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_bt_,TYPE_INT_MAX));
1443 default: internal(file_line, "do_alu: invalid bit test %u", alu);
1446 g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, label_ovf));
1449 op_s = minimum(OP_SIZE_NATIVE, ARCH_SHIFT_SIZE);
1450 op_s = maximum(op_s, op_size);
1451 g(gen_frame_get(ctx, op_size, zero_x, slot_1, R_SCRATCH_1, ®1));
1453 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1454 if (ARCH_HAS_BTX(alu == BTX_BT ? BTX_BTEXT : alu, OP_SIZE_NATIVE, true)) {
1456 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
1458 gen_insn(INSN_BTX, OP_SIZE_NATIVE, alu == BTX_BT ? BTX_BTEXT : alu, 1);
1460 gen_insn(INSN_BTX, OP_SIZE_NATIVE, alu == BTX_BT ? BTX_BTEXT : alu, 0);
1466 } else switch (alu) {
1468 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_OR, target, reg1, 1ULL << cnst, 0));
1471 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_AND, target, reg1, ~(1ULL << cnst), 0));
1474 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, target, reg1, 1ULL << cnst, 0));
1478 g(gen_3address_rot_imm(ctx, i_size(op_size), ROT_SHR, target, reg1, cnst, 0));
1480 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_AND, target, target, 1, 0));
1483 internal(file_line, "do_alu: invalid bit test %u", alu);
1486 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
1488 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1491 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
1492 if (mode == MODE_INT) {
1494 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg2, max_imm, alu == BTX_BT ? COND_A : COND_AE, label_ovf));
1496 if (alu != BTX_BT) {
1497 if (!ARCH_HAS_BTX(alu, OP_SIZE_NATIVE, false))
1499 need_mask = !ARCH_HAS_BTX(alu, op_size, false);
1501 #if defined(ARCH_X86)
1502 need_mask = op_size < OP_SIZE_2;
1504 if (!ARCH_HAS_BTX(BTX_BTEXT, OP_SIZE_NATIVE, false))
1506 need_mask = !ARCH_HAS_BTX(BTX_BTEXT, op_size, false);
1510 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_AND, R_SCRATCH_2, reg2, (8U << op_size) - 1, 0));
1513 if (alu == BTX_BT) {
1514 #if defined(ARCH_X86)
1515 gen_insn(INSN_BT, maximum(op_size, OP_SIZE_2), 0, 1);
1519 g(gen_frame_set_cond(ctx, maximum(op_size, OP_SIZE_2), false, COND_B, slot_r));
1521 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1522 gen_insn(INSN_BTX, need_mask ? OP_SIZE_NATIVE : op_size, BTX_BTEXT, 0);
1527 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
1530 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
1531 #if defined(ARCH_X86)
1533 target = R_SCRATCH_1;
1534 if (target != reg1) {
1535 g(gen_mov(ctx, op_size, target, reg1));
1538 gen_insn(INSN_BTX, maximum(op_size, OP_SIZE_2), alu, 1);
1540 gen_insn(INSN_BTX, need_mask ? OP_SIZE_NATIVE : op_size, alu, 0);
1546 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1552 if (mode == MODE_FIXED && op_size < ARCH_SHIFT_SIZE) {
1553 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_2, reg2, (8U << op_size) - 1, 0));
1556 g(gen_load_constant(ctx, R_SCRATCH_3, 1));
1558 g(gen_3address_rot(ctx, op_s, ROT_SHL, R_SCRATCH_3, R_SCRATCH_3, reg2));
1563 #if defined(ARCH_S390) || defined(ARCH_POWER)
1564 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_1, reg1, R_SCRATCH_3, 1));
1566 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
1568 gen_one(R_SCRATCH_3);
1570 g(gen_frame_set_cond(ctx, i_size_cmp(op_size), false, COND_NE, slot_r));
1572 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_1, reg1, R_SCRATCH_3, 0));
1573 g(gen_frame_cmp_imm_set_cond_reg(ctx, i_size(op_size), R_SCRATCH_1, 0, COND_NE, slot_r));
1577 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1578 g(gen_3address_alu(ctx, i_size(op_size), ALU_OR, target, reg1, R_SCRATCH_3, 0));
1581 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1582 if (!ARCH_HAS_ANDN) {
1583 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_3, R_SCRATCH_3, -1, 0));
1585 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, target, reg1, R_SCRATCH_3, 0));
1588 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, target, reg1, R_SCRATCH_3, 0));
1591 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1592 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, target, reg1, R_SCRATCH_3, 0));
1595 internal(file_line, "gen_alu: unsupported bit test %u", alu);
1598 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1606 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1607 size_t attr_unused upcall;
1608 frame_t attr_unused swap;
1612 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1613 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_XOR, 0, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1614 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_XOR, 0, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1615 #if defined(ARCH_ARM64)
1616 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
1618 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
1619 gen_one(R_SCRATCH_1);
1623 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, ARCH_HAS_FLAGS));
1626 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu, slot_r));
1628 g(gen_frame_cmp_imm_set_cond_reg(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, 0, alu, slot_r));
1631 #if defined(ARCH_X86_64) || defined(ARCH_X86_X32) || defined(ARCH_ARM)
1634 swap = slot_1; slot_1 = slot_2; slot_2 = swap;
1635 alu = alu == COND_G ? COND_L : COND_B;
1639 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1640 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1641 g(gen_frame_load_cmp(ctx, OP_SIZE_NATIVE, false, garbage, true, slot_1, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1642 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_SBB, 1, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1643 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu, slot_r));
1647 swap = slot_1; slot_1 = slot_2; slot_2 = swap;
1648 alu = alu == COND_GE ? COND_LE : COND_BE;
1652 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1653 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1654 g(gen_frame_load_cmp(ctx, OP_SIZE_NATIVE, false, garbage, true, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1655 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_SBB, 1, slot_1, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1656 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu == COND_LE ? COND_GE : COND_AE, slot_r));
1659 case COND_L: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_less_,TYPE_INT_MAX)); goto do_upcall;
1660 case COND_LE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_less_equal_,TYPE_INT_MAX)); goto do_upcall;
1661 case COND_G: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_greater_,TYPE_INT_MAX)); goto do_upcall;
1662 case COND_GE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_greater_equal_,TYPE_INT_MAX)); goto do_upcall;
1663 case COND_B: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_uless_,TYPE_INT_MAX)); goto do_upcall;
1664 case COND_BE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_uless_equal_,TYPE_INT_MAX)); goto do_upcall;
1665 case COND_A: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ugreater_,TYPE_INT_MAX)); goto do_upcall;
1666 case COND_AE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ugreater_equal_,TYPE_INT_MAX)); goto do_upcall;
1667 do_upcall: g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, 0));
1671 internal(file_line, "gen_alu: unsupported condition %u", alu);
1675 #if defined(ARCH_X86)
1676 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
1677 g(gen_frame_load_cmp_set_cond(ctx, op_size, garbage, slot_2, reg1, alu, slot_r));
1679 g(gen_frame_get(ctx, op_size, op_size == i_size_cmp(op_size) + (unsigned)zero ? garbage : alu == COND_L || alu == COND_LE || alu == COND_G || alu == COND_GE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
1680 g(gen_frame_load_cmp_set_cond(ctx, op_size, alu == COND_L || alu == COND_LE || alu == COND_G || alu == COND_GE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x, slot_2, reg1, alu, slot_r));
1686 static bool attr_w gen_alu1(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_r)
1689 unsigned reg1, target;
1691 case MODE_FIXED: switch (op) {
1692 case OPCODE_FIXED_OP_not: alu = ALU1_NOT; goto do_alu;
1693 case OPCODE_FIXED_OP_neg: alu = ALU1_NEG; goto do_alu;
1694 case OPCODE_FIXED_OP_bswap:
1695 case OPCODE_FIXED_OP_bswap_alt1: alu = ALU1_BSWAP; goto do_bswap;
1696 case OPCODE_FIXED_OP_brev:
1697 case OPCODE_FIXED_OP_brev_alt1: alu = ALU1_BREV; goto do_brev;
1698 case OPCODE_FIXED_OP_bsf:
1699 case OPCODE_FIXED_OP_bsf_alt1: alu = ALU1_BSF; goto do_bsf_bsr_popcnt;
1700 case OPCODE_FIXED_OP_bsr:
1701 case OPCODE_FIXED_OP_bsr_alt1: alu = ALU1_BSR; goto do_bsf_bsr_popcnt;
1702 case OPCODE_FIXED_OP_popcnt:
1703 case OPCODE_FIXED_OP_popcnt_alt1: alu = ALU1_POPCNT; goto do_bsf_bsr_popcnt;
1704 case OPCODE_FIXED_OP_to_int: goto do_fixed_conv;
1705 case OPCODE_FIXED_OP_from_int: goto do_fixed_conv;
1706 case OPCODE_FIXED_OP_uto_int: goto conv_uto_int;
1707 case OPCODE_FIXED_OP_ufrom_int: goto conv_ufrom_int;
1708 default: internal(file_line, "gen_alu1: unsupported fixed operation %u", op);
1710 case MODE_INT: switch (op) {
1711 case OPCODE_INT_OP_not: alu = ALU1_NOT; mode = MODE_FIXED; goto do_alu;
1712 case OPCODE_INT_OP_neg: alu = ALU1_NEG; goto do_alu;
1713 case OPCODE_INT_OP_bsf: alu = ALU1_BSF; goto do_bsf_bsr_popcnt;
1714 case OPCODE_INT_OP_bsr: alu = ALU1_BSR; goto do_bsf_bsr_popcnt;
1715 case OPCODE_INT_OP_popcnt:
1716 case OPCODE_INT_OP_popcnt_alt1: alu = ALU1_POPCNT; goto do_bsf_bsr_popcnt;
1717 case OPCODE_INT_OP_to_int: goto do_conv;
1718 case OPCODE_INT_OP_from_int: goto do_conv;
1719 default: internal(file_line, "gen_alu1: unsupported int operation %u", op);
1721 case MODE_BOOL: switch (op) {
1722 case OPCODE_BOOL_OP_not: goto do_bool_not;
1723 default: internal(file_line, "gen_alu1: unsupported bool operation %u", op);
1726 internal(file_line, "gen_alu1: unsupported mode %u", mode);
1732 bool arch_use_flags = ARCH_HAS_FLAGS;
1734 #if defined(ARCH_POWER)
1735 arch_use_flags = false;
1737 if (op_size > OP_SIZE_NATIVE) {
1738 #if !defined(ARCH_X86) && !defined(ARCH_ARM) && !defined(ARCH_POWER)
1739 if (alu == ALU1_NEG) {
1740 if (mode == MODE_FIXED)
1741 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_unary_neg_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, 0));
1743 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_unary_neg_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf));
1747 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1748 #if defined(ARCH_S390)
1749 if (alu == ALU1_NOT) {
1750 g(gen_load_constant(ctx, R_SCRATCH_3, -1));
1752 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_XOR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, 0));
1753 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_XOR, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_3, 0));
1755 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
1759 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_1, R_SCRATCH_1, alu == ALU1_NEG ? 2 : 0));
1760 if (alu == ALU1_NOT) {
1761 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NOT, R_SCRATCH_2, R_SCRATCH_2, 0));
1763 #if defined(ARCH_X86)
1764 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NOT, R_SCRATCH_2, R_SCRATCH_2, 0));
1766 g(gen_imm(ctx, -1, IMM_PURPOSE_SUB, OP_SIZE_NATIVE));
1767 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_SBB, ALU_WRITES_FLAGS(ALU_SBB, is_imm()));
1768 gen_one(R_SCRATCH_2);
1769 gen_one(R_SCRATCH_2);
1772 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NGC, R_SCRATCH_2, R_SCRATCH_2, (mode == MODE_INT)));
1775 if (mode == MODE_INT) {
1776 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_O, 0);
1777 gen_four(label_ovf);
1779 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
1782 if ((arch_use_flags || ARCH_SUPPORTS_TRAPS) && slot_1 == slot_r && i_size_cmp(op_size) == op_size + zero) {
1784 unsigned undo_alu = alu;
1785 if (slot_is_register(ctx, slot_1)) {
1786 unsigned reg = ctx->registers[slot_1];
1787 if (mode == MODE_INT && ARCH_SUPPORTS_TRAPS) {
1788 gen_insn(INSN_ALU1_TRAP, op_size, alu, ALU1_WRITES_FLAGS(alu));
1791 if (ARCH_TRAP_BEFORE || alu == undo_alu) {
1792 gen_four(label_ovf);
1795 ce = alloc_undo_label(ctx);
1798 gen_four(ce->undo_label);
1799 goto do_undo_opcode;
1802 g(gen_2address_alu1(ctx, i_size(op_size), alu, reg, reg, mode == MODE_INT));
1803 if (mode == MODE_INT) {
1804 if (alu != undo_alu) {
1805 ce = alloc_undo_label(ctx);
1808 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1809 gen_four(ce->undo_label);
1811 ce->undo_opcode = INSN_ALU1 + ARCH_PARTIAL_ALU(op_size);
1812 ce->undo_op_size = i_size(op_size);
1813 ce->undo_aux = undo_alu;
1814 ce->undo_writes_flags = ALU1_WRITES_FLAGS(undo_alu);
1815 ce->undo_parameters[0] = reg;
1816 ce->undo_parameters[1] = reg;
1817 ce->undo_parameters_len = 2;
1819 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1820 gen_four(label_ovf);
1825 #if defined(ARCH_X86)
1828 int64_t offset = (size_t)slot_1 * slot_size;
1829 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
1830 gen_insn(INSN_ALU1 + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, ALU1_WRITES_FLAGS(alu) | (mode == MODE_INT));
1831 gen_address_offset();
1832 gen_address_offset();
1833 if (mode == MODE_INT) {
1834 if (alu != undo_alu) {
1835 ce = alloc_undo_label(ctx);
1838 ce->undo_opcode = INSN_ALU1 + ARCH_PARTIAL_ALU(op_size);
1839 ce->undo_op_size = i_size(op_size);
1840 ce->undo_aux = undo_alu;
1841 ce->undo_writes_flags = ALU1_WRITES_FLAGS(undo_alu);
1842 m = mark_params(ctx);
1843 gen_address_offset();
1844 gen_address_offset();
1845 copy_params(ctx, ce, m);
1846 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1847 gen_four(ce->undo_label);
1849 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1850 gen_four(label_ovf);
1857 target = gen_frame_target(ctx, slot_r, mode == MODE_INT ? slot_1 : NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1858 if (mode == MODE_FIXED) {
1861 ex = ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
1862 if (ARCH_SUPPORTS_TRAPS && op_size >= OP_SIZE_4)
1864 if (op_size == i_size(op_size) + (unsigned)zero)
1867 g(gen_frame_get(ctx, op_size, ex, slot_1, mode == MODE_INT ? R_SCRATCH_2 : target, ®1));
1868 #if defined(ARCH_S390)
1869 if (alu == ALU1_NOT) {
1870 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, target, reg1, -1, 0));
1872 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1876 #if defined(ARCH_X86)
1877 g(gen_2address_alu1(ctx, op_size, alu, target, reg1, mode == MODE_INT));
1879 if (mode == MODE_INT) {
1880 #if defined(ARCH_POWER)
1881 if (op_size == OP_SIZE_NATIVE) {
1882 g(gen_2address_alu1(ctx, i_size(op_size), alu, target, reg1, 0));
1883 if (alu == ALU1_NEG) {
1884 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_CG_SCRATCH, target, reg1, 1));
1886 gen_insn(INSN_JMP_COND, op_size, COND_L, 0);
1887 gen_four(label_ovf);
1889 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1894 if (!arch_use_flags && !ARCH_SUPPORTS_TRAPS && ARCH_HAS_ANDN && op_size >= OP_SIZE_4) {
1895 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, alu, target, reg1, 0));
1897 if (alu == ALU1_NEG) {
1898 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_3, target, reg1, 0));
1901 if (op_size < OP_SIZE_NATIVE)
1902 g(gen_extend(ctx, op_size, sign_x, R_SCRATCH_3, R_SCRATCH_3));
1904 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_3, COND_S, label_ovf));
1906 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1910 if (op_size <= OP_SIZE_2 || (!arch_use_flags && !ARCH_SUPPORTS_TRAPS)) {
1911 int64_t imm = (ARCH_PREFERS_SX(op_size) ? -0x80ULL : 0x80ULL) << (((1 << op_size) - 1) * 8);
1913 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg1, imm, COND_E, label_ovf));
1917 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1921 if (mode == MODE_INT) {
1922 gen_insn(INSN_ALU1_TRAP, op_size, alu, ALU1_WRITES_FLAGS(alu));
1925 gen_four(label_ovf);
1926 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1930 g(gen_2address_alu1(ctx, i_size(op_size), alu, target, reg1, mode == MODE_INT));
1932 if (mode == MODE_INT) {
1933 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1934 gen_four(label_ovf);
1936 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1944 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1945 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
1947 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, target, reg1, 1, 0));
1949 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1957 bool attr_unused sx = false;
1958 #if defined(ARCH_X86) || defined(ARCH_ARM) || defined(ARCH_IA64) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_RISCV64) || defined(ARCH_S390)
1959 #if defined(ARCH_ARM32)
1960 if (unlikely(!cpu_test_feature(CPU_FEATURE_armv6)))
1961 goto do_generic_bswap;
1963 #if defined(ARCH_MIPS)
1964 if (unlikely(!MIPS_HAS_ROT))
1965 goto do_generic_bswap;
1966 sx = op_size == OP_SIZE_4;
1968 #if defined(ARCH_RISCV64)
1969 if (unlikely(!cpu_test_feature(CPU_FEATURE_zbb)))
1970 goto do_generic_bswap;
1972 #if defined(ARCH_S390)
1973 if (op_size == OP_SIZE_2)
1974 goto do_generic_bswap;
1976 #if defined(ARCH_X86)
1977 if (op_size >= OP_SIZE_4 && !cpu_test_feature(CPU_FEATURE_bswap))
1978 goto do_generic_bswap;
1980 if (op_size > OP_SIZE_NATIVE) {
1981 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1983 target = R_SCRATCH_1;
1985 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1986 g(gen_frame_get(ctx, op_size, sx ? sign_x : garbage, slot_1, target, ®1));
1989 if (op_size == OP_SIZE_1) {
1990 #if defined(ARCH_IA64) || defined(ARCH_RISCV64)
1991 } else if (op_size == OP_SIZE_2 || op_size == OP_SIZE_4) {
1992 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSWAP, target, reg1, 0));
1994 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, target, target, op_size == OP_SIZE_2 ? 48 : 32, 0));
1996 } else if (op_size == OP_SIZE_2) {
1997 #if defined(ARCH_X86)
1998 g(gen_3address_rot_imm(ctx, OP_SIZE_2, ROT_ROR, target, reg1, 8, 0));
2000 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_BSWAP16, target, reg1, 0));
2003 g(gen_2address_alu1(ctx, minimum(op_size, OP_SIZE_NATIVE), ALU1_BSWAP, target, reg1, 0));
2005 if (op_size > OP_SIZE_NATIVE) {
2006 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSWAP, R_SCRATCH_2, R_SCRATCH_2, 0));
2009 if (op_size > OP_SIZE_NATIVE)
2010 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_2, R_SCRATCH_1));
2012 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2015 goto do_generic_bswap;
2017 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bswap_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2023 #if defined(ARCH_ARM) || defined(ARCH_LOONGARCH64) || (defined(ARCH_MIPS) && MIPS_R6)
2024 #if defined(ARCH_ARM32)
2025 if (unlikely(!cpu_test_feature(CPU_FEATURE_armv6t2)))
2026 goto do_generic_brev;
2028 if (op_size > OP_SIZE_NATIVE) {
2029 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
2031 target = R_SCRATCH_1;
2033 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2034 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
2037 g(gen_2address_alu1(ctx, minimum(maximum(OP_SIZE_4, op_size), OP_SIZE_NATIVE), ALU1_BREV, target, reg1, 0));
2038 if (op_size <= OP_SIZE_2) {
2039 g(gen_3address_rot_imm(ctx, OP_SIZE_4, ROT_SHR, target, target, op_size == OP_SIZE_1 ? 24 : 16, 0));
2041 if (op_size > OP_SIZE_NATIVE) {
2042 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BREV, R_SCRATCH_2, R_SCRATCH_2, 0));
2045 if (op_size > OP_SIZE_NATIVE)
2046 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_2, R_SCRATCH_1));
2048 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2051 goto do_generic_brev;
2053 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_brev_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2058 do_bsf_bsr_popcnt: {
2059 if (op_size > OP_SIZE_NATIVE) {
2060 #if defined(ARCH_X86)
2061 uint32_t label_finish = 0; /* avoid warning */
2062 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_popcnt)))
2063 goto do_generic_bsf_bsr_popcnt;
2064 if (alu == ALU1_BSR || alu == ALU1_POPCNT) {
2065 if (mode == MODE_INT) {
2066 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2067 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_NATIVE));
2068 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
2069 gen_address_offset();
2072 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_S, 0);
2073 gen_four(label_ovf);
2076 if (alu == ALU1_BSF) {
2077 label_finish = alloc_label(ctx);
2078 if (unlikely(!label_finish))
2081 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2082 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSF, ALU1_WRITES_FLAGS(ALU1_BSF));
2083 gen_one(R_SCRATCH_1);
2084 gen_address_offset();
2086 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2087 gen_four(label_finish);
2089 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2090 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSF, ALU1_WRITES_FLAGS(ALU1_BSF));
2091 gen_one(R_SCRATCH_1);
2092 gen_address_offset();
2094 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, 8U << OP_SIZE_NATIVE, 0));
2096 if (alu == ALU1_BSR) {
2097 label_finish = alloc_label(ctx);
2098 if (unlikely(!label_finish))
2101 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2102 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSR, ALU1_WRITES_FLAGS(ALU1_BSR));
2103 gen_one(R_SCRATCH_1);
2104 gen_address_offset();
2106 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, 8U << OP_SIZE_NATIVE, 0));
2108 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2109 gen_four(label_finish);
2111 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2112 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSR, ALU1_WRITES_FLAGS(ALU1_BSR));
2113 gen_one(R_SCRATCH_1);
2114 gen_address_offset();
2116 if (alu == ALU1_BSF || alu == ALU1_BSR) {
2117 if (mode == MODE_INT) {
2118 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_E, 0);
2119 gen_four(label_ovf);
2121 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2122 gen_four(label_finish);
2124 g(gen_load_constant(ctx, R_SCRATCH_1, -1));
2127 gen_label(label_finish);
2129 if (mode == MODE_INT)
2132 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2133 internal(file_line, "gen_alu1: bad scratch registers");
2134 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2138 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2139 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2140 gen_address_offset();
2141 gen_one(R_SCRATCH_1);
2143 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2144 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2145 gen_address_offset();
2146 gen_one(R_SCRATCH_2);
2150 if (alu == ALU1_POPCNT) {
2151 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2152 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_POPCNT, ALU1_WRITES_FLAGS(ALU1_POPCNT));
2153 gen_one(R_SCRATCH_1);
2154 gen_address_offset();
2156 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2157 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_POPCNT, ALU1_WRITES_FLAGS(ALU1_POPCNT));
2158 gen_one(R_SCRATCH_2);
2159 gen_address_offset();
2161 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 1));
2163 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2164 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2165 gen_address_offset();
2166 gen_one(R_SCRATCH_1);
2168 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2169 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_NATIVE));
2170 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2171 gen_address_offset();
2177 goto do_generic_bsf_bsr_popcnt;
2179 #if defined(ARCH_X86)
2180 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_popcnt)))
2181 goto do_generic_bsf_bsr_popcnt;
2182 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2183 if (op_size == OP_SIZE_1 || ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT)) {
2184 g(gen_frame_get(ctx, op_size, zero_x, slot_1, target, ®1));
2185 if ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT) {
2186 g(gen_cmp_test_jmp(ctx, INSN_TEST, op_size, reg1, reg1, alu == ALU1_BSR ? COND_LE : COND_S, label_ovf));
2188 g(gen_2address_alu1(ctx, maximum(op_size, OP_SIZE_2), alu, target, reg1, 1));
2189 if ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT)
2190 goto x86_bsf_bsr_popcnt_finish;
2192 g(gen_frame_load_op1(ctx, op_size, alu, 1, slot_1, target));
2194 if (alu == ALU1_POPCNT)
2195 goto x86_bsf_bsr_popcnt_finish;
2196 if (mode == MODE_FIXED) {
2197 uint32_t cmov_label;
2198 gen_insn(INSN_MOV, maximum(op_size, OP_SIZE_4), 0, 0);
2199 gen_one(R_SCRATCH_2);
2202 g(gen_cmov(ctx, maximum(op_size, OP_SIZE_4), COND_E, target, &cmov_label));
2203 gen_one(R_SCRATCH_2);
2205 gen_label(cmov_label);
2208 gen_insn(INSN_JMP_COND, maximum(op_size, OP_SIZE_2), COND_E, 0);
2209 gen_four(label_ovf);
2211 x86_bsf_bsr_popcnt_finish:
2212 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2215 #if defined(ARCH_ARM)
2216 #if defined(ARCH_ARM32)
2217 if (alu == ALU1_BSR && unlikely(!cpu_test_feature(CPU_FEATURE_armv6)))
2218 goto do_generic_bsf_bsr_popcnt;
2219 if (alu == ALU1_BSF && unlikely(!cpu_test_feature(CPU_FEATURE_armv6t2)))
2220 goto do_generic_bsf_bsr_popcnt;
2222 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_neon)))
2223 goto do_generic_bsf_bsr_popcnt;
2224 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2225 if (mode == MODE_INT) {
2226 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2229 if (alu == ALU1_POPCNT) {
2230 g(gen_mov(ctx, OP_SIZE_NATIVE, FR_SCRATCH_1, reg1));
2231 gen_insn(INSN_FP_ALU1, OP_SIZE_NATIVE, FP_ALU1_VCNT8, 0);
2232 gen_one(FR_SCRATCH_1);
2233 gen_one(FR_SCRATCH_1);
2234 #if defined(ARCH_ARM32)
2235 if (op_size > OP_SIZE_1) {
2236 gen_insn(INSN_FP_ALU1, OP_SIZE_1, FP_ALU1_VPADDL, 0);
2237 gen_one(FR_SCRATCH_1);
2238 gen_one(FR_SCRATCH_1);
2240 if (op_size > OP_SIZE_2) {
2241 gen_insn(INSN_FP_ALU1, OP_SIZE_2, FP_ALU1_VPADDL, 0);
2242 gen_one(FR_SCRATCH_1);
2243 gen_one(FR_SCRATCH_1);
2246 if (op_size > OP_SIZE_1) {
2247 gen_insn(INSN_FP_ALU1, OP_SIZE_1, FP_ALU1_ADDV, 0);
2248 gen_one(FR_SCRATCH_1);
2249 gen_one(FR_SCRATCH_1);
2252 g(gen_frame_store(ctx, op_size, slot_r, 0, FR_SCRATCH_1));
2253 if (slot_is_register(ctx, slot_r))
2254 g(unspill(ctx, slot_r));
2258 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2259 if (mode == MODE_FIXED && alu == ALU1_BSF) {
2260 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
2266 if (alu == ALU1_BSF) {
2267 g(gen_2address_alu1(ctx, i_size(op_size), ALU1_BREV, target, reg1, 0));
2271 g(gen_2address_alu1(ctx, i_size(op_size), ALU1_LZCNT, target, reg1, 0));
2273 if (alu == ALU1_BSR) {
2274 g(gen_load_constant(ctx, R_SCRATCH_2, op_size == OP_SIZE_8 ? 63 : 31));
2275 g(gen_3address_alu(ctx, i_size(op_size), ALU_SUB, target, R_SCRATCH_2, target, 0));
2278 if (mode == MODE_FIXED && alu == ALU1_BSF) {
2279 #if defined(ARCH_ARM32)
2280 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2281 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2286 gen_insn(INSN_CSEL_INV, i_size(op_size), COND_NE, 0);
2294 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2297 #if defined(ARCH_ALPHA)
2298 if (likely(cpu_test_feature(CPU_FEATURE_cix))) {
2299 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2300 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2301 if (mode == MODE_INT) {
2302 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2304 if (alu == ALU1_POPCNT) {
2305 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, reg1, 0));
2307 if (alu == ALU1_BSF) {
2308 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSF, target, reg1, 0));
2310 if (mode == MODE_FIXED) {
2311 g(gen_imm(ctx, -1, IMM_PURPOSE_MOVR, OP_SIZE_INT));
2312 gen_insn(INSN_MOVR, OP_SIZE_NATIVE, COND_E, 0);
2319 if (alu == ALU1_BSR) {
2320 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2322 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2324 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2326 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2330 #if defined(ARCH_MIPS)
2331 if (MIPS_HAS_CLZ && alu != ALU1_POPCNT) {
2332 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2333 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2334 if (mode == MODE_INT) {
2335 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2337 if (alu == ALU1_BSF) {
2338 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, target, reg1, 0));
2340 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, reg1, target, 0));
2343 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2345 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2347 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2349 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2353 #if defined(ARCH_POWER)
2354 if (alu == ALU1_BSF && (unlikely(!cpu_test_feature(CPU_FEATURE_v203)) || unlikely(!cpu_test_feature(CPU_FEATURE_v30))))
2355 goto do_generic_bsf_bsr_popcnt;
2356 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_v206)))
2357 goto do_generic_bsf_bsr_popcnt;
2358 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2359 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2360 if (mode == MODE_INT) {
2361 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2363 if (alu == ALU1_POPCNT) {
2364 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, reg1, 0));
2366 if (alu == ALU1_BSF) {
2367 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSF, target, reg1, 0));
2369 if (mode == MODE_FIXED) {
2370 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_3, reg1, reg1, 1));
2372 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2373 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2379 if (alu == ALU1_BSR) {
2380 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2382 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2384 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2386 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2389 #if defined(ARCH_LOONGARCH64) || defined(ARCH_RISCV64)
2390 #if defined(ARCH_LOONGARCH64)
2391 if (alu == ALU1_POPCNT)
2392 goto do_generic_bsf_bsr_popcnt;
2394 #if defined(ARCH_RISCV64)
2395 if (unlikely(!cpu_test_feature(CPU_FEATURE_zbb)))
2396 goto do_generic_bsf_bsr_popcnt;
2398 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2399 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2400 if (mode == MODE_INT) {
2401 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2403 if (alu == ALU1_POPCNT) {
2404 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_POPCNT, target, reg1, 0));
2406 if (alu == ALU1_BSF) {
2407 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_BSF, target, reg1, 0));
2409 if (mode == MODE_FIXED) {
2410 g(gen_imm(ctx, 1, IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2411 gen_insn(INSN_CMP_DEST_REG, OP_SIZE_NATIVE, COND_B, 0);
2412 gen_one(R_SCRATCH_3);
2416 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, R_SCRATCH_3, 0));
2418 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, target, target, R_SCRATCH_3, 0));
2421 if (alu == ALU1_BSR) {
2422 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_LZCNT, target, reg1, 0));
2424 g(gen_load_constant(ctx, R_SCRATCH_3, op_size <= OP_SIZE_4 ? 31 : 63));
2426 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2428 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2431 #if defined(ARCH_IA64) || defined(ARCH_S390) || defined(ARCH_SPARC)
2432 if (alu == ALU1_BSF && !ARCH_HAS_ANDN)
2433 goto do_generic_bsf_bsr_popcnt;
2434 #if defined(ARCH_S390)
2435 if (!cpu_test_feature(CPU_FEATURE_misc_45) || !cpu_test_feature(CPU_FEATURE_misc_insn_ext_3))
2436 goto do_generic_bsf_bsr_popcnt;
2438 #if defined(ARCH_SPARC)
2440 goto do_generic_bsf_bsr_popcnt;
2442 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2443 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2444 if (mode == MODE_INT) {
2445 g(gen_cmp_test_jmp(ctx, INSN_TEST, maximum(op_size, OP_SIZE_4), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2447 if (ARCH_PREFERS_SX(op_size) && alu == ALU1_POPCNT && op_size < OP_SIZE_NATIVE) {
2448 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
2452 if (alu == ALU1_POPCNT) {
2453 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, R_SCRATCH_1, reg1, 0));
2454 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
2457 if (alu == ALU1_BSF) {
2458 g(gen_3address_alu_imm(ctx, OP_SIZE_NATIVE, ALU_SUB, target, reg1, 1, 0));
2460 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ANDN, target, target, reg1, 0));
2462 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, target, 0));
2464 if (mode == MODE_FIXED) {
2465 unsigned attr_unused test_reg = R_SCRATCH_1;
2466 #if defined(ARCH_S390)
2467 g(gen_imm(ctx, 0, COND_IS_LOGICAL(COND_E) ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2468 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1 + COND_IS_LOGICAL(COND_E));
2472 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2473 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2478 #if defined(ARCH_IA64)
2479 g(gen_cmp_dest_reg(ctx, OP_SIZE_NATIVE, reg1, (unsigned)-1, R_CMP_RESULT, 0, COND_NE));
2480 test_reg = R_CMP_RESULT;
2482 g(gen_imm(ctx, -1, IMM_PURPOSE_MOVR, OP_SIZE_NATIVE));
2483 gen_insn(INSN_MOVR, OP_SIZE_NATIVE, COND_E, 0);
2491 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2495 do_generic_bsf_bsr_popcnt:
2496 if (alu == ALU1_BSF) {
2497 if (mode == MODE_FIXED)
2498 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bsf_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2500 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_bsf_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2502 if (alu == ALU1_BSR) {
2503 if (mode == MODE_FIXED)
2504 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bsr_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2506 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_bsr_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2508 if (alu == ALU1_POPCNT) {
2509 if (mode == MODE_FIXED)
2510 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_popcnt_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2512 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_popcnt_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2520 unsigned src_op_size, dest_op_size;
2521 const struct type *src_type, *dest_type;
2522 src_type = get_type_of_local(ctx, slot_1);
2523 dest_type = get_type_of_local(ctx, slot_r);
2525 if (TYPE_TAG_IS_FIXED(src_type->tag)) {
2526 src_op_size = TYPE_TAG_IDX_FIXED(src_type->tag) >> 1;
2528 src_op_size = TYPE_TAG_IDX_INT(src_type->tag);
2531 if (TYPE_TAG_IS_FIXED(dest_type->tag)) {
2532 dest_op_size = TYPE_TAG_IDX_FIXED(dest_type->tag) >> 1;
2534 dest_op_size = TYPE_TAG_IDX_INT(dest_type->tag);
2537 if (src_op_size <= OP_SIZE_NATIVE) {
2538 g(gen_frame_get(ctx, src_op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
2540 #if defined(ARCH_X86)
2541 if (dest_op_size < src_op_size)
2542 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, 0, false, R_SCRATCH_1));
2545 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_3));
2549 if (dest_op_size >= src_op_size) {
2550 if (dest_op_size <= OP_SIZE_NATIVE) {
2551 g(gen_frame_store(ctx, dest_op_size, slot_r, 0, reg1));
2553 if (src_op_size <= OP_SIZE_NATIVE) {
2554 #if defined(ARCH_X86)
2555 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2556 internal(file_line, "gen_alu1: bad scratch registers");
2557 if (reg1 == R_SCRATCH_1) {
2558 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2563 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, reg1, (1U << (OP_SIZE_NATIVE + 3)) - 1, false));
2564 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, reg1, R_SCRATCH_2));
2566 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, reg1, R_SCRATCH_3));
2571 if (src_op_size > OP_SIZE_NATIVE) {
2572 #if defined(ARCH_ARM)
2573 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
2574 gen_one(R_SCRATCH_3);
2575 gen_one(ARG_SHIFTED_REGISTER);
2576 gen_one(ARG_SHIFT_ASR | ((1U << (OP_SIZE_NATIVE + 3)) - 1));
2577 gen_one(R_SCRATCH_1);
2579 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2580 gen_four(label_ovf);
2581 #elif defined(ARCH_X86)
2582 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2583 internal(file_line, "gen_alu1: bad scratch registers");
2584 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2588 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2589 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
2590 gen_one(R_SCRATCH_2);
2591 gen_address_offset();
2593 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2594 gen_four(label_ovf);
2596 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, R_SCRATCH_1, (1U << (OP_SIZE_NATIVE + 3)) - 1, 0));
2598 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_3, R_SCRATCH_2, COND_NE, label_ovf));
2601 src_op_size = OP_SIZE_NATIVE;
2603 if (src_op_size > dest_op_size) {
2604 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, dest_op_size, reg1, R_SCRATCH_2, label_ovf));
2606 g(gen_frame_store(ctx, dest_op_size, slot_r, 0, reg1));
2612 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_uto_int_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2616 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_ufrom_int_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2620 static bool attr_w gen_constant(struct codegen_context *ctx, bool real, unsigned op_size, bool shrt, frame_t slot_r)
2624 c = (int16_t)get_unaligned_16(ctx->current_position);
2625 } else switch (op_size) {
2626 #define fx(n, type, utype, sz, bits) \
2628 c = (type)cat(get_unaligned_,bits)(ctx->current_position);\
2633 internal(file_line, "gen_constant: invalid type %u", op_size);
2635 if (op_size > OP_SIZE_NATIVE) {
2636 unsigned shift = (8U << OP_SIZE_NATIVE) - 1;
2637 g(gen_frame_store_imm_raw(ctx, OP_SIZE_NATIVE, slot_r, lo_word(OP_SIZE_NATIVE), c & ((2ULL << shift) - 1)));
2638 g(gen_frame_store_imm_raw(ctx, OP_SIZE_NATIVE, slot_r, hi_word(OP_SIZE_NATIVE), c >> 1 >> shift));
2639 if (real && slot_is_register(ctx, slot_r))
2640 g(unspill(ctx, slot_r));
2642 } else if (real && slot_is_register(ctx, slot_r)) {
2643 if (ARCH_HAS_FP_GP_MOV) {
2644 g(gen_load_constant(ctx, R_SCRATCH_1, c));
2645 g(gen_mov(ctx, op_size, ctx->registers[slot_r], R_SCRATCH_1));
2647 g(gen_frame_store_imm_raw(ctx, op_size, slot_r, 0, c));
2648 g(unspill(ctx, slot_r));
2651 g(gen_frame_store_imm(ctx, op_size, slot_r, 0, c));
2656 static bool attr_w gen_real_constant(struct codegen_context *ctx, const struct type *t, frame_t slot_r)
2659 if (is_power_of_2(t->size) && t->size <= sizeof(uintbig_t))
2660 return gen_constant(ctx, true, log_2(t->size), false, slot_r);
2662 g(load_function_offset(ctx, R_SCRATCH_3, offsetof(struct data, u_.function.code)));
2664 offset = (ctx->current_position - da(ctx->fn,function)->code) * sizeof(code_t);
2666 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)slot_r * slot_size, R_SCRATCH_3, offset, t->size, minimum(t->align, sizeof(code_t))));
2667 if (slot_is_register(ctx, slot_r))
2668 g(unspill(ctx, slot_r));
2673 static bool attr_w gen_copy(struct codegen_context *ctx, unsigned op_size, frame_t slot_1, frame_t slot_r)
2676 if (unlikely(op_size > OP_SIZE_NATIVE)) {
2677 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
2678 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
2681 unsigned target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2682 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
2683 g(gen_frame_store(ctx, op_size, slot_r, 0, reg1));
2688 static bool attr_w gen_fp_alu(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, frame_t slot_r)
2690 unsigned attr_unused fp_alu;
2692 unsigned attr_unused op_size = real_type_to_op_size(real_type);
2693 unsigned reg1, reg2, target;
2695 case OPCODE_REAL_OP_add:
2696 case OPCODE_REAL_OP_add_alt1:
2697 case OPCODE_REAL_OP_add_alt2: fp_alu = FP_ALU_ADD; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_add_real16_t); label_ovf = 0; goto do_alu;
2698 case OPCODE_REAL_OP_subtract:
2699 case OPCODE_REAL_OP_subtract_alt1:
2700 case OPCODE_REAL_OP_subtract_alt2: fp_alu = FP_ALU_SUB; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_subtract_real16_t); label_ovf = 0; goto do_alu;
2701 case OPCODE_REAL_OP_multiply:
2702 case OPCODE_REAL_OP_multiply_alt1:
2703 case OPCODE_REAL_OP_multiply_alt2: fp_alu = FP_ALU_MUL; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_multiply_real16_t); label_ovf = 0; goto do_alu;
2704 case OPCODE_REAL_OP_divide:
2705 case OPCODE_REAL_OP_divide_alt1:
2706 case OPCODE_REAL_OP_divide_alt2: fp_alu = FP_ALU_DIV; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_divide_real16_t); label_ovf = 0; goto do_alu;
2707 case OPCODE_REAL_OP_modulo:
2708 case OPCODE_REAL_OP_power:
2709 case OPCODE_REAL_OP_ldexp:
2710 case OPCODE_REAL_OP_atan2: upc = offsetof(struct cg_upcall_vector_s, REAL_binary_modulo_real16_t) + (op - OPCODE_REAL_OP_modulo) * TYPE_REAL_N * sizeof(void (*)(void)); goto do_upcall;
2711 case OPCODE_REAL_OP_equal:
2712 case OPCODE_REAL_OP_equal_alt1:
2713 case OPCODE_REAL_OP_equal_alt2: fp_alu = FP_COND_E; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_equal_real16_t); goto do_cmp;
2714 case OPCODE_REAL_OP_not_equal:
2715 case OPCODE_REAL_OP_not_equal_alt1:
2716 case OPCODE_REAL_OP_not_equal_alt2: fp_alu = FP_COND_NE; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_not_equal_real16_t); goto do_cmp;
2717 case OPCODE_REAL_OP_less:
2718 case OPCODE_REAL_OP_less_alt1:
2719 case OPCODE_REAL_OP_less_alt2: fp_alu = FP_COND_B; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_less_real16_t); goto do_cmp;
2720 case OPCODE_REAL_OP_less_equal:
2721 case OPCODE_REAL_OP_less_equal_alt1:
2722 case OPCODE_REAL_OP_less_equal_alt2: fp_alu = FP_COND_BE; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_less_equal_real16_t); goto do_cmp;
2723 case OPCODE_REAL_OP_greater:
2724 case OPCODE_REAL_OP_greater_alt1:
2725 case OPCODE_REAL_OP_greater_alt2: fp_alu = FP_COND_A; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_greater_real16_t); goto do_cmp;
2726 case OPCODE_REAL_OP_greater_equal:
2727 case OPCODE_REAL_OP_greater_equal_alt1:
2728 case OPCODE_REAL_OP_greater_equal_alt2: fp_alu = FP_COND_AE; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_greater_equal_real16_t); goto do_cmp;
2729 default: internal(file_line, "gen_fp_alu: unsupported operation %u", op);
2733 if ((SUPPORTED_FP >> real_type) & 1) {
2734 #if defined(ARCH_IA64)
2735 if (unlikely(fp_alu == FP_ALU_DIV))
2738 #if defined(ARCH_X86)
2740 #elif defined(ARCH_S390)
2741 if ((op_size <= OP_SIZE_8 && (size_t)slot_2 * slot_size < 4096) || slot_is_register(ctx, slot_2))
2743 if (slot_is_register(ctx, slot_2))
2746 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, FR_SCRATCH_1);
2747 g(gen_frame_get(ctx, op_size, garbage, slot_1, FR_SCRATCH_1, ®1));
2748 if (slot_is_register(ctx, slot_2)) {
2749 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, ctx->registers[slot_2]));
2751 if (target != reg1 && !ARCH_IS_3ADDRESS_FP) {
2752 g(gen_mov(ctx, op_size, target, reg1));
2755 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size, IMM_PURPOSE_VLDR_VSTR_OFFSET, op_size));
2756 gen_insn(INSN_FP_ALU, op_size, fp_alu, 0);
2759 gen_address_offset();
2761 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2764 #if defined(ARCH_ALPHA)
2765 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2766 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2767 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, FR_SCRATCH_3);
2768 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, reg2));
2769 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2771 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2772 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2773 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
2774 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, reg2));
2775 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2779 #ifdef SUPPORTED_FP_X87
2780 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
2781 if (real_type != 3) {
2782 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2783 g(gen_frame_load_x87(ctx, INSN_X87_ALU, op_size, fp_alu, slot_1));
2785 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2786 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2787 gen_insn(INSN_X87_ALUP, op_size, fp_alu, 0);
2790 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
2794 #ifdef SUPPORTED_FP_HALF_CVT
2795 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
2796 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2797 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2798 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
2799 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2800 gen_one(FR_SCRATCH_1);
2802 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2803 gen_one(FR_SCRATCH_2);
2805 gen_insn(INSN_FP_ALU, OP_SIZE_4, fp_alu, 0);
2806 gen_one(FR_SCRATCH_1);
2807 gen_one(FR_SCRATCH_1);
2808 gen_one(FR_SCRATCH_2);
2809 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
2811 gen_one(FR_SCRATCH_1);
2812 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2819 if ((SUPPORTED_FP >> real_type) & 1
2820 #if defined(ARCH_ALPHA)
2821 && ARCH_SUPPORTS_TRAPS
2824 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2825 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2826 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2827 #if defined(ARCH_ALPHA)
2828 gen_insn(INSN_FP_CMP_DEST_REG_TRAP, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
2829 gen_one(FR_SCRATCH_3);
2832 gen_four(label_ovf);
2834 if (!ARCH_HAS_FP_GP_MOV) {
2835 g(gen_frame_store_raw(ctx, OP_SIZE_4, slot_r, 0, FR_SCRATCH_3));
2836 g(gen_frame_load_raw(ctx, OP_SIZE_4, sign_x, slot_r, 0, false, target));
2838 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_3));
2841 if (fp_alu == FP_COND_NE) {
2842 g(gen_imm(ctx, 0, IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2843 gen_insn(INSN_CMP_DEST_REG, OP_SIZE_NATIVE, COND_E, 0);
2848 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SHR, target, target, 30, 0));
2851 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2854 #elif defined(ARCH_IA64)
2855 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
2856 gen_one(R_CMP_RESULT);
2860 gen_insn(INSN_JMP_REG, OP_SIZE_NATIVE, COND_NE, 0);
2861 gen_one(R_CMP_RESULT);
2862 gen_four(label_ovf);
2864 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu, 0);
2865 gen_one(R_CMP_RESULT);
2869 g(gen_mov(ctx, OP_SIZE_NATIVE, target, R_CMP_RESULT));
2871 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2874 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
2875 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
2879 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
2880 gen_four(label_ovf);
2882 gen_insn(INSN_FP_CMP_COND, op_size, fp_alu, 1);
2886 gen_insn(INSN_FP_TEST_REG, OP_SIZE_NATIVE, fp_alu, 0);
2889 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2892 #elif defined(ARCH_RISCV64)
2893 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
2894 gen_one(R_SCRATCH_1);
2898 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
2899 gen_one(R_SCRATCH_2);
2903 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
2905 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, COND_E, label_ovf));
2907 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
2912 if (fp_alu == FP_COND_NE) {
2913 g(gen_imm(ctx, 1, IMM_PURPOSE_XOR, OP_SIZE_NATIVE));
2914 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_AND, false));
2920 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2923 gen_insn(INSN_FP_CMP, op_size, 0, 1);
2926 #if defined(ARCH_ARM32)
2927 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
2929 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
2930 gen_four(label_ovf);
2931 g(gen_frame_set_cond(ctx, op_size, false, fp_alu, slot_r));
2935 #ifdef SUPPORTED_FP_X87
2936 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
2937 if (likely(cpu_test_feature(CPU_FEATURE_cmov))) {
2938 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2939 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2940 gen_insn(INSN_X87_FCOMIP, op_size, 0, 0);
2942 gen_insn(INSN_X87_FSTP, op_size, 0, 0);
2944 gen_insn(INSN_JMP_COND, op_size, COND_P, 0);
2945 gen_four(label_ovf);
2946 g(gen_frame_set_cond(ctx, op_size, false, fp_alu & 0xf, slot_r));
2950 if (real_type != 3) {
2951 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2952 g(gen_frame_load_x87(ctx, INSN_X87_FCOMP, op_size, 0, slot_2));
2954 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2955 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2956 gen_insn(INSN_X87_FCOMPP, op_size, 0, 0);
2959 gen_insn(INSN_X87_FNSTSW, 0, 0, 0);
2963 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2968 gen_insn(INSN_JMP_COND, OP_SIZE_2, COND_NE, 0);
2969 gen_four(label_ovf);
2973 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2977 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
2980 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2984 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_E, slot_r));
2987 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2991 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
2994 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2998 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
3001 internal(file_line, "gen_fp_alu: invalid condition %u", fp_alu);
3006 #ifdef SUPPORTED_FP_HALF_CVT
3007 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3008 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3009 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
3010 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3011 gen_one(FR_SCRATCH_1);
3013 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3014 gen_one(FR_SCRATCH_2);
3016 gen_insn(INSN_FP_CMP, OP_SIZE_4, 0, 1);
3017 gen_one(FR_SCRATCH_1);
3018 gen_one(FR_SCRATCH_2);
3019 #if defined(ARCH_ARM32)
3020 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3022 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3023 gen_four(label_ovf);
3024 g(gen_frame_set_cond(ctx, op_size, false, fp_alu, slot_r));
3030 return gen_alu_typed_upcall(ctx, upc, real_type, slot_1, slot_2, slot_r, label_ovf);
3033 #define OP_IS_ROUND(alu) ((alu) == FP_ALU1_ROUND || (alu) == FP_ALU1_FLOOR || (alu) == FP_ALU1_CEIL || (alu) == FP_ALU1_TRUNC)
3035 static bool attr_w gen_fp_alu1(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_r)
3037 unsigned attr_unused fp_alu;
3039 unsigned attr_unused op_size = real_type_to_op_size(real_type);
3040 unsigned reg1, target;
3042 case OPCODE_REAL_OP_neg:
3043 case OPCODE_REAL_OP_neg_alt1:
3044 case OPCODE_REAL_OP_neg_alt2: fp_alu = FP_ALU1_NEG; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_neg_real16_t); label_ovf = 0; goto do_alu;
3045 case OPCODE_REAL_OP_sqrt:
3046 case OPCODE_REAL_OP_sqrt_alt1:
3047 case OPCODE_REAL_OP_sqrt_alt2: fp_alu = FP_ALU1_SQRT; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_sqrt_real16_t); label_ovf = 0; goto do_alu;
3048 case OPCODE_REAL_OP_round: fp_alu = FP_ALU1_ROUND; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_round_real16_t); label_ovf = 0; goto do_alu;
3049 case OPCODE_REAL_OP_floor: fp_alu = FP_ALU1_FLOOR; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_floor_real16_t); label_ovf = 0; goto do_alu;
3050 case OPCODE_REAL_OP_ceil: fp_alu = FP_ALU1_CEIL; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_ceil_real16_t); label_ovf = 0; goto do_alu;
3051 case OPCODE_REAL_OP_trunc: fp_alu = FP_ALU1_TRUNC; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_trunc_real16_t); label_ovf = 0; goto do_alu;
3052 case OPCODE_REAL_OP_to_int:
3053 case OPCODE_REAL_OP_to_int_alt1:
3054 case OPCODE_REAL_OP_to_int_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_to_int_real16_t); goto do_to_int;
3055 case OPCODE_REAL_OP_from_int:
3056 case OPCODE_REAL_OP_from_int_alt1:
3057 case OPCODE_REAL_OP_from_int_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_from_int_real16_t); label_ovf = 0; goto do_from_int;
3058 case OPCODE_REAL_OP_is_exception:
3059 case OPCODE_REAL_OP_is_exception_alt1:
3060 case OPCODE_REAL_OP_is_exception_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_is_exception_real16_t); label_ovf = 0; goto do_is_exception;
3061 default: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_cbrt_real16_t) + (op - OPCODE_REAL_OP_cbrt) * TYPE_REAL_N * sizeof(void (*)(void)); label_ovf = 0; goto do_upcall;
3065 if ((SUPPORTED_FP >> real_type) & 1 && (
3066 #if defined(ARCH_ALPHA)
3067 fp_alu == FP_ALU1_NEG ||
3068 (fp_alu == FP_ALU1_SQRT && cpu_test_feature(CPU_FEATURE_fix)) ||
3069 #elif defined(ARCH_ARM32)
3070 fp_alu == FP_ALU1_NEG ||
3071 fp_alu == FP_ALU1_SQRT ||
3072 #elif defined(ARCH_ARM64)
3074 #elif defined(ARCH_IA64)
3075 fp_alu == FP_ALU1_NEG ||
3076 #elif defined(ARCH_LOONGARCH64)
3077 fp_alu == FP_ALU1_NEG ||
3078 fp_alu == FP_ALU1_SQRT ||
3079 fp_alu == FP_ALU1_ROUND ||
3080 #elif defined(ARCH_MIPS)
3081 fp_alu == FP_ALU1_NEG ||
3082 (fp_alu == FP_ALU1_SQRT && MIPS_HAS_SQRT) ||
3083 #elif defined(ARCH_PARISC)
3084 (fp_alu == FP_ALU1_NEG && PA_20) ||
3085 fp_alu == FP_ALU1_SQRT ||
3086 #elif defined(ARCH_POWER)
3087 fp_alu == FP_ALU1_NEG ||
3088 (fp_alu == FP_ALU1_SQRT && cpu_test_feature(CPU_FEATURE_p2) && real_type != 4) ||
3089 #elif defined(ARCH_S390)
3091 #elif defined(ARCH_SPARC)
3092 fp_alu == FP_ALU1_NEG ||
3093 fp_alu == FP_ALU1_SQRT ||
3094 #elif defined(ARCH_RISCV64)
3095 fp_alu == FP_ALU1_NEG ||
3096 fp_alu == FP_ALU1_SQRT ||
3097 #elif defined(ARCH_X86)
3098 fp_alu == FP_ALU1_SQRT ||
3099 (OP_IS_ROUND(fp_alu) && cpu_test_feature(CPU_FEATURE_sse41)) ||
3102 #if defined(ARCH_S390)
3103 if (op_size <= OP_SIZE_8 && (size_t)slot_1 * slot_size < 4096 && fp_alu == FP_ALU1_SQRT) {
3104 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3105 if (slot_is_register(ctx, slot_1)) {
3106 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3108 gen_one(ctx->registers[slot_1]);
3109 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3111 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size, IMM_PURPOSE_VLDR_VSTR_OFFSET, op_size));
3112 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3114 gen_address_offset();
3115 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3120 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3121 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_2);
3122 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3125 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3128 #ifdef SUPPORTED_FP_X87
3129 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3130 if (fp_alu == FP_ALU1_NEG) {
3131 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3132 gen_insn(INSN_X87_FCHS, op_size, 0, 0);
3133 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3135 } else if (fp_alu == FP_ALU1_SQRT) {
3136 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3137 gen_insn(INSN_X87_FSQRT, op_size, 0, 0);
3138 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3140 } else if (fp_alu == FP_ALU1_ROUND) {
3141 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3142 gen_insn(INSN_X87_FRNDINT, op_size, 0, 0);
3143 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3148 #ifdef SUPPORTED_FP_HALF_CVT
3149 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1 && (
3150 #if defined(ARCH_ARM32)
3151 fp_alu == FP_ALU1_NEG ||
3152 fp_alu == FP_ALU1_SQRT ||
3153 #elif defined(ARCH_ARM64)
3155 #elif defined(ARCH_X86)
3156 fp_alu == FP_ALU1_SQRT ||
3157 (OP_IS_ROUND(fp_alu) && cpu_test_feature(CPU_FEATURE_sse41)) ||
3160 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3161 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3162 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3165 gen_insn(INSN_FP_ALU1, OP_SIZE_4, fp_alu, 0);
3168 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
3171 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3178 if ((SUPPORTED_FP >> real_type) & 1
3179 #if defined(ARCH_ALPHA)
3180 && ARCH_SUPPORTS_TRAPS
3182 #if defined(ARCH_MIPS)
3186 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3189 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
3190 #if defined(ARCH_X86)
3191 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3195 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, target, sign_bit(uint_default_t), COND_E, label_ovf));
3197 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3200 #if defined(ARCH_ARM) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS)
3201 #if defined(ARCH_ARM)
3202 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3205 #if defined(ARCH_ARM32)
3206 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3208 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3209 gen_four(label_ovf);
3211 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3215 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
3216 gen_four(label_ovf);
3218 #if defined(ARCH_ARM32) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS)
3219 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3220 gen_one(FR_SCRATCH_1);
3223 g(gen_mov(ctx, OP_SIZE_INT, target, FR_SCRATCH_1));
3225 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3229 g(gen_imm(ctx, (int_default_t)(sign_bit(uint_default_t) + 1), IMM_PURPOSE_ADD, OP_SIZE_INT));
3230 gen_insn(INSN_ALU, OP_SIZE_INT, ALU_ADD, ALU_WRITES_FLAGS(ALU_ADD, is_imm()));
3231 gen_one(R_SCRATCH_2);
3235 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_2, 1, COND_BE, label_ovf));
3237 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3240 #if defined(ARCH_IA64)
3241 gen_insn(INSN_FP_TO_INT64, op_size, 0, 0);
3242 gen_one(FR_SCRATCH_1);
3245 g(gen_mov(ctx, OP_SIZE_NATIVE, target, FR_SCRATCH_1));
3247 if (OP_SIZE_INT == OP_SIZE_4) {
3248 g(gen_extend(ctx, OP_SIZE_4, sign_x, R_SCRATCH_2, target));
3249 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_NE, label_ovf));
3251 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, sign_bit(uint64_t), COND_E, label_ovf));
3254 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3257 #if defined(ARCH_PARISC) || defined(ARCH_POWER) || defined(ARCH_SPARC)
3258 #if defined(ARCH_POWER)
3259 if (!cpu_test_feature(CPU_FEATURE_ppc))
3261 if (OP_SIZE_INT == OP_SIZE_4)
3264 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3265 gen_one(FR_SCRATCH_1);
3268 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_1));
3269 if (slot_is_register(ctx, slot_r))
3270 g(unspill(ctx, slot_r));
3271 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_r, 0, false, target));
3273 g(gen_imm(ctx, sign_bit(uint_default_t) + 1, IMM_PURPOSE_ADD, OP_SIZE_INT));
3274 gen_insn(INSN_ALU, i_size(OP_SIZE_INT), ALU_ADD, ALU_WRITES_FLAGS(ALU_ADD, is_imm()));
3275 gen_one(R_SCRATCH_2);
3279 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_2, 1, COND_BE, label_ovf));
3283 #if defined(ARCH_ALPHA)
3284 gen_insn(INSN_FP_TO_INT64_TRAP, op_size, 0, 0);
3285 gen_one(FR_SCRATCH_2);
3287 gen_four(label_ovf);
3289 if (OP_SIZE_INT == OP_SIZE_4) {
3290 gen_insn(INSN_FP_INT64_TO_INT32_TRAP, 0, 0, 0);
3291 gen_one(FR_SCRATCH_3);
3292 gen_one(FR_SCRATCH_2);
3293 gen_four(label_ovf);
3294 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_3));
3296 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_2));
3298 if (slot_is_register(ctx, slot_r))
3299 g(unspill(ctx, slot_r));
3302 #if defined(ARCH_S390)
3303 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 1);
3307 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3308 gen_four(label_ovf);
3310 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3313 #if defined(ARCH_RISCV64)
3314 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3318 g(gen_load_constant(ctx, R_SCRATCH_2, sign_bit(int_default_t)));
3320 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_E, label_ovf));
3322 g(gen_imm(ctx, -1, IMM_PURPOSE_XOR, i_size(size)));
3323 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_XOR, is_imm()));
3324 gen_one(R_SCRATCH_2);
3325 gen_one(R_SCRATCH_2);
3328 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_E, label_ovf));
3330 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3334 #ifdef SUPPORTED_FP_X87
3335 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3336 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3338 if (likely(cpu_test_feature(CPU_FEATURE_sse3))) {
3339 g(gen_frame_store_x87(ctx, INSN_X87_FISTTP, OP_SIZE_INT, slot_r));
3341 gen_insn(INSN_PUSH, OP_SIZE_NATIVE, 0, 0);
3345 gen_insn(INSN_X87_FLDCW, 0, 0, 0);
3346 gen_one(ARG_ADDRESS_1);
3350 g(gen_frame_store_x87(ctx, INSN_X87_FISTP, OP_SIZE_INT, slot_r));
3352 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
3353 gen_one(ARG_ADDRESS_1);
3359 gen_insn(INSN_X87_FLDCW, 0, 0, 0);
3360 gen_one(ARG_ADDRESS_1);
3364 gen_insn(INSN_ALU, i_size(OP_SIZE_ADDRESS), ALU_ADD, 1);
3368 gen_eight(1 << OP_SIZE_NATIVE);
3370 if (slot_is_register(ctx, slot_r))
3371 g(unspill(ctx, slot_r));
3372 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_r, 0, false, R_SCRATCH_1));
3374 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_1, sign_bit(int_default_t), COND_E, label_ovf));
3379 #ifdef SUPPORTED_FP_HALF_CVT
3380 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3381 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3382 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3383 gen_one(FR_SCRATCH_1);
3385 reg1 = FR_SCRATCH_1;
3387 op_size = real_type_to_op_size(real_type);
3394 if ((SUPPORTED_FP >> real_type) & 1) {
3395 #if defined(ARCH_ALPHA) || defined(ARCH_ARM32) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC) || defined(ARCH_POWER) || defined(ARCH_SPARC)
3396 int int_op_size = OP_SIZE_INT;
3397 #if defined(ARCH_POWER)
3398 if (int_op_size == OP_SIZE_4)
3400 if (op_size == OP_SIZE_4 && !cpu_test_feature(CPU_FEATURE_v206))
3402 if (op_size == OP_SIZE_8 && !cpu_test_feature(CPU_FEATURE_ppc))
3405 if (slot_is_register(ctx, slot_1))
3406 g(spill(ctx, slot_1));
3407 g(gen_frame_load_raw(ctx, int_op_size, zero_x, slot_1, 0, false, FR_SCRATCH_1));
3408 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_2);
3409 #if defined(ARCH_ALPHA)
3410 if (OP_SIZE_INT == OP_SIZE_4) {
3411 gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
3412 gen_one(FR_SCRATCH_1);
3413 gen_one(FR_SCRATCH_1);
3415 int_op_size = OP_SIZE_8;
3418 gen_insn(int_op_size == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, op_size, 0, 0);
3420 gen_one(FR_SCRATCH_1);
3422 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3424 #elif defined(ARCH_IA64)
3425 g(gen_frame_get(ctx, OP_SIZE_INT, sign_x, slot_1, R_SCRATCH_1, ®1));
3426 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3428 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
3430 gen_insn(INSN_FP_FROM_INT64, op_size, 0, 0);
3434 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3437 g(gen_frame_get(ctx, OP_SIZE_INT, garbage, slot_1, R_SCRATCH_1, ®1));
3438 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3440 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, op_size, 0, 0);
3444 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3448 #ifdef SUPPORTED_FP_X87
3449 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3450 if (slot_is_register(ctx, slot_1))
3451 g(spill(ctx, slot_1));
3452 g(gen_frame_load_x87(ctx, INSN_X87_FILD, OP_SIZE_INT, 0, slot_1));
3453 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3457 #ifdef SUPPORTED_FP_HALF_CVT
3458 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3459 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3460 #if defined(ARCH_ARM32)
3461 g(gen_frame_get(ctx, OP_SIZE_INT, zero_x, slot_1, FR_SCRATCH_1, ®1));
3463 gen_insn(INSN_FP_FROM_INT32, OP_SIZE_4, 0, 0);
3467 g(gen_frame_get(ctx, OP_SIZE_INT, garbage, slot_1, R_SCRATCH_1, ®1));
3468 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, OP_SIZE_4, 0, 0);
3472 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
3475 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3482 if ((SUPPORTED_FP >> real_type) & 1) {
3483 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3484 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
3485 #if defined(ARCH_ALPHA)
3486 gen_insn(INSN_FP_CMP_UNORDERED_DEST_REG, op_size, 0, 0);
3487 gen_one(FR_SCRATCH_2);
3488 gen_one(FR_SCRATCH_1);
3491 if (!cpu_test_feature(CPU_FEATURE_fix)) {
3492 g(gen_frame_store_raw(ctx, OP_SIZE_4, slot_r, 0, FR_SCRATCH_2));
3493 g(gen_frame_load_raw(ctx, OP_SIZE_4, sign_x, slot_r, 0, false, target));
3495 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_2));
3498 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SHR, target, target, 30, 0));
3500 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3503 #elif defined(ARCH_IA64)
3504 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
3505 gen_one(R_CMP_RESULT);
3509 g(gen_mov(ctx, OP_SIZE_NATIVE, target, R_CMP_RESULT));
3511 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3512 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
3513 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3517 gen_insn(INSN_FP_TEST_REG, OP_SIZE_NATIVE, FP_COND_P, 0);
3520 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3521 #elif defined(ARCH_RISCV64)
3522 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3527 g(gen_imm(ctx, 1, IMM_PURPOSE_XOR, OP_SIZE_NATIVE));
3528 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_XOR, is_imm()));
3533 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3535 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3538 #if defined(ARCH_ARM32)
3539 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3541 g(gen_frame_set_cond(ctx, op_size, false, FP_COND_P, slot_r));
3545 #ifdef SUPPORTED_FP_X87
3546 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3547 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3548 if (likely(cpu_test_feature(CPU_FEATURE_cmov))) {
3549 gen_insn(INSN_X87_FCOMIP, op_size, 0, 0);
3552 g(gen_frame_set_cond(ctx, op_size, false, COND_P, slot_r));
3556 gen_insn(INSN_X87_FCOMP, op_size, 0, 0);
3559 gen_insn(INSN_X87_FNSTSW, 0, 0, 0);
3563 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
3568 g(gen_frame_set_cond(ctx, op_size, false, COND_NE, slot_r));
3573 #ifdef SUPPORTED_FP_HALF_CVT
3574 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3575 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3576 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3577 gen_one(FR_SCRATCH_1);
3579 gen_insn(INSN_FP_CMP, OP_SIZE_4, 0, 1);
3580 gen_one(FR_SCRATCH_1);
3581 gen_one(FR_SCRATCH_1);
3582 #if defined(ARCH_ARM32)
3583 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3585 g(gen_frame_set_cond(ctx, op_size, false, FP_COND_P, slot_r));
3591 g(gen_alu_typed_upcall(ctx, upc, real_type, slot_1, NO_FRAME_T, slot_r, label_ovf));
3595 static bool attr_w gen_is_exception(struct codegen_context *ctx, frame_t slot_1, frame_t slot_r)
3597 uint32_t no_ex_label, escape_label;
3598 const struct type *type = get_type_of_local(ctx, slot_1);
3600 no_ex_label = alloc_label(ctx);
3601 if (unlikely(!no_ex_label))
3603 escape_label = alloc_escape_label(ctx);
3604 if (unlikely(!escape_label))
3607 if (TYPE_IS_FLAT(type))
3608 g(gen_test_1_jz_cached(ctx, slot_1, no_ex_label));
3610 g(gen_frame_load(ctx, OP_SIZE_SLOT, zero_x, slot_1, 0, false, R_SCRATCH_1));
3611 g(gen_ptr_is_thunk(ctx, R_SCRATCH_1, slot_1, escape_label));
3613 if (!TYPE_IS_FLAT(type)) {
3614 g(gen_compare_da_tag(ctx, R_SCRATCH_1, DATA_TAG_flat, COND_E, escape_label, R_SCRATCH_1));
3617 gen_label(no_ex_label);
3618 g(gen_frame_clear(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r));
3620 flag_set(ctx, slot_r, false);
3625 static bool attr_w gen_system_property(struct codegen_context *ctx, frame_t slot_1, frame_t slot_r)
3627 uint32_t escape_label;
3629 escape_label = alloc_escape_label(ctx);
3630 if (unlikely(!escape_label))
3633 g(gen_test_1_cached(ctx, slot_1, escape_label));
3635 g(gen_upcall_start(ctx, 1));
3637 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_1, 0, false, R_ARG0));
3638 g(gen_upcall_argument(ctx, 0));
3640 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, cg_upcall_ipret_system_property), 1));
3642 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, R_RET0));
3644 flag_set(ctx, slot_1, false);
3645 flag_set(ctx, slot_r, false);
3650 static bool attr_w gen_alu_jmp(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, frame_t slot_1, frame_t slot_2, int32_t jmp_offset, bool *failed)
3655 unsigned attr_unused reg2;
3657 *failed = true; return true;
3660 case MODE_FIXED: switch (op) {
3661 case OPCODE_FIXED_OP_equal: alu = COND_E; goto do_compare;
3662 case OPCODE_FIXED_OP_not_equal: alu = COND_NE; goto do_compare;
3663 case OPCODE_FIXED_OP_less: alu = COND_L; goto do_compare;
3664 case OPCODE_FIXED_OP_less_equal: alu = COND_LE; goto do_compare;
3665 case OPCODE_FIXED_OP_greater: alu = COND_G; goto do_compare;
3666 case OPCODE_FIXED_OP_greater_equal: alu = COND_GE; goto do_compare;
3667 case OPCODE_FIXED_OP_uless: alu = COND_B; goto do_compare;
3668 case OPCODE_FIXED_OP_uless_equal: alu = COND_BE; goto do_compare;
3669 case OPCODE_FIXED_OP_ugreater: alu = COND_A; goto do_compare;
3670 case OPCODE_FIXED_OP_ugreater_equal: alu = COND_AE; goto do_compare;
3671 case OPCODE_FIXED_OP_bt: *failed = true; return true;
3672 default: internal(file_line, "gen_alu_jmp: unsupported fixed operation %u", op);
3674 case MODE_INT: switch (op) {
3675 case OPCODE_INT_OP_equal: alu = COND_E; goto do_compare;
3676 case OPCODE_INT_OP_not_equal: alu = COND_NE; goto do_compare;
3677 case OPCODE_INT_OP_less: alu = COND_L; goto do_compare;
3678 case OPCODE_INT_OP_less_equal: alu = COND_LE; goto do_compare;
3679 case OPCODE_INT_OP_greater: alu = COND_G; goto do_compare;
3680 case OPCODE_INT_OP_greater_equal: alu = COND_GE; goto do_compare;
3681 case OPCODE_INT_OP_bt: *failed = true; return true;
3682 default: internal(file_line, "gen_alu_jmp: unsupported int operation %u", op);
3684 case MODE_BOOL: switch (op) {
3685 case OPCODE_BOOL_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
3686 case OPCODE_BOOL_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
3687 case OPCODE_BOOL_OP_equal: alu = COND_E; mode = MODE_FIXED; goto do_compare;
3688 case OPCODE_BOOL_OP_not_equal: alu = COND_NE; mode = MODE_FIXED; goto do_compare;
3689 case OPCODE_BOOL_OP_less: alu = COND_L; mode = MODE_FIXED; goto do_compare;
3690 case OPCODE_BOOL_OP_less_equal: alu = COND_LE; mode = MODE_FIXED; goto do_compare;
3691 case OPCODE_BOOL_OP_greater: alu = COND_G; mode = MODE_FIXED; goto do_compare;
3692 case OPCODE_BOOL_OP_greater_equal: alu = COND_GE; mode = MODE_FIXED; goto do_compare;
3693 default: internal(file_line, "gen_alu_jmp: unsupported bool operation %u", op);
3696 internal(file_line, "gen_alu_jmp: unsupported mode %u", mode);
3698 bool attr_unused logical;
3699 if (unlikely(op_size > OP_SIZE_NATIVE)) {
3703 if (slot_is_register(ctx, slot_2) && !slot_is_register(ctx, slot_1)) {
3708 case COND_L: alu = COND_G; break;
3709 case COND_LE: alu = COND_GE; break;
3710 case COND_G: alu = COND_L; break;
3711 case COND_GE: alu = COND_LE; break;
3712 case COND_B: alu = COND_A; break;
3713 case COND_BE: alu = COND_AE; break;
3714 case COND_A: alu = COND_B; break;
3715 case COND_AE: alu = COND_BE; break;
3718 ex = op_size == i_size_cmp(op_size) + (unsigned)zero ? garbage : alu == COND_L || alu == COND_LE || alu == COND_G || alu == COND_GE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
3719 g(gen_frame_get(ctx, op_size, ex, slot_1, R_SCRATCH_1, ®1));
3720 if (ARCH_HAS_JMP_2REGS(alu)) {
3721 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3722 g(gen_jump(ctx, jmp_offset, i_size_cmp(op_size), alu ^ 1, reg1, reg2));
3726 logical = COND_IS_LOGICAL(alu ^ 1);
3727 g(gen_frame_load_cmp(ctx, op_size, logical, ex, false, slot_2, 0, false, reg1));
3728 g(gen_jump(ctx, jmp_offset, op_size, alu ^ 1, -1U, -1U));
3730 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3731 g(gen_cmp_dest_reg(ctx, op_size, reg1, reg2, R_CMP_RESULT, 0, alu));
3732 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, R_CMP_RESULT, -1U));
3737 if (slot_is_register(ctx, slot_2) && !slot_is_register(ctx, slot_1)) {
3742 ex = op_size == i_size(op_size) + (unsigned)zero ? garbage : ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
3743 g(gen_frame_get(ctx, op_size, ex, slot_1, R_SCRATCH_1, ®1));
3744 #if defined(ARCH_X86)
3745 if (alu == ALU_AND && !slot_is_register(ctx, slot_2)) {
3746 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size, IMM_PURPOSE_LDR_OFFSET, op_size));
3747 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
3749 gen_address_offset();
3750 g(gen_jump(ctx, jmp_offset, op_size, COND_E, -1U, -1U));
3754 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3755 #if ARCH_HAS_FLAGS && !defined(ARCH_S390)
3756 if (alu == ALU_AND) {
3757 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
3760 g(gen_jump(ctx, jmp_offset, op_size, COND_E, -1U, -1U));
3764 #if defined(ARCH_ARM64)
3769 g(gen_3address_alu(ctx, i_size(op_size), alu, R_SCRATCH_1, reg1, reg2, 1));
3770 g(gen_jump(ctx, jmp_offset, i_size(op_size), COND_E, -1U, -1U));
3775 g(gen_3address_alu(ctx, i_size(op_size), alu, R_SCRATCH_1, reg1, reg2, 0));
3776 g(gen_jump(ctx, jmp_offset, i_size(op_size), COND_E, R_SCRATCH_1, -1U));
3781 static bool attr_w gen_fp_alu_jmp(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, int32_t jmp_offset, bool *failed)
3783 unsigned attr_unused fp_alu;
3784 unsigned attr_unused op_size = real_type_to_op_size(real_type);
3785 unsigned reg1, reg2;
3786 unsigned attr_unused target;
3788 case OPCODE_REAL_OP_equal:
3789 case OPCODE_REAL_OP_equal_alt1:
3790 case OPCODE_REAL_OP_equal_alt2: fp_alu = FP_COND_E; goto do_cmp;
3791 case OPCODE_REAL_OP_not_equal:
3792 case OPCODE_REAL_OP_not_equal_alt1:
3793 case OPCODE_REAL_OP_not_equal_alt2: fp_alu = FP_COND_NE; goto do_cmp;
3794 case OPCODE_REAL_OP_less:
3795 case OPCODE_REAL_OP_less_alt1:
3796 case OPCODE_REAL_OP_less_alt2: fp_alu = FP_COND_B; goto do_cmp;
3797 case OPCODE_REAL_OP_less_equal:
3798 case OPCODE_REAL_OP_less_equal_alt1:
3799 case OPCODE_REAL_OP_less_equal_alt2: fp_alu = FP_COND_BE; goto do_cmp;
3800 case OPCODE_REAL_OP_greater:
3801 case OPCODE_REAL_OP_greater_alt1:
3802 case OPCODE_REAL_OP_greater_alt2: fp_alu = FP_COND_A; goto do_cmp;
3803 case OPCODE_REAL_OP_greater_equal:
3804 case OPCODE_REAL_OP_greater_equal_alt1:
3805 case OPCODE_REAL_OP_greater_equal_alt2: fp_alu = FP_COND_AE; goto do_cmp;
3806 default: internal(file_line, "gen_fp_alu_jmp: unsupported operation %u", op);
3810 if ((SUPPORTED_FP >> real_type) & 1
3811 #if defined(ARCH_ALPHA)
3812 && ARCH_SUPPORTS_TRAPS && cpu_test_feature(CPU_FEATURE_fix)
3815 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3816 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
3817 target = R_SCRATCH_1;
3818 #if defined(ARCH_ALPHA)
3819 gen_insn(INSN_FP_CMP_DEST_REG_TRAP, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
3820 gen_one(FR_SCRATCH_3);
3823 gen_four(label_ovf);
3825 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_3));
3827 if (fp_alu == FP_COND_NE) {
3828 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_NE, target, -1U));
3830 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, target, -1U));
3834 #elif defined(ARCH_IA64)
3835 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
3836 gen_one(R_CMP_RESULT);
3840 gen_insn(INSN_JMP_REG, OP_SIZE_NATIVE, COND_NE, 0);
3841 gen_one(R_CMP_RESULT);
3842 gen_four(label_ovf);
3844 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu, 0);
3845 gen_one(R_CMP_RESULT);
3849 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, R_CMP_RESULT, -1U));
3852 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
3853 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3857 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
3858 gen_four(label_ovf);
3860 gen_insn(INSN_FP_CMP_COND, op_size, fp_alu ^ 1, 1);
3864 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, fp_alu ^ 1, -1U, -1U));
3867 #elif defined(ARCH_RISCV64)
3868 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3869 gen_one(R_SCRATCH_1);
3873 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3874 gen_one(R_SCRATCH_2);
3878 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
3880 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, COND_E, label_ovf));
3882 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
3887 if (fp_alu == FP_COND_NE) {
3888 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_NE, target, -1U));
3890 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, target, -1U));
3894 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3897 #if defined(ARCH_ARM32)
3898 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3900 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3901 gen_four(label_ovf);
3902 g(gen_jump(ctx, jmp_offset, op_size, fp_alu ^ 1, -1U, -1U));