2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 static bool attr_w gen_jump(struct codegen_context *ctx, int32_t jmp_offset, unsigned op_size, unsigned cond, unsigned reg1, unsigned reg2);
21 static bool attr_w gen_alu_upcall(struct codegen_context *ctx, size_t upcall, unsigned op_size, frame_t slot_1, frame_t slot_2, frame_t slot_r, uint32_t label_ovf)
23 if (slot_is_register(ctx, slot_1))
24 g(spill(ctx, slot_1));
25 if (slot_2 != NO_FRAME_T && slot_is_register(ctx, slot_2))
26 g(spill(ctx, slot_2));
27 g(gen_upcall_start(ctx, frame_t_is_const(slot_2) ? 4 : slot_2 != NO_FRAME_T ? 3 : 2));
28 g(gen_frame_address(ctx, slot_1, 0, R_ARG0));
29 g(gen_upcall_argument(ctx, 0));
30 if (frame_t_is_const(slot_2)) {
32 g(gen_load_constant(ctx, R_ARG1, frame_t_get_const(slot_2)));
33 g(gen_upcall_argument(ctx, 1));
34 g(gen_frame_address(ctx, slot_r, 0, R_ARG2));
35 g(gen_upcall_argument(ctx, 2));
36 g(gen_get_upcall_pointer(ctx, upcall, R_ARG3));
37 g(gen_upcall_argument(ctx, 3));
38 x_offs = offsetof(struct cg_upcall_vector_s, INT_binary_const_int8_t) + op_size * sizeof(void (*)(void));
39 g(gen_upcall(ctx, x_offs, 4));
40 } else if (slot_2 != NO_FRAME_T) {
41 g(gen_frame_address(ctx, slot_2, 0, R_ARG1));
42 g(gen_upcall_argument(ctx, 1));
43 g(gen_frame_address(ctx, slot_r, 0, R_ARG2));
44 g(gen_upcall_argument(ctx, 2));
45 g(gen_upcall(ctx, upcall, 3));
47 g(gen_frame_address(ctx, slot_r, 0, R_ARG1));
48 g(gen_upcall_argument(ctx, 1));
49 g(gen_upcall(ctx, upcall, 2));
51 if (slot_is_register(ctx, slot_r))
52 g(unspill(ctx, slot_r));
54 g(gen_jmp_on_zero(ctx, OP_SIZE_1, R_RET0, COND_E, label_ovf));
58 static bool attr_w gen_alu_typed_upcall(struct codegen_context *ctx, size_t upcall, unsigned op_size, frame_t slot_1, frame_t slot_2, frame_t slot_r, uint32_t label_ovf)
60 upcall += op_size * sizeof(void (*)(void));
61 return gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, label_ovf);
68 #define MODE_ARRAY_LEN_GT 4
70 static bool attr_w gen_alu(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, frame_t slot_r)
74 unsigned reg1, reg2, reg3, target;
76 case MODE_FIXED: switch (op) {
77 case OPCODE_FIXED_OP_add: alu = ALU_ADD; goto do_alu;
78 case OPCODE_FIXED_OP_subtract: alu = ALU_SUB; goto do_alu;
79 case OPCODE_FIXED_OP_multiply: goto do_multiply;
80 case OPCODE_FIXED_OP_divide:
81 case OPCODE_FIXED_OP_divide_alt1: sgn = true; mod = false; goto do_divide;
82 case OPCODE_FIXED_OP_udivide:
83 case OPCODE_FIXED_OP_udivide_alt1: sgn = false; mod = false; goto do_divide;
84 case OPCODE_FIXED_OP_modulo:
85 case OPCODE_FIXED_OP_modulo_alt1: sgn = true; mod = true; goto do_divide;
86 case OPCODE_FIXED_OP_umodulo:
87 case OPCODE_FIXED_OP_umodulo_alt1: sgn = false; mod = true; goto do_divide;
88 case OPCODE_FIXED_OP_power: return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_binary_power_int8_t), op_size, slot_1, slot_2, slot_r, 0);
89 case OPCODE_FIXED_OP_and: alu = ALU_AND; goto do_alu;
90 case OPCODE_FIXED_OP_or: alu = ALU_OR; goto do_alu;
91 case OPCODE_FIXED_OP_xor: alu = ALU_XOR; goto do_alu;
92 case OPCODE_FIXED_OP_shl: alu = ROT_SHL; goto do_shift;
93 case OPCODE_FIXED_OP_shr: alu = ROT_SAR; goto do_shift;
94 case OPCODE_FIXED_OP_ushr: alu = ROT_SHR; goto do_shift;
95 case OPCODE_FIXED_OP_rol: alu = ROT_ROL; goto do_shift;
96 case OPCODE_FIXED_OP_ror: alu = ROT_ROR; goto do_shift;
97 case OPCODE_FIXED_OP_bts: alu = BTX_BTS; goto do_bt;
98 case OPCODE_FIXED_OP_btr: alu = BTX_BTR; goto do_bt;
99 case OPCODE_FIXED_OP_btc: alu = BTX_BTC; goto do_bt;
100 case OPCODE_FIXED_OP_equal: alu = COND_E; goto do_compare;
101 case OPCODE_FIXED_OP_not_equal: alu = COND_NE; goto do_compare;
102 case OPCODE_FIXED_OP_less: alu = COND_L; goto do_compare;
103 case OPCODE_FIXED_OP_less_equal: alu = COND_LE; goto do_compare;
104 case OPCODE_FIXED_OP_uless: alu = COND_B; goto do_compare;
105 case OPCODE_FIXED_OP_uless_equal: alu = COND_BE; goto do_compare;
106 case OPCODE_FIXED_OP_bt: alu = BTX_BT; goto do_bt;
107 default: internal(file_line, "gen_alu: unsupported fixed operation %u", op);
109 case MODE_INT: switch (op) {
110 case OPCODE_INT_OP_add: alu = ALU_ADD; goto do_alu;
111 case OPCODE_INT_OP_subtract: alu = ALU_SUB; goto do_alu;
112 case OPCODE_INT_OP_multiply: goto do_multiply;
113 case OPCODE_INT_OP_divide:
114 case OPCODE_INT_OP_divide_alt1: sgn = true; mod = false; goto do_divide;
115 case OPCODE_INT_OP_modulo:
116 case OPCODE_INT_OP_modulo_alt1: sgn = true; mod = true; goto do_divide;
117 case OPCODE_INT_OP_power: return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_power_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf);
118 case OPCODE_INT_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
119 case OPCODE_INT_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
120 case OPCODE_INT_OP_xor: alu = ALU_XOR; mode = MODE_FIXED; goto do_alu;
121 case OPCODE_INT_OP_shl: alu = ROT_SHL; goto do_shift;
122 case OPCODE_INT_OP_shr: alu = ROT_SAR; goto do_shift;
123 case OPCODE_INT_OP_bts: alu = BTX_BTS; goto do_bt;
124 case OPCODE_INT_OP_btr: alu = BTX_BTR; goto do_bt;
125 case OPCODE_INT_OP_btc: alu = BTX_BTC; goto do_bt;
126 case OPCODE_INT_OP_equal: alu = COND_E; goto do_compare;
127 case OPCODE_INT_OP_not_equal: alu = COND_NE; goto do_compare;
128 case OPCODE_INT_OP_less: alu = COND_L; goto do_compare;
129 case OPCODE_INT_OP_less_equal: alu = COND_LE; goto do_compare;
130 case OPCODE_INT_OP_bt: alu = BTX_BT; goto do_bt;
131 default: internal(file_line, "gen_alu: unsupported int operation %u", op);
133 case MODE_BOOL: switch (op) {
134 case OPCODE_BOOL_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
135 case OPCODE_BOOL_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
136 case OPCODE_BOOL_OP_equal: alu = COND_E; goto do_compare;
137 case OPCODE_BOOL_OP_not_equal: alu = ALU_XOR; mode = MODE_FIXED; goto do_alu;
138 case OPCODE_BOOL_OP_less: alu = COND_L; goto do_compare;
139 case OPCODE_BOOL_OP_less_equal: alu = COND_LE; goto do_compare;
140 default: internal(file_line, "gen_alu: unsupported bool operation %u", op);
143 internal(file_line, "gen_alu: unsupported mode %u", mode);
149 size_t attr_unused offset;
150 uint8_t attr_unused long_imm;
151 unsigned first_flags;
152 unsigned second_flags;
154 unsigned attr_unused op_size_flags;
155 if (unlikely(op_size > OP_SIZE_NATIVE)) {
156 #if !defined(ARCH_X86) && !defined(ARCH_ARM) && !defined(ARCH_PARISC) && !defined(ARCH_POWER) && !defined(ARCH_SPARC32)
157 if (mode == MODE_FIXED) {
158 if (alu == ALU_ADD) {
159 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_add_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, 0));
161 } else if (alu == ALU_SUB) {
162 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_subtract_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, 0));
165 } else if (mode == MODE_INT) {
166 if (alu == ALU_ADD) {
167 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_binary_add_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, label_ovf));
169 } else if (alu == ALU_SUB) {
170 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_binary_subtract_,TYPE_INT_MAX)), op_size, slot_1, slot_2, slot_r, label_ovf));
175 first_flags = alu == ALU_ADD || alu == ALU_SUB ? 2 : 0;
176 second_flags = mode == MODE_INT ? 1 : 0;
177 second_alu = alu == ALU_ADD ? ALU_ADC : alu == ALU_SUB ? ALU_SBB : alu;
178 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
179 #if defined(ARCH_X86)
180 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, alu, first_flags, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
181 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, second_alu, second_flags, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
183 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
184 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, first_flags));
185 #if defined(ARCH_PARISC)
186 if (mode == MODE_INT) {
187 gen_insn(INSN_ALU_FLAGS_TRAP, OP_SIZE_NATIVE, second_alu, ALU_WRITES_FLAGS(second_alu, false));
188 gen_one(R_SCRATCH_2);
189 gen_one(R_SCRATCH_2);
190 gen_one(R_SCRATCH_4);
195 gen_insn(first_flags ? INSN_ALU_FLAGS : INSN_ALU, OP_SIZE_NATIVE, second_alu, second_flags | ALU_WRITES_FLAGS(second_alu, false));
196 gen_one(R_SCRATCH_2);
197 gen_one(R_SCRATCH_2);
198 gen_one(R_SCRATCH_4);
201 #if !defined(ARCH_PARISC)
202 if (mode == MODE_INT) {
203 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_O, 0);
207 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
211 if ((ARCH_HAS_FLAGS || ARCH_SUPPORTS_TRAPS) && slot_2 == slot_r && slot_1 != slot_2 && alu_is_commutative(alu)) {
216 if ((ARCH_HAS_FLAGS || ARCH_SUPPORTS_TRAPS) && slot_1 == slot_r && (slot_1 != slot_2 || mode != MODE_INT) && i_size_cmp(op_size) == op_size + zero
217 #if defined(ARCH_POWER)
218 && op_size == OP_SIZE_NATIVE
222 unsigned undo_alu = alu == ALU_ADD ? ALU_SUB : ALU_ADD;
223 if (slot_is_register(ctx, slot_1)) {
224 unsigned reg1 = ctx->registers[slot_1];
225 if (slot_is_register(ctx, slot_2) || frame_t_is_const(slot_2)) {
226 unsigned reg2 = frame_t_is_const(slot_2) ? 0xff /* avoid warning */ : ctx->registers[slot_2];
227 if (mode == MODE_INT && ARCH_SUPPORTS_TRAPS) {
228 if (frame_t_is_const(slot_2))
229 g(gen_imm(ctx, frame_t_get_const(slot_2), alu_trap_purpose(alu), i_size(op_size)));
230 gen_insn(INSN_ALU_TRAP, op_size, alu, ALU_WRITES_FLAGS(alu, frame_t_is_const(slot_2) && is_imm()));
233 if (frame_t_is_const(slot_2))
237 if (ARCH_TRAP_BEFORE) {
241 ce = alloc_undo_label(ctx);
244 gen_four(ce->undo_label);
248 if (frame_t_is_const(slot_2))
249 g(gen_3address_alu_imm(ctx, i_size(op_size), alu, reg1, reg1, frame_t_get_const(slot_2), mode == MODE_INT));
251 g(gen_3address_alu(ctx, i_size(op_size), alu, reg1, reg1, reg2, mode == MODE_INT));
252 if (mode == MODE_INT) {
254 ce = alloc_undo_label(ctx);
257 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
258 gen_four(ce->undo_label);
260 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
261 ce->undo_op_size = i_size(op_size);
262 ce->undo_aux = undo_alu;
263 ce->undo_writes_flags = ALU_WRITES_FLAGS(undo_alu, frame_t_is_const(slot_2) && is_imm());
264 m = mark_params(ctx);
267 if (frame_t_is_const(slot_2))
271 copy_params(ctx, ce, m);
275 #if defined(ARCH_S390) || defined(ARCH_X86)
276 else if (!frame_t_is_const(slot_2)) {
278 int64_t offset = (size_t)slot_2 * slot_size;
279 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
280 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, 1);
283 gen_address_offset();
284 if (mode == MODE_INT) {
285 ce = alloc_undo_label(ctx);
288 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
289 ce->undo_op_size = i_size(op_size);
290 ce->undo_aux = undo_alu;
291 ce->undo_writes_flags = ARCH_HAS_FLAGS;
292 m = mark_params(ctx);
295 gen_address_offset();
296 copy_params(ctx, ce, m);
297 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
298 gen_four(ce->undo_label);
304 #if defined(ARCH_X86)
308 int64_t offset = (size_t)slot_1 * slot_size;
309 if (!frame_t_is_const(slot_2))
310 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_1, ®2));
311 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
312 if (frame_t_is_const(slot_2))
313 g(gen_imm(ctx, frame_t_get_const(slot_2), alu_purpose(alu), i_size(op_size)));
314 gen_insn(INSN_ALU + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, 1);
315 gen_address_offset();
316 gen_address_offset();
317 if (frame_t_is_const(slot_2))
321 if (mode == MODE_INT) {
322 ce = alloc_undo_label(ctx);
325 ce->undo_opcode = INSN_ALU + ARCH_PARTIAL_ALU(op_size);
326 ce->undo_op_size = i_size(op_size);
327 ce->undo_aux = undo_alu;
328 ce->undo_writes_flags = ARCH_HAS_FLAGS;
329 m = mark_params(ctx);
330 gen_address_offset();
331 gen_address_offset();
332 if (frame_t_is_const(slot_2))
336 copy_params(ctx, ce, m);
337 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
338 gen_four(ce->undo_label);
345 #if defined(ARCH_X86)
347 #elif defined(ARCH_S390)
348 if (op_size >= OP_SIZE_4)
349 #elif ARCH_HAS_FLAGS && !defined(ARCH_POWER)
350 if (op_size == i_size(op_size) + (unsigned)zero && frame_t_is_const(slot_2))
352 if (mode != MODE_INT && op_size == i_size(op_size) + (unsigned)zero && frame_t_is_const(slot_2))
355 if (mode == MODE_INT) {
356 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
358 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
360 g(gen_frame_load(ctx, op_size, garbage, slot_1, 0, false, target));
361 g(gen_frame_load_op(ctx, op_size, garbage, alu, mode == MODE_INT, slot_2, 0, false, target));
362 goto check_ovf_store;
364 op_size_flags = !ARCH_HAS_FLAGS && !ARCH_SUPPORTS_TRAPS ? OP_SIZE_NATIVE : OP_SIZE_4;
365 #if defined(ARCH_POWER)
366 op_size_flags = OP_SIZE_NATIVE;
368 g(gen_frame_get(ctx, op_size, mode == MODE_INT && (op_size < op_size_flags || ARCH_SUPPORTS_TRAPS) ? sign_x : garbage, slot_1, R_SCRATCH_1, ®1));
369 g(gen_frame_get(ctx, op_size, mode == MODE_INT && (op_size < op_size_flags || ARCH_SUPPORTS_TRAPS) ? sign_x : garbage, slot_2, R_SCRATCH_2, ®2));
371 if (mode == MODE_INT && op_size >= OP_SIZE_4) {
372 if (ARCH_SUPPORTS_TRAPS) {
373 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
374 gen_insn(INSN_ALU_TRAP, op_size, alu, ALU_WRITES_FLAGS(alu, false));
379 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
382 if (op_size >= OP_SIZE_NATIVE) {
383 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
384 g(gen_3address_alu(ctx, i_size(op_size), alu, target, reg1, reg2, 0));
385 #if defined(ARCH_IA64)
386 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_1, reg1, reg2, 0));
387 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_2, reg2, target, 0));
388 if (alu == ALU_ADD) {
389 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_SCRATCH_1, R_SCRATCH_2, R_SCRATCH_1, 0));
391 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
393 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), R_SCRATCH_1, R_SCRATCH_1, COND_S, label_ovf));
395 gen_insn(INSN_CMP_DEST_REG, i_size(op_size), COND_L, 0);
396 gen_one(R_SCRATCH_1);
397 if (alu == ALU_ADD) {
405 g(gen_imm(ctx, 0, IMM_PURPOSE_CMP, i_size(op_size)));
406 gen_insn(INSN_CMP_DEST_REG, i_size(op_size), COND_L, 0);
407 gen_one(R_SCRATCH_2);
411 g(gen_cmp_test_jmp(ctx, INSN_CMP, i_size(op_size), R_SCRATCH_1, R_SCRATCH_2, COND_NE, label_ovf));
413 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
418 if (mode == MODE_INT) {
419 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
420 } else if (!ARCH_IS_3ADDRESS(alu, mode == MODE_INT && op_size >= op_size_flags) && !alu_is_commutative(alu)) {
421 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
423 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
425 g(gen_3address_alu(ctx, i_size(op_size), alu, target, reg1, reg2, mode == MODE_INT && op_size >= op_size_flags));
427 if (mode == MODE_INT && unlikely(op_size < op_size_flags)) {
428 g(gen_cmp_extended(ctx, op_size_flags, op_size, target, R_SCRATCH_2, label_ovf));
431 if (mode == MODE_INT) {
432 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
435 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
443 size_t attr_unused offset;
444 uint8_t attr_unused long_imm;
445 if (unlikely(op_size > OP_SIZE_NATIVE) || unlikely(!ARCH_HAS_MUL)) {
446 if (mode == MODE_INT) {
447 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf));
450 #if defined(ARCH_X86)
451 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, hi_word(OP_SIZE_NATIVE), true, R_CX));
452 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, hi_word(OP_SIZE_NATIVE), true, R_AX));
453 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_MUL, true, slot_2, lo_word(OP_SIZE_NATIVE), true, R_CX));
454 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_MUL, true, slot_1, lo_word(OP_SIZE_NATIVE), true, R_AX));
455 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_CX, R_CX, R_AX, 0));
456 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, lo_word(OP_SIZE_NATIVE), true, R_AX));
458 offset = (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE);
459 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
460 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 1);
464 gen_address_offset();
466 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_DX, R_DX, R_CX, 0));
468 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_AX, R_DX));
471 #elif defined(ARCH_ARM32)
472 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
473 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
475 g(gen_mov(ctx, OP_SIZE_NATIVE, R_SCRATCH_NA_1, R_SCRATCH_1));
477 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, R_SCRATCH_4, R_SCRATCH_1, R_SCRATCH_4, 0));
479 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
480 gen_one(R_SCRATCH_2);
481 gen_one(R_SCRATCH_3);
482 gen_one(R_SCRATCH_2);
483 gen_one(R_SCRATCH_4);
485 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 0);
486 gen_one(R_SCRATCH_1);
487 gen_one(R_SCRATCH_4);
488 gen_one(R_SCRATCH_NA_1);
489 gen_one(R_SCRATCH_3);
491 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ADD, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_4, 0));
493 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
496 #elif defined(ARCH_ARM64)
497 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
498 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_2, 0, R_SCRATCH_3, R_SCRATCH_4));
500 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_UMULH, R_SCRATCH_NA_1, R_SCRATCH_1, R_SCRATCH_3, 0));
502 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
503 gen_one(R_SCRATCH_NA_1);
504 gen_one(R_SCRATCH_2);
505 gen_one(R_SCRATCH_3);
506 gen_one(R_SCRATCH_NA_1);
508 gen_insn(INSN_MADD, OP_SIZE_NATIVE, 0, 0);
509 gen_one(R_SCRATCH_2);
510 gen_one(R_SCRATCH_1);
511 gen_one(R_SCRATCH_4);
512 gen_one(R_SCRATCH_NA_1);
514 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, 0));
516 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
520 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, 0));
525 #if defined(ARCH_X86)
526 if (mode == MODE_INT) {
527 if (op_size != OP_SIZE_1 && slot_r == slot_1 && slot_is_register(ctx, slot_1)) {
529 target = ctx->registers[slot_1];
530 g(gen_mov(ctx, op_size, R_SCRATCH_1, target));
531 g(gen_frame_load_op(ctx, op_size, garbage, ALU_MUL, mode == MODE_INT, slot_2, 0, false, target));
532 ce = alloc_undo_label(ctx);
535 ce->undo_opcode = INSN_MOV;
536 ce->undo_op_size = op_size;
538 ce->undo_writes_flags = 0;
539 ce->undo_parameters[0] = target;
540 ce->undo_parameters[1] = R_SCRATCH_1;
541 ce->undo_parameters_len = 2;
542 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
543 gen_four(ce->undo_label);
546 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
548 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
550 if (op_size == OP_SIZE_1)
551 target = R_SCRATCH_1;
552 g(gen_frame_load(ctx, op_size, garbage, slot_1, 0, false, target));
553 g(gen_frame_load_op(ctx, op_size, garbage, ALU_MUL, mode == MODE_INT, slot_2, 0, false, target));
554 if (mode == MODE_INT) {
555 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
558 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
561 #if defined(ARCH_ALPHA)
562 if (mode == MODE_INT && op_size >= OP_SIZE_4 && ARCH_SUPPORTS_TRAPS) {
563 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
564 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
565 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
567 gen_insn(INSN_ALU_TRAP, op_size, ALU_MUL, ALU_WRITES_FLAGS(ALU_MUL, false));
572 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
577 #if defined(ARCH_ARM32)
578 if (mode == MODE_INT && op_size == OP_SIZE_4) {
579 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
580 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
581 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
583 gen_insn(INSN_MUL_L, OP_SIZE_NATIVE, 0, 0);
585 gen_one(R_SCRATCH_4);
589 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
590 gen_one(R_SCRATCH_4);
591 gen_one(ARG_SHIFTED_REGISTER);
592 gen_one(ARG_SHIFT_ASR | 0x1f);
595 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
598 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
603 #if defined(ARCH_ARM64)
604 if (mode == MODE_INT && op_size == OP_SIZE_4) {
605 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
606 g(gen_frame_get(ctx, op_size, op_size < OP_SIZE_4 ? sign_x : garbage, slot_1, R_SCRATCH_1, ®1));
607 g(gen_frame_get(ctx, op_size, op_size < OP_SIZE_4 ? sign_x : garbage, slot_2, R_SCRATCH_2, ®2));
608 gen_insn(INSN_ALU, OP_SIZE_8, ALU_MUL, ALU_WRITES_FLAGS(ALU_MUL, false));
610 gen_one(ARG_EXTENDED_REGISTER);
611 gen_one(ARG_EXTEND_SXTW);
613 gen_one(ARG_EXTENDED_REGISTER);
614 gen_one(ARG_EXTEND_SXTW);
617 gen_insn(INSN_CMP, OP_SIZE_8, 0, 1);
619 gen_one(ARG_EXTENDED_REGISTER);
620 gen_one(ARG_EXTEND_SXTW);
623 gen_insn(INSN_JMP_COND, OP_SIZE_8, COND_NE, 0);
626 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
630 if (mode == MODE_INT && op_size == OP_SIZE_8) {
631 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
632 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
633 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
634 g(gen_3address_alu(ctx, OP_SIZE_8, ALU_SMULH, R_SCRATCH_3, reg1, reg2, 0));
636 g(gen_3address_alu(ctx, OP_SIZE_8, ALU_MUL, target, reg1, reg2, 0));
638 gen_insn(INSN_CMP, OP_SIZE_8, 0, 1);
639 gen_one(R_SCRATCH_3);
640 gen_one(ARG_SHIFTED_REGISTER);
641 gen_one(ARG_SHIFT_ASR | 0x3f);
644 gen_insn(INSN_JMP_COND, OP_SIZE_8, COND_NE, 0);
647 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
652 #if defined(ARCH_POWER)
653 if (mode == MODE_INT && op_size >= OP_SIZE_4) {
654 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
655 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
656 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
658 g(gen_3address_alu(ctx, op_size, ALU_MUL, target, reg1, reg2, 1));
660 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
663 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
668 #if defined(ARCH_LOONGARCH64) || (defined(ARCH_MIPS) && MIPS_R6) || defined(ARCH_RISCV64)
669 if (mode == MODE_INT && op_size == OP_SIZE_NATIVE) {
670 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
671 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
672 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
674 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SMULH, R_SCRATCH_3, reg1, reg2, 0));
676 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, target, reg1, reg2, 0));
678 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, (8U << OP_SIZE_NATIVE) - 1, 0));
680 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_3, R_SCRATCH_4, COND_NE, label_ovf));
682 g(gen_frame_store(ctx, OP_SIZE_NATIVE, slot_r, 0, target));
687 #if defined(ARCH_S390)
688 if (mode == MODE_INT && op_size >= OP_SIZE_4 && likely(cpu_test_feature(CPU_FEATURE_misc_insn_ext_2))) {
689 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
690 g(gen_frame_load(ctx, op_size, sign_x, slot_1, 0, false, target));
691 g(gen_frame_load_op(ctx, op_size, sign_x, ALU_MUL, 1, slot_2, 0, false, target));
693 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
696 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
700 #if (defined(ARCH_MIPS) && !MIPS_R6) || defined(ARCH_S390)
701 #if defined(ARCH_MIPS)
702 if (mode == MODE_INT && op_size >= OP_SIZE_4)
704 #if defined(ARCH_S390)
705 if (mode == MODE_INT && op_size == OP_SIZE_4)
708 #if defined(ARCH_S390)
709 target = R_SCRATCH_1;
711 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
713 g(gen_frame_get(ctx, op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
714 g(gen_frame_get(ctx, op_size, sign_x, slot_2, R_SCRATCH_3, ®2));
716 gen_insn(INSN_MUL_L, op_size, 0, 0);
718 gen_one(R_SCRATCH_2);
722 g(gen_3address_rot_imm(ctx, op_size, ROT_SAR, R_SCRATCH_4, target, (8U << op_size) - 1, false));
724 g(gen_cmp_test_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_2, R_SCRATCH_4, COND_NE, label_ovf));
726 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
730 if (mode == MODE_INT && op_size == OP_SIZE_NATIVE) {
731 g(gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_binary_multiply_int8_t), op_size, slot_1, slot_2, slot_r, label_ovf));
735 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
736 if (op_size < OP_SIZE_NATIVE && mode == MODE_INT) {
737 g(gen_frame_get(ctx, op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
738 g(gen_frame_get(ctx, op_size, sign_x, slot_2, R_SCRATCH_2, ®2));
740 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_MUL, target, reg1, reg2, 0));
742 g(gen_frame_load(ctx, op_size, sign_x, slot_1, 0, false, target));
743 g(gen_frame_load_op(ctx, op_size, sign_x, ALU_MUL, 0, slot_2, 0, false, target));
746 if (mode == MODE_INT) {
747 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, op_size, target, R_SCRATCH_2, label_ovf));
750 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
759 uint32_t attr_unused label_skip = 0; /* avoid warning */
760 uint32_t attr_unused label_skip2 = 0; /* avoid warning */
761 uint32_t attr_unused label_end = 0; /* avoid warning */
762 uint32_t attr_unused label_div_0 = 0; /* avoid warning */
763 unsigned attr_unused divide_alu = 0; /* avoid warning */
764 bool attr_unused have_mod = false;
765 bool attr_unused force_sx = false;
766 unsigned attr_unused div_op_size = i_size(op_size);
767 if (unlikely(op_size > OP_SIZE_NATIVE) || unlikely(!ARCH_HAS_DIV)
768 #if defined(ARCH_S390)
769 || !(Z || (op_size <= OP_SIZE_4 && sgn))
773 if (mode == MODE_INT) {
774 upcall = !mod ? offsetof(struct cg_upcall_vector_s, INT_binary_divide_int8_t) : offsetof(struct cg_upcall_vector_s, INT_binary_modulo_int8_t);
776 upcall = !mod ? offsetof(struct cg_upcall_vector_s, FIXED_binary_divide_int8_t) : offsetof(struct cg_upcall_vector_s, FIXED_binary_modulo_int8_t);
778 upcall = !mod ? offsetof(struct cg_upcall_vector_s, FIXED_binary_udivide_int8_t) : offsetof(struct cg_upcall_vector_s, FIXED_binary_umodulo_int8_t);
780 g(gen_alu_typed_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, mode == MODE_INT ? label_ovf : 0));
783 #if defined(ARCH_X86) || defined(ARCH_S390)
784 if (mode == MODE_FIXED) {
785 label_skip = alloc_label(ctx);
786 if (unlikely(!label_skip))
788 label_end = alloc_label(ctx);
789 if (unlikely(!label_end))
792 label_skip2 = alloc_label(ctx);
793 if (unlikely(!label_skip2))
797 #if defined(ARCH_X86)
798 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX || R_SCRATCH_3 != R_CX)
799 internal(file_line, "gen_alu: bad scratch registers");
801 g(gen_frame_load(ctx, op_size, sgn ? sign_x : zero_x, slot_1, 0, false, R_SCRATCH_1));
802 g(gen_frame_load(ctx, op_size, sgn ? sign_x : zero_x, slot_2, 0, false, R_SCRATCH_3));
804 g(gen_jmp_on_zero(ctx, i_size(op_size), R_SCRATCH_3, COND_E, mode == MODE_INT ? label_ovf : label_skip));
808 uint32_t label_not_minus_1;
809 label_not_minus_1 = alloc_label(ctx);
810 if (unlikely(!label_not_minus_1))
813 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_3, -1, COND_NE, label_not_minus_1));
815 val = -(uint64_t)0x80 << (((1 << op_size) - 1) * 8);
816 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_1, val, COND_E, mode == MODE_INT ? label_ovf : label_skip2));
818 gen_label(label_not_minus_1);
821 #if defined(ARCH_X86)
822 if (op_size >= OP_SIZE_2) {
824 gen_insn(INSN_CWD + ARCH_PARTIAL_ALU(op_size), op_size, 0, 0);
825 gen_one(R_SCRATCH_2);
826 gen_one(R_SCRATCH_1);
827 if (op_size == OP_SIZE_2)
828 gen_one(R_SCRATCH_2);
830 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_XOR, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_2, 0));
833 gen_insn(INSN_DIV_L, op_size, sgn, 1);
834 gen_one(R_SCRATCH_1);
835 gen_one(i_size(op_size) == OP_SIZE_1 ? R_SCRATCH_1 : R_SCRATCH_2);
836 gen_one(R_SCRATCH_1);
837 gen_one(i_size(op_size) == OP_SIZE_1 ? R_SCRATCH_1 : R_SCRATCH_2);
838 gen_one(R_SCRATCH_3);
841 g(gen_load_constant(ctx, R_SCRATCH_2, 0));
842 } else if (op_size <= OP_SIZE_4) {
843 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, R_SCRATCH_1, (1U << (OP_SIZE_NATIVE + 3)) - 1, false));
845 gen_insn(INSN_DIV_L, i_size(op_size), sgn, 1);
846 gen_one(R_SCRATCH_2);
847 gen_one(R_SCRATCH_1);
848 gen_one(R_SCRATCH_2);
849 gen_one(R_SCRATCH_1);
850 gen_one(R_SCRATCH_3);
852 if (mod && i_size(op_size) == OP_SIZE_1) {
853 g(gen_3address_rot_imm(ctx, OP_SIZE_2, ROT_SHR, R_SCRATCH_1, R_SCRATCH_1, 8, 0));
854 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
856 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_2));
858 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
860 if (mode == MODE_FIXED) {
861 gen_insn(INSN_JMP, 0, 0, 0);
865 gen_label(label_skip2);
868 g(gen_frame_clear(ctx, op_size, slot_r));
870 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
872 gen_insn(INSN_JMP, 0, 0, 0);
876 gen_label(label_skip);
878 g(gen_frame_clear(ctx, op_size, slot_r));
880 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
881 gen_label(label_end);
885 #if defined(ARCH_MIPS)
887 div_op_size = maximum(op_size, OP_SIZE_4);
888 if (op_size == OP_SIZE_4)
891 #if defined(ARCH_POWER)
892 have_mod = cpu_test_feature(CPU_FEATURE_v30);
893 div_op_size = maximum(op_size, OP_SIZE_4);
895 #if defined(ARCH_LOONGARCH64) || defined(ARCH_RISCV64)
897 div_op_size = maximum(op_size, OP_SIZE_4);
899 label_end = alloc_label(ctx);
900 if (unlikely(!label_end))
903 g(gen_frame_get(ctx, op_size, (sgn && op_size < i_size(op_size)) || force_sx ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
904 g(gen_frame_get(ctx, op_size, (sgn && op_size < i_size(op_size)) || force_sx ? sign_x : zero_x, slot_2, R_SCRATCH_2, ®2));
905 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_3);
907 if (ARCH_PREFERS_SX(op_size) && !sgn && op_size < i_size(op_size)) {
908 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
910 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_2, reg2));
914 if (mode == MODE_INT) {
915 g(gen_jmp_on_zero(ctx, i_size(op_size), reg2, COND_E, label_ovf));
918 uint32_t label_not_minus_1;
919 label_not_minus_1 = alloc_label(ctx);
920 if (unlikely(!label_not_minus_1))
923 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg2, -1, COND_NE, label_not_minus_1));
925 val = 0xFFFFFFFFFFFFFF80ULL << (((1 << op_size) - 1) * 8);
926 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg1, val, COND_E, label_ovf));
928 gen_label(label_not_minus_1);
931 #if !(defined(ARCH_ARM) && ARM_ASM_DIV_NO_TRAP)
933 g(gen_load_constant(ctx, target, 0));
935 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
937 g(gen_jmp_on_zero(ctx, i_size(op_size), reg2, COND_E, label_end));
940 uint32_t label_not_minus_1;
941 label_not_minus_1 = alloc_label(ctx);
942 if (unlikely(!label_not_minus_1))
945 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg2, -1, COND_NE, label_not_minus_1));
948 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
950 g(gen_load_constant(ctx, target, 0));
953 val = 0xFFFFFFFFFFFFFF80ULL << (((1 << op_size) - 1) * 8);
954 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size(op_size), reg1, val, COND_E, label_end));
956 gen_label(label_not_minus_1);
960 if (mod && have_mod) {
961 g(gen_3address_alu(ctx, div_op_size, sgn ? ALU_SREM : ALU_UREM, target, reg1, reg2, 0));
963 g(gen_3address_alu(ctx, div_op_size, sgn ? ALU_SDIV : ALU_UDIV, target, reg1, reg2, 0));
966 if (mod && !have_mod) {
967 #if defined(ARCH_ARM)
968 gen_insn(INSN_MADD, i_size(op_size), 1, 0);
974 g(gen_3address_alu(ctx, i_size(op_size), ALU_MUL, R_SCRATCH_2, reg2, target, 0));
975 g(gen_3address_alu(ctx, i_size(op_size), ALU_SUB, target, reg1, R_SCRATCH_2, 0));
979 gen_label(label_end);
980 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
991 int64_t cnst = 0; /* avoid warning */
992 bool c = frame_t_is_const(slot_2);
993 if (unlikely(op_size > OP_SIZE_NATIVE)) {
995 if (mode == MODE_FIXED) {
997 case ROT_SHL: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_shl_,TYPE_INT_MAX));
999 case ROT_SAR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_shr_,TYPE_INT_MAX));
1001 case ROT_SHR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ushr_,TYPE_INT_MAX));
1003 case ROT_ROL: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_rol_,TYPE_INT_MAX));
1005 case ROT_ROR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_ror_,TYPE_INT_MAX));
1007 default: internal(file_line, "do_alu: invalid shift %u", alu);
1011 case ROT_SHL: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_shl_,TYPE_INT_MAX));
1013 case ROT_SAR: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_shr_,TYPE_INT_MAX));
1015 default: internal(file_line, "do_alu: invalid shift %u", alu);
1018 g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, mode == MODE_INT ? label_ovf : 0));
1021 op_s = i_size_rot(op_size);
1022 #if defined(ARCH_X86)
1023 if (slot_1 == slot_r && !slot_is_register(ctx, slot_1) && !(mode == MODE_INT && alu == ROT_SHL)) {
1024 int64_t offset = (size_t)slot_1 * slot_size;
1026 cnst = frame_t_get_const(slot_2);
1027 if (mode == MODE_INT) {
1028 if ((uint64_t)cnst > (8U << op_size) - 1) {
1029 gen_insn(INSN_JMP, 0, 0, 0);
1030 gen_four(label_ovf);
1034 cnst &= (8U << op_size) - 1;
1036 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, op_size));
1037 gen_insn(INSN_ROT + ARCH_PARTIAL_ALU(op_size), op_size, alu, 1);
1038 gen_address_offset();
1039 gen_address_offset();
1043 g(gen_frame_load(ctx, op_size, garbage, slot_2, 0, false, R_SCRATCH_3));
1044 if (mode == MODE_INT) {
1045 int64_t imm = (8U << op_size) - 1;
1046 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, op_size, R_SCRATCH_3, imm, COND_A, label_ovf));
1047 } else if ((alu != ROT_ROL && alu != ROT_ROR) && op_size < OP_SIZE_4) {
1048 g(gen_3address_alu_imm(ctx, OP_SIZE_1, ALU_AND, R_SCRATCH_3, R_SCRATCH_3, (8U << op_size) - 1, 0));
1050 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, op_size));
1051 gen_insn(INSN_ROT + ARCH_PARTIAL_ALU(op_size), op_size, alu, 1);
1052 gen_address_offset();
1053 gen_address_offset();
1054 gen_one(R_SCRATCH_3);
1058 if (mode == MODE_INT && alu == ROT_SHL && op_size < OP_SIZE_NATIVE)
1061 must_mask = op_size < ARCH_SHIFT_SIZE;
1062 sx = (alu == ROT_SAR && op_size < op_s) || (alu == ROT_SHL && op_size < OP_SIZE_NATIVE && mode == MODE_INT);
1063 #if defined(ARCH_MIPS)
1064 sx |= op_size == OP_SIZE_4;
1066 g(gen_frame_get(ctx, op_size, sx ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
1068 reg3 = 0xff; /* avoid warning */
1069 cnst = frame_t_get_const(slot_2);
1071 #if defined(ARCH_X86)
1072 if (!ARCH_IS_3ADDRESS_ROT(alu, op_size)) {
1073 g(gen_frame_load(ctx, op_size, garbage, slot_2, 0, false, R_SCRATCH_3));
1077 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_3, ®3));
1079 if (ARCH_PREFERS_SX(op_size) && !sx && op_size < op_s) {
1080 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
1084 if (mode == MODE_INT) {
1085 int64_t imm = (8U << op_size) - 1;
1087 if ((uint64_t)cnst > (uint64_t)imm) {
1088 gen_insn(INSN_JMP, 0, 0, 0);
1089 gen_four(label_ovf);
1093 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg3, imm, COND_A, label_ovf));
1096 #if defined(ARCH_ARM)
1097 if (alu == ROT_ROL) {
1099 cnst = -(uint64_t)cnst;
1101 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1107 #if defined(ARCH_LOONGARCH64)
1108 if (alu == ROT_ROL && op_size >= OP_SIZE_4) {
1110 cnst = -(uint64_t)cnst;
1112 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1118 #if defined(ARCH_MIPS)
1119 if (MIPS_HAS_ROT && alu == ROT_ROL && op_size >= OP_SIZE_4) {
1121 cnst = -(uint64_t)cnst;
1123 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1129 #if defined(ARCH_POWER)
1130 if (alu == ROT_ROR && op_size >= OP_SIZE_4) {
1132 cnst = -(uint64_t)cnst;
1134 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1140 #if defined(ARCH_S390)
1141 if (Z && alu == ROT_ROR && op_size >= OP_SIZE_4) {
1143 cnst = -(uint64_t)cnst;
1145 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_NEG, R_SCRATCH_3, reg3, 0));
1152 cnst &= (8U << op_size) - 1;
1153 } else if (must_mask) {
1154 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_3, reg3, (8U << op_size) - 1, 0));
1159 #if defined(ARCH_X86)
1160 if (mode == MODE_INT && alu == ROT_SHL) {
1161 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_2);
1163 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_2);
1166 g(gen_3address_rot_imm(ctx, op_s, alu, target, reg1, cnst, 0));
1168 g(gen_3address_rot(ctx, op_s, alu, target, reg1, reg3));
1171 if (mode == MODE_INT && alu == ROT_SHL) {
1172 if (op_size < OP_SIZE_NATIVE) {
1173 gen_insn(INSN_MOVSX, op_size, 0, 0);
1174 gen_one(R_SCRATCH_4);
1177 g(gen_cmp_test_jmp(ctx, INSN_CMP, op_s, target, R_SCRATCH_4, COND_NE, label_ovf));
1180 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, cnst, 0));
1182 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, target, reg3));
1185 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, reg1, R_SCRATCH_4, COND_NE, label_ovf));
1188 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1191 #if defined(ARCH_ARM)
1192 if (op_size <= OP_SIZE_2 && alu == ROT_ROR) {
1193 gen_insn(INSN_ALU, OP_SIZE_4, ALU_OR, ALU_WRITES_FLAGS(ALU_OR, false));
1194 gen_one(R_SCRATCH_1);
1196 gen_one(ARG_SHIFTED_REGISTER);
1197 gen_one(ARG_SHIFT_LSL | (8U << op_size));
1199 if (op_size == OP_SIZE_1)
1203 goto do_generic_shift;
1205 #if defined(ARCH_LOONGARCH64)
1206 if (alu == ROT_ROR && op_size >= OP_SIZE_4)
1207 goto do_generic_shift;
1209 #if defined(ARCH_MIPS)
1210 if (MIPS_HAS_ROT && alu == ROT_ROR && op_size >= OP_SIZE_4)
1211 goto do_generic_shift;
1213 #if defined(ARCH_POWER)
1214 if (alu == ROT_ROL && op_size >= OP_SIZE_4)
1215 goto do_generic_shift;
1217 #if defined(ARCH_RISCV64)
1218 if ((alu == ROT_ROL || alu == ROT_ROR) && likely(cpu_test_feature(CPU_FEATURE_zbb))) {
1219 if (likely(op_size >= OP_SIZE_4)) {
1220 goto do_generic_shift;
1224 #if defined(ARCH_S390)
1225 if (Z && alu == ROT_ROL && op_size >= OP_SIZE_4)
1226 goto do_generic_shift;
1228 if (alu == ROT_ROL || alu == ROT_ROR) {
1229 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
1231 g(gen_3address_rot_imm(ctx, op_s, alu == ROT_ROL ? ROT_SHL : ROT_SHR, R_SCRATCH_2, reg1, cnst, 0));
1232 g(gen_3address_rot_imm(ctx, op_s, alu == ROT_ROL ? ROT_SHR : ROT_SHL, target, reg1, -(uint64_t)cnst & ((8U << op_size) - 1), 0));
1234 g(gen_3address_rot(ctx, op_s, alu == ROT_ROL ? ROT_SHL : ROT_SHR, R_SCRATCH_2, reg1, reg3));
1235 g(gen_2address_alu1(ctx, i_size(OP_SIZE_4), ALU1_NEG, R_SCRATCH_3, reg3, 0));
1237 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_3, R_SCRATCH_3, (8U << op_size) - 1, 0));
1239 g(gen_3address_rot(ctx, op_s, alu == ROT_ROL ? ROT_SHR : ROT_SHL, target, reg1, R_SCRATCH_3));
1241 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, target, target, R_SCRATCH_2, 0));
1242 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1246 goto do_generic_shift;
1248 if (mode == MODE_INT && alu == ROT_SHL) {
1249 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, R_SCRATCH_1);
1250 #if defined(ARCH_S390)
1251 if (op_size >= OP_SIZE_4) {
1253 g(gen_3address_rot_imm(ctx, op_size, ROT_SAL, target, reg1, cnst, 0));
1255 g(gen_3address_rot(ctx, op_size, ROT_SAL, target, reg1, reg3));
1257 gen_insn(INSN_JMP_COND, op_size, COND_O, 0);
1258 gen_four(label_ovf);
1261 if (op_size <= OP_SIZE_NATIVE - 1) {
1263 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, alu, target, reg1, cnst, 0));
1265 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, alu, target, reg1, reg3));
1267 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, op_size, target, R_SCRATCH_2, label_ovf));
1270 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_2, reg1, cnst, 0));
1271 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, R_SCRATCH_2, cnst, 0));
1273 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_2, reg1, reg3));
1274 g(gen_3address_rot(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_4, R_SCRATCH_2, reg3));
1277 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, reg1, R_SCRATCH_4, COND_NE, label_ovf));
1279 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_2));
1284 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
1286 g(gen_3address_rot_imm(ctx, op_s, alu, target, reg1, cnst, 0));
1288 g(gen_3address_rot(ctx, op_s, alu, target, reg1, reg3));
1291 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1298 unsigned attr_unused op_s;
1300 #if defined(ARCH_X86)
1301 if ((alu == BTX_BT || slot_1 == slot_r) && !slot_is_register(ctx, slot_1)) {
1302 unsigned n_op_size = minimum(op_size, OP_SIZE_NATIVE);
1303 g(gen_frame_get(ctx, n_op_size, garbage, slot_2, R_SCRATCH_1, ®2));
1304 if (mode == MODE_INT) {
1305 int64_t imm = (8U << op_size) - 1;
1306 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, n_op_size, reg2, imm, alu == BTX_BT ? COND_A : COND_AE, label_ovf));
1307 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1308 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
1309 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
1310 gen_address_offset();
1313 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
1314 gen_four(label_ovf);
1317 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_AND, R_SCRATCH_1, reg2, (8U << op_size) - 1, 0));
1320 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size, IMM_PURPOSE_STR_OFFSET, maximum(n_op_size, OP_SIZE_2)));
1321 if (alu == BTX_BT) {
1322 gen_insn(INSN_BT, maximum(n_op_size, OP_SIZE_2), 0, 1);
1323 gen_address_offset();
1325 g(gen_frame_set_cond(ctx, maximum(n_op_size, OP_SIZE_2), false, COND_B, slot_r));
1327 gen_insn(INSN_BTX, maximum(n_op_size, OP_SIZE_2), alu, 1);
1328 gen_address_offset();
1329 gen_address_offset();
1335 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1337 if (mode == MODE_FIXED) {
1339 case BTX_BTS: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_bts_,TYPE_INT_MAX));
1341 case BTX_BTR: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_btr_,TYPE_INT_MAX));
1343 case BTX_BTC: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_btc_,TYPE_INT_MAX));
1345 case BTX_BT: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_bt_,TYPE_INT_MAX));
1347 default: internal(file_line, "do_alu: invalid bit test %u", alu);
1351 case BTX_BTS: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_bts_,TYPE_INT_MAX));
1353 case BTX_BTR: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_btr_,TYPE_INT_MAX));
1355 case BTX_BTC: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_btc_,TYPE_INT_MAX));
1357 case BTX_BT: upcall = offsetof(struct cg_upcall_vector_s, cat(INT_binary_bt_,TYPE_INT_MAX));
1359 default: internal(file_line, "do_alu: invalid bit test %u", alu);
1362 g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, label_ovf));
1365 op_s = minimum(OP_SIZE_NATIVE, ARCH_SHIFT_SIZE);
1366 op_s = maximum(op_s, op_size);
1367 g(gen_frame_get(ctx, op_size, zero_x, slot_1, R_SCRATCH_1, ®1));
1368 g(gen_frame_get(ctx, op_size, garbage, slot_2, R_SCRATCH_2, ®2));
1369 if (mode == MODE_INT) {
1370 int64_t imm = (8U << op_size) - 1;
1371 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg2, imm, alu == BTX_BT ? COND_A : COND_AE, label_ovf));
1373 if (alu != BTX_BT) {
1374 if (!ARCH_HAS_BTX(alu, OP_SIZE_NATIVE, false))
1376 need_mask = !ARCH_HAS_BTX(alu, op_size, false);
1378 #if defined(ARCH_X86)
1379 need_mask = op_size < OP_SIZE_2;
1381 if (!ARCH_HAS_BTX(BTX_BTEXT, OP_SIZE_NATIVE, false))
1383 need_mask = !ARCH_HAS_BTX(BTX_BTEXT, op_size, false);
1387 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_AND, R_SCRATCH_2, reg2, (8U << op_size) - 1, 0));
1390 if (alu == BTX_BT) {
1391 #if defined(ARCH_X86)
1392 gen_insn(INSN_BT, maximum(op_size, OP_SIZE_2), 0, 1);
1396 g(gen_frame_set_cond(ctx, maximum(op_size, OP_SIZE_2), false, COND_B, slot_r));
1398 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1399 gen_insn(INSN_BTX, need_mask ? OP_SIZE_NATIVE : op_size, BTX_BTEXT, 0);
1404 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
1407 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, R_SCRATCH_1);
1408 #if defined(ARCH_X86)
1410 target = R_SCRATCH_1;
1411 if (target != reg1) {
1412 g(gen_mov(ctx, op_size, target, reg1));
1415 gen_insn(INSN_BTX, maximum(op_size, OP_SIZE_2), alu, 1);
1417 gen_insn(INSN_BTX, need_mask ? OP_SIZE_NATIVE : op_size, alu, 0);
1423 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1429 if (mode == MODE_FIXED && op_size < ARCH_SHIFT_SIZE) {
1430 g(gen_3address_alu_imm(ctx, i_size(OP_SIZE_4), ALU_AND, R_SCRATCH_2, reg2, (8U << op_size) - 1, 0));
1433 g(gen_load_constant(ctx, R_SCRATCH_3, 1));
1435 g(gen_3address_rot(ctx, op_s, ROT_SHL, R_SCRATCH_3, R_SCRATCH_3, reg2));
1440 #if defined(ARCH_S390) || defined(ARCH_POWER)
1441 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_1, reg1, R_SCRATCH_3, 1));
1443 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
1445 gen_one(R_SCRATCH_3);
1447 g(gen_frame_set_cond(ctx, i_size_cmp(op_size), false, COND_NE, slot_r));
1449 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_1, reg1, R_SCRATCH_3, 0));
1450 g(gen_frame_cmp_imm_set_cond_reg(ctx, i_size(op_size), R_SCRATCH_1, 0, COND_NE, slot_r));
1454 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1455 g(gen_3address_alu(ctx, i_size(op_size), ALU_OR, target, reg1, R_SCRATCH_3, 0));
1458 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1459 if (!ARCH_HAS_ANDN) {
1460 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, R_SCRATCH_3, R_SCRATCH_3, -1, 0));
1462 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, target, reg1, R_SCRATCH_3, 0));
1465 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, target, reg1, R_SCRATCH_3, 0));
1468 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1469 g(gen_3address_alu(ctx, i_size(op_size), ALU_XOR, target, reg1, R_SCRATCH_3, 0));
1472 internal(file_line, "gen_alu: unsupported bit test %u", alu);
1475 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1483 if (unlikely(op_size > OP_SIZE_NATIVE)) {
1484 size_t attr_unused upcall;
1488 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1489 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_XOR, 0, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1490 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_XOR, 0, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1491 #if defined(ARCH_ARM64)
1492 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
1494 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
1495 gen_one(R_SCRATCH_1);
1499 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, ARCH_HAS_FLAGS));
1502 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu, slot_r));
1504 g(gen_frame_cmp_imm_set_cond_reg(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, 0, alu, slot_r));
1507 #if defined(ARCH_X86_64) || defined(ARCH_X86_X32) || defined(ARCH_ARM)
1510 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1511 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1512 g(gen_frame_load_cmp(ctx, OP_SIZE_NATIVE, false, garbage, true, slot_1, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1513 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_SBB, 1, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1514 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu, slot_r));
1518 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1519 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_2, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1520 g(gen_frame_load_cmp(ctx, OP_SIZE_NATIVE, false, garbage, true, slot_2, lo_word(OP_SIZE_NATIVE), true, R_SCRATCH_2));
1521 g(gen_frame_load_op(ctx, OP_SIZE_NATIVE, garbage, ALU_SBB, 1, slot_1, hi_word(OP_SIZE_NATIVE), true, R_SCRATCH_1));
1522 g(gen_frame_set_cond(ctx, OP_SIZE_NATIVE, false, alu == COND_LE ? COND_GE : COND_AE, slot_r));
1525 case COND_L: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_less_,TYPE_INT_MAX)); goto do_upcall;
1526 case COND_B: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_uless_,TYPE_INT_MAX)); goto do_upcall;
1527 case COND_LE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_less_equal_,TYPE_INT_MAX)); goto do_upcall;
1528 case COND_BE: upcall = offsetof(struct cg_upcall_vector_s, cat(FIXED_binary_uless_equal_,TYPE_INT_MAX)); goto do_upcall;
1529 do_upcall: g(gen_alu_upcall(ctx, upcall, op_size, slot_1, slot_2, slot_r, 0));
1533 internal(file_line, "gen_alu: unsupported condition %u", alu);
1537 #if defined(ARCH_X86)
1538 g(gen_frame_get(ctx, op_size, garbage, slot_1, R_SCRATCH_1, ®1));
1539 g(gen_frame_load_cmp_set_cond(ctx, op_size, garbage, slot_2, reg1, alu, slot_r));
1541 g(gen_frame_get(ctx, op_size, op_size == i_size_cmp(op_size) + (unsigned)zero ? garbage : alu == COND_L || alu == COND_LE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
1542 g(gen_frame_load_cmp_set_cond(ctx, op_size, alu == COND_L || alu == COND_LE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x, slot_2, reg1, alu, slot_r));
1548 static bool attr_w gen_alu1(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_r)
1551 unsigned reg1, target;
1553 case MODE_FIXED: switch (op) {
1554 case OPCODE_FIXED_OP_not: alu = ALU1_NOT; goto do_alu;
1555 case OPCODE_FIXED_OP_neg: alu = ALU1_NEG; goto do_alu;
1556 case OPCODE_FIXED_OP_inc: alu = ALU1_INC; goto do_alu;
1557 case OPCODE_FIXED_OP_dec: alu = ALU1_DEC; goto do_alu;
1558 case OPCODE_FIXED_OP_bswap:
1559 case OPCODE_FIXED_OP_bswap_alt1: alu = ALU1_BSWAP; goto do_bswap;
1560 case OPCODE_FIXED_OP_brev:
1561 case OPCODE_FIXED_OP_brev_alt1: alu = ALU1_BREV; goto do_brev;
1562 case OPCODE_FIXED_OP_bsf:
1563 case OPCODE_FIXED_OP_bsf_alt1: alu = ALU1_BSF; goto do_bsf_bsr_popcnt;
1564 case OPCODE_FIXED_OP_bsr:
1565 case OPCODE_FIXED_OP_bsr_alt1: alu = ALU1_BSR; goto do_bsf_bsr_popcnt;
1566 case OPCODE_FIXED_OP_popcnt:
1567 case OPCODE_FIXED_OP_popcnt_alt1: alu = ALU1_POPCNT; goto do_bsf_bsr_popcnt;
1568 case OPCODE_FIXED_OP_to_int: goto do_fixed_conv;
1569 case OPCODE_FIXED_OP_from_int: goto do_fixed_conv;
1570 case OPCODE_FIXED_OP_uto_int: goto conv_uto_int;
1571 case OPCODE_FIXED_OP_ufrom_int: goto conv_ufrom_int;
1572 default: internal(file_line, "gen_alu1: unsupported fixed operation %u", op);
1574 case MODE_INT: switch (op) {
1575 case OPCODE_INT_OP_not: alu = ALU1_NOT; mode = MODE_FIXED; goto do_alu;
1576 case OPCODE_INT_OP_neg: alu = ALU1_NEG; goto do_alu;
1577 case OPCODE_INT_OP_inc: alu = ALU1_INC; goto do_alu;
1578 case OPCODE_INT_OP_dec: alu = ALU1_DEC; goto do_alu;
1579 case OPCODE_INT_OP_bsf: alu = ALU1_BSF; goto do_bsf_bsr_popcnt;
1580 case OPCODE_INT_OP_bsr: alu = ALU1_BSR; goto do_bsf_bsr_popcnt;
1581 case OPCODE_INT_OP_popcnt:
1582 case OPCODE_INT_OP_popcnt_alt1: alu = ALU1_POPCNT; goto do_bsf_bsr_popcnt;
1583 case OPCODE_INT_OP_to_int: goto do_conv;
1584 case OPCODE_INT_OP_from_int: goto do_conv;
1585 default: internal(file_line, "gen_alu1: unsupported int operation %u", op);
1587 case MODE_BOOL: switch (op) {
1588 case OPCODE_BOOL_OP_not: goto do_bool_not;
1589 default: internal(file_line, "gen_alu1: unsupported bool operation %u", op);
1592 internal(file_line, "gen_alu1: unsupported mode %u", mode);
1598 bool arch_use_flags = ARCH_HAS_FLAGS;
1600 #if defined(ARCH_POWER)
1601 arch_use_flags = false;
1603 if (op_size > OP_SIZE_NATIVE) {
1604 #if !defined(ARCH_X86) && !defined(ARCH_ARM) && !defined(ARCH_POWER)
1605 if (alu == ALU1_NEG) {
1606 if (mode == MODE_FIXED)
1607 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_unary_neg_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, 0));
1609 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_unary_neg_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf));
1612 if (alu == ALU1_DEC) {
1613 if (mode == MODE_FIXED)
1614 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_unary_dec_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, 0));
1616 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_unary_dec_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf));
1619 if (alu == ALU1_INC) {
1620 if (mode == MODE_FIXED)
1621 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(FIXED_unary_inc_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, 0));
1623 g(gen_alu_upcall(ctx, offsetof(struct cg_upcall_vector_s, cat(INT_unary_inc_,TYPE_INT_MAX)), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf));
1627 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1628 #if defined(ARCH_S390)
1629 if (alu == ALU1_NOT) {
1630 g(gen_load_constant(ctx, R_SCRATCH_3, -1));
1632 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_XOR, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_3, 0));
1633 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_XOR, R_SCRATCH_2, R_SCRATCH_2, R_SCRATCH_3, 0));
1635 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
1639 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, alu, R_SCRATCH_1, R_SCRATCH_1, alu == ALU1_INC || alu == ALU1_DEC || alu == ALU1_NEG ? 2 : 0));
1640 if (alu == ALU1_NOT) {
1641 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NOT, R_SCRATCH_2, R_SCRATCH_2, 0));
1642 } else if (alu == ALU1_INC || alu == ALU1_DEC) {
1643 g(gen_imm(ctx, 0, alu == ALU1_INC ? IMM_PURPOSE_ADD : IMM_PURPOSE_SUB, OP_SIZE_NATIVE));
1644 gen_insn(INSN_ALU, OP_SIZE_NATIVE, alu == ALU1_INC ? ALU_ADC : ALU_SBB, (mode == MODE_INT) | ALU_WRITES_FLAGS(alu == ALU1_INC ? ALU_ADC : ALU_SBB, is_imm()));
1645 gen_one(R_SCRATCH_2);
1646 gen_one(R_SCRATCH_2);
1649 #if defined(ARCH_X86)
1650 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NOT, R_SCRATCH_2, R_SCRATCH_2, 0));
1652 g(gen_imm(ctx, -1, IMM_PURPOSE_SUB, OP_SIZE_NATIVE));
1653 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_SBB, ALU_WRITES_FLAGS(ALU_SBB, is_imm()));
1654 gen_one(R_SCRATCH_2);
1655 gen_one(R_SCRATCH_2);
1658 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NGC, R_SCRATCH_2, R_SCRATCH_2, (mode == MODE_INT)));
1661 if (mode == MODE_INT) {
1662 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_O, 0);
1663 gen_four(label_ovf);
1665 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
1668 if ((arch_use_flags || ARCH_SUPPORTS_TRAPS) && slot_1 == slot_r && i_size_cmp(op_size) == op_size + zero) {
1670 unsigned undo_alu = alu == ALU1_INC ? ALU1_DEC : alu == ALU1_DEC ? ALU1_INC : alu;
1671 if (slot_is_register(ctx, slot_1)) {
1672 unsigned reg = ctx->registers[slot_1];
1673 if (mode == MODE_INT && ARCH_SUPPORTS_TRAPS) {
1674 gen_insn(INSN_ALU1_TRAP, op_size, alu, ALU1_WRITES_FLAGS(alu));
1677 if (ARCH_TRAP_BEFORE || alu == undo_alu) {
1678 gen_four(label_ovf);
1681 ce = alloc_undo_label(ctx);
1684 gen_four(ce->undo_label);
1685 goto do_undo_opcode;
1688 g(gen_2address_alu1(ctx, i_size(op_size), alu, reg, reg, mode == MODE_INT));
1689 if (mode == MODE_INT) {
1690 if (alu != undo_alu) {
1691 ce = alloc_undo_label(ctx);
1694 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1695 gen_four(ce->undo_label);
1697 ce->undo_opcode = INSN_ALU1 + ARCH_PARTIAL_ALU(op_size);
1698 ce->undo_op_size = i_size(op_size);
1699 ce->undo_aux = undo_alu;
1700 ce->undo_writes_flags = ALU1_WRITES_FLAGS(undo_alu);
1701 ce->undo_parameters[0] = reg;
1702 ce->undo_parameters[1] = reg;
1703 ce->undo_parameters_len = 2;
1705 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1706 gen_four(label_ovf);
1711 #if defined(ARCH_X86)
1714 int64_t offset = (size_t)slot_1 * slot_size;
1715 g(gen_address(ctx, R_FRAME, offset, IMM_PURPOSE_LDR_OFFSET, i_size(op_size)));
1716 gen_insn(INSN_ALU1 + ARCH_PARTIAL_ALU(op_size), i_size(op_size), alu, ALU1_WRITES_FLAGS(alu) | (mode == MODE_INT));
1717 gen_address_offset();
1718 gen_address_offset();
1719 if (mode == MODE_INT) {
1720 if (alu != undo_alu) {
1721 ce = alloc_undo_label(ctx);
1724 ce->undo_opcode = INSN_ALU1 + ARCH_PARTIAL_ALU(op_size);
1725 ce->undo_op_size = i_size(op_size);
1726 ce->undo_aux = undo_alu;
1727 ce->undo_writes_flags = ALU1_WRITES_FLAGS(undo_alu);
1728 m = mark_params(ctx);
1729 gen_address_offset();
1730 gen_address_offset();
1731 copy_params(ctx, ce, m);
1732 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1733 gen_four(ce->undo_label);
1735 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1736 gen_four(label_ovf);
1743 target = gen_frame_target(ctx, slot_r, mode == MODE_INT ? slot_1 : NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1744 if (mode == MODE_FIXED) {
1747 ex = ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
1748 if (ARCH_SUPPORTS_TRAPS && op_size >= OP_SIZE_4)
1750 if (op_size == i_size(op_size) + (unsigned)zero)
1753 g(gen_frame_get(ctx, op_size, ex, slot_1, mode == MODE_INT ? R_SCRATCH_2 : target, ®1));
1754 #if defined(ARCH_S390)
1755 if (alu == ALU1_NOT) {
1756 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, target, reg1, -1, 0));
1758 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1762 #if defined(ARCH_X86)
1763 g(gen_2address_alu1(ctx, op_size, alu, target, reg1, mode == MODE_INT));
1765 if (mode == MODE_INT) {
1766 #if defined(ARCH_POWER)
1767 if (op_size == OP_SIZE_NATIVE) {
1768 g(gen_2address_alu1(ctx, i_size(op_size), alu, target, reg1, 0));
1769 if (alu == ALU1_NEG) {
1770 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_CG_SCRATCH, target, reg1, 1));
1771 } else if (alu == ALU1_INC) {
1772 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_CG_SCRATCH, target, reg1, 1));
1773 } else if (alu == ALU1_DEC) {
1774 g(gen_3address_alu(ctx, i_size(op_size), ALU_ANDN, R_CG_SCRATCH, reg1, target, 1));
1776 gen_insn(INSN_JMP_COND, op_size, COND_L, 0);
1777 gen_four(label_ovf);
1779 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1784 if (ARCH_HAS_JMP_2REGS(COND_L) && ARCH_HAS_JMP_2REGS(COND_G) && (alu == ALU1_INC || alu == ALU1_DEC) && op_size == i_size_cmp(op_size) + (unsigned)zero) {
1785 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, alu, target, reg1, 0));
1787 g(gen_cmp_test_jmp(ctx, INSN_CMP, i_size_cmp(op_size), target, reg1, alu == ALU1_INC ? COND_L : COND_G, label_ovf));
1789 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1793 if (!arch_use_flags && !ARCH_SUPPORTS_TRAPS && ARCH_HAS_ANDN && op_size >= OP_SIZE_4) {
1794 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, alu, target, reg1, 0));
1796 if (alu == ALU1_NEG) {
1797 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_3, target, reg1, 0));
1798 } else if (alu == ALU1_INC) {
1799 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ANDN, R_SCRATCH_3, target, reg1, 0));
1800 } else if (alu == ALU1_DEC) {
1801 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ANDN, R_SCRATCH_3, reg1, target, 0));
1804 if (op_size < OP_SIZE_NATIVE)
1805 g(gen_extend(ctx, op_size, sign_x, R_SCRATCH_3, R_SCRATCH_3));
1807 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_3, COND_S, label_ovf));
1809 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1813 if (op_size <= OP_SIZE_2 || (!arch_use_flags && !ARCH_SUPPORTS_TRAPS)) {
1814 int64_t imm = ((alu != ALU1_INC && ARCH_PREFERS_SX(op_size) ? -0x80ULL : 0x80ULL) << (((1 << op_size) - 1) * 8)) - (alu == ALU1_INC);
1816 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, i_size_cmp(op_size), reg1, imm, COND_E, label_ovf));
1820 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1824 if (mode == MODE_INT) {
1825 gen_insn(INSN_ALU1_TRAP, op_size, alu, ALU1_WRITES_FLAGS(alu));
1828 gen_four(label_ovf);
1829 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1833 g(gen_2address_alu1(ctx, i_size(op_size), alu, target, reg1, mode == MODE_INT));
1835 if (mode == MODE_INT) {
1836 gen_insn(INSN_JMP_COND, i_size_cmp(op_size), COND_O, 0);
1837 gen_four(label_ovf);
1839 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1847 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1848 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
1850 g(gen_3address_alu_imm(ctx, i_size(op_size), ALU_XOR, target, reg1, 1, 0));
1852 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1860 bool attr_unused sx = false;
1861 #if defined(ARCH_X86) || defined(ARCH_ARM) || defined(ARCH_IA64) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_RISCV64) || defined(ARCH_S390)
1862 #if defined(ARCH_ARM32)
1863 if (unlikely(!cpu_test_feature(CPU_FEATURE_armv6)))
1864 goto do_generic_bswap;
1866 #if defined(ARCH_MIPS)
1867 if (unlikely(!MIPS_HAS_ROT))
1868 goto do_generic_bswap;
1869 sx = op_size == OP_SIZE_4;
1871 #if defined(ARCH_RISCV64)
1872 if (unlikely(!cpu_test_feature(CPU_FEATURE_zbb)))
1873 goto do_generic_bswap;
1875 #if defined(ARCH_S390)
1876 if (op_size == OP_SIZE_2)
1877 goto do_generic_bswap;
1879 #if defined(ARCH_X86)
1880 if (op_size >= OP_SIZE_4 && !cpu_test_feature(CPU_FEATURE_bswap))
1881 goto do_generic_bswap;
1883 if (op_size > OP_SIZE_NATIVE) {
1884 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1886 target = R_SCRATCH_1;
1888 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1889 g(gen_frame_get(ctx, op_size, sx ? sign_x : garbage, slot_1, target, ®1));
1892 if (op_size == OP_SIZE_1) {
1893 #if defined(ARCH_IA64) || defined(ARCH_RISCV64)
1894 } else if (op_size == OP_SIZE_2 || op_size == OP_SIZE_4) {
1895 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSWAP, target, reg1, 0));
1897 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, target, target, op_size == OP_SIZE_2 ? 48 : 32, 0));
1899 } else if (op_size == OP_SIZE_2) {
1900 #if defined(ARCH_X86)
1901 g(gen_3address_rot_imm(ctx, OP_SIZE_2, ROT_ROR, target, reg1, 8, 0));
1903 g(gen_2address_alu1(ctx, OP_SIZE_4, ALU1_BSWAP16, target, reg1, 0));
1906 g(gen_2address_alu1(ctx, minimum(op_size, OP_SIZE_NATIVE), ALU1_BSWAP, target, reg1, 0));
1908 if (op_size > OP_SIZE_NATIVE) {
1909 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSWAP, R_SCRATCH_2, R_SCRATCH_2, 0));
1912 if (op_size > OP_SIZE_NATIVE)
1913 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_2, R_SCRATCH_1));
1915 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1918 goto do_generic_bswap;
1920 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bswap_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
1926 #if defined(ARCH_ARM) || defined(ARCH_LOONGARCH64) || (defined(ARCH_MIPS) && MIPS_R6)
1927 #if defined(ARCH_ARM32)
1928 if (unlikely(!cpu_test_feature(CPU_FEATURE_armv6t2)))
1929 goto do_generic_brev;
1931 if (op_size > OP_SIZE_NATIVE) {
1932 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
1934 target = R_SCRATCH_1;
1936 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
1937 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
1940 g(gen_2address_alu1(ctx, minimum(maximum(OP_SIZE_4, op_size), OP_SIZE_NATIVE), ALU1_BREV, target, reg1, 0));
1941 if (op_size <= OP_SIZE_2) {
1942 g(gen_3address_rot_imm(ctx, OP_SIZE_4, ROT_SHR, target, target, op_size == OP_SIZE_1 ? 24 : 16, 0));
1944 if (op_size > OP_SIZE_NATIVE) {
1945 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BREV, R_SCRATCH_2, R_SCRATCH_2, 0));
1948 if (op_size > OP_SIZE_NATIVE)
1949 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_2, R_SCRATCH_1));
1951 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
1954 goto do_generic_brev;
1956 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_brev_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
1961 do_bsf_bsr_popcnt: {
1962 if (op_size > OP_SIZE_NATIVE) {
1963 #if defined(ARCH_X86)
1964 uint32_t label_finish = 0; /* avoid warning */
1965 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_popcnt)))
1966 goto do_generic_bsf_bsr_popcnt;
1967 if (alu == ALU1_BSR || alu == ALU1_POPCNT) {
1968 if (mode == MODE_INT) {
1969 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
1970 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_NATIVE));
1971 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
1972 gen_address_offset();
1975 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_S, 0);
1976 gen_four(label_ovf);
1979 if (alu == ALU1_BSF) {
1980 label_finish = alloc_label(ctx);
1981 if (unlikely(!label_finish))
1984 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
1985 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSF, ALU1_WRITES_FLAGS(ALU1_BSF));
1986 gen_one(R_SCRATCH_1);
1987 gen_address_offset();
1989 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
1990 gen_four(label_finish);
1992 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
1993 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSF, ALU1_WRITES_FLAGS(ALU1_BSF));
1994 gen_one(R_SCRATCH_1);
1995 gen_address_offset();
1997 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, 8U << OP_SIZE_NATIVE, 0));
1999 if (alu == ALU1_BSR) {
2000 label_finish = alloc_label(ctx);
2001 if (unlikely(!label_finish))
2004 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2005 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSR, ALU1_WRITES_FLAGS(ALU1_BSR));
2006 gen_one(R_SCRATCH_1);
2007 gen_address_offset();
2009 g(gen_3address_alu_imm(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, 8U << OP_SIZE_NATIVE, 0));
2011 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2012 gen_four(label_finish);
2014 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2015 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_BSR, ALU1_WRITES_FLAGS(ALU1_BSR));
2016 gen_one(R_SCRATCH_1);
2017 gen_address_offset();
2019 if (alu == ALU1_BSF || alu == ALU1_BSR) {
2020 if (mode == MODE_INT) {
2021 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_E, 0);
2022 gen_four(label_ovf);
2024 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2025 gen_four(label_finish);
2027 g(gen_load_constant(ctx, R_SCRATCH_1, -1));
2030 gen_label(label_finish);
2032 if (mode == MODE_INT)
2035 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2036 internal(file_line, "gen_alu1: bad scratch registers");
2037 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2041 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2042 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2043 gen_address_offset();
2044 gen_one(R_SCRATCH_1);
2046 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2047 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2048 gen_address_offset();
2049 gen_one(R_SCRATCH_2);
2053 if (alu == ALU1_POPCNT) {
2054 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2055 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_POPCNT, ALU1_WRITES_FLAGS(ALU1_POPCNT));
2056 gen_one(R_SCRATCH_1);
2057 gen_address_offset();
2059 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2060 gen_insn(INSN_ALU1, OP_SIZE_NATIVE, ALU1_POPCNT, ALU1_WRITES_FLAGS(ALU1_POPCNT));
2061 gen_one(R_SCRATCH_2);
2062 gen_address_offset();
2064 g(gen_3address_alu(ctx, OP_SIZE_4, ALU_ADD, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 1));
2066 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + lo_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2067 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2068 gen_address_offset();
2069 gen_one(R_SCRATCH_1);
2071 g(gen_address(ctx, R_FRAME, (size_t)slot_r * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_STR_OFFSET, OP_SIZE_NATIVE));
2072 g(gen_imm(ctx, 0, IMM_PURPOSE_STORE_VALUE, OP_SIZE_NATIVE));
2073 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
2074 gen_address_offset();
2080 goto do_generic_bsf_bsr_popcnt;
2082 #if defined(ARCH_X86)
2083 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_popcnt)))
2084 goto do_generic_bsf_bsr_popcnt;
2085 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2086 if (op_size == OP_SIZE_1 || ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT)) {
2087 g(gen_frame_get(ctx, op_size, zero_x, slot_1, target, ®1));
2088 if ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT) {
2089 g(gen_cmp_test_jmp(ctx, INSN_TEST, op_size, reg1, reg1, alu == ALU1_BSR ? COND_LE : COND_S, label_ovf));
2091 g(gen_2address_alu1(ctx, maximum(op_size, OP_SIZE_2), alu, target, reg1, 1));
2092 if ((alu == ALU1_BSR || alu == ALU1_POPCNT) && mode == MODE_INT)
2093 goto x86_bsf_bsr_popcnt_finish;
2095 g(gen_frame_load_op1(ctx, op_size, alu, 1, slot_1, target));
2097 if (alu == ALU1_POPCNT)
2098 goto x86_bsf_bsr_popcnt_finish;
2099 if (mode == MODE_FIXED) {
2100 uint32_t cmov_label;
2101 gen_insn(INSN_MOV, maximum(op_size, OP_SIZE_4), 0, 0);
2102 gen_one(R_SCRATCH_2);
2105 g(gen_cmov(ctx, maximum(op_size, OP_SIZE_4), COND_E, target, &cmov_label));
2106 gen_one(R_SCRATCH_2);
2108 gen_label(cmov_label);
2111 gen_insn(INSN_JMP_COND, maximum(op_size, OP_SIZE_2), COND_E, 0);
2112 gen_four(label_ovf);
2114 x86_bsf_bsr_popcnt_finish:
2115 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2118 #if defined(ARCH_ARM)
2119 #if defined(ARCH_ARM32)
2120 if (alu == ALU1_BSR && unlikely(!cpu_test_feature(CPU_FEATURE_armv6)))
2121 goto do_generic_bsf_bsr_popcnt;
2122 if (alu == ALU1_BSF && unlikely(!cpu_test_feature(CPU_FEATURE_armv6t2)))
2123 goto do_generic_bsf_bsr_popcnt;
2125 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_neon)))
2126 goto do_generic_bsf_bsr_popcnt;
2127 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2128 if (mode == MODE_INT) {
2129 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2132 if (alu == ALU1_POPCNT) {
2133 g(gen_mov(ctx, OP_SIZE_NATIVE, FR_SCRATCH_1, reg1));
2134 gen_insn(INSN_FP_ALU1, OP_SIZE_NATIVE, FP_ALU1_VCNT8, 0);
2135 gen_one(FR_SCRATCH_1);
2136 gen_one(FR_SCRATCH_1);
2137 #if defined(ARCH_ARM32)
2138 if (op_size > OP_SIZE_1) {
2139 gen_insn(INSN_FP_ALU1, OP_SIZE_1, FP_ALU1_VPADDL, 0);
2140 gen_one(FR_SCRATCH_1);
2141 gen_one(FR_SCRATCH_1);
2143 if (op_size > OP_SIZE_2) {
2144 gen_insn(INSN_FP_ALU1, OP_SIZE_2, FP_ALU1_VPADDL, 0);
2145 gen_one(FR_SCRATCH_1);
2146 gen_one(FR_SCRATCH_1);
2149 if (op_size > OP_SIZE_1) {
2150 gen_insn(INSN_FP_ALU1, OP_SIZE_1, FP_ALU1_ADDV, 0);
2151 gen_one(FR_SCRATCH_1);
2152 gen_one(FR_SCRATCH_1);
2155 g(gen_frame_store(ctx, op_size, slot_r, 0, FR_SCRATCH_1));
2156 if (slot_is_register(ctx, slot_r))
2157 g(unspill(ctx, slot_r));
2161 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2162 if (mode == MODE_FIXED && alu == ALU1_BSF) {
2163 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
2169 if (alu == ALU1_BSF) {
2170 g(gen_2address_alu1(ctx, i_size(op_size), ALU1_BREV, target, reg1, 0));
2174 g(gen_2address_alu1(ctx, i_size(op_size), ALU1_LZCNT, target, reg1, 0));
2176 if (alu == ALU1_BSR) {
2177 g(gen_load_constant(ctx, R_SCRATCH_2, op_size == OP_SIZE_8 ? 63 : 31));
2178 g(gen_3address_alu(ctx, i_size(op_size), ALU_SUB, target, R_SCRATCH_2, target, 0));
2181 if (mode == MODE_FIXED && alu == ALU1_BSF) {
2182 #if defined(ARCH_ARM32)
2183 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2184 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2189 gen_insn(INSN_CSEL_INV, i_size(op_size), COND_NE, 0);
2197 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2200 #if defined(ARCH_ALPHA)
2201 if (likely(cpu_test_feature(CPU_FEATURE_cix))) {
2202 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2203 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2204 if (mode == MODE_INT) {
2205 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2207 if (alu == ALU1_POPCNT) {
2208 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, reg1, 0));
2210 if (alu == ALU1_BSF) {
2211 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSF, target, reg1, 0));
2213 if (mode == MODE_FIXED) {
2214 g(gen_imm(ctx, -1, IMM_PURPOSE_MOVR, OP_SIZE_INT));
2215 gen_insn(INSN_MOVR, OP_SIZE_NATIVE, COND_E, 0);
2222 if (alu == ALU1_BSR) {
2223 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2225 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2227 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2229 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2233 #if defined(ARCH_MIPS)
2234 if (MIPS_HAS_CLZ && alu != ALU1_POPCNT) {
2235 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2236 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2237 if (mode == MODE_INT) {
2238 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2240 if (alu == ALU1_BSF) {
2241 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, target, reg1, 0));
2243 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, reg1, target, 0));
2246 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2248 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2250 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2252 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2256 #if defined(ARCH_POWER)
2257 if (alu == ALU1_BSF && (unlikely(!cpu_test_feature(CPU_FEATURE_v203)) || unlikely(!cpu_test_feature(CPU_FEATURE_v30))))
2258 goto do_generic_bsf_bsr_popcnt;
2259 if (alu == ALU1_POPCNT && unlikely(!cpu_test_feature(CPU_FEATURE_v206)))
2260 goto do_generic_bsf_bsr_popcnt;
2261 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2262 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2263 if (mode == MODE_INT) {
2264 g(gen_cmp_test_jmp(ctx, INSN_TEST, i_size(op_size), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2266 if (alu == ALU1_POPCNT) {
2267 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, reg1, 0));
2269 if (alu == ALU1_BSF) {
2270 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_BSF, target, reg1, 0));
2272 if (mode == MODE_FIXED) {
2273 g(gen_3address_alu(ctx, i_size(op_size), ALU_AND, R_SCRATCH_3, reg1, reg1, 1));
2275 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2276 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2282 if (alu == ALU1_BSR) {
2283 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_LZCNT, target, reg1, 0));
2285 g(gen_load_constant(ctx, R_SCRATCH_3, OP_SIZE_NATIVE == OP_SIZE_8 ? 63 : 31));
2287 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2289 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2292 #if defined(ARCH_LOONGARCH64) || defined(ARCH_RISCV64)
2293 #if defined(ARCH_LOONGARCH64)
2294 if (alu == ALU1_POPCNT)
2295 goto do_generic_bsf_bsr_popcnt;
2297 #if defined(ARCH_RISCV64)
2298 if (unlikely(!cpu_test_feature(CPU_FEATURE_zbb)))
2299 goto do_generic_bsf_bsr_popcnt;
2301 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2302 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2303 if (mode == MODE_INT) {
2304 g(gen_cmp_test_jmp(ctx, INSN_TEST, OP_SIZE_NATIVE, reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2306 if (alu == ALU1_POPCNT) {
2307 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_POPCNT, target, reg1, 0));
2309 if (alu == ALU1_BSF) {
2310 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_BSF, target, reg1, 0));
2312 if (mode == MODE_FIXED) {
2313 g(gen_imm(ctx, 1, IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2314 gen_insn(INSN_CMP_DEST_REG, OP_SIZE_NATIVE, COND_B, 0);
2315 gen_one(R_SCRATCH_3);
2319 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_NEG, R_SCRATCH_3, R_SCRATCH_3, 0));
2321 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_OR, target, target, R_SCRATCH_3, 0));
2324 if (alu == ALU1_BSR) {
2325 g(gen_2address_alu1(ctx, maximum(OP_SIZE_4, op_size), ALU1_LZCNT, target, reg1, 0));
2327 g(gen_load_constant(ctx, R_SCRATCH_3, op_size <= OP_SIZE_4 ? 31 : 63));
2329 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_SUB, target, R_SCRATCH_3, target, 0));
2331 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2334 #if defined(ARCH_IA64) || defined(ARCH_S390) || defined(ARCH_SPARC)
2335 if (alu == ALU1_BSF && !ARCH_HAS_ANDN)
2336 goto do_generic_bsf_bsr_popcnt;
2337 #if defined(ARCH_S390)
2338 if (!cpu_test_feature(CPU_FEATURE_misc_45) || !cpu_test_feature(CPU_FEATURE_misc_insn_ext_3))
2339 goto do_generic_bsf_bsr_popcnt;
2341 #if defined(ARCH_SPARC)
2343 goto do_generic_bsf_bsr_popcnt;
2345 g(gen_frame_get(ctx, op_size, mode == MODE_INT ? sign_x : zero_x, slot_1, R_SCRATCH_1, ®1));
2346 target = gen_frame_target(ctx, slot_r, slot_1, NO_FRAME_T, R_SCRATCH_2);
2347 if (mode == MODE_INT) {
2348 g(gen_cmp_test_jmp(ctx, INSN_TEST, maximum(op_size, OP_SIZE_4), reg1, reg1, alu == ALU1_BSR ? COND_LE : alu == ALU1_BSF ? COND_E : COND_S, label_ovf));
2350 if (ARCH_PREFERS_SX(op_size) && alu == ALU1_POPCNT && op_size < OP_SIZE_NATIVE) {
2351 g(gen_extend(ctx, op_size, zero_x, R_SCRATCH_1, reg1));
2355 if (alu == ALU1_POPCNT) {
2356 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, R_SCRATCH_1, reg1, 0));
2357 g(gen_frame_store(ctx, op_size, slot_r, 0, R_SCRATCH_1));
2360 if (alu == ALU1_BSF) {
2361 g(gen_3address_alu_imm(ctx, OP_SIZE_NATIVE, ALU_SUB, target, reg1, 1, 0));
2363 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_ANDN, target, target, reg1, 0));
2365 g(gen_2address_alu1(ctx, OP_SIZE_NATIVE, ALU1_POPCNT, target, target, 0));
2367 if (mode == MODE_FIXED) {
2368 unsigned attr_unused test_reg = R_SCRATCH_1;
2369 #if defined(ARCH_S390)
2370 g(gen_imm(ctx, 0, COND_IS_LOGICAL(COND_E) ? IMM_PURPOSE_CMP_LOGICAL : IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2371 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1 + COND_IS_LOGICAL(COND_E));
2375 g(gen_imm(ctx, -1, IMM_PURPOSE_CMOV, OP_SIZE_NATIVE));
2376 gen_insn(INSN_CMOV, OP_SIZE_NATIVE, COND_E, 0);
2381 #if defined(ARCH_IA64)
2382 g(gen_cmp_dest_reg(ctx, OP_SIZE_NATIVE, reg1, (unsigned)-1, R_CMP_RESULT, 0, COND_NE));
2383 test_reg = R_CMP_RESULT;
2385 g(gen_imm(ctx, -1, IMM_PURPOSE_MOVR, OP_SIZE_NATIVE));
2386 gen_insn(INSN_MOVR, OP_SIZE_NATIVE, COND_E, 0);
2394 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2398 do_generic_bsf_bsr_popcnt:
2399 if (alu == ALU1_BSF) {
2400 if (mode == MODE_FIXED)
2401 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bsf_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2403 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_bsf_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2405 if (alu == ALU1_BSR) {
2406 if (mode == MODE_FIXED)
2407 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_bsr_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2409 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_bsr_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2411 if (alu == ALU1_POPCNT) {
2412 if (mode == MODE_FIXED)
2413 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_unary_popcnt_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, 0);
2415 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, INT_unary_popcnt_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2423 unsigned src_op_size, dest_op_size;
2424 const struct type *src_type, *dest_type;
2425 src_type = get_type_of_local(ctx, slot_1);
2426 dest_type = get_type_of_local(ctx, slot_r);
2428 if (TYPE_TAG_IS_FIXED(src_type->tag)) {
2429 src_op_size = TYPE_TAG_IDX_FIXED(src_type->tag) >> 1;
2431 src_op_size = TYPE_TAG_IDX_INT(src_type->tag);
2434 if (TYPE_TAG_IS_FIXED(dest_type->tag)) {
2435 dest_op_size = TYPE_TAG_IDX_FIXED(dest_type->tag) >> 1;
2437 dest_op_size = TYPE_TAG_IDX_INT(dest_type->tag);
2440 if (src_op_size <= OP_SIZE_NATIVE) {
2441 g(gen_frame_get(ctx, src_op_size, sign_x, slot_1, R_SCRATCH_1, ®1));
2443 #if defined(ARCH_X86)
2444 if (dest_op_size < src_op_size)
2445 g(gen_frame_load(ctx, OP_SIZE_NATIVE, garbage, slot_1, 0, false, R_SCRATCH_1));
2448 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_3));
2452 if (dest_op_size >= src_op_size) {
2453 if (dest_op_size <= OP_SIZE_NATIVE) {
2454 g(gen_frame_store(ctx, dest_op_size, slot_r, 0, reg1));
2456 if (src_op_size <= OP_SIZE_NATIVE) {
2457 #if defined(ARCH_X86)
2458 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2459 internal(file_line, "gen_alu1: bad scratch registers");
2460 if (reg1 == R_SCRATCH_1) {
2461 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2466 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, reg1, (1U << (OP_SIZE_NATIVE + 3)) - 1, false));
2467 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, reg1, R_SCRATCH_2));
2469 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, reg1, R_SCRATCH_3));
2474 if (src_op_size > OP_SIZE_NATIVE) {
2475 #if defined(ARCH_ARM)
2476 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
2477 gen_one(R_SCRATCH_3);
2478 gen_one(ARG_SHIFTED_REGISTER);
2479 gen_one(ARG_SHIFT_ASR | ((1U << (OP_SIZE_NATIVE + 3)) - 1));
2480 gen_one(R_SCRATCH_1);
2482 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2483 gen_four(label_ovf);
2484 #elif defined(ARCH_X86)
2485 if (R_SCRATCH_1 != R_AX || R_SCRATCH_2 != R_DX)
2486 internal(file_line, "gen_alu1: bad scratch registers");
2487 gen_insn(INSN_CWD, OP_SIZE_NATIVE, 0, 0);
2491 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size + hi_word(OP_SIZE_NATIVE), IMM_PURPOSE_LDR_OFFSET, OP_SIZE_NATIVE));
2492 gen_insn(INSN_CMP, OP_SIZE_NATIVE, 0, 1);
2493 gen_one(R_SCRATCH_2);
2494 gen_address_offset();
2496 gen_insn(INSN_JMP_COND, OP_SIZE_NATIVE, COND_NE, 0);
2497 gen_four(label_ovf);
2499 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SAR, R_SCRATCH_2, R_SCRATCH_1, (1U << (OP_SIZE_NATIVE + 3)) - 1, 0));
2501 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, R_SCRATCH_3, R_SCRATCH_2, COND_NE, label_ovf));
2504 src_op_size = OP_SIZE_NATIVE;
2506 if (src_op_size > dest_op_size) {
2507 g(gen_cmp_extended(ctx, OP_SIZE_NATIVE, dest_op_size, reg1, R_SCRATCH_2, label_ovf));
2509 g(gen_frame_store(ctx, dest_op_size, slot_r, 0, reg1));
2515 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_uto_int_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2519 return gen_alu_typed_upcall(ctx, offsetof(struct cg_upcall_vector_s, FIXED_ufrom_int_int8_t), op_size, slot_1, NO_FRAME_T, slot_r, label_ovf);
2523 static bool attr_w gen_constant(struct codegen_context *ctx, bool real, unsigned op_size, bool shrt, frame_t slot_r)
2527 c = (int16_t)get_unaligned_16(ctx->current_position);
2528 } else switch (op_size) {
2529 #define fx(n, type, utype, sz, bits) \
2531 c = (type)cat(get_unaligned_,bits)(ctx->current_position);\
2536 internal(file_line, "gen_constant: invalid type %u", op_size);
2538 if (op_size > OP_SIZE_NATIVE) {
2539 unsigned shift = (8U << OP_SIZE_NATIVE) - 1;
2540 g(gen_frame_store_imm_raw(ctx, OP_SIZE_NATIVE, slot_r, lo_word(OP_SIZE_NATIVE), c & ((2ULL << shift) - 1)));
2541 g(gen_frame_store_imm_raw(ctx, OP_SIZE_NATIVE, slot_r, hi_word(OP_SIZE_NATIVE), c >> 1 >> shift));
2542 if (real && slot_is_register(ctx, slot_r))
2543 g(unspill(ctx, slot_r));
2545 } else if (real && slot_is_register(ctx, slot_r)) {
2546 if (ARCH_HAS_FP_GP_MOV) {
2547 g(gen_load_constant(ctx, R_SCRATCH_1, c));
2548 g(gen_mov(ctx, op_size, ctx->registers[slot_r], R_SCRATCH_1));
2550 g(gen_frame_store_imm_raw(ctx, op_size, slot_r, 0, c));
2551 g(unspill(ctx, slot_r));
2554 g(gen_frame_store_imm(ctx, op_size, slot_r, 0, c));
2559 static bool attr_w gen_real_constant(struct codegen_context *ctx, const struct type *t, frame_t slot_r)
2562 if (is_power_of_2(t->size) && t->size <= sizeof(uintbig_t))
2563 return gen_constant(ctx, true, log_2(t->size), false, slot_r);
2565 g(load_function_offset(ctx, R_SCRATCH_3, offsetof(struct data, u_.function.code)));
2567 offset = (ctx->current_position - da(ctx->fn,function)->code) * sizeof(code_t);
2569 g(gen_memcpy_raw(ctx, R_FRAME, (size_t)slot_r * slot_size, R_SCRATCH_3, offset, t->size, minimum(t->align, sizeof(code_t))));
2570 if (slot_is_register(ctx, slot_r))
2571 g(unspill(ctx, slot_r));
2576 static bool attr_w gen_copy(struct codegen_context *ctx, unsigned op_size, frame_t slot_1, frame_t slot_r)
2579 if (unlikely(op_size > OP_SIZE_NATIVE)) {
2580 g(gen_frame_load_2(ctx, OP_SIZE_NATIVE, slot_1, 0, R_SCRATCH_1, R_SCRATCH_2));
2581 g(gen_frame_store_2(ctx, OP_SIZE_NATIVE, slot_r, 0, R_SCRATCH_1, R_SCRATCH_2));
2584 unsigned target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2585 g(gen_frame_get(ctx, op_size, garbage, slot_1, target, ®1));
2586 g(gen_frame_store(ctx, op_size, slot_r, 0, reg1));
2591 static bool attr_w gen_fp_alu(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, frame_t slot_r)
2593 unsigned attr_unused fp_alu;
2595 unsigned attr_unused op_size = real_type_to_op_size(real_type);
2596 unsigned reg1, reg2, target;
2598 case OPCODE_REAL_OP_add:
2599 case OPCODE_REAL_OP_add_alt1:
2600 case OPCODE_REAL_OP_add_alt2: fp_alu = FP_ALU_ADD; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_add_real16_t); label_ovf = 0; goto do_alu;
2601 case OPCODE_REAL_OP_subtract:
2602 case OPCODE_REAL_OP_subtract_alt1:
2603 case OPCODE_REAL_OP_subtract_alt2: fp_alu = FP_ALU_SUB; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_subtract_real16_t); label_ovf = 0; goto do_alu;
2604 case OPCODE_REAL_OP_multiply:
2605 case OPCODE_REAL_OP_multiply_alt1:
2606 case OPCODE_REAL_OP_multiply_alt2: fp_alu = FP_ALU_MUL; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_multiply_real16_t); label_ovf = 0; goto do_alu;
2607 case OPCODE_REAL_OP_divide:
2608 case OPCODE_REAL_OP_divide_alt1:
2609 case OPCODE_REAL_OP_divide_alt2: fp_alu = FP_ALU_DIV; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_divide_real16_t); label_ovf = 0; goto do_alu;
2610 case OPCODE_REAL_OP_modulo:
2611 case OPCODE_REAL_OP_power:
2612 case OPCODE_REAL_OP_ldexp:
2613 case OPCODE_REAL_OP_atan2: upc = offsetof(struct cg_upcall_vector_s, REAL_binary_modulo_real16_t) + (op - OPCODE_REAL_OP_modulo) * TYPE_REAL_N * sizeof(void (*)(void)); goto do_upcall;
2614 case OPCODE_REAL_OP_equal:
2615 case OPCODE_REAL_OP_equal_alt1:
2616 case OPCODE_REAL_OP_equal_alt2: fp_alu = FP_COND_E; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_equal_real16_t); goto do_cmp;
2617 case OPCODE_REAL_OP_not_equal:
2618 case OPCODE_REAL_OP_not_equal_alt1:
2619 case OPCODE_REAL_OP_not_equal_alt2: fp_alu = FP_COND_NE; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_not_equal_real16_t); goto do_cmp;
2620 case OPCODE_REAL_OP_less:
2621 case OPCODE_REAL_OP_less_alt1:
2622 case OPCODE_REAL_OP_less_alt2: fp_alu = FP_COND_B; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_less_real16_t); goto do_cmp;
2623 case OPCODE_REAL_OP_less_equal:
2624 case OPCODE_REAL_OP_less_equal_alt1:
2625 case OPCODE_REAL_OP_less_equal_alt2: fp_alu = FP_COND_BE; upc = offsetof(struct cg_upcall_vector_s, REAL_binary_less_equal_real16_t); goto do_cmp;
2626 default: internal(file_line, "gen_fp_alu: unsupported operation %u", op);
2630 if ((SUPPORTED_FP >> real_type) & 1) {
2631 #if defined(ARCH_IA64)
2632 if (unlikely(fp_alu == FP_ALU_DIV))
2635 #if defined(ARCH_X86)
2637 #elif defined(ARCH_S390)
2638 if ((op_size <= OP_SIZE_8 && (size_t)slot_2 * slot_size < 4096) || slot_is_register(ctx, slot_2))
2640 if (slot_is_register(ctx, slot_2))
2643 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, slot_2, FR_SCRATCH_1);
2644 g(gen_frame_get(ctx, op_size, garbage, slot_1, FR_SCRATCH_1, ®1));
2645 if (slot_is_register(ctx, slot_2)) {
2646 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, ctx->registers[slot_2]));
2648 if (target != reg1 && !ARCH_IS_3ADDRESS_FP) {
2649 g(gen_mov(ctx, op_size, target, reg1));
2652 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size, IMM_PURPOSE_VLDR_VSTR_OFFSET, op_size));
2653 gen_insn(INSN_FP_ALU, op_size, fp_alu, 0);
2656 gen_address_offset();
2658 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2661 #if defined(ARCH_ALPHA)
2662 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2663 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2664 target = gen_frame_target(ctx, slot_r, slot_1, slot_2, FR_SCRATCH_3);
2665 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, reg2));
2666 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2668 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2669 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2670 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
2671 g(gen_3address_fp_alu(ctx, op_size, fp_alu, target, reg1, reg2));
2672 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2676 #ifdef SUPPORTED_FP_X87
2677 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
2678 if (real_type != 3) {
2679 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2680 g(gen_frame_load_x87(ctx, INSN_X87_ALU, op_size, fp_alu, slot_1));
2682 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2683 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2684 gen_insn(INSN_X87_ALUP, op_size, fp_alu, 0);
2687 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
2691 #ifdef SUPPORTED_FP_HALF_CVT
2692 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
2693 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2694 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2695 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
2696 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2697 gen_one(FR_SCRATCH_1);
2699 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2700 gen_one(FR_SCRATCH_2);
2702 gen_insn(INSN_FP_ALU, OP_SIZE_4, fp_alu, 0);
2703 gen_one(FR_SCRATCH_1);
2704 gen_one(FR_SCRATCH_1);
2705 gen_one(FR_SCRATCH_2);
2706 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
2708 gen_one(FR_SCRATCH_1);
2709 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
2716 if ((SUPPORTED_FP >> real_type) & 1
2717 #if defined(ARCH_ALPHA)
2718 && ARCH_SUPPORTS_TRAPS
2721 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2722 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2723 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
2724 #if defined(ARCH_ALPHA)
2725 gen_insn(INSN_FP_CMP_DEST_REG_TRAP, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
2726 gen_one(FR_SCRATCH_3);
2729 gen_four(label_ovf);
2731 if (!ARCH_HAS_FP_GP_MOV) {
2732 g(gen_frame_store_raw(ctx, OP_SIZE_4, slot_r, 0, FR_SCRATCH_3));
2733 g(gen_frame_load_raw(ctx, OP_SIZE_4, sign_x, slot_r, 0, false, target));
2735 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_3));
2738 if (fp_alu == FP_COND_NE) {
2739 g(gen_imm(ctx, 0, IMM_PURPOSE_CMP, OP_SIZE_NATIVE));
2740 gen_insn(INSN_CMP_DEST_REG, OP_SIZE_NATIVE, COND_E, 0);
2745 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SHR, target, target, 30, 0));
2748 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2751 #elif defined(ARCH_IA64)
2752 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
2753 gen_one(R_CMP_RESULT);
2757 gen_insn(INSN_JMP_REG, OP_SIZE_NATIVE, COND_NE, 0);
2758 gen_one(R_CMP_RESULT);
2759 gen_four(label_ovf);
2761 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu, 0);
2762 gen_one(R_CMP_RESULT);
2766 g(gen_mov(ctx, OP_SIZE_NATIVE, target, R_CMP_RESULT));
2768 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2771 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
2772 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
2776 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
2777 gen_four(label_ovf);
2779 gen_insn(INSN_FP_CMP_COND, op_size, fp_alu, 1);
2783 gen_insn(INSN_FP_TEST_REG, OP_SIZE_NATIVE, fp_alu, 0);
2786 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2789 #elif defined(ARCH_RISCV64)
2790 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
2791 gen_one(R_SCRATCH_1);
2795 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
2796 gen_one(R_SCRATCH_2);
2800 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
2802 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, COND_E, label_ovf));
2804 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
2809 if (fp_alu == FP_COND_NE) {
2810 g(gen_imm(ctx, 1, IMM_PURPOSE_XOR, OP_SIZE_NATIVE));
2811 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_AND, false));
2817 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
2820 gen_insn(INSN_FP_CMP, op_size, 0, 1);
2823 #if defined(ARCH_ARM32)
2824 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
2826 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
2827 gen_four(label_ovf);
2828 g(gen_frame_set_cond(ctx, op_size, false, fp_alu, slot_r));
2832 #ifdef SUPPORTED_FP_X87
2833 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
2834 if (likely(cpu_test_feature(CPU_FEATURE_cmov))) {
2835 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2836 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2837 gen_insn(INSN_X87_FCOMIP, op_size, 0, 0);
2839 gen_insn(INSN_X87_FSTP, op_size, 0, 0);
2841 gen_insn(INSN_JMP_COND, op_size, COND_P, 0);
2842 gen_four(label_ovf);
2843 g(gen_frame_set_cond(ctx, op_size, false, fp_alu & 0xf, slot_r));
2847 if (real_type != 3) {
2848 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2849 g(gen_frame_load_x87(ctx, INSN_X87_FCOMP, op_size, 0, slot_2));
2851 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_2));
2852 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
2853 gen_insn(INSN_X87_FCOMPP, op_size, 0, 0);
2856 gen_insn(INSN_X87_FNSTSW, 0, 0, 0);
2860 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2865 gen_insn(INSN_JMP_COND, OP_SIZE_2, COND_NE, 0);
2866 gen_four(label_ovf);
2870 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2874 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
2877 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2881 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_E, slot_r));
2884 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2888 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
2891 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
2895 g(gen_frame_set_cond(ctx, OP_SIZE_2, false, COND_NE, slot_r));
2898 internal(file_line, "gen_fp_alu: invalid condition %u", fp_alu);
2903 #ifdef SUPPORTED_FP_HALF_CVT
2904 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
2905 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
2906 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
2907 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2908 gen_one(FR_SCRATCH_1);
2910 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
2911 gen_one(FR_SCRATCH_2);
2913 gen_insn(INSN_FP_CMP, OP_SIZE_4, 0, 1);
2914 gen_one(FR_SCRATCH_1);
2915 gen_one(FR_SCRATCH_2);
2916 #if defined(ARCH_ARM32)
2917 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
2919 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
2920 gen_four(label_ovf);
2921 g(gen_frame_set_cond(ctx, op_size, false, fp_alu, slot_r));
2927 return gen_alu_typed_upcall(ctx, upc, real_type, slot_1, slot_2, slot_r, label_ovf);
2930 #define OP_IS_ROUND(alu) ((alu) == FP_ALU1_ROUND || (alu) == FP_ALU1_FLOOR || (alu) == FP_ALU1_CEIL || (alu) == FP_ALU1_TRUNC)
2932 static bool attr_w gen_fp_alu1(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_r)
2934 unsigned attr_unused fp_alu;
2936 unsigned attr_unused op_size = real_type_to_op_size(real_type);
2937 unsigned reg1, target;
2939 case OPCODE_REAL_OP_neg:
2940 case OPCODE_REAL_OP_neg_alt1:
2941 case OPCODE_REAL_OP_neg_alt2: fp_alu = FP_ALU1_NEG; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_neg_real16_t); label_ovf = 0; goto do_alu;
2942 case OPCODE_REAL_OP_sqrt:
2943 case OPCODE_REAL_OP_sqrt_alt1:
2944 case OPCODE_REAL_OP_sqrt_alt2: fp_alu = FP_ALU1_SQRT; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_sqrt_real16_t); label_ovf = 0; goto do_alu;
2945 case OPCODE_REAL_OP_round: fp_alu = FP_ALU1_ROUND; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_round_real16_t); label_ovf = 0; goto do_alu;
2946 case OPCODE_REAL_OP_floor: fp_alu = FP_ALU1_FLOOR; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_floor_real16_t); label_ovf = 0; goto do_alu;
2947 case OPCODE_REAL_OP_ceil: fp_alu = FP_ALU1_CEIL; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_ceil_real16_t); label_ovf = 0; goto do_alu;
2948 case OPCODE_REAL_OP_trunc: fp_alu = FP_ALU1_TRUNC; upc = offsetof(struct cg_upcall_vector_s, REAL_unary_trunc_real16_t); label_ovf = 0; goto do_alu;
2949 case OPCODE_REAL_OP_to_int:
2950 case OPCODE_REAL_OP_to_int_alt1:
2951 case OPCODE_REAL_OP_to_int_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_to_int_real16_t); goto do_to_int;
2952 case OPCODE_REAL_OP_from_int:
2953 case OPCODE_REAL_OP_from_int_alt1:
2954 case OPCODE_REAL_OP_from_int_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_from_int_real16_t); label_ovf = 0; goto do_from_int;
2955 case OPCODE_REAL_OP_is_exception:
2956 case OPCODE_REAL_OP_is_exception_alt1:
2957 case OPCODE_REAL_OP_is_exception_alt2: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_is_exception_real16_t); label_ovf = 0; goto do_is_exception;
2958 default: upc = offsetof(struct cg_upcall_vector_s, REAL_unary_cbrt_real16_t) + (op - OPCODE_REAL_OP_cbrt) * TYPE_REAL_N * sizeof(void (*)(void)); label_ovf = 0; goto do_upcall;
2962 if ((SUPPORTED_FP >> real_type) & 1 && (
2963 #if defined(ARCH_ALPHA)
2964 fp_alu == FP_ALU1_NEG ||
2965 (fp_alu == FP_ALU1_SQRT && cpu_test_feature(CPU_FEATURE_fix)) ||
2966 #elif defined(ARCH_ARM32)
2967 fp_alu == FP_ALU1_NEG ||
2968 fp_alu == FP_ALU1_SQRT ||
2969 #elif defined(ARCH_ARM64)
2971 #elif defined(ARCH_IA64)
2972 fp_alu == FP_ALU1_NEG ||
2973 #elif defined(ARCH_LOONGARCH64)
2974 fp_alu == FP_ALU1_NEG ||
2975 fp_alu == FP_ALU1_SQRT ||
2976 fp_alu == FP_ALU1_ROUND ||
2977 #elif defined(ARCH_MIPS)
2978 fp_alu == FP_ALU1_NEG ||
2979 (fp_alu == FP_ALU1_SQRT && MIPS_HAS_SQRT) ||
2980 #elif defined(ARCH_PARISC)
2981 (fp_alu == FP_ALU1_NEG && PA_20) ||
2982 fp_alu == FP_ALU1_SQRT ||
2983 #elif defined(ARCH_POWER)
2984 fp_alu == FP_ALU1_NEG ||
2985 (fp_alu == FP_ALU1_SQRT && cpu_test_feature(CPU_FEATURE_p2) && real_type != 4) ||
2986 #elif defined(ARCH_S390)
2988 #elif defined(ARCH_SPARC)
2989 fp_alu == FP_ALU1_NEG ||
2990 fp_alu == FP_ALU1_SQRT ||
2991 #elif defined(ARCH_RISCV64)
2992 fp_alu == FP_ALU1_NEG ||
2993 fp_alu == FP_ALU1_SQRT ||
2994 #elif defined(ARCH_X86)
2995 fp_alu == FP_ALU1_SQRT ||
2996 (OP_IS_ROUND(fp_alu) && cpu_test_feature(CPU_FEATURE_sse41)) ||
2999 #if defined(ARCH_S390)
3000 if (op_size <= OP_SIZE_8 && (size_t)slot_1 * slot_size < 4096 && fp_alu == FP_ALU1_SQRT) {
3001 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3002 if (slot_is_register(ctx, slot_1)) {
3003 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3005 gen_one(ctx->registers[slot_1]);
3006 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3008 g(gen_address(ctx, R_FRAME, (size_t)slot_1 * slot_size, IMM_PURPOSE_VLDR_VSTR_OFFSET, op_size));
3009 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3011 gen_address_offset();
3012 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3017 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3018 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_2);
3019 gen_insn(INSN_FP_ALU1, op_size, fp_alu, 0);
3022 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3025 #ifdef SUPPORTED_FP_X87
3026 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3027 if (fp_alu == FP_ALU1_NEG) {
3028 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3029 gen_insn(INSN_X87_FCHS, op_size, 0, 0);
3030 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3032 } else if (fp_alu == FP_ALU1_SQRT) {
3033 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3034 gen_insn(INSN_X87_FSQRT, op_size, 0, 0);
3035 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3037 } else if (fp_alu == FP_ALU1_ROUND) {
3038 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3039 gen_insn(INSN_X87_FRNDINT, op_size, 0, 0);
3040 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3045 #ifdef SUPPORTED_FP_HALF_CVT
3046 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1 && (
3047 #if defined(ARCH_ARM32)
3048 fp_alu == FP_ALU1_NEG ||
3049 fp_alu == FP_ALU1_SQRT ||
3050 #elif defined(ARCH_ARM64)
3052 #elif defined(ARCH_X86)
3053 fp_alu == FP_ALU1_SQRT ||
3054 (OP_IS_ROUND(fp_alu) && cpu_test_feature(CPU_FEATURE_sse41)) ||
3057 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3058 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3059 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3062 gen_insn(INSN_FP_ALU1, OP_SIZE_4, fp_alu, 0);
3065 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
3068 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3075 if ((SUPPORTED_FP >> real_type) & 1
3076 #if defined(ARCH_ALPHA)
3077 && ARCH_SUPPORTS_TRAPS
3079 #if defined(ARCH_MIPS)
3083 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3086 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
3087 #if defined(ARCH_X86)
3088 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3092 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, target, sign_bit(uint_default_t), COND_E, label_ovf));
3094 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3097 #if defined(ARCH_ARM) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS)
3098 #if defined(ARCH_ARM)
3099 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3102 #if defined(ARCH_ARM32)
3103 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3105 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3106 gen_four(label_ovf);
3108 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3112 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
3113 gen_four(label_ovf);
3115 #if defined(ARCH_ARM32) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS)
3116 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3117 gen_one(FR_SCRATCH_1);
3120 g(gen_mov(ctx, OP_SIZE_INT, target, FR_SCRATCH_1));
3122 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3126 g(gen_imm(ctx, (int_default_t)(sign_bit(uint_default_t) + 1), IMM_PURPOSE_ADD, OP_SIZE_INT));
3127 gen_insn(INSN_ALU, OP_SIZE_INT, ALU_ADD, ALU_WRITES_FLAGS(ALU_ADD, is_imm()));
3128 gen_one(R_SCRATCH_2);
3132 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_2, 1, COND_BE, label_ovf));
3134 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3137 #if defined(ARCH_IA64)
3138 gen_insn(INSN_FP_TO_INT64, op_size, 0, 0);
3139 gen_one(FR_SCRATCH_1);
3142 g(gen_mov(ctx, OP_SIZE_NATIVE, target, FR_SCRATCH_1));
3144 if (OP_SIZE_INT == OP_SIZE_4) {
3145 g(gen_extend(ctx, OP_SIZE_4, sign_x, R_SCRATCH_2, target));
3146 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_NE, label_ovf));
3148 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, sign_bit(uint64_t), COND_E, label_ovf));
3151 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3154 #if defined(ARCH_PARISC) || defined(ARCH_POWER) || defined(ARCH_SPARC)
3155 #if defined(ARCH_POWER)
3156 if (!cpu_test_feature(CPU_FEATURE_ppc))
3158 if (OP_SIZE_INT == OP_SIZE_4)
3161 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3162 gen_one(FR_SCRATCH_1);
3165 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_1));
3166 if (slot_is_register(ctx, slot_r))
3167 g(unspill(ctx, slot_r));
3168 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_r, 0, false, target));
3170 g(gen_imm(ctx, sign_bit(uint_default_t) + 1, IMM_PURPOSE_ADD, OP_SIZE_INT));
3171 gen_insn(INSN_ALU, i_size(OP_SIZE_INT), ALU_ADD, ALU_WRITES_FLAGS(ALU_ADD, is_imm()));
3172 gen_one(R_SCRATCH_2);
3176 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_2, 1, COND_BE, label_ovf));
3180 #if defined(ARCH_ALPHA)
3181 gen_insn(INSN_FP_TO_INT64_TRAP, op_size, 0, 0);
3182 gen_one(FR_SCRATCH_2);
3184 gen_four(label_ovf);
3186 if (OP_SIZE_INT == OP_SIZE_4) {
3187 gen_insn(INSN_FP_INT64_TO_INT32_TRAP, 0, 0, 0);
3188 gen_one(FR_SCRATCH_3);
3189 gen_one(FR_SCRATCH_2);
3190 gen_four(label_ovf);
3191 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_3));
3193 g(gen_frame_store_raw(ctx, OP_SIZE_INT, slot_r, 0, FR_SCRATCH_2));
3195 if (slot_is_register(ctx, slot_r))
3196 g(unspill(ctx, slot_r));
3199 #if defined(ARCH_S390)
3200 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 1);
3204 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3205 gen_four(label_ovf);
3207 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3210 #if defined(ARCH_RISCV64)
3211 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_TO_INT32 : INSN_FP_TO_INT64, op_size, 0, 0);
3215 g(gen_load_constant(ctx, R_SCRATCH_2, sign_bit(int_default_t)));
3217 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_E, label_ovf));
3219 g(gen_imm(ctx, -1, IMM_PURPOSE_XOR, i_size(size)));
3220 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_XOR, is_imm()));
3221 gen_one(R_SCRATCH_2);
3222 gen_one(R_SCRATCH_2);
3225 g(gen_cmp_test_jmp(ctx, INSN_CMP, OP_SIZE_NATIVE, target, R_SCRATCH_2, COND_E, label_ovf));
3227 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, target));
3231 #ifdef SUPPORTED_FP_X87
3232 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3233 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3235 if (likely(cpu_test_feature(CPU_FEATURE_sse3))) {
3236 g(gen_frame_store_x87(ctx, INSN_X87_FISTTP, OP_SIZE_INT, slot_r));
3238 gen_insn(INSN_PUSH, OP_SIZE_NATIVE, 0, 0);
3242 gen_insn(INSN_X87_FLDCW, 0, 0, 0);
3243 gen_one(ARG_ADDRESS_1);
3247 g(gen_frame_store_x87(ctx, INSN_X87_FISTP, OP_SIZE_INT, slot_r));
3249 gen_insn(INSN_MOV, OP_SIZE_NATIVE, 0, 0);
3250 gen_one(ARG_ADDRESS_1);
3256 gen_insn(INSN_X87_FLDCW, 0, 0, 0);
3257 gen_one(ARG_ADDRESS_1);
3261 gen_insn(INSN_ALU, i_size(OP_SIZE_ADDRESS), ALU_ADD, 1);
3265 gen_eight(1 << OP_SIZE_NATIVE);
3267 if (slot_is_register(ctx, slot_r))
3268 g(unspill(ctx, slot_r));
3269 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_r, 0, false, R_SCRATCH_1));
3271 g(gen_cmp_test_imm_jmp(ctx, INSN_CMP, OP_SIZE_INT, R_SCRATCH_1, sign_bit(int_default_t), COND_E, label_ovf));
3276 #ifdef SUPPORTED_FP_HALF_CVT
3277 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3278 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3279 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3280 gen_one(FR_SCRATCH_1);
3282 reg1 = FR_SCRATCH_1;
3284 op_size = real_type_to_op_size(real_type);
3291 if ((SUPPORTED_FP >> real_type) & 1) {
3292 #if defined(ARCH_ALPHA) || defined(ARCH_ARM32) || defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC) || defined(ARCH_POWER) || defined(ARCH_SPARC)
3293 int int_op_size = OP_SIZE_INT;
3294 #if defined(ARCH_POWER)
3295 if (int_op_size == OP_SIZE_4)
3297 if (op_size == OP_SIZE_4 && !cpu_test_feature(CPU_FEATURE_v206))
3299 if (op_size == OP_SIZE_8 && !cpu_test_feature(CPU_FEATURE_ppc))
3302 if (slot_is_register(ctx, slot_1))
3303 g(spill(ctx, slot_1));
3304 g(gen_frame_load_raw(ctx, int_op_size, zero_x, slot_1, 0, false, FR_SCRATCH_1));
3305 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_2);
3306 #if defined(ARCH_ALPHA)
3307 if (OP_SIZE_INT == OP_SIZE_4) {
3308 gen_insn(INSN_MOVSX, OP_SIZE_4, 0, 0);
3309 gen_one(FR_SCRATCH_1);
3310 gen_one(FR_SCRATCH_1);
3312 int_op_size = OP_SIZE_8;
3315 gen_insn(int_op_size == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, op_size, 0, 0);
3317 gen_one(FR_SCRATCH_1);
3319 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3321 #elif defined(ARCH_IA64)
3322 g(gen_frame_get(ctx, OP_SIZE_INT, sign_x, slot_1, R_SCRATCH_1, ®1));
3323 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3325 g(gen_mov(ctx, OP_SIZE_NATIVE, target, reg1));
3327 gen_insn(INSN_FP_FROM_INT64, op_size, 0, 0);
3331 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3334 g(gen_frame_get(ctx, OP_SIZE_INT, garbage, slot_1, R_SCRATCH_1, ®1));
3335 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3337 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, op_size, 0, 0);
3341 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3345 #ifdef SUPPORTED_FP_X87
3346 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3347 if (slot_is_register(ctx, slot_1))
3348 g(spill(ctx, slot_1));
3349 g(gen_frame_load_x87(ctx, INSN_X87_FILD, OP_SIZE_INT, 0, slot_1));
3350 g(gen_frame_store_x87(ctx, INSN_X87_FSTP, op_size, slot_r));
3354 #ifdef SUPPORTED_FP_HALF_CVT
3355 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3356 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, FR_SCRATCH_1);
3357 #if defined(ARCH_ARM32)
3358 g(gen_frame_get(ctx, OP_SIZE_INT, zero_x, slot_1, FR_SCRATCH_1, ®1));
3360 gen_insn(INSN_FP_FROM_INT32, OP_SIZE_4, 0, 0);
3364 g(gen_frame_get(ctx, OP_SIZE_INT, garbage, slot_1, R_SCRATCH_1, ®1));
3365 gen_insn(OP_SIZE_INT == OP_SIZE_4 ? INSN_FP_FROM_INT32 : INSN_FP_FROM_INT64, OP_SIZE_4, 0, 0);
3369 gen_insn(INSN_FP_CVT, OP_SIZE_4, op_size, 0);
3372 g(gen_frame_store(ctx, op_size, slot_r, 0, target));
3379 if ((SUPPORTED_FP >> real_type) & 1) {
3380 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3381 target = gen_frame_target(ctx, slot_r, NO_FRAME_T, NO_FRAME_T, R_SCRATCH_1);
3382 #if defined(ARCH_ALPHA)
3383 gen_insn(INSN_FP_CMP_UNORDERED_DEST_REG, op_size, 0, 0);
3384 gen_one(FR_SCRATCH_2);
3385 gen_one(FR_SCRATCH_1);
3388 if (!cpu_test_feature(CPU_FEATURE_fix)) {
3389 g(gen_frame_store_raw(ctx, OP_SIZE_4, slot_r, 0, FR_SCRATCH_2));
3390 g(gen_frame_load_raw(ctx, OP_SIZE_4, sign_x, slot_r, 0, false, target));
3392 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_2));
3395 g(gen_3address_rot_imm(ctx, OP_SIZE_NATIVE, ROT_SHR, target, target, 30, 0));
3397 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3400 #elif defined(ARCH_IA64)
3401 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
3402 gen_one(R_CMP_RESULT);
3406 g(gen_mov(ctx, OP_SIZE_NATIVE, target, R_CMP_RESULT));
3408 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3409 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
3410 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3414 gen_insn(INSN_FP_TEST_REG, OP_SIZE_NATIVE, FP_COND_P, 0);
3417 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3418 #elif defined(ARCH_RISCV64)
3419 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3424 g(gen_imm(ctx, 1, IMM_PURPOSE_XOR, OP_SIZE_NATIVE));
3425 gen_insn(INSN_ALU, OP_SIZE_NATIVE, ALU_XOR, ALU_WRITES_FLAGS(ALU_XOR, is_imm()));
3430 g(gen_frame_store(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r, 0, target));
3432 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3435 #if defined(ARCH_ARM32)
3436 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3438 g(gen_frame_set_cond(ctx, op_size, false, FP_COND_P, slot_r));
3442 #ifdef SUPPORTED_FP_X87
3443 if ((SUPPORTED_FP_X87 >> real_type) & 1) {
3444 g(gen_frame_load_x87(ctx, INSN_X87_FLD, op_size, 0, slot_1));
3445 if (likely(cpu_test_feature(CPU_FEATURE_cmov))) {
3446 gen_insn(INSN_X87_FCOMIP, op_size, 0, 0);
3449 g(gen_frame_set_cond(ctx, op_size, false, COND_P, slot_r));
3453 gen_insn(INSN_X87_FCOMP, op_size, 0, 0);
3456 gen_insn(INSN_X87_FNSTSW, 0, 0, 0);
3460 gen_insn(INSN_TEST, OP_SIZE_2, 0, 1);
3465 g(gen_frame_set_cond(ctx, op_size, false, COND_NE, slot_r));
3470 #ifdef SUPPORTED_FP_HALF_CVT
3471 if ((SUPPORTED_FP_HALF_CVT >> real_type) & 1) {
3472 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3473 gen_insn(INSN_FP_CVT, op_size, OP_SIZE_4, 0);
3474 gen_one(FR_SCRATCH_1);
3476 gen_insn(INSN_FP_CMP, OP_SIZE_4, 0, 1);
3477 gen_one(FR_SCRATCH_1);
3478 gen_one(FR_SCRATCH_1);
3479 #if defined(ARCH_ARM32)
3480 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3482 g(gen_frame_set_cond(ctx, op_size, false, FP_COND_P, slot_r));
3488 g(gen_alu_typed_upcall(ctx, upc, real_type, slot_1, NO_FRAME_T, slot_r, label_ovf));
3492 static bool attr_w gen_is_exception(struct codegen_context *ctx, frame_t slot_1, frame_t slot_r)
3494 uint32_t no_ex_label, escape_label;
3495 const struct type *type = get_type_of_local(ctx, slot_1);
3497 no_ex_label = alloc_label(ctx);
3498 if (unlikely(!no_ex_label))
3500 escape_label = alloc_escape_label(ctx);
3501 if (unlikely(!escape_label))
3504 if (TYPE_IS_FLAT(type))
3505 g(gen_test_1_jz_cached(ctx, slot_1, no_ex_label));
3507 g(gen_frame_load(ctx, OP_SIZE_SLOT, zero_x, slot_1, 0, false, R_SCRATCH_1));
3508 g(gen_ptr_is_thunk(ctx, R_SCRATCH_1, slot_1, escape_label));
3510 if (!TYPE_IS_FLAT(type)) {
3511 g(gen_compare_da_tag(ctx, R_SCRATCH_1, DATA_TAG_flat, COND_E, escape_label, R_SCRATCH_1));
3514 gen_label(no_ex_label);
3515 g(gen_frame_clear(ctx, log_2(sizeof(ajla_flat_option_t)), slot_r));
3517 flag_set(ctx, slot_r, false);
3522 static bool attr_w gen_system_property(struct codegen_context *ctx, frame_t slot_1, frame_t slot_r)
3524 uint32_t escape_label;
3526 escape_label = alloc_escape_label(ctx);
3527 if (unlikely(!escape_label))
3530 g(gen_test_1_cached(ctx, slot_1, escape_label));
3532 g(gen_upcall_start(ctx, 1));
3534 g(gen_frame_load(ctx, OP_SIZE_INT, garbage, slot_1, 0, false, R_ARG0));
3535 g(gen_upcall_argument(ctx, 0));
3537 g(gen_upcall(ctx, offsetof(struct cg_upcall_vector_s, cg_upcall_ipret_system_property), 1));
3539 g(gen_frame_store(ctx, OP_SIZE_INT, slot_r, 0, R_RET0));
3541 flag_set(ctx, slot_1, false);
3542 flag_set(ctx, slot_r, false);
3547 static bool attr_w gen_alu_jmp(struct codegen_context *ctx, unsigned mode, unsigned op_size, unsigned op, frame_t slot_1, frame_t slot_2, int32_t jmp_offset, bool *failed)
3552 unsigned attr_unused reg2;
3554 *failed = true; return true;
3557 case MODE_FIXED: switch (op) {
3558 case OPCODE_FIXED_OP_equal: alu = COND_E; goto do_compare;
3559 case OPCODE_FIXED_OP_not_equal: alu = COND_NE; goto do_compare;
3560 case OPCODE_FIXED_OP_less: alu = COND_L; goto do_compare;
3561 case OPCODE_FIXED_OP_less_equal: alu = COND_LE; goto do_compare;
3562 case OPCODE_FIXED_OP_uless: alu = COND_B; goto do_compare;
3563 case OPCODE_FIXED_OP_uless_equal: alu = COND_BE; goto do_compare;
3564 case OPCODE_FIXED_OP_bt: *failed = true; return true;
3565 default: internal(file_line, "gen_alu_jmp: unsupported fixed operation %u", op);
3567 case MODE_INT: switch (op) {
3568 case OPCODE_INT_OP_equal: alu = COND_E; goto do_compare;
3569 case OPCODE_INT_OP_not_equal: alu = COND_NE; goto do_compare;
3570 case OPCODE_INT_OP_less: alu = COND_L; goto do_compare;
3571 case OPCODE_INT_OP_less_equal: alu = COND_LE; goto do_compare;
3572 case OPCODE_INT_OP_bt: *failed = true; return true;
3573 default: internal(file_line, "gen_alu_jmp: unsupported int operation %u", op);
3575 case MODE_BOOL: switch (op) {
3576 case OPCODE_BOOL_OP_and: alu = ALU_AND; mode = MODE_FIXED; goto do_alu;
3577 case OPCODE_BOOL_OP_or: alu = ALU_OR; mode = MODE_FIXED; goto do_alu;
3578 case OPCODE_BOOL_OP_equal: alu = COND_E; mode = MODE_FIXED; goto do_compare;
3579 case OPCODE_BOOL_OP_not_equal: alu = COND_NE; mode = MODE_FIXED; goto do_compare;
3580 case OPCODE_BOOL_OP_less: alu = COND_L; mode = MODE_FIXED; goto do_compare;
3581 case OPCODE_BOOL_OP_less_equal: alu = COND_LE; mode = MODE_FIXED; goto do_compare;
3582 default: internal(file_line, "gen_alu_jmp: unsupported bool operation %u", op);
3585 internal(file_line, "gen_alu_jmp: unsupported mode %u", mode);
3587 bool attr_unused logical;
3588 if (unlikely(op_size > OP_SIZE_NATIVE)) {
3592 if (slot_is_register(ctx, slot_2) && !slot_is_register(ctx, slot_1)) {
3597 case COND_L: alu = COND_G; break;
3598 case COND_LE: alu = COND_GE; break;
3599 case COND_B: alu = COND_A; break;
3600 case COND_BE: alu = COND_AE; break;
3603 ex = op_size == i_size_cmp(op_size) + (unsigned)zero ? garbage : alu == COND_L || alu == COND_LE || alu == COND_G || alu == COND_GE || ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
3604 g(gen_frame_get(ctx, op_size, ex, slot_1, R_SCRATCH_1, ®1));
3605 if (ARCH_HAS_JMP_2REGS(alu)) {
3606 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3607 g(gen_jump(ctx, jmp_offset, i_size_cmp(op_size), alu ^ 1, reg1, reg2));
3611 logical = COND_IS_LOGICAL(alu ^ 1);
3612 g(gen_frame_load_cmp(ctx, op_size, logical, ex, false, slot_2, 0, false, reg1));
3613 g(gen_jump(ctx, jmp_offset, op_size, alu ^ 1, -1U, -1U));
3615 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3616 g(gen_cmp_dest_reg(ctx, op_size, reg1, reg2, R_CMP_RESULT, 0, alu));
3617 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, R_CMP_RESULT, -1U));
3622 if (slot_is_register(ctx, slot_2) && !slot_is_register(ctx, slot_1)) {
3627 ex = op_size == i_size(op_size) + (unsigned)zero ? garbage : ARCH_PREFERS_SX(op_size) ? sign_x : zero_x;
3628 g(gen_frame_get(ctx, op_size, ex, slot_1, R_SCRATCH_1, ®1));
3629 #if defined(ARCH_X86)
3630 if (alu == ALU_AND && !slot_is_register(ctx, slot_2)) {
3631 g(gen_address(ctx, R_FRAME, (size_t)slot_2 * slot_size, IMM_PURPOSE_LDR_OFFSET, op_size));
3632 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
3634 gen_address_offset();
3635 g(gen_jump(ctx, jmp_offset, op_size, COND_E, -1U, -1U));
3639 g(gen_frame_get(ctx, op_size, ex, slot_2, R_SCRATCH_2, ®2));
3640 #if ARCH_HAS_FLAGS && !defined(ARCH_S390)
3641 if (alu == ALU_AND) {
3642 gen_insn(INSN_TEST, i_size(op_size), 0, 1);
3645 g(gen_jump(ctx, jmp_offset, op_size, COND_E, -1U, -1U));
3649 #if defined(ARCH_ARM64)
3654 g(gen_3address_alu(ctx, i_size(op_size), alu, R_SCRATCH_1, reg1, reg2, 1));
3655 g(gen_jump(ctx, jmp_offset, i_size(op_size), COND_E, -1U, -1U));
3660 g(gen_3address_alu(ctx, i_size(op_size), alu, R_SCRATCH_1, reg1, reg2, 0));
3661 g(gen_jump(ctx, jmp_offset, i_size(op_size), COND_E, R_SCRATCH_1, -1U));
3666 static bool attr_w gen_fp_alu_jmp(struct codegen_context *ctx, unsigned real_type, unsigned op, uint32_t label_ovf, frame_t slot_1, frame_t slot_2, int32_t jmp_offset, bool *failed)
3668 unsigned attr_unused fp_alu;
3669 unsigned attr_unused op_size = real_type_to_op_size(real_type);
3670 unsigned reg1, reg2;
3671 unsigned attr_unused target;
3673 case OPCODE_REAL_OP_equal:
3674 case OPCODE_REAL_OP_equal_alt1:
3675 case OPCODE_REAL_OP_equal_alt2: fp_alu = FP_COND_E; goto do_cmp;
3676 case OPCODE_REAL_OP_not_equal:
3677 case OPCODE_REAL_OP_not_equal_alt1:
3678 case OPCODE_REAL_OP_not_equal_alt2: fp_alu = FP_COND_NE; goto do_cmp;
3679 case OPCODE_REAL_OP_less:
3680 case OPCODE_REAL_OP_less_alt1:
3681 case OPCODE_REAL_OP_less_alt2: fp_alu = FP_COND_B; goto do_cmp;
3682 case OPCODE_REAL_OP_less_equal:
3683 case OPCODE_REAL_OP_less_equal_alt1:
3684 case OPCODE_REAL_OP_less_equal_alt2: fp_alu = FP_COND_BE; goto do_cmp;
3685 default: internal(file_line, "gen_fp_alu_jmp: unsupported operation %u", op);
3689 if ((SUPPORTED_FP >> real_type) & 1
3690 #if defined(ARCH_ALPHA)
3691 && ARCH_SUPPORTS_TRAPS && cpu_test_feature(CPU_FEATURE_fix)
3694 g(gen_frame_get(ctx, op_size, zero_x, slot_1, FR_SCRATCH_1, ®1));
3695 g(gen_frame_get(ctx, op_size, zero_x, slot_2, FR_SCRATCH_2, ®2));
3696 target = R_SCRATCH_1;
3697 #if defined(ARCH_ALPHA)
3698 gen_insn(INSN_FP_CMP_DEST_REG_TRAP, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
3699 gen_one(FR_SCRATCH_3);
3702 gen_four(label_ovf);
3704 g(gen_mov(ctx, OP_SIZE_4, target, FR_SCRATCH_3));
3706 if (fp_alu == FP_COND_NE) {
3707 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_NE, target, -1U));
3709 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, target, -1U));
3713 #elif defined(ARCH_IA64)
3714 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_P, 0);
3715 gen_one(R_CMP_RESULT);
3719 gen_insn(INSN_JMP_REG, OP_SIZE_NATIVE, COND_NE, 0);
3720 gen_one(R_CMP_RESULT);
3721 gen_four(label_ovf);
3723 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu, 0);
3724 gen_one(R_CMP_RESULT);
3728 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, R_CMP_RESULT, -1U);
3731 #elif defined(ARCH_LOONGARCH64) || defined(ARCH_MIPS) || defined(ARCH_PARISC)
3732 gen_insn(INSN_FP_CMP_COND, op_size, FP_COND_P, 1);
3736 gen_insn(INSN_JMP_FP_TEST, 0, FP_COND_P, 0);
3737 gen_four(label_ovf);
3739 gen_insn(INSN_FP_CMP_COND, op_size, fp_alu ^ 1, 1);
3743 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, fp_alu ^ 1, -1U, -1U));
3746 #elif defined(ARCH_RISCV64)
3747 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3748 gen_one(R_SCRATCH_1);
3752 gen_insn(INSN_FP_CMP_DEST_REG, op_size, FP_COND_E, 0);
3753 gen_one(R_SCRATCH_2);
3757 g(gen_3address_alu(ctx, OP_SIZE_NATIVE, ALU_AND, R_SCRATCH_1, R_SCRATCH_1, R_SCRATCH_2, 0));
3759 g(gen_jmp_on_zero(ctx, OP_SIZE_NATIVE, R_SCRATCH_1, COND_E, label_ovf));
3761 gen_insn(INSN_FP_CMP_DEST_REG, op_size, fp_alu == FP_COND_NE ? FP_COND_E : fp_alu, 0);
3766 if (fp_alu == FP_COND_NE) {
3767 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_NE, target, -1U));
3769 g(gen_jump(ctx, jmp_offset, OP_SIZE_NATIVE, COND_E, target, -1U));
3773 gen_insn(INSN_FP_CMP, op_size, 0, 1);
3776 #if defined(ARCH_ARM32)
3777 gen_insn(INSN_FP_TO_INT_FLAGS, 0, 0, 1);
3779 gen_insn(INSN_JMP_COND, op_size, FP_COND_P, 0);
3780 gen_four(label_ovf);
3781 g(gen_jump(ctx, jmp_offset, op_size, fp_alu ^ 1, -1U, -1U));