2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
25 #if (defined(__HP_cc) && EFFICIENT_WORD_SIZE >= 64) ^ defined(UNUSUAL_ARITHMETICS)
26 #define add_subtract_overflow_test_mode 1
27 #define neg_overflow_test_mode 1
29 #define add_subtract_overflow_test_mode 0
30 #define neg_overflow_test_mode 0
34 #if defined(HAVE_BUILTIN_ADD_SUB_OVERFLOW) && !defined(UNUSUAL)
36 #define gen_generic_addsub(fn, type, utype, mode) \
37 static maybe_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
41 if (unlikely(__builtin_add_overflow(*op1, *op2, &r))) \
44 if (unlikely(__builtin_sub_overflow(*op1, *op2, &r))) \
51 #define gen_generic_inc_dec(type, utype) \
52 static maybe_inline bool attr_unused cat(INT_unary_inc_,type)(const type *op, type *res)\
55 if (unlikely(__builtin_add_overflow(*op, 1, &r))) \
60 static maybe_inline bool attr_unused cat(INT_unary_dec_,type)(const type *op, type *res)\
63 if (unlikely(__builtin_sub_overflow(*op, 1, &r))) \
71 #define gen_generic_addsub(fn, type, utype, mode) \
72 static maybe_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
74 type o1 = *op1, o2 = *op2; \
77 if (sizeof(type) < sizeof(int_efficient_t)) { \
78 int_efficient_t lr = (int_efficient_t)o1 + (int_efficient_t)o2;\
80 if (unlikely(r != lr)) \
83 r = (utype)o1 + (utype)o2; \
84 if (!(add_subtract_overflow_test_mode)) { \
85 if (unlikely((~(o1 ^ o2) & (o2 ^ r) & sign_bit(utype)) != 0))\
88 if ((r >= o1) != (o2 >= 0)) \
93 if (sizeof(type) < sizeof(int_efficient_t)) { \
94 int_efficient_t lr = (int_efficient_t)o1 - (int_efficient_t)o2;\
96 if (unlikely(r != lr)) \
99 r = (utype)o1 - (utype)o2; \
100 if (!(add_subtract_overflow_test_mode)) { \
101 if (unlikely((~(o2 ^ r) & (o1 ^ r) & sign_bit(utype)) != 0))\
104 if ((r <= o1) != (o2 >= 0)) \
113 #define gen_generic_inc_dec(type, utype) \
114 static maybe_inline bool attr_unused cat(INT_unary_inc_,type)(const type *op, type *res)\
117 if (unlikely(o == signed_maximum(type))) \
119 *res = (utype)o + 1; \
122 static maybe_inline bool attr_unused cat(INT_unary_dec_,type)(const type *op, type *res)\
125 if (unlikely(o == sign_bit(type))) \
127 *res = (utype)o - 1; \
134 #if defined(HAVE_BUILTIN_MUL_OVERFLOW) && !defined(UNUSUAL)
136 #define gen_generic_multiply(type, utype) \
137 static maybe_inline bool attr_unused cat(INT_binary_multiply_,type)(const type *op1, const type *op2, type *res)\
140 if (unlikely(__builtin_mul_overflow(*op1, *op2, &r))) \
148 #define generic_multiply_(n, s, u, sz, bits) \
149 if (sz >= sizeof(unsigned) && sizeof(type) * 2 <= sz) { \
150 u lres = (u)o1 * (u)o2; \
151 if (unlikely(lres != (u)(type)lres)) \
153 *res = (type)(s)lres; \
157 #define gen_generic_multiply(type, utype) \
158 static maybe_inline bool attr_unused cat(INT_binary_multiply_,type)(const type *op1, const type *op2, type *res)\
160 const utype half_sign = (utype)1 << (sizeof(type) * 4); \
161 type o1 = *op1, o2 = *op2; \
163 for_all_fixed(generic_multiply_) \
164 r = (utype)o1 * (utype)o2; \
165 if (likely(!(((utype)(o1 + half_sign / 2) | (utype)(o2 + half_sign / 2)) & -half_sign)))\
167 if (likely(o1 != 0)) { \
168 if (unlikely(o1 == -1) && unlikely(r == sign_bit(type)))\
181 #define gen_generic_divmod(fn, type, utype, operator) \
182 static maybe_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
184 type o1 = *op1, o2 = *op2; \
187 if (unlikely(o2 == -1) && unlikely(o1 == sign_bit(type))) \
189 if (DIVIDE_ROUNDS_TO_ZERO) \
190 *res = o1 operator o2; \
192 cat4(FIXED_binary_,fn,_,type)(cast_ptr(const utype *, op1), cast_ptr(const utype *, op2), cast_ptr(utype *, res));\
196 #define gen_generic_divmod_alt1(fn, type, utype) \
197 static maybe_inline bool attr_unused cat4(INT_binary_,fn,_alt1_,type)(const type *op1, const type *op2, type *res)\
199 type o1 = *op1, o2 = *op2; \
202 if (unlikely(o2 == -1) && unlikely(o1 == sign_bit(type))) \
204 cat4(FIXED_binary_,fn,_alt1_,type)(cast_ptr(const utype *, op1), cast_ptr(const utype *, op2), cast_ptr(utype *, res));\
209 #define gen_generic_int_power(type, utype) \
210 static bool attr_unused cat(INT_binary_power_,type)(const type *op1, const type *op2, type *res)\
215 if (unlikely(o2 < 0)) \
219 if (unlikely(!cat(INT_binary_multiply_,type)(&r, &o1, &r)))\
225 if (unlikely(!cat(INT_binary_multiply_,type)(&o1, &o1, &o1))) \
233 #define gen_generic_shr(type, utype) \
234 static maybe_inline bool attr_unused cat(INT_binary_shr_,type)(const type *op1, const type *op2, type *res)\
236 type o1 = *op1, o2 = *op2; \
238 if (unlikely((utype)o2 >= (int)sizeof(type) * 8)) \
240 if (!RIGHT_SHIFT_KEEPS_SIGN) \
241 if (unlikely(o1 < 0)) \
248 #define gen_generic_shl(type, utype) \
249 static maybe_inline bool attr_unused cat(INT_binary_shl_,type)(const type *op1, const type *op2, type *res)\
251 type o1 = *op1, o2 = *op2; \
252 if (unlikely((utype)o2 >= (int)sizeof(type) * 8)) \
254 if (sizeof(type) <= sizeof(int_efficient_t) / 2) { \
255 int_efficient_t r = (int_efficient_t)o1 << o2; \
256 if (unlikely(r != (type)r)) \
261 type r = (utype)o1 << o2; \
262 if (!RIGHT_SHIFT_KEEPS_SIGN) \
263 if (unlikely(r < 0)) \
265 if (unlikely(r >> o2 != o1)) \
272 #define gen_generic_btx(fn, type, utype, mode) \
273 static maybe_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
275 if (unlikely((utype)*op2 >= sizeof(type) * 8 - 1)) { \
276 if (unlikely(*op2 < 0)) \
278 if (mode == 0 && *op1 < 0) { \
282 if (mode == 1 && *op1 >= 0) { \
288 cat4(FIXED_binary_,fn,_,type)(cast_ptr(utype *, op1), cast_ptr(const utype *, op2), cast_ptr(utype *, res));\
292 #define gen_generic_bt(type, utype) \
293 static maybe_inline bool attr_unused cat(INT_binary_bt_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
295 type o1 = *cast_ptr(type *, op1); \
296 type o2 = *cast_ptr(type *, op2); \
297 if (unlikely((utype)o2 >= sizeof(type) * 8)) { \
298 if (unlikely(o2 < 0)) \
302 *res = (ajla_flat_option_t)(((utype)o1 >> o2) & 1); \
307 #define gen_generic_not(type, utype) \
308 static ipret_inline bool attr_unused cat(INT_unary_not_,type)(const type *op, type *res)\
310 *res = ~(utype)*op; \
314 #define gen_generic_neg(type, utype) \
315 static maybe_inline bool attr_unused cat(INT_unary_neg_,type)(const type *op, type *res)\
319 if (!(neg_overflow_test_mode)) { \
320 if (unlikely(o == sign_bit(type))) \
325 if (unlikely((o & neg) < 0)) \
332 #define gen_generic_int_bsfr(fn, type, utype, bits, mode) \
333 static maybe_inline bool attr_unused cat4(INT_unary_,fn,_,type)(const type *op, type *res)\
335 if (!(mode) && unlikely(!*op)) \
337 if ((mode) && unlikely(*op <= 0)) \
340 cat(FIXED_unary_bsf_,type)(cast_ptr(const utype *, op), cast_ptr(utype *, res));\
342 cat(FIXED_unary_bsr_,type)(cast_ptr(const utype *, op), cast_ptr(utype *, res));\
346 #define gen_generic_int_popcnt(type, utype, bits) \
347 static maybe_inline bool attr_unused cat(INT_unary_popcnt_,type)(const type *op, type *res)\
349 if (unlikely(*op < 0)) \
351 cat(FIXED_unary_popcnt_,type)(cast_ptr(const utype *, op), cast_ptr(utype *, res));\
355 #define gen_generic_int_popcnt_alt1(type, utype, bits) \
356 static ipret_inline bool attr_unused cat(INT_unary_popcnt_alt1_,type)(const type *op, type *res)\
358 if (unlikely(*op < 0)) \
360 cat(FIXED_unary_popcnt_alt1_,type)(cast_ptr(const utype *, op), cast_ptr(utype *, res));\
368 #if defined(INLINE_ASM_GCC_X86)
370 #if defined(INLINE_ASM_GCC_LABELS)
373 * This is a trick. The asm goto syntax doesn't allow us to
374 * specify that the %0 register changed.
376 * We copy the variable op1 to o1 using an asm statement,
377 * so that the compiler doesn't know that *op1 == o1. We
378 * never ever reference o1 again, so the compiler won't
379 * reuse the value in the register %0.
381 #define gen_x86_binary(fn, type, utype, instr, suffix, c1, c2, c3) \
382 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
385 asm_copy(o1, *op1); \
387 "#instr #suffix" %1, %0 \n\
389 mov"#suffix" %0, %2 \n\
390 " : : c2(o1), c3(*op2), "m"(*res) : "memory", "cc" : overflow); \
396 #define gen_x86_binary_2reg(fn, type, utype, instr1, instr2, suffix, reg)\
397 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
400 asm_copy(o1, *op1); \
402 "#instr1 #suffix" %1, %%"#reg"ax \n\
403 "#instr2 #suffix" %2, %%"#reg"dx \n\
405 mov"#suffix" %%"#reg"ax, %3 \n\
406 mov"#suffix" %%"#reg"dx, %4 \n\
408 "m"(*op2), "m"(*(cast_ptr(char *, op2) + sizeof(type) / 2)),\
409 "m"(*res), "m"(*(cast_ptr(char *, res) + sizeof(type) / 2))\
410 : "memory", "cc" : overflow); \
419 #define gen_x86_binary(fn, type, utype, instr, suffix, c1, c2, c3) \
420 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
425 "#instr #suffix" %2, %1 \n\
427 " : "=q"X86_ASM_M(overflow), c1(r) : c3(*op2), "1"(*op1) : "cc");\
428 if (unlikely(overflow)) \
434 #define gen_x86_binary_2reg(fn, type, utype, instr1, instr2, suffix, reg)\
435 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
440 "#instr1 #suffix" %2, %%"#reg"ax \n\
441 "#instr2 #suffix" %3, %%"#reg"dx \n\
443 " : "=q"X86_ASM_M(overflow), "=A"(r) \
444 : "m"(*op2), "m"(*(cast_ptr(char *, op2) + sizeof(type) / 2)),\
447 if (unlikely(overflow)) \
455 #if defined(INLINE_ASM_GCC_LABELS)
457 #define gen_x86_neg(type, utype, suffix, constr) \
458 static ipret_inline bool attr_unused cat(INT_unary_neg_,type)(const type *op, type *res)\
465 mov"#suffix" %0, %1 \n\
466 " : : constr(o), "m"(*res) : "memory", "cc" : overflow); \
472 #define gen_x86_neg_2reg(type, utype, suffix, reg) \
473 static ipret_inline bool attr_unused cat(INT_unary_neg_,type)(const type *op, type *res)\
478 neg"#suffix" %%"#reg"ax \n\
479 not"#suffix" %%"#reg"dx \n\
480 sbb"#suffix" $-1, %%"#reg"dx \n\
482 mov"#suffix" %%"#reg"ax, %1 \n\
483 mov"#suffix" %%"#reg"dx, %2 \n\
485 "m"(*res), "m"(*(cast_ptr(char *, res) + sizeof(type) / 2))\
486 : "memory", "cc" : overflow); \
492 #define gen_x86_inc_dec(fn, type, utype, suffix, constr) \
493 static ipret_inline bool attr_unused cat4(INT_unary_,fn,_,type)(const type *op, type *res)\
498 "#fn""#suffix" %0 \n\
500 mov"#suffix" %0, %1 \n\
501 " : : constr(o), "m"(*res) : "memory", "cc" : overflow); \
515 #if defined(INLINE_ASM_GCC_ARM) || defined(INLINE_ASM_GCC_ARM64)
517 #if defined(INLINE_ASM_GCC_LABELS)
519 #define gen_arm_addsub(fn, type, utype, instr, s) \
520 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
523 asm_copy(o1, *op1); \
524 __asm__ goto (ARM_ASM_PREFIX " \n\
525 "#instr" %"s"0, %"s"0, %"s"1 \n\
528 " : : "r"(o1), "r"(*op2), "m"(*res) : "memory", "cc" : overflow);\
536 #define gen_arm_addsub(fn, type, utype, instr, s) \
537 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
540 unsigned long overflow; \
541 __asm__ (ARM_ASM_PREFIX " \n\
542 "#instr" %"s"1, %"s"2, %"s"3 \n\
543 mrs %0, "ARM_ASM_APSR" \n\
544 " : "=r"(overflow), "=r"(r) : "r"(*op1), "r"(*op2) : "cc"); \
545 if (unlikely(overflow & (1 << 28))) \
553 #if defined(INLINE_ASM_GCC_LABELS) && defined(ARM_ASM_STRD)
555 #define gen_arm_addsub_2reg(fn, type, utype, instr, instr2) \
556 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
559 asm_copy(o1, *op1); \
560 __asm__ goto (ARM_ASM_PREFIX " \n\
561 "#instr" %"ARM_ASM_LO"0, %"ARM_ASM_LO"0, %"ARM_ASM_LO"1 \n\
562 "#instr2" %"ARM_ASM_HI"0, %"ARM_ASM_HI"0, %"ARM_ASM_HI"1 \n\
564 "ARM_ASM_STRD" %"ARM_ASM_LO"0, %"ARM_ASM_HI"0, [ %2 ] \n\
565 " : : "r"(o1), "r"(*op2), "r"(res) : "memory", "cc" : overflow);\
573 #define gen_arm_addsub_2reg(fn, type, utype, instr, instr2) \
574 static ipret_inline bool attr_unused cat4(INT_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
577 unsigned long overflow; \
578 __asm__ (ARM_ASM_PREFIX " \n\
579 "#instr" %"ARM_ASM_LO"1, %"ARM_ASM_LO"2, %"ARM_ASM_LO"3 \n\
580 "#instr2" %"ARM_ASM_HI"1, %"ARM_ASM_HI"2, %"ARM_ASM_HI"3 \n\
581 mrs %0, "ARM_ASM_APSR" \n\
582 " : "=r"(overflow), "=r"(r) : "1"(*op1), "r"(*op2) : "cc"); \
583 if (unlikely(overflow & (1 << 28))) \
591 #if defined(INLINE_ASM_GCC_LABELS) && ARM_VERSION >= 6
593 #define gen_arm_multiply(type, utype) \
594 static ipret_inline bool attr_unused cat(INT_binary_multiply_,type)(const type *op1, const type *op2, type *res)\
597 asm_copy(o1, *op1); \
598 asm_copy(o2, *op2); \
599 __asm__ goto (ARM_ASM_PREFIX " \n\
600 smull %0, %1, %0, %1 \n\
601 cmp %1, %0, asr #31 \n\
604 " : : "r"(o1), "r"(o2), "m"(*res) : "memory", "cc" : overflow); \
612 #define gen_arm_multiply(type, utype) \
613 static ipret_inline bool attr_unused cat(INT_binary_multiply_,type)(const type *op1, const type *op2, type *res)\
615 uint32_t r, overflow; \
616 __asm__ (ARM_ASM_PREFIX " \n\
617 smull %0, %1, %2, %3 \n\
618 eor %1, %1, %0, asr #31 \n\
619 " : "=&r"(r), "=&r"(overflow) : "r"(*op1), "r"(*op2)); \
620 if (unlikely(overflow != 0)) \
628 #if defined(INLINE_ASM_GCC_LABELS)
630 #define gen_arm_neg(type, utype, s) \
631 static ipret_inline bool attr_unused cat(INT_unary_neg_,type)(const type *op, type *res)\
635 __asm__ goto (ARM_ASM_PREFIX " \n\
636 negs %"s"0, %"s"0 \n\
639 " : : "r"(o), "m"(*res) : "memory", "cc" : overflow); \
645 #if defined(INLINE_ASM_GCC_ARM64)
646 #define arm_neg_2nd "ngcs %"ARM_ASM_HI"0, %"ARM_ASM_HI"0"
648 #elif defined(INLINE_ASM_GCC_ARM_THUMB2)
649 #define arm_neg_2nd "sbcs %"ARM_ASM_HI"0, %2, %"ARM_ASM_HI"0"
650 #define arm_neg_zreg , "r"(0L)
652 #define arm_neg_2nd "rscs %"ARM_ASM_HI"0, %"ARM_ASM_HI"0, #0"
656 #define gen_arm_neg_2reg(type, utype) \
657 static ipret_inline bool attr_unused cat(INT_unary_neg_,type)(const type *op, type *res)\
661 __asm__ goto (ARM_ASM_PREFIX " \n\
662 negs %"ARM_ASM_LO"0, %"ARM_ASM_LO"0 \n\
665 "ARM_ASM_STRD" %"ARM_ASM_LO"0, %"ARM_ASM_HI"0, [ %1 ] \n\
666 " : : "r"(o), "r"(res) arm_neg_zreg : "memory", "cc" : overflow);\
676 #ifdef FIXED_DIVIDE_ALT1_TYPES
677 #define INT_DIVIDE_ALT1_TYPES FIXED_DIVIDE_ALT1_TYPES
678 #define INT_DIVIDE_ALT1_FEATURES FIXED_DIVIDE_ALT1_FEATURES
680 #ifdef FIXED_MODULO_ALT1_TYPES
681 #define INT_MODULO_ALT1_TYPES FIXED_MODULO_ALT1_TYPES
682 #define INT_MODULO_ALT1_FEATURES FIXED_MODULO_ALT1_FEATURES
684 #ifdef FIXED_POPCNT_ALT1_TYPES
685 #define INT_POPCNT_ALT1_TYPES FIXED_POPCNT_ALT1_TYPES
686 #define INT_POPCNT_ALT1_FEATURES FIXED_POPCNT_ALT1_FEATURES
689 #define file_inc "arithm-i.inc"
690 #include "for-int.inc"