2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #ifndef AJLA_ARITHM_R_H
20 #define AJLA_ARITHM_R_H
25 #define sse_one_param(x) stringify(x)
26 #define avx_two_params(x) stringify(x)", "stringify(x)
28 #define gen_sse_binary(fn, type, v, instr, s, p) \
29 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
32 "v"movs"#s" %1, %%xmm0 \n\
33 "v""#instr"s"#s" %2, "p(%%xmm0)" \n\
34 "v"movs"#s" %%xmm0, %0 \n\
35 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB); \
39 #ifdef INLINE_ASM_GCC_LABELS
40 #define gen_sse_logical(fn, type, v, instr, s) \
41 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
43 if (sizeof(ajla_flat_option_t) != 1) \
46 "v"movs"#s" %1, %%xmm0 \n\
47 "v"ucomis"#s" %2, %%xmm0 \n\
50 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC : unordered);\
56 #define gen_sse_logical(fn, type, v, instr, s) \
57 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
59 unsigned char unordered, r; \
61 "v"movs"#s" %2, %%xmm0 \n\
62 "v"ucomis"#s" %3, %%xmm0 \n\
65 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
66 if (unlikely(unordered)) \
73 #define gen_sse_neg(fn, type, v, s, p) \
74 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
76 static const type x = -0.0; \
78 "v"movs"#s" %1, %%xmm0 \n\
79 "v"movs"#s" %2, %%xmm1 \n\
80 "v"xorp"#s" %%xmm1, "p(%%xmm0)" \n\
81 "v"movs"#s" %%xmm0, %0 \n\
82 " : "=m"(*res) : "m"(*op1), "m"(x) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
85 #define gen_sse_sqrt(fn, type, v, s, p) \
86 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
89 "v"sqrts"#s" %1, "p(%%xmm0)" \n\
90 "v"movs"#s" %%xmm0, %0 \n\
91 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
94 #define gen_sse_to_int(fn, type, v, s) \
95 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
99 "v"cvtts"#s"2si %1, %0 \n\
100 " : "=r"(r) : "m"(*op1)); \
101 if (unlikely(r == sign_bit(int_default_t))) \
107 #define gen_sse_from_int(fn, type, v, s, z, p) \
108 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
111 "v"cvtsi2s"#s""#z" %1, "p(%%xmm0)" \n\
112 "v"movs"#s" %%xmm0, %0 \n\
113 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
116 #define gen_sse_is_exception(fn, type, v, s) \
117 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, ajla_flat_option_t *res)\
119 if (sizeof(ajla_flat_option_t) != 1) \
122 "v"movs"#s" %1, %%xmm0 \n\
123 "v"ucomis"#s" %%xmm0, %%xmm0 \n\
125 " : "=m"(*res) : "m"(*op1) : "cc" X86_ASM_XMM0_CLOBC); \
128 #define gen_f16c_binary(fn, type, instr) \
129 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
132 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
133 vpinsrw $0, %2, %%xmm7, %%xmm1 \n\
134 vcvtph2ps %%xmm0, %%xmm0 \n\
135 vcvtph2ps %%xmm1, %%xmm1 \n\
136 v"#instr"ss %%xmm1, %%xmm0, %%xmm0 \n\
137 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
138 vpextrw $0, %%xmm0, %0 \n\
139 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
143 #define gen_f16c_sqrt(fn, type) \
144 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
147 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
148 vcvtph2ps %%xmm0, %%xmm0 \n\
149 vsqrtss %%xmm0, %%xmm0, %%xmm0 \n\
150 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
151 vpextrw $0, %%xmm0, %0 \n\
152 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB); \
156 #ifdef INLINE_ASM_GCC_LABELS
157 #define gen_f16c_logical(fn, type, instr) \
158 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
160 if (sizeof(ajla_flat_option_t) != 1) \
163 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
164 vpinsrw $0, %2, %%xmm7, %%xmm1 \n\
165 vcvtph2ps %%xmm0, %%xmm0 \n\
166 vcvtph2ps %%xmm1, %%xmm1 \n\
167 vucomiss %%xmm1, %%xmm0 \n\
170 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC X86_ASM_XMM1_CLOBC : unordered);\
176 #define gen_f16c_logical(fn, type, instr) \
177 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
179 unsigned char unordered, r; \
181 vpinsrw $0, %2, %%xmm7, %%xmm0 \n\
182 vpinsrw $0, %3, %%xmm7, %%xmm1 \n\
183 vcvtph2ps %%xmm0, %%xmm0 \n\
184 vcvtph2ps %%xmm1, %%xmm1 \n\
185 vucomiss %%xmm1, %%xmm0 \n\
188 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
189 if (unlikely(unordered)) \
196 #define gen_f16c_to_int(fn, type) \
197 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
201 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
202 vcvtph2ps %%xmm0, %%xmm0 \n\
203 vcvttss2si %%xmm0, %0 \n\
204 " : "=r"(r) : "m"(*op1) X86_ASM_XMM0_CLOB); \
205 if (unlikely(r == sign_bit(int_default_t))) \
211 #define gen_f16c_from_int(fn, type, z) \
212 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
215 vcvtsi2ss"#z" %1, %%xmm7, %%xmm0 \n\
216 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
217 vpextrw $0, %%xmm0, %0 \n\
218 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
221 #define gen_fp16_binary(fn, type, instr) \
222 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
225 vmovsh %1, %%xmm0 \n\
226 v"#instr"sh %2, %%xmm0, %%xmm0 \n\
227 vmovsh %%xmm0, %0 \n\
228 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB); \
232 #define gen_fp16_sqrt(fn, type) \
233 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
236 vsqrtsh %1, %%xmm7, %%xmm0 \n\
237 vmovsh %%xmm0, %0 \n\
238 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB); \
242 #ifdef INLINE_ASM_GCC_LABELS
243 #define gen_fp16_logical(fn, type, instr) \
244 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
246 if (sizeof(ajla_flat_option_t) != 1) \
249 vmovsh %1, %%xmm0 \n\
250 vucomish %2, %%xmm0 \n\
253 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC : unordered);\
259 #define gen_fp16_logical(fn, type, instr) \
260 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
262 unsigned char unordered, r; \
264 vmovsh %2, %%xmm0 \n\
265 vucomish %3, %%xmm0 \n\
268 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
269 if (unlikely(unordered)) \
276 #define gen_fp16_to_int(fn, type) \
277 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
281 vcvttsh2si %1, %0 \n\
282 " : "=r"(r) : "m"(*op1)); \
283 if (unlikely(r == sign_bit(int_default_t))) \
289 #define gen_fp16_from_int(fn, type, z) \
290 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
293 vcvtsi2sh"#z" %1, %%xmm7, %%xmm0 \n\
294 vmovsh %%xmm0, %0 \n\
295 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
298 #define gen_vfp_binary(fn, type, op, f, s) \
299 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
301 __asm__ (ARM_ASM_PREFIX " \n\
302 vldr "s"0, [ %1 ] \n\
303 vldr "s"1, [ %2 ] \n\
304 "op"."f" "s"0, "s"0, "s"1 \n\
305 vstr "s"0, [ %0 ] \n\
306 " :: "r"(res), "r"(op1), "r"(op2) : s"0", s"1", "memory"); \
310 #define gen_vfp_unary(fn, type, op, f, s) \
311 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
313 __asm__ (ARM_ASM_PREFIX " \n\
314 vldr "s"0, [ %1 ] \n\
315 "op"."f" "s"0, "s"0 \n\
316 vstr "s"0, [ %0 ] \n\
317 " :: "r"(res), "r"(op1) : s"0", "memory"); \
321 #ifdef INLINE_ASM_GCC_LABELS
322 #define gen_vfp_logical(fn, type, cond, f, s) \
323 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
325 __asm__ goto (ARM_ASM_PREFIX " \n\
326 vldr "s"0, [ %1 ] \n\
327 vldr "s"1, [ %2 ] \n\
329 vcmp."f" "s"0, "s"1 \n\
330 vmrs APSR_nzcv, fpscr \n\
331 bvs %l[unordered] \n\
333 mov"#cond" r0, #1 \n\
335 " : : "r"(res), "r"(op1), "r"(op2) : s"0", s"1", "r0", "memory", "cc" : unordered);\
340 #define gen_vfp_to_int(fn, type, f, s) \
341 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
343 __asm__ goto (ARM_ASM_PREFIX " \n\
344 vldr "s"0, [ %1 ] \n\
345 vcmp."f" "s"0, "s"0 \n\
346 vmrs APSR_nzcv, fpscr \n\
347 bvs %l[unordered] \n\
348 vcvt.s32."f" s1, "s"0 \n\
350 add r0, r0, #0x80000000 \n\
351 add r0, r0, #0x00000001 \n\
353 bls %l[unordered] \n\
355 " : : "r"(res), "r"(op1) : s"0", s"1", "r0", "memory", "cc" : unordered);\
361 #define gen_vfp_logical(fn, type, cond, f, s) \
362 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
364 unsigned unordered, r; \
365 __asm__ (ARM_ASM_PREFIX " \n\
366 vldr "s"0, [ %2 ] \n\
367 vldr "s"1, [ %3 ] \n\
370 vcmp."f" "s"0, "s"1 \n\
371 vmrs APSR_nzcv, fpscr \n\
375 mov"#cond" %1, #1 \n\
376 " : "=r"(unordered), "=r"(r) : "r"(op1), "r"(op2) : s"0", s"1", "r0", "memory", "cc");\
377 if (unlikely(unordered)) \
382 #define gen_vfp_to_int(fn, type, f, s) \
383 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
385 unsigned unordered; \
387 __asm__ (ARM_ASM_PREFIX " \n\
388 vldr "s"0, [ %2 ] \n\
390 vcmp."f" "s"0, "s"0 \n\
391 vmrs APSR_nzcv, fpscr \n\
394 vcvt.s32."f" s0, "s"0 \n\
396 " : "=r"(unordered), "=r"(r) : "r"(op1) : s"0", s"1", "r0", "memory", "cc");\
397 if (unlikely(unordered) || (unlikely((unsigned)r + 0x80000001U < 1)))\
404 #define gen_vfp_from_int(fn, type, f, s) \
405 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
409 vcvt."f".s32 "s"0, s0 \n\
410 vstr "s"0, [ %0 ] \n\
411 " : : "r"(res), "r"(op1) : "d0", "memory"); \
415 #define gen_vfp_is_exception(fn, type, f, s) \
416 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, ajla_flat_option_t *res)\
418 unsigned unordered; \
419 __asm__ (ARM_ASM_PREFIX " \n\
420 vldr "s"0, [ %1 ] \n\
422 vcmp."f" "s"0, "s"0 \n\
423 vmrs APSR_nzcv, fpscr \n\
426 " : "=r"(unordered) : "r"(op1) : s"0", s"1", "cc"); \
430 #define gen_vfp_half_binary(fn, type, op) \
431 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
433 __asm__ (ARM_ASM_PREFIX " \n\
434 vld1.16 d0[0], [ %1 ] \n\
435 vld1.16 d0[2], [ %2 ] \n\
436 vcvtb.f32.f16 s0, s0 \n\
437 vcvtb.f32.f16 s1, s1 \n\
438 "op".f32 s0, s0, s1 \n\
439 vcvtb.f16.f32 s0, s0 \n\
440 vst1.16 d0[0], [ %0 ] \n\
441 " :: "r"(res), "r"(op1), "r"(op2) : "d0", "memory"); \
445 #ifdef INLINE_ASM_GCC_LABELS
446 #define gen_vfp_half_logical(fn, type, cond) \
447 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
449 __asm__ goto (ARM_ASM_PREFIX " \n\
450 vld1.16 d0[0], [ %1 ] \n\
451 vld1.16 d0[2], [ %2 ] \n\
453 vcvtb.f32.f16 s0, s0 \n\
454 vcvtb.f32.f16 s1, s1 \n\
456 vmrs APSR_nzcv, fpscr \n\
457 bvs %l[unordered] \n\
459 mov"#cond" r0, #1 \n\
461 " : : "r"(res), "r"(op1), "r"(op2) : "d0", "r0", "memory", "cc" : unordered);\
466 #define gen_vfp_half_to_int(fn, type) \
467 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
469 __asm__ goto (ARM_ASM_PREFIX " \n\
470 vld1.16 d0[0], [ %1 ] \n\
471 vcvtb.f32.f16 s0, s0 \n\
473 vmrs APSR_nzcv, fpscr \n\
474 bvs %l[unordered] \n\
475 vcvt.s32.f32 s1, s0 \n\
477 add r0, r0, #0x80000000 \n\
478 add r0, r0, #0x00000001 \n\
480 bls %l[unordered] \n\
482 " : : "r"(res), "r"(op1) : "d0", "r0", "memory", "cc" : unordered);\
488 #define gen_vfp_half_logical(fn, type, cond) \
489 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
491 unsigned unordered, r; \
492 __asm__ (ARM_ASM_PREFIX " \n\
493 vld1.16 d0[0], [ %2 ] \n\
494 vld1.16 d0[2], [ %3 ] \n\
497 vcvtb.f32.f16 s0, s0 \n\
498 vcvtb.f32.f16 s1, s1 \n\
500 vmrs APSR_nzcv, fpscr \n\
504 mov"#cond" %1, #1 \n\
505 " : "=r"(unordered), "=r"(r) : "r"(op1), "r"(op2) : "d0", "memory", "cc");\
506 if (unlikely(unordered)) \
511 #define gen_vfp_half_to_int(fn, type) \
512 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
514 unsigned unordered; \
516 __asm__ (ARM_ASM_PREFIX " \n\
517 vld1.16 d0[0], [ %2 ] \n\
519 vcvtb.f32.f16 s0, s0 \n\
521 vmrs APSR_nzcv, fpscr \n\
524 vcvt.s32.f32 s0, s0 \n\
526 " : "=r"(unordered), "=r"(r) : "r"(op1) : "d0", "r0", "memory", "cc");\
527 if (unlikely(unordered) || (unlikely((unsigned)r + 0x80000001U < 1)))\
534 #define gen_vfp_half_from_int(fn, type) \
535 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
537 __asm__ (ARM_ASM_PREFIX " \n\
539 vcvt.f32.s32 s0, s0 \n\
540 vcvtb.f16.f32 s0, s0 \n\
541 vst1.16 d0[0], [ %0 ] \n\
542 " : : "r"(res), "r"(op1) : "d0", "memory"); \
547 #ifdef INT_DEFAULT_BITS
549 #define gen_sse_ops(type, s, z) \
550 gen_sse_binary(add_alt1, type, "", add, s, sse_one_param) \
551 gen_sse_binary(subtract_alt1, type, "", sub, s, sse_one_param) \
552 gen_sse_binary(multiply_alt1, type, "", mul, s, sse_one_param) \
553 gen_sse_binary(divide_alt1, type, "", div, s, sse_one_param) \
554 gen_sse_logical(equal_alt1, type, "", sete, s) \
555 gen_sse_logical(not_equal_alt1, type, "", setne, s) \
556 gen_sse_logical(less_alt1, type, "", setb, s) \
557 gen_sse_logical(less_equal_alt1, type, "", setbe, s) \
558 gen_sse_logical(greater_alt1, type, "", seta, s) \
559 gen_sse_logical(greater_equal_alt1, type, "", setae, s) \
560 gen_sse_neg(neg_alt1, type, "", s, sse_one_param) \
561 gen_sse_sqrt(sqrt_alt1, type, "", s, sse_one_param) \
562 gen_sse_to_int(to_int_alt1, type, "", s) \
563 gen_sse_from_int(from_int_alt1, type, "", s, z, sse_one_param) \
564 gen_sse_is_exception(is_exception_alt1, type, "", s)
566 #define gen_avx_ops(type, s, z) \
567 gen_sse_binary(add_alt2, type, "v", add, s, avx_two_params) \
568 gen_sse_binary(subtract_alt2, type, "v", sub, s, avx_two_params) \
569 gen_sse_binary(multiply_alt2, type, "v", mul, s, avx_two_params) \
570 gen_sse_binary(divide_alt2, type, "v", div, s, avx_two_params) \
571 gen_sse_logical(equal_alt2, type, "v", sete, s) \
572 gen_sse_logical(not_equal_alt2, type, "v", setne, s) \
573 gen_sse_logical(less_alt2, type, "v", setb, s) \
574 gen_sse_logical(less_equal_alt2, type, "v", setbe, s) \
575 gen_sse_logical(greater_alt2, type, "v", seta, s) \
576 gen_sse_logical(greater_equal_alt2, type, "v", setae, s) \
577 gen_sse_neg(neg_alt2, type, "v", s, avx_two_params) \
578 gen_sse_sqrt(sqrt_alt2, type, "v", s, avx_two_params) \
579 gen_sse_to_int(to_int_alt2, type, "v", s) \
580 gen_sse_from_int(from_int_alt2, type, "v", s, z, avx_two_params) \
581 gen_sse_is_exception(is_exception_alt2, type, "v", s)
583 #define gen_f16c_ops(z) \
584 gen_f16c_binary(add_alt1, real16_t, add) \
585 gen_f16c_binary(subtract_alt1, real16_t, sub) \
586 gen_f16c_binary(multiply_alt1, real16_t, mul) \
587 gen_f16c_binary(divide_alt1, real16_t, div) \
588 gen_f16c_sqrt(sqrt_alt1, real16_t) \
589 gen_f16c_logical(equal_alt1, real16_t, sete) \
590 gen_f16c_logical(not_equal_alt1, real16_t, setne) \
591 gen_f16c_logical(less_alt1, real16_t, setb) \
592 gen_f16c_logical(less_equal_alt1, real16_t, setbe) \
593 gen_f16c_logical(greater_alt1, real16_t, seta) \
594 gen_f16c_logical(greater_equal_alt1, real16_t, setae) \
595 gen_f16c_to_int(to_int_alt1, real16_t) \
596 gen_f16c_from_int(from_int_alt1, real16_t, z)
598 #define gen_fp16_ops(z) \
599 gen_fp16_binary(add_alt2, real16_t, add) \
600 gen_fp16_binary(subtract_alt2, real16_t, sub) \
601 gen_fp16_binary(multiply_alt2, real16_t, mul) \
602 gen_fp16_binary(divide_alt2, real16_t, div) \
603 gen_fp16_sqrt(sqrt_alt2, real16_t) \
604 gen_fp16_logical(equal_alt2, real16_t, sete) \
605 gen_fp16_logical(not_equal_alt2, real16_t, setne) \
606 gen_fp16_logical(less_alt2, real16_t, setb) \
607 gen_fp16_logical(less_equal_alt2, real16_t, setbe) \
608 gen_fp16_logical(greater_alt2, real16_t, seta) \
609 gen_fp16_logical(greater_equal_alt2, real16_t, setae) \
610 gen_fp16_to_int(to_int_alt2, real16_t) \
611 gen_fp16_from_int(from_int_alt2, real16_t, z)
613 #define gen_vfp_ops(type, f, s) \
614 gen_vfp_binary(add_alt1, type, "vadd", f, s) \
615 gen_vfp_binary(subtract_alt1, type, "vsub", f, s) \
616 gen_vfp_binary(multiply_alt1, type, "vmul", f, s) \
617 gen_vfp_binary(divide_alt1, type, "vdiv", f, s) \
618 gen_vfp_unary(neg_alt1, type, "vneg", f, s) \
619 gen_vfp_unary(sqrt_alt1, type, "vsqrt", f, s) \
620 gen_vfp_logical(equal_alt1, type, eq, f, s) \
621 gen_vfp_logical(not_equal_alt1, type, ne, f, s) \
622 gen_vfp_logical(less_alt1, type, mi, f, s) \
623 gen_vfp_logical(less_equal_alt1, type, ls, f, s) \
624 gen_vfp_logical(greater_alt1, type, gt, f, s) \
625 gen_vfp_logical(greater_equal_alt1, type, ge, f, s) \
626 gen_vfp_to_int(to_int_alt1, type, f, s) \
627 gen_vfp_from_int(from_int_alt1, type, f, s) \
628 gen_vfp_is_exception(is_exception_alt1, type, f, s)
630 #define gen_vfp_half_ops() \
631 gen_vfp_half_binary(add_alt1, real16_t, "vadd") \
632 gen_vfp_half_binary(subtract_alt1, real16_t, "vsub") \
633 gen_vfp_half_binary(multiply_alt1, real16_t, "vmul") \
634 gen_vfp_half_binary(divide_alt1, real16_t, "vdiv") \
635 gen_vfp_half_logical(equal_alt1, real16_t, eq) \
636 gen_vfp_half_logical(not_equal_alt1, real16_t, ne) \
637 gen_vfp_half_logical(less_alt1, real16_t, mi) \
638 gen_vfp_half_logical(less_equal_alt1, real16_t, ls) \
639 gen_vfp_half_logical(greater_alt1, real16_t, gt) \
640 gen_vfp_half_logical(greater_equal_alt1, real16_t, ge) \
641 gen_vfp_half_to_int(to_int_alt1, real16_t) \
642 gen_vfp_half_from_int(from_int_alt1, real16_t)
646 #define gen_sse_ops(type, s, z)
647 #define gen_avx_ops(type, s, z)
648 #define gen_f16c_ops(z)
649 #define gen_fp16_ops(z)
650 #define gen_vfp_ops(type, f, s)
651 #define gen_vfp_half_ops()
655 #define file_inc "arithm-r.inc"
656 #include "for-real.inc"