rework the verifier to prepare for loop cutting
[ajla.git] / arithm-r.h
blob8fb88a64ce21f71e45137822841962b8b9dca627
1 /*
2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
9 * version.
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #ifndef AJLA_ARITHM_R_H
20 #define AJLA_ARITHM_R_H
22 #include "asm.h"
23 #include "arithm-b.h"
25 #define sse_one_param(x) stringify(x)
26 #define avx_two_params(x) stringify(x)", "stringify(x)
28 #define gen_sse_binary(fn, type, v, instr, s, p) \
29 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
30 { \
31 __asm__ (" \n\
32 "v"movs"#s" %1, %%xmm0 \n\
33 "v""#instr"s"#s" %2, "p(%%xmm0)" \n\
34 "v"movs"#s" %%xmm0, %0 \n\
35 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB); \
36 return true; \
39 #ifdef INLINE_ASM_GCC_LABELS
40 #define gen_sse_logical(fn, type, v, instr, s) \
41 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
42 { \
43 if (sizeof(ajla_flat_option_t) != 1) \
44 *res = 0; \
45 __asm__ goto (" \n\
46 "v"movs"#s" %1, %%xmm0 \n\
47 "v"ucomis"#s" %2, %%xmm0 \n\
48 jp %l[unordered] \n\
49 "#instr" %0 \n\
50 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC : unordered);\
51 return true; \
52 unordered: \
53 return false; \
55 #else
56 #define gen_sse_logical(fn, type, v, instr, s) \
57 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
58 { \
59 unsigned char unordered, r; \
60 __asm__ (" \n\
61 "v"movs"#s" %2, %%xmm0 \n\
62 "v"ucomis"#s" %3, %%xmm0 \n\
63 setp %1 \n\
64 "#instr" %0 \n\
65 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
66 if (unlikely(unordered)) \
67 return false; \
68 *res = r; \
69 return true; \
71 #endif
73 #define gen_sse_neg(fn, type, v, s, p) \
74 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
75 { \
76 static const type x = -0.0; \
77 __asm__ (" \
78 "v"movs"#s" %1, %%xmm0 \n\
79 "v"movs"#s" %2, %%xmm1 \n\
80 "v"xorp"#s" %%xmm1, "p(%%xmm0)" \n\
81 "v"movs"#s" %%xmm0, %0 \n\
82 " : "=m"(*res) : "m"(*op1), "m"(x) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
85 #define gen_sse_sqrt(fn, type, v, s, p) \
86 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
87 { \
88 __asm__ (" \
89 "v"sqrts"#s" %1, "p(%%xmm0)" \n\
90 "v"movs"#s" %%xmm0, %0 \n\
91 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
94 #define gen_sse_to_int(fn, type, v, s) \
95 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
96 { \
97 int_default_t r; \
98 __asm__ (" \n\
99 "v"cvtts"#s"2si %1, %0 \n\
100 " : "=r"(r) : "m"(*op1)); \
101 if (unlikely(r == sign_bit(int_default_t))) \
102 return false; \
103 *res = r; \
104 return true; \
107 #define gen_sse_from_int(fn, type, v, s, z, p) \
108 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
110 __asm__ (" \n\
111 "v"cvtsi2s"#s""#z" %1, "p(%%xmm0)" \n\
112 "v"movs"#s" %%xmm0, %0 \n\
113 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
116 #define gen_sse_is_exception(fn, type, v, s) \
117 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, ajla_flat_option_t *res)\
119 if (sizeof(ajla_flat_option_t) != 1) \
120 *res = 0; \
121 __asm__ (" \
122 "v"movs"#s" %1, %%xmm0 \n\
123 "v"ucomis"#s" %%xmm0, %%xmm0 \n\
124 setp %0 \n\
125 " : "=m"(*res) : "m"(*op1) : "cc" X86_ASM_XMM0_CLOBC); \
128 #define gen_f16c_binary(fn, type, instr) \
129 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
131 __asm__ (" \n\
132 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
133 vpinsrw $0, %2, %%xmm7, %%xmm1 \n\
134 vcvtph2ps %%xmm0, %%xmm0 \n\
135 vcvtph2ps %%xmm1, %%xmm1 \n\
136 v"#instr"ss %%xmm1, %%xmm0, %%xmm0 \n\
137 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
138 vpextrw $0, %%xmm0, %0 \n\
139 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB X86_ASM_XMM1_CLOBC);\
140 return true; \
143 #define gen_f16c_sqrt(fn, type) \
144 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
146 __asm__ (" \n\
147 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
148 vcvtph2ps %%xmm0, %%xmm0 \n\
149 vsqrtss %%xmm0, %%xmm0, %%xmm0 \n\
150 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
151 vpextrw $0, %%xmm0, %0 \n\
152 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB); \
153 return true; \
156 #ifdef INLINE_ASM_GCC_LABELS
157 #define gen_f16c_logical(fn, type, instr) \
158 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
160 if (sizeof(ajla_flat_option_t) != 1) \
161 *res = 0; \
162 __asm__ goto (" \n\
163 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
164 vpinsrw $0, %2, %%xmm7, %%xmm1 \n\
165 vcvtph2ps %%xmm0, %%xmm0 \n\
166 vcvtph2ps %%xmm1, %%xmm1 \n\
167 vucomiss %%xmm1, %%xmm0 \n\
168 jp %l[unordered] \n\
169 "#instr" %0 \n\
170 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC X86_ASM_XMM1_CLOBC : unordered);\
171 return true; \
172 unordered: \
173 return false; \
175 #else
176 #define gen_f16c_logical(fn, type, instr) \
177 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
179 unsigned char unordered, r; \
180 __asm__ (" \n\
181 vpinsrw $0, %2, %%xmm7, %%xmm0 \n\
182 vpinsrw $0, %3, %%xmm7, %%xmm1 \n\
183 vcvtph2ps %%xmm0, %%xmm0 \n\
184 vcvtph2ps %%xmm1, %%xmm1 \n\
185 vucomiss %%xmm1, %%xmm0 \n\
186 setp %1 \n\
187 "#instr" %0 \n\
188 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
189 if (unlikely(unordered)) \
190 return false; \
191 *res = r; \
192 return true; \
194 #endif
196 #define gen_f16c_to_int(fn, type) \
197 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
199 int_default_t r; \
200 __asm__ (" \n\
201 vpinsrw $0, %1, %%xmm7, %%xmm0 \n\
202 vcvtph2ps %%xmm0, %%xmm0 \n\
203 vcvttss2si %%xmm0, %0 \n\
204 " : "=r"(r) : "m"(*op1) X86_ASM_XMM0_CLOB); \
205 if (unlikely(r == sign_bit(int_default_t))) \
206 return false; \
207 *res = r; \
208 return true; \
211 #define gen_f16c_from_int(fn, type, z) \
212 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
214 __asm__ (" \n\
215 vcvtsi2ss"#z" %1, %%xmm7, %%xmm0 \n\
216 vcvtps2ph $4, %%xmm0, %%xmm0 \n\
217 vpextrw $0, %%xmm0, %0 \n\
218 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
221 #define gen_fp16_binary(fn, type, instr) \
222 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
224 __asm__ (" \n\
225 vmovsh %1, %%xmm0 \n\
226 v"#instr"sh %2, %%xmm0, %%xmm0 \n\
227 vmovsh %%xmm0, %0 \n\
228 " : "=m"(*res) : "m"(*op1), "m"(*op2) X86_ASM_XMM0_CLOB); \
229 return true; \
232 #define gen_fp16_sqrt(fn, type) \
233 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
235 __asm__ (" \n\
236 vsqrtsh %1, %%xmm7, %%xmm0 \n\
237 vmovsh %%xmm0, %0 \n\
238 " : "=m"(*res) : "m"(*op1) X86_ASM_XMM0_CLOB); \
239 return true; \
242 #ifdef INLINE_ASM_GCC_LABELS
243 #define gen_fp16_logical(fn, type, instr) \
244 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
246 if (sizeof(ajla_flat_option_t) != 1) \
247 *res = 0; \
248 __asm__ goto (" \n\
249 vmovsh %1, %%xmm0 \n\
250 vucomish %2, %%xmm0 \n\
251 jp %l[unordered] \n\
252 "#instr" %0 \n\
253 " : : "m"(*res), "m"(*op1), "m"(*op2) : "memory", "cc" X86_ASM_XMM0_CLOBC : unordered);\
254 return true; \
255 unordered: \
256 return false; \
258 #else
259 #define gen_fp16_logical(fn, type, instr) \
260 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
262 unsigned char unordered, r; \
263 __asm__ (" \n\
264 vmovsh %2, %%xmm0 \n\
265 vucomish %3, %%xmm0 \n\
266 setp %1 \n\
267 "#instr" %0 \n\
268 " : "=r"(r), "=r"(unordered) : "m"(*op1), "m"(*op2) : "cc" X86_ASM_XMM0_CLOBC);\
269 if (unlikely(unordered)) \
270 return false; \
271 *res = r; \
272 return true; \
274 #endif
276 #define gen_fp16_to_int(fn, type) \
277 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
279 int_default_t r; \
280 __asm__ (" \n\
281 vcvttsh2si %1, %0 \n\
282 " : "=r"(r) : "m"(*op1)); \
283 if (unlikely(r == sign_bit(int_default_t))) \
284 return false; \
285 *res = r; \
286 return true; \
289 #define gen_fp16_from_int(fn, type, z) \
290 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
292 __asm__ (" \n\
293 vcvtsi2sh"#z" %1, %%xmm7, %%xmm0 \n\
294 vmovsh %%xmm0, %0 \n\
295 " : "=m"(*res) : "rm"(*op1) X86_ASM_XMM0_CLOB); \
298 #define gen_vfp_binary(fn, type, op, f, s) \
299 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
301 __asm__ (ARM_ASM_PREFIX " \n\
302 vldr "s"0, [ %1 ] \n\
303 vldr "s"1, [ %2 ] \n\
304 "op"."f" "s"0, "s"0, "s"1 \n\
305 vstr "s"0, [ %0 ] \n\
306 " :: "r"(res), "r"(op1), "r"(op2) : s"0", s"1", "memory"); \
307 return true; \
310 #define gen_vfp_unary(fn, type, op, f, s) \
311 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, type *res)\
313 __asm__ (ARM_ASM_PREFIX " \n\
314 vldr "s"0, [ %1 ] \n\
315 "op"."f" "s"0, "s"0 \n\
316 vstr "s"0, [ %0 ] \n\
317 " :: "r"(res), "r"(op1) : s"0", "memory"); \
318 return true; \
321 #ifdef INLINE_ASM_GCC_LABELS
322 #define gen_vfp_logical(fn, type, cond, f, s) \
323 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
325 __asm__ goto (ARM_ASM_PREFIX " \n\
326 vldr "s"0, [ %1 ] \n\
327 vldr "s"1, [ %2 ] \n\
328 mov r0, #0 \n\
329 vcmp."f" "s"0, "s"1 \n\
330 vmrs APSR_nzcv, fpscr \n\
331 bvs %l[unordered] \n\
332 it "#cond" \n\
333 mov"#cond" r0, #1 \n\
334 strb r0, [ %0 ] \n\
335 " : : "r"(res), "r"(op1), "r"(op2) : s"0", s"1", "r0", "memory", "cc" : unordered);\
336 return true; \
337 unordered: \
338 return false; \
340 #define gen_vfp_to_int(fn, type, f, s) \
341 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
343 __asm__ goto (ARM_ASM_PREFIX " \n\
344 vldr "s"0, [ %1 ] \n\
345 vcmp."f" "s"0, "s"0 \n\
346 vmrs APSR_nzcv, fpscr \n\
347 bvs %l[unordered] \n\
348 vcvt.s32."f" s1, "s"0 \n\
349 vmov r0, s1 \n\
350 add r0, r0, #0x80000000 \n\
351 add r0, r0, #0x00000001 \n\
352 cmp r0, #1 \n\
353 bls %l[unordered] \n\
354 vstr s1, [ %0 ] \n\
355 " : : "r"(res), "r"(op1) : s"0", s"1", "r0", "memory", "cc" : unordered);\
356 return true; \
357 unordered: \
358 return false; \
360 #else
361 #define gen_vfp_logical(fn, type, cond, f, s) \
362 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
364 unsigned unordered, r; \
365 __asm__ (ARM_ASM_PREFIX " \n\
366 vldr "s"0, [ %2 ] \n\
367 vldr "s"1, [ %3 ] \n\
368 mov %0, #0 \n\
369 mov %1, #0 \n\
370 vcmp."f" "s"0, "s"1 \n\
371 vmrs APSR_nzcv, fpscr \n\
372 it vs \n\
373 movvs %0, #1 \n\
374 it "#cond" \n\
375 mov"#cond" %1, #1 \n\
376 " : "=r"(unordered), "=r"(r) : "r"(op1), "r"(op2) : s"0", s"1", "r0", "memory", "cc");\
377 if (unlikely(unordered)) \
378 return false; \
379 *res = r; \
380 return true; \
382 #define gen_vfp_to_int(fn, type, f, s) \
383 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
385 unsigned unordered; \
386 int_default_t r; \
387 __asm__ (ARM_ASM_PREFIX " \n\
388 vldr "s"0, [ %2 ] \n\
389 mov %0, #0 \n\
390 vcmp."f" "s"0, "s"0 \n\
391 vmrs APSR_nzcv, fpscr \n\
392 it vs \n\
393 movvs %0, #1 \n\
394 vcvt.s32."f" s0, "s"0 \n\
395 vmov %1, s0 \n\
396 " : "=r"(unordered), "=r"(r) : "r"(op1) : s"0", s"1", "r0", "memory", "cc");\
397 if (unlikely(unordered) || (unlikely((unsigned)r + 0x80000001U < 1)))\
398 return false; \
399 *res = r; \
400 return true; \
402 #endif
404 #define gen_vfp_from_int(fn, type, f, s) \
405 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
407 __asm__ (" \n\
408 vldr s0, [ %1 ] \n\
409 vcvt."f".s32 "s"0, s0 \n\
410 vstr "s"0, [ %0 ] \n\
411 " : : "r"(res), "r"(op1) : "d0", "memory"); \
412 return true; \
415 #define gen_vfp_is_exception(fn, type, f, s) \
416 static ipret_inline void attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, ajla_flat_option_t *res)\
418 unsigned unordered; \
419 __asm__ (ARM_ASM_PREFIX " \n\
420 vldr "s"0, [ %1 ] \n\
421 mov %0, #0 \n\
422 vcmp."f" "s"0, "s"0 \n\
423 vmrs APSR_nzcv, fpscr \n\
424 it vs \n\
425 movvs %0, #1 \n\
426 " : "=r"(unordered) : "r"(op1) : s"0", s"1", "cc"); \
427 *res = unordered; \
430 #define gen_vfp_half_binary(fn, type, op) \
431 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, type *res)\
433 __asm__ (ARM_ASM_PREFIX " \n\
434 vld1.16 d0[0], [ %1 ] \n\
435 vld1.16 d0[2], [ %2 ] \n\
436 vcvtb.f32.f16 s0, s0 \n\
437 vcvtb.f32.f16 s1, s1 \n\
438 "op".f32 s0, s0, s1 \n\
439 vcvtb.f16.f32 s0, s0 \n\
440 vst1.16 d0[0], [ %0 ] \n\
441 " :: "r"(res), "r"(op1), "r"(op2) : "d0", "memory"); \
442 return true; \
445 #ifdef INLINE_ASM_GCC_LABELS
446 #define gen_vfp_half_logical(fn, type, cond) \
447 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
449 __asm__ goto (ARM_ASM_PREFIX " \n\
450 vld1.16 d0[0], [ %1 ] \n\
451 vld1.16 d0[2], [ %2 ] \n\
452 mov r0, #0 \n\
453 vcvtb.f32.f16 s0, s0 \n\
454 vcvtb.f32.f16 s1, s1 \n\
455 vcmp.f32 s0, s1 \n\
456 vmrs APSR_nzcv, fpscr \n\
457 bvs %l[unordered] \n\
458 it "#cond" \n\
459 mov"#cond" r0, #1 \n\
460 strb r0, [ %0 ] \n\
461 " : : "r"(res), "r"(op1), "r"(op2) : "d0", "r0", "memory", "cc" : unordered);\
462 return true; \
463 unordered: \
464 return false; \
466 #define gen_vfp_half_to_int(fn, type) \
467 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
469 __asm__ goto (ARM_ASM_PREFIX " \n\
470 vld1.16 d0[0], [ %1 ] \n\
471 vcvtb.f32.f16 s0, s0 \n\
472 vcmp.f32 s0, s0 \n\
473 vmrs APSR_nzcv, fpscr \n\
474 bvs %l[unordered] \n\
475 vcvt.s32.f32 s1, s0 \n\
476 vmov r0, s1 \n\
477 add r0, r0, #0x80000000 \n\
478 add r0, r0, #0x00000001 \n\
479 cmp r0, #1 \n\
480 bls %l[unordered] \n\
481 vstr s1, [ %0 ] \n\
482 " : : "r"(res), "r"(op1) : "d0", "r0", "memory", "cc" : unordered);\
483 return true; \
484 unordered: \
485 return false; \
487 #else
488 #define gen_vfp_half_logical(fn, type, cond) \
489 static ipret_inline bool attr_unused cat4(REAL_binary_,fn,_,type)(const type *op1, const type *op2, ajla_flat_option_t *res)\
491 unsigned unordered, r; \
492 __asm__ (ARM_ASM_PREFIX " \n\
493 vld1.16 d0[0], [ %2 ] \n\
494 vld1.16 d0[2], [ %3 ] \n\
495 mov %0, #0 \n\
496 mov %1, #0 \n\
497 vcvtb.f32.f16 s0, s0 \n\
498 vcvtb.f32.f16 s1, s1 \n\
499 vcmp.f32 s0, s1 \n\
500 vmrs APSR_nzcv, fpscr \n\
501 it vs \n\
502 movvs %0, #1 \n\
503 it "#cond" \n\
504 mov"#cond" %1, #1 \n\
505 " : "=r"(unordered), "=r"(r) : "r"(op1), "r"(op2) : "d0", "memory", "cc");\
506 if (unlikely(unordered)) \
507 return false; \
508 *res = r; \
509 return true; \
511 #define gen_vfp_half_to_int(fn, type) \
512 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const type *op1, int_default_t *res)\
514 unsigned unordered; \
515 int_default_t r; \
516 __asm__ (ARM_ASM_PREFIX " \n\
517 vld1.16 d0[0], [ %2 ] \n\
518 mov %0, #0 \n\
519 vcvtb.f32.f16 s0, s0 \n\
520 vcmp.f32 s0, s0 \n\
521 vmrs APSR_nzcv, fpscr \n\
522 it vs \n\
523 movvs %0, #1 \n\
524 vcvt.s32.f32 s0, s0 \n\
525 vmov %1, s0 \n\
526 " : "=r"(unordered), "=r"(r) : "r"(op1) : "d0", "r0", "memory", "cc");\
527 if (unlikely(unordered) || (unlikely((unsigned)r + 0x80000001U < 1)))\
528 return false; \
529 *res = r; \
530 return true; \
532 #endif
534 #define gen_vfp_half_from_int(fn, type) \
535 static ipret_inline bool attr_unused cat4(REAL_unary_,fn,_,type)(const int_default_t *op1, type *res)\
537 __asm__ (ARM_ASM_PREFIX " \n\
538 vldr s0, [ %1 ] \n\
539 vcvt.f32.s32 s0, s0 \n\
540 vcvtb.f16.f32 s0, s0 \n\
541 vst1.16 d0[0], [ %0 ] \n\
542 " : : "r"(res), "r"(op1) : "d0", "memory"); \
543 return true; \
547 #ifdef INT_DEFAULT_BITS
549 #define gen_sse_ops(type, s, z) \
550 gen_sse_binary(add_alt1, type, "", add, s, sse_one_param) \
551 gen_sse_binary(subtract_alt1, type, "", sub, s, sse_one_param) \
552 gen_sse_binary(multiply_alt1, type, "", mul, s, sse_one_param) \
553 gen_sse_binary(divide_alt1, type, "", div, s, sse_one_param) \
554 gen_sse_logical(equal_alt1, type, "", sete, s) \
555 gen_sse_logical(not_equal_alt1, type, "", setne, s) \
556 gen_sse_logical(less_alt1, type, "", setb, s) \
557 gen_sse_logical(less_equal_alt1, type, "", setbe, s) \
558 gen_sse_logical(greater_alt1, type, "", seta, s) \
559 gen_sse_logical(greater_equal_alt1, type, "", setae, s) \
560 gen_sse_neg(neg_alt1, type, "", s, sse_one_param) \
561 gen_sse_sqrt(sqrt_alt1, type, "", s, sse_one_param) \
562 gen_sse_to_int(to_int_alt1, type, "", s) \
563 gen_sse_from_int(from_int_alt1, type, "", s, z, sse_one_param) \
564 gen_sse_is_exception(is_exception_alt1, type, "", s)
566 #define gen_avx_ops(type, s, z) \
567 gen_sse_binary(add_alt2, type, "v", add, s, avx_two_params) \
568 gen_sse_binary(subtract_alt2, type, "v", sub, s, avx_two_params) \
569 gen_sse_binary(multiply_alt2, type, "v", mul, s, avx_two_params) \
570 gen_sse_binary(divide_alt2, type, "v", div, s, avx_two_params) \
571 gen_sse_logical(equal_alt2, type, "v", sete, s) \
572 gen_sse_logical(not_equal_alt2, type, "v", setne, s) \
573 gen_sse_logical(less_alt2, type, "v", setb, s) \
574 gen_sse_logical(less_equal_alt2, type, "v", setbe, s) \
575 gen_sse_logical(greater_alt2, type, "v", seta, s) \
576 gen_sse_logical(greater_equal_alt2, type, "v", setae, s) \
577 gen_sse_neg(neg_alt2, type, "v", s, avx_two_params) \
578 gen_sse_sqrt(sqrt_alt2, type, "v", s, avx_two_params) \
579 gen_sse_to_int(to_int_alt2, type, "v", s) \
580 gen_sse_from_int(from_int_alt2, type, "v", s, z, avx_two_params) \
581 gen_sse_is_exception(is_exception_alt2, type, "v", s)
583 #define gen_f16c_ops(z) \
584 gen_f16c_binary(add_alt1, real16_t, add) \
585 gen_f16c_binary(subtract_alt1, real16_t, sub) \
586 gen_f16c_binary(multiply_alt1, real16_t, mul) \
587 gen_f16c_binary(divide_alt1, real16_t, div) \
588 gen_f16c_sqrt(sqrt_alt1, real16_t) \
589 gen_f16c_logical(equal_alt1, real16_t, sete) \
590 gen_f16c_logical(not_equal_alt1, real16_t, setne) \
591 gen_f16c_logical(less_alt1, real16_t, setb) \
592 gen_f16c_logical(less_equal_alt1, real16_t, setbe) \
593 gen_f16c_logical(greater_alt1, real16_t, seta) \
594 gen_f16c_logical(greater_equal_alt1, real16_t, setae) \
595 gen_f16c_to_int(to_int_alt1, real16_t) \
596 gen_f16c_from_int(from_int_alt1, real16_t, z)
598 #define gen_fp16_ops(z) \
599 gen_fp16_binary(add_alt2, real16_t, add) \
600 gen_fp16_binary(subtract_alt2, real16_t, sub) \
601 gen_fp16_binary(multiply_alt2, real16_t, mul) \
602 gen_fp16_binary(divide_alt2, real16_t, div) \
603 gen_fp16_sqrt(sqrt_alt2, real16_t) \
604 gen_fp16_logical(equal_alt2, real16_t, sete) \
605 gen_fp16_logical(not_equal_alt2, real16_t, setne) \
606 gen_fp16_logical(less_alt2, real16_t, setb) \
607 gen_fp16_logical(less_equal_alt2, real16_t, setbe) \
608 gen_fp16_logical(greater_alt2, real16_t, seta) \
609 gen_fp16_logical(greater_equal_alt2, real16_t, setae) \
610 gen_fp16_to_int(to_int_alt2, real16_t) \
611 gen_fp16_from_int(from_int_alt2, real16_t, z)
613 #define gen_vfp_ops(type, f, s) \
614 gen_vfp_binary(add_alt1, type, "vadd", f, s) \
615 gen_vfp_binary(subtract_alt1, type, "vsub", f, s) \
616 gen_vfp_binary(multiply_alt1, type, "vmul", f, s) \
617 gen_vfp_binary(divide_alt1, type, "vdiv", f, s) \
618 gen_vfp_unary(neg_alt1, type, "vneg", f, s) \
619 gen_vfp_unary(sqrt_alt1, type, "vsqrt", f, s) \
620 gen_vfp_logical(equal_alt1, type, eq, f, s) \
621 gen_vfp_logical(not_equal_alt1, type, ne, f, s) \
622 gen_vfp_logical(less_alt1, type, mi, f, s) \
623 gen_vfp_logical(less_equal_alt1, type, ls, f, s) \
624 gen_vfp_logical(greater_alt1, type, gt, f, s) \
625 gen_vfp_logical(greater_equal_alt1, type, ge, f, s) \
626 gen_vfp_to_int(to_int_alt1, type, f, s) \
627 gen_vfp_from_int(from_int_alt1, type, f, s) \
628 gen_vfp_is_exception(is_exception_alt1, type, f, s)
630 #define gen_vfp_half_ops() \
631 gen_vfp_half_binary(add_alt1, real16_t, "vadd") \
632 gen_vfp_half_binary(subtract_alt1, real16_t, "vsub") \
633 gen_vfp_half_binary(multiply_alt1, real16_t, "vmul") \
634 gen_vfp_half_binary(divide_alt1, real16_t, "vdiv") \
635 gen_vfp_half_logical(equal_alt1, real16_t, eq) \
636 gen_vfp_half_logical(not_equal_alt1, real16_t, ne) \
637 gen_vfp_half_logical(less_alt1, real16_t, mi) \
638 gen_vfp_half_logical(less_equal_alt1, real16_t, ls) \
639 gen_vfp_half_logical(greater_alt1, real16_t, gt) \
640 gen_vfp_half_logical(greater_equal_alt1, real16_t, ge) \
641 gen_vfp_half_to_int(to_int_alt1, real16_t) \
642 gen_vfp_half_from_int(from_int_alt1, real16_t)
644 #else
646 #define gen_sse_ops(type, s, z)
647 #define gen_avx_ops(type, s, z)
648 #define gen_f16c_ops(z)
649 #define gen_fp16_ops(z)
650 #define gen_vfp_ops(type, f, s)
651 #define gen_vfp_half_ops()
653 #endif
655 #define file_inc "arithm-r.inc"
656 #include "for-real.inc"
658 #endif