2 * Copyright 2023 Siemens
4 * The authors hereby grant permission to use, copy, modify, distribute,
5 * and license this software and its documentation for any purpose, provided
6 * that existing copyright notices are retained in all copies and that this
7 * notice is included verbatim in any distributions. No written agreement,
8 * license, or royalty fee is required for any of the authorized uses.
9 * Modifications to this software may be copyrighted by their authors
10 * and need not follow the licensing terms described here, provided that
11 * the new terms are clearly indicated on the first page of each file where
16 * ====================================================
17 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
19 * Developed at SunPro, a Sun Microsystems, Inc. business.
20 * Permission to use, copy, modify, and distribute this
21 * software is freely granted, provided that this notice
23 * ====================================================
26 /* Based on newlib/libm/mathfp/s_fmod.c in Newlib. */
28 #include "amdgcnmach.h"
30 DEF_VD_MATH_FUNC (v64df
, fmod
, v64df x
, v64df y
)
36 EXTRACT_WORDS (hx
, lx
, x
);
37 EXTRACT_WORDS (hy
, ly
, y
);
38 v64si sx
= hx
& 0x80000000; /* sign of x */
40 hy
&= 0x7fffffff; /* |y| */
42 v64df zeroes
= VECTOR_MERGE (VECTOR_INIT (-0.0),
46 /* purge off exception values */
47 VECTOR_IF (((hy
| ly
) == 0) | (hx
>= 0x7ff00000)
48 | ((hy
| ((ly
| -ly
) >> 31)) > 0x7ff00000), cond
) // y=0, or x not finite or y is NaN
49 VECTOR_RETURN ((x
* y
) / (x
* y
), cond
);
51 VECTOR_IF (hx
<= hy
, cond
) // |x|<|y| return x
52 VECTOR_IF2 ((hx
< hy
) | (lx
< ly
), cond2
, cond
)
53 VECTOR_RETURN (x
, cond
);
55 VECTOR_IF2 (lx
== ly
, cond2
, cond
)
56 VECTOR_RETURN (zeroes
, cond2
);
60 /* determine ix = ilogb(x) */
62 VECTOR_IF (hx
< 0x00100000, cond
) // subnormal x
63 VECTOR_IF2 (hx
== 0, cond2
, cond
)
64 ix
= VECTOR_INIT (-1043);
65 for (v64si i
= __builtin_convertvector (lx
, v64si
);
66 !ALL_ZEROES_P (cond2
& (i
> 0));
68 VECTOR_COND_MOVE (ix
, ix
- 1, cond2
& (i
> 0));
69 VECTOR_ELSE2 (cond2
, cond
)
70 ix
= VECTOR_INIT (-1022);
71 for (v64si i
= __builtin_convertvector (hx
<< 11, v64si
);
72 !ALL_ZEROES_P (cond2
& (i
> 0));
74 VECTOR_COND_MOVE (ix
, ix
- 1, cond2
& (i
> 0));
77 VECTOR_COND_MOVE (ix
, (hx
>> 20) - 1023, cond
);
80 /* determine iy = ilogb(y) */
82 VECTOR_IF (hy
< 0x00100000, cond
) // subnormal y
83 VECTOR_IF2 (hy
== 0, cond2
, cond
)
84 iy
= VECTOR_INIT (-1043);
85 for (v64si i
= __builtin_convertvector (ly
, v64si
);
86 !ALL_ZEROES_P (cond2
& (i
> 0));
88 VECTOR_COND_MOVE (iy
, iy
- 1, cond2
& (i
> 0));
89 VECTOR_ELSE2 (cond2
, cond
)
90 iy
= VECTOR_INIT (-1022);
91 for (v64si i
= __builtin_convertvector (hy
<< 11, v64si
);
92 !ALL_ZEROES_P (cond2
& (i
> 0));
94 VECTOR_COND_MOVE (iy
, iy
- 1, cond2
& (i
> 0));
97 VECTOR_COND_MOVE (iy
, (hy
>> 20) - 1023, cond
);
101 /* set up {hx,lx}, {hy,ly} and align y to x */
102 VECTOR_IF (ix
>= -1022, cond
)
103 VECTOR_COND_MOVE (hx
, 0x00100000 | (0x000fffff & hx
), cond
);
104 VECTOR_ELSE (cond
) // subnormal x, shift x to normal
106 v64si n
= -1022 - ix
;
107 VECTOR_IF2 (n
<= 31, cond2
, cond
)
108 VECTOR_COND_MOVE (hx
, (hx
<< n
) | (lx
>> (32 - n
)), cond2
);
109 VECTOR_COND_MOVE (lx
, lx
<< n
, cond2
);
110 VECTOR_ELSE2 (cond2
, cond
)
111 VECTOR_COND_MOVE (hx
, __builtin_convertvector (lx
<< (n
- 32), v64si
), cond2
);
112 VECTOR_COND_MOVE (lx
, VECTOR_INIT (0U), cond2
);
116 VECTOR_IF (iy
>= -1022, cond
)
117 VECTOR_COND_MOVE (hy
, 0x00100000 | (0x000fffff & hy
), cond
);
118 VECTOR_ELSE (cond
) // subnormal y, shift y to normal
120 v64si n
= -1022 - iy
;
121 VECTOR_IF2 (n
<= 31, cond2
, cond
)
122 VECTOR_COND_MOVE (hy
, (hy
<< n
) | (ly
>> (32 - n
)), cond2
);
123 VECTOR_COND_MOVE (ly
, ly
<< n
, cond2
);
124 VECTOR_ELSE2 (cond2
, cond
)
125 VECTOR_COND_MOVE (hy
, __builtin_convertvector (ly
<< (n
- 32), v64si
), cond2
);
126 VECTOR_COND_MOVE (ly
, VECTOR_INIT (0U), cond2
);
135 while (!ALL_ZEROES_P (cond
))
139 VECTOR_IF2 (lx
< ly
, cond2
, cond
)
140 VECTOR_COND_MOVE (hz
, hz
- 1, cond2
);
142 VECTOR_IF2 (hz
< 0, cond2
, cond
)
143 VECTOR_COND_MOVE (hx
, hx
+ hx
+ (__builtin_convertvector(lx
, v64usi
) >> 31), cond2
);
144 VECTOR_COND_MOVE (lx
, lx
+ lx
, cond2
);
145 VECTOR_ELSE2 (cond2
, cond
)
146 VECTOR_IF2 ((hz
| lz
) == 0, cond3
, cond2
) // return sign(x)*0
147 VECTOR_RETURN (zeroes
, cond3
);
149 VECTOR_COND_MOVE (hx
, hz
+ hz
+ (__builtin_convertvector(lz
, v64usi
) >> 31), cond2
);
150 VECTOR_COND_MOVE (lx
, lz
+ lz
, cond2
);
153 n
+= cond
; // Active lanes should be -1
159 VECTOR_COND_MOVE (hz
, hz
- 1, lx
< ly
);
160 VECTOR_IF (hz
>= 0, cond
)
161 VECTOR_COND_MOVE (hx
, hz
, cond
);
162 VECTOR_COND_MOVE (lx
, lz
, cond
);
165 /* convert back to floating value and restore the sign */
166 VECTOR_RETURN (zeroes
, (hx
| lx
) == 0); // return sign(x)*0
167 cond
= hx
< 0x00100000;
168 while (!ALL_ZEROES_P (cond
)) // normalize x
170 VECTOR_COND_MOVE (hx
, hx
+ hx
+ (lx
>> 31), cond
);
171 VECTOR_COND_MOVE (lx
, lx
+ lx
, cond
);
172 iy
+= cond
; // Active lanes should be -1
174 cond
&= (hx
< 0x00100000);
176 VECTOR_IF (iy
>= -1022, cond
) // normalize output
177 VECTOR_COND_MOVE (hx
, (hx
- 0x00100000) | ((iy
+ 1023) << 20), cond
);
178 INSERT_WORDS (x
, hx
| sx
, lx
, cond
);
179 VECTOR_ELSE (cond
) // subnormal output */
181 VECTOR_IF2 (n
<= 20, cond2
, cond
)
182 VECTOR_COND_MOVE (lx
, (lx
>> n
) | (hx
<< (32 - n
)), cond2
);
183 VECTOR_COND_MOVE (hx
, hx
>> n
, cond2
);
184 VECTOR_ELSEIF2 (n
<= 31, cond2
, cond
)
185 VECTOR_COND_MOVE (lx
, __builtin_convertvector ((hx
<< (32 - n
)) | (lx
>> n
), v64usi
), cond2
);
186 VECTOR_COND_MOVE (hx
, sx
, cond2
);
187 VECTOR_ELSE2 (cond2
, cond
)
188 VECTOR_COND_MOVE (lx
, __builtin_convertvector (hx
>> (n
- 32), v64usi
), cond2
);
189 VECTOR_COND_MOVE (hx
, sx
, cond2
);
191 INSERT_WORDS (x
, hx
| sx
, lx
, cond
);
192 x
*= VECTOR_INIT (1.0); /* create necessary signal */
195 VECTOR_RETURN (x
, NO_COND
); /* exact output */
199 DEF_VARIANTS2 (fmod
, df
, df
)