1 /* multi_arith.h: multi-precision integer arithmetic functions, needed
2 to do extended-precision floating point.
4 (c) 1998 David Huggins-Daines.
6 Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
9 You may copy, modify, and redistribute this file under the terms of
10 the GNU General Public License, version 2, or any later version, at
15 These are not general multi-precision math routines. Rather, they
16 implement the subset of integer arithmetic that we need in order to
17 multiply, divide, and normalize 128-bit unsigned mantissae. */
22 static inline void fp_denormalize(struct fp_ext
*reg
, unsigned int cnt
)
28 reg
->lowmant
= reg
->mant
.m32
[1] << (8 - cnt
);
29 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> cnt
) |
30 (reg
->mant
.m32
[0] << (32 - cnt
));
31 reg
->mant
.m32
[0] = reg
->mant
.m32
[0] >> cnt
;
34 reg
->lowmant
= reg
->mant
.m32
[1] >> (cnt
- 8);
35 if (reg
->mant
.m32
[1] << (40 - cnt
))
37 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> cnt
) |
38 (reg
->mant
.m32
[0] << (32 - cnt
));
39 reg
->mant
.m32
[0] = reg
->mant
.m32
[0] >> cnt
;
42 asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg
->lowmant
)
43 : "m" (reg
->mant
.m32
[0]), "d" (64 - cnt
));
44 if (reg
->mant
.m32
[1] << (40 - cnt
))
46 reg
->mant
.m32
[1] = reg
->mant
.m32
[0] >> (cnt
- 32);
50 reg
->lowmant
= reg
->mant
.m32
[0] >> (cnt
- 40);
51 if ((reg
->mant
.m32
[0] << (72 - cnt
)) || reg
->mant
.m32
[1])
53 reg
->mant
.m32
[1] = reg
->mant
.m32
[0] >> (cnt
- 32);
57 reg
->lowmant
= reg
->mant
.m32
[0] || reg
->mant
.m32
[1];
64 static inline int fp_overnormalize(struct fp_ext
*reg
)
68 if (reg
->mant
.m32
[0]) {
69 asm ("bfffo %1{#0,#32},%0" : "=d" (shift
) : "dm" (reg
->mant
.m32
[0]));
70 reg
->mant
.m32
[0] = (reg
->mant
.m32
[0] << shift
) | (reg
->mant
.m32
[1] >> (32 - shift
));
71 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] << shift
);
73 asm ("bfffo %1{#0,#32},%0" : "=d" (shift
) : "dm" (reg
->mant
.m32
[1]));
74 reg
->mant
.m32
[0] = (reg
->mant
.m32
[1] << shift
);
82 static inline int fp_addmant(struct fp_ext
*dest
, struct fp_ext
*src
)
86 /* we assume here, gcc only insert move and a clr instr */
87 asm volatile ("add.b %1,%0" : "=d,g" (dest
->lowmant
)
88 : "g,d" (src
->lowmant
), "0,0" (dest
->lowmant
));
89 asm volatile ("addx.l %1,%0" : "=d" (dest
->mant
.m32
[1])
90 : "d" (src
->mant
.m32
[1]), "0" (dest
->mant
.m32
[1]));
91 asm volatile ("addx.l %1,%0" : "=d" (dest
->mant
.m32
[0])
92 : "d" (src
->mant
.m32
[0]), "0" (dest
->mant
.m32
[0]));
93 asm volatile ("addx.l %0,%0" : "=d" (carry
) : "0" (0));
98 static inline int fp_addcarry(struct fp_ext
*reg
)
100 if (++reg
->exp
== 0x7fff) {
102 fp_set_sr(FPSR_EXC_INEX2
);
104 fp_set_sr(FPSR_EXC_OVFL
);
107 reg
->lowmant
= (reg
->mant
.m32
[1] << 7) | (reg
->lowmant
? 1 : 0);
108 reg
->mant
.m32
[1] = (reg
->mant
.m32
[1] >> 1) |
109 (reg
->mant
.m32
[0] << 31);
110 reg
->mant
.m32
[0] = (reg
->mant
.m32
[0] >> 1) | 0x80000000;
115 static inline void fp_submant(struct fp_ext
*dest
, struct fp_ext
*src1
,
118 /* we assume here, gcc only insert move and a clr instr */
119 asm volatile ("sub.b %1,%0" : "=d,g" (dest
->lowmant
)
120 : "g,d" (src2
->lowmant
), "0,0" (src1
->lowmant
));
121 asm volatile ("subx.l %1,%0" : "=d" (dest
->mant
.m32
[1])
122 : "d" (src2
->mant
.m32
[1]), "0" (src1
->mant
.m32
[1]));
123 asm volatile ("subx.l %1,%0" : "=d" (dest
->mant
.m32
[0])
124 : "d" (src2
->mant
.m32
[0]), "0" (src1
->mant
.m32
[0]));
127 #define fp_mul64(desth, destl, src1, src2) ({ \
128 asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
129 : "dm" (src1), "0" (src2)); \
131 #define fp_div64(quot, rem, srch, srcl, div) \
132 asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
133 : "dm" (div), "1" (srch), "0" (srcl))
134 #define fp_add64(dest1, dest2, src1, src2) ({ \
135 asm ("add.l %1,%0" : "=d,dm" (dest2) \
136 : "dm,d" (src2), "0,0" (dest2)); \
137 asm ("addx.l %1,%0" : "=d" (dest1) \
138 : "d" (src1), "0" (dest1)); \
140 #define fp_addx96(dest, src) ({ \
141 /* we assume here, gcc only insert move and a clr instr */ \
142 asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
143 : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
144 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
145 : "d" (temp.m32[0]), "0" (dest->m32[1])); \
146 asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
147 : "d" (0), "0" (dest->m32[0])); \
149 #define fp_sub64(dest, src) ({ \
150 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
151 : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
152 asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
153 : "d" (src.m32[0]), "0" (dest.m32[0])); \
155 #define fp_sub96c(dest, srch, srcm, srcl) ({ \
157 asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
158 : "dm,d" (srcl), "0,0" (dest.m32[2])); \
159 asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
160 : "d" (srcm), "0" (dest.m32[1])); \
161 asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
162 : "d" (srch), "1" (dest.m32[0])); \
166 static inline void fp_multiplymant(union fp_mant128
*dest
, struct fp_ext
*src1
,
169 union fp_mant64 temp
;
171 fp_mul64(dest
->m32
[0], dest
->m32
[1], src1
->mant
.m32
[0], src2
->mant
.m32
[0]);
172 fp_mul64(dest
->m32
[2], dest
->m32
[3], src1
->mant
.m32
[1], src2
->mant
.m32
[1]);
174 fp_mul64(temp
.m32
[0], temp
.m32
[1], src1
->mant
.m32
[0], src2
->mant
.m32
[1]);
175 fp_addx96(dest
, temp
);
177 fp_mul64(temp
.m32
[0], temp
.m32
[1], src1
->mant
.m32
[1], src2
->mant
.m32
[0]);
178 fp_addx96(dest
, temp
);
181 static inline void fp_dividemant(union fp_mant128
*dest
, struct fp_ext
*src
,
184 union fp_mant128 tmp
;
185 union fp_mant64 tmp64
;
186 unsigned long *mantp
= dest
->m32
;
187 unsigned long fix
, rem
, first
, dummy
;
190 /* the algorithm below requires dest to be smaller than div,
191 but both have the high bit set */
192 if (src
->mant
.m64
>= div
->mant
.m64
) {
193 fp_sub64(src
->mant
, div
->mant
);
199 /* basic idea behind this algorithm: we can't divide two 64bit numbers
200 (AB/CD) directly, but we can calculate AB/C0, but this means this
201 quotient is off by C0/CD, so we have to multiply the first result
202 to fix the result, after that we have nearly the correct result
203 and only a few corrections are needed. */
205 /* C0/CD can be precalculated, but it's an 64bit division again, but
206 we can make it a bit easier, by dividing first through C so we get
207 10/1D and now only a single shift and the value fits into 32bit. */
209 dummy
= div
->mant
.m32
[1] / div
->mant
.m32
[0] + 1;
210 dummy
= (dummy
>> 1) | fix
;
211 fp_div64(fix
, dummy
, fix
, 0, dummy
);
214 for (i
= 0; i
< 3; i
++, mantp
++) {
215 if (src
->mant
.m32
[0] == div
->mant
.m32
[0]) {
216 fp_div64(first
, rem
, 0, src
->mant
.m32
[1], div
->mant
.m32
[0]);
218 fp_mul64(*mantp
, dummy
, first
, fix
);
221 fp_div64(first
, rem
, src
->mant
.m32
[0], src
->mant
.m32
[1], div
->mant
.m32
[0]);
223 fp_mul64(*mantp
, dummy
, first
, fix
);
226 fp_mul64(tmp
.m32
[0], tmp
.m32
[1], div
->mant
.m32
[0], first
- *mantp
);
227 fp_add64(tmp
.m32
[0], tmp
.m32
[1], 0, rem
);
230 fp_mul64(tmp64
.m32
[0], tmp64
.m32
[1], *mantp
, div
->mant
.m32
[1]);
231 fp_sub96c(tmp
, 0, tmp64
.m32
[0], tmp64
.m32
[1]);
233 src
->mant
.m32
[0] = tmp
.m32
[1];
234 src
->mant
.m32
[1] = tmp
.m32
[2];
236 while (!fp_sub96c(tmp
, 0, div
->mant
.m32
[0], div
->mant
.m32
[1])) {
237 src
->mant
.m32
[0] = tmp
.m32
[1];
238 src
->mant
.m32
[1] = tmp
.m32
[2];
244 static inline void fp_putmant128(struct fp_ext
*dest
, union fp_mant128
*src
,
251 dest
->mant
.m64
= src
->m64
[0];
252 dest
->lowmant
= src
->m32
[2] >> 24;
253 if (src
->m32
[3] || (src
->m32
[2] << 8))
257 asm volatile ("lsl.l #1,%0"
258 : "=d" (tmp
) : "0" (src
->m32
[2]));
259 asm volatile ("roxl.l #1,%0"
260 : "=d" (dest
->mant
.m32
[1]) : "0" (src
->m32
[1]));
261 asm volatile ("roxl.l #1,%0"
262 : "=d" (dest
->mant
.m32
[0]) : "0" (src
->m32
[0]));
263 dest
->lowmant
= tmp
>> 24;
264 if (src
->m32
[3] || (tmp
<< 8))
268 asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
269 : "=d" (dest
->mant
.m32
[0])
270 : "d" (src
->m32
[0]), "0" (src
->m32
[1]));
271 asm volatile ("roxr.l #1,%0"
272 : "=d" (dest
->mant
.m32
[1]) : "0" (src
->m32
[2]));
273 asm volatile ("roxr.l #1,%0"
274 : "=d" (tmp
) : "0" (src
->m32
[3]));
275 dest
->lowmant
= tmp
>> 24;
276 if (src
->m32
[3] << 7)
280 dest
->mant
.m32
[0] = src
->m32
[1];
281 dest
->mant
.m32
[1] = src
->m32
[2];
282 dest
->lowmant
= src
->m32
[3] >> 24;
283 if (src
->m32
[3] << 8)
289 #endif /* MULTI_ARITH_H */