cybergraphics.library: speed up brightness operation in processpixelarray
[AROS.git] / workbench / libs / mathieeedoubbas / ieeedpmul.c
blob9bd3b5df3f5f2bb3e2f9a8c036bc1b3dbf4d709f
1 /*
2 Copyright © 1995-2003, The AROS Development Team. All rights reserved.
3 $Id$
4 */
5 #include <aros/debug.h>
6 #include <proto/utility.h>
7 #include "mathieeedoubbas_intern.h"
9 static void add128(ULONG *s, ULONG *d)
11 ULONG ovl = 0;
12 WORD i;
14 for (i = 3; i >= 0; i--) {
15 ULONG o = d[i];
16 d[i] += s[i] + ovl;
17 ovl = 0;
18 if (o > d[i])
19 ovl = 1;
23 /*****************************************************************************
25 NAME */
27 AROS_LHQUAD2(double, IEEEDPMul,
29 /* SYNOPSIS */
30 AROS_LHAQUAD(double, y, D0, D1),
31 AROS_LHAQUAD(double, z, D2, D3),
33 /* LOCATION */
34 struct MathIeeeDoubBasBase *, MathIeeeDoubBasBase, 13, MathIeeeDoubBas)
36 /* FUNCTION
37 Multiplies two IEEE double precision numbers
39 INPUTS
41 RESULT
42 +1 : y > z
43 0 : y = z
44 -1 : y < z
46 Flags:
47 zero : y = z
48 negative : y < z
49 overflow : 0
51 BUGS
53 INTERNALS
55 *****************************************************************************/
57 AROS_LIBFUNC_INIT
59 BOOL sign;
60 QUAD res;
61 QUAD Qtmp1, Qtmp2;
62 WORD exp1, exp2, exp;
63 double *Dres = (double *)&res;
65 QUAD * Qy = (QUAD *)&y;
66 QUAD * Qz = (QUAD *)&z;
68 D(bug("%08x %08x * %08x %08x\n",
69 (ULONG)Get_High32of64(*Qy), (ULONG)Get_Low32of64(*Qy),
70 (ULONG)Get_High32of64(*Qz), (ULONG)Get_Low32of64(*Qz)));
72 sign = ((Get_High32of64(*Qy) ^ Get_High32of64(*Qz)) & IEEEDPSign_Mask_Hi) != 0;
74 AND64C(Qtmp1, *Qy, IEEEDPExponent_Mask_Hi, IEEEDPExponent_Mask_Lo);
75 AND64C(Qtmp2, *Qz, IEEEDPExponent_Mask_Hi, IEEEDPExponent_Mask_Lo);
77 SHRU32(exp1, Qtmp1, 52);
78 SHRU32(exp2, Qtmp2, 52);
80 exp1 = (exp1 & 0x7ff) - 1023;
81 exp2 = (exp2 & 0x7ff) - 1023;
82 exp = exp1 + exp2;
84 D(bug("EXP %d + EXP %d = %d (%08x)\n", exp1, exp2, exp, (exp + 1023) << 20));
86 AND64C(Qtmp1, *Qy, IEEEDPMantisse_Mask_Hi, IEEEDPMantisse_Mask_Lo);
87 AND64C(Qtmp2, *Qz, IEEEDPMantisse_Mask_Hi, IEEEDPMantisse_Mask_Lo);
89 /* (53 bit) * (53 bit) multiplication */
91 ULONG x1, x2, y1, y2;
92 ULONG t1[4], t2[4];
94 x1 = Get_High32of64(Qtmp1);
95 x1 |= 0x00100000; /* Set "hidden" 53th mantissa bit */
96 x2 = Get_Low32of64(Qtmp1);
97 y1 = Get_High32of64(Qtmp2);
98 y1 |= 0x00100000; /* Set "hidden" 53th mantissa bit */
99 y2 = Get_Low32of64(Qtmp2);
101 D(bug("%08x %08x %08x %08x\n", x1, x2, y1, y2));
103 Qtmp1 = UMult64(x1, y1);
104 t1[0] = Get_High32of64(Qtmp1);
105 t1[1] = Get_Low32of64(Qtmp1);
107 Qtmp1 = UMult64(x2, y2);
108 t1[2] = Get_High32of64(Qtmp1);
109 t1[3] = Get_Low32of64(Qtmp1);
111 Qtmp1 = UMult64(x1, y2);
112 t2[3] = 0;
113 t2[2] = Get_High32of64(Qtmp1);
114 t2[1] = Get_Low32of64(Qtmp1);
115 t2[0] = 0;
117 add128(t2, t1);
119 Qtmp1 = UMult64(x2, y1);
120 t2[3] = 0;
121 t2[2] = Get_High32of64(Qtmp1);
122 t2[1] = Get_Low32of64(Qtmp1);
123 t2[0] = 0;
125 add128(t2, t1);
127 D(bug("%08x %08x %08x %08x\n", t1[0], t1[1], t1[2], t1[3]));
129 /* Normalize. Pobably could be more optimal.. */
130 if (t1[0] || t1[1] || t1[2] || t1[3]) {
131 while (t1[0] & 0xfffffe00) {
132 t1[3] >>= 1;
133 t1[3] |= (t1[2] & 1) ? 0x80000000 : 0;
134 t1[2] >>= 1;
135 t1[2] |= (t1[1] & 1) ? 0x80000000 : 0;
136 t1[1] >>= 1;
137 t1[1] |= (t1[0] & 1) ? 0x80000000 : 0;
138 t1[0] >>= 1;
139 exp++;
141 while (!(t1[0] & 0x00000100)) {
142 t1[0] <<= 1;
143 t1[0] |= (t1[1] & 0x80000000) ? 1 : 0;
144 t1[1] <<= 1;
145 t1[1] |= (t1[2] & 0x80000000) ? 1 : 0;
146 t1[2] <<= 1;
147 t1[2] |= (t1[3] & 0x80000000) ? 1 : 0;
148 t1[3] <<= 1;
149 exp--;
153 D(bug("%08x %08x %08x %08x\n", t1[0], t1[1], t1[2], t1[3]));
155 if (exp <= -1023) {
156 if (sign)
157 Set_Value64C(res, 0x0, 0x0);
158 else
159 Set_Value64C(res, IEEEDPSign_Mask_Hi, IEEEDPSign_Mask_Lo);
160 return *Dres;
163 if (exp >= 1023) {
164 if (sign)
165 Set_Value64C(res, IEEEDPPInfty_Hi, IEEEDPPInfty_Lo);
166 else
167 Set_Value64C(res, IEEEDPPInfty_Hi | IEEEDPSign_Mask_Hi, IEEEDPPInfty_Lo | IEEEDPSign_Mask_Lo);
168 return *Dres;
171 Set_Value64C(res, (t1[0] << 12) | (t1[1] >> 20), (t1[1] << 12) | (t1[2] >> 20));
172 AND64QC(res, IEEEDPMantisse_Mask_Hi, IEEEDPMantisse_Mask_Lo);
173 SHL64(Qtmp1, (QUAD)(exp + 1023), 52);
174 OR64Q(res, Qtmp1);
176 if (sign)
177 OR64(res, IEEEDPSign_Mask_Hi, IEEEDPSign_Mask_Lo);
179 return *Dres;
181 AROS_LIBFUNC_EXIT