4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
31 #define restrict _Restrict
36 /* float logf(float x)
40 * for x is negative, -Inf => QNaN + invalid;
41 * for x = 0 => -Inf + divide-by-zero;
42 * for x = +Inf => Inf;
43 * for x = NaN => QNaN.
44 * 2. Computes logarithm from:
45 * x = m * 2**n => log(x) = n * log(2) + log(m),
47 * Let m = m0 + dm, where m0 = 1 + k / 32,
50 * Then log(m) = log(m0 + dm) = log(m0) + log(1+y),
51 * where y = dm*(1/m0), y = [-1/66, 1/64].
53 * 1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32);
54 * log(m0) is looked up in a table of log(1), log(1+1/32),
56 * log(1+y) is computed using approximation:
57 * log(1+y) = ((a3*y + a2)*y + a1)*y*y + y.
59 * The maximum relative error for the approximating
60 * polynomial is 2**(-28.41). All calculations are of
62 * Maximum error observed: less than 0.545 ulp for the
63 * whole float type range.
66 static const double __TBL_logf
[] = {
67 /* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */
68 /* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */
69 0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02,
70 1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07,
71 8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01,
72 1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07,
73 1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01,
74 9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08,
75 2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01,
76 9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08,
77 3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01,
78 8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08,
79 3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01,
80 7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08,
81 4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01,
82 7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08,
83 5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01,
84 7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08,
85 5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01,
86 6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08,
87 6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01,
88 6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08,
89 6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01,
90 6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08
94 K3
= -2.49887584306188944706e-01,
95 K2
= 3.33368809981254554946e-01,
96 K1
= -5.00000008402474976565e-01;
101 } inf
= { 0x7f800000 };
106 iy##N = ival##N & 0x007fffff; \
107 ival##N = (iy##N + 0x20000) & 0xfffc0000; \
108 i##N = ival##N >> 17; \
109 iy##N = iy##N - ival##N; \
110 ty##N = LN2 * (double) exp##N + __TBL_logf[i##N]; \
111 yy##N = (double) iy##N * __TBL_logf[i##N + 1]; \
112 yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N; \
113 y[0] = (float)(yy##N + ty##N); \
116 #define PREPROCESS(N, index, label) \
117 ival##N = *(int*)x; \
120 exp##N = (ival##N >> 23) - 127; \
121 if ((ival##N & 0x7fffffff) >= 0x7f800000) /* X = NaN or Inf */ \
123 y[index] = value + INF; \
126 if (ival##N < 0x00800000) \
128 if (ival##N > 0) /* X = denormal */ \
130 value = (float) ival##N; \
131 ival##N = *(int*) &value; \
132 exp##N = (ival##N >> 23) - (127 + 149); \
137 y[index] = ((ival##N & 0x7fffffff) == 0) ? \
138 -1.0f / value : value / value; \
144 __vlogf(int n
, float * restrict x
, int stridex
, float * restrict y
,
147 double LN2
= __TBL_logf
[64]; /* log(2) = 0.6931471805599453094 */
148 double yy0
, yy1
, yy2
, yy3
, yy4
;
149 double ty0
, ty1
, ty2
, ty3
, ty4
;
151 int i0
, i1
, i2
, i3
, i4
;
152 int ival0
, ival1
, ival2
, ival3
, ival4
;
153 int exp0
, exp1
, exp2
, exp3
, exp4
;
154 int iy0
, iy1
, iy2
, iy3
, iy4
;
166 PREPROCESS(0, 0, begin
)
171 PREPROCESS(1, stridey
, process1
)
176 PREPROCESS(2, (stridey
<< 1), process2
)
181 PREPROCESS(3, (stridey
<< 1) + stridey
, process3
)
186 PREPROCESS(4, (stridey
<< 2), process4
)
188 iy0
= ival0
& 0x007fffff;
189 iy1
= ival1
& 0x007fffff;
190 iy2
= ival2
& 0x007fffff;
191 iy3
= ival3
& 0x007fffff;
192 iy4
= ival4
& 0x007fffff;
194 ival0
= (iy0
+ 0x20000) & 0xfffc0000;
195 ival1
= (iy1
+ 0x20000) & 0xfffc0000;
196 ival2
= (iy2
+ 0x20000) & 0xfffc0000;
197 ival3
= (iy3
+ 0x20000) & 0xfffc0000;
198 ival4
= (iy4
+ 0x20000) & 0xfffc0000;
212 ty0
= LN2
* (double) exp0
+ __TBL_logf
[i0
];
213 ty1
= LN2
* (double) exp1
+ __TBL_logf
[i1
];
214 ty2
= LN2
* (double) exp2
+ __TBL_logf
[i2
];
215 ty3
= LN2
* (double) exp3
+ __TBL_logf
[i3
];
216 ty4
= LN2
* (double) exp4
+ __TBL_logf
[i4
];
218 yy0
= (double) iy0
* __TBL_logf
[i0
+ 1];
219 yy1
= (double) iy1
* __TBL_logf
[i1
+ 1];
220 yy2
= (double) iy2
* __TBL_logf
[i2
+ 1];
221 yy3
= (double) iy3
* __TBL_logf
[i3
+ 1];
222 yy4
= (double) iy4
* __TBL_logf
[i4
+ 1];
224 yy0
= ((K3
* yy0
+ K2
) * yy0
+ K1
) * yy0
* yy0
+ yy0
;
225 yy1
= ((K3
* yy1
+ K2
) * yy1
+ K1
) * yy1
* yy1
+ yy1
;
226 yy2
= ((K3
* yy2
+ K2
) * yy2
+ K1
) * yy2
* yy2
+ yy2
;
227 yy3
= ((K3
* yy3
+ K2
) * yy3
+ K1
) * yy3
* yy3
+ yy3
;
228 yy4
= ((K3
* yy4
+ K2
) * yy4
+ K1
) * yy4
* yy4
+ yy4
;
230 y
[0] = (float)(yy0
+ ty0
);
232 y
[0] = (float)(yy1
+ ty1
);
234 y
[0] = (float)(yy2
+ ty2
);
236 y
[0] = (float)(yy3
+ ty3
);
238 y
[0] = (float)(yy4
+ ty4
);