4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
37 ! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
38 ! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
40 ! TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
41 ! TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
44 .word 0x3e800000, 0x00000000, 0x3ff6a09e, 0x667f3bcd,
45 .word 0x3e7f81f8, 0x1f81f820, 0x3ff673e3, 0x2ef63a03,
46 .word 0x3e7f07c1, 0xf07c1f08, 0x3ff6482d, 0x37a5a3d2,
47 .word 0x3e7e9131, 0xabf0b767, 0x3ff61d72, 0xb7978671,
48 .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3ff5f3aa, 0x673fa911,
49 .word 0x3e7dae60, 0x76b981db, 0x3ff5cacb, 0x7802f342,
50 .word 0x3e7d41d4, 0x1d41d41d, 0x3ff5a2cd, 0x8c69d61a,
51 .word 0x3e7cd856, 0x89039b0b, 0x3ff57ba8, 0xb0ee01b9,
52 .word 0x3e7c71c7, 0x1c71c71c, 0x3ff55555, 0x55555555,
53 .word 0x3e7c0e07, 0x0381c0e0, 0x3ff52fcc, 0x468d6b54,
54 .word 0x3e7bacf9, 0x14c1bad0, 0x3ff50b06, 0xa8fc6b70,
55 .word 0x3e7b4e81, 0xb4e81b4f, 0x3ff4e6fd, 0xf33cf032,
56 .word 0x3e7af286, 0xbca1af28, 0x3ff4c3ab, 0xe93bcf74,
57 .word 0x3e7a98ef, 0x606a63be, 0x3ff4a10a, 0x97af7b92,
58 .word 0x3e7a41a4, 0x1a41a41a, 0x3ff47f14, 0x4fe17f9f,
59 .word 0x3e79ec8e, 0x951033d9, 0x3ff45dc3, 0xa3c34fa3,
60 .word 0x3e799999, 0x9999999a, 0x3ff43d13, 0x6248490f,
61 .word 0x3e7948b0, 0xfcd6e9e0, 0x3ff41cfe, 0x93ff5199,
62 .word 0x3e78f9c1, 0x8f9c18fa, 0x3ff3fd80, 0x77e70577,
63 .word 0x3e78acb9, 0x0f6bf3aa, 0x3ff3de94, 0x8077db58,
64 .word 0x3e786186, 0x18618618, 0x3ff3c036, 0x50e00e03,
65 .word 0x3e781818, 0x18181818, 0x3ff3a261, 0xba6d7a37,
66 .word 0x3e77d05f, 0x417d05f4, 0x3ff38512, 0xba21f51e,
67 .word 0x3e778a4c, 0x8178a4c8, 0x3ff36845, 0x766eec92,
68 .word 0x3e7745d1, 0x745d1746, 0x3ff34bf6, 0x3d156826,
69 .word 0x3e7702e0, 0x5c0b8170, 0x3ff33021, 0x8127c0e0,
70 .word 0x3e76c16c, 0x16c16c17, 0x3ff314c3, 0xd92a9e91,
71 .word 0x3e768168, 0x16816817, 0x3ff2f9d9, 0xfd52fd50,
72 .word 0x3e7642c8, 0x590b2164, 0x3ff2df60, 0xc5df2c9e,
73 .word 0x3e760581, 0x60581606, 0x3ff2c555, 0x2988e428,
74 .word 0x3e75c988, 0x2b931057, 0x3ff2abb4, 0x3c0eb0f4,
75 .word 0x3e758ed2, 0x308158ed, 0x3ff2927b, 0x2cd320f5,
76 .word 0x3e755555, 0x55555555, 0x3ff279a7, 0x4590331c,
77 .word 0x3e751d07, 0xeae2f815, 0x3ff26135, 0xe91daf55,
78 .word 0x3e74e5e0, 0xa72f0539, 0x3ff24924, 0x92492492,
79 .word 0x3e74afd6, 0xa052bf5b, 0x3ff23170, 0xd2be638a,
80 .word 0x3e747ae1, 0x47ae147b, 0x3ff21a18, 0x51ff630a,
81 .word 0x3e7446f8, 0x6562d9fb, 0x3ff20318, 0xcc6a8f5d,
82 .word 0x3e741414, 0x14141414, 0x3ff1ec70, 0x124e98f9,
83 .word 0x3e73e22c, 0xbce4a902, 0x3ff1d61c, 0x070ae7d3,
84 .word 0x3e73b13b, 0x13b13b14, 0x3ff1c01a, 0xa03be896,
85 .word 0x3e738138, 0x13813814, 0x3ff1aa69, 0xe4f2777f,
86 .word 0x3e73521c, 0xfb2b78c1, 0x3ff19507, 0xecf5b9e9,
87 .word 0x3e7323e3, 0x4a2b10bf, 0x3ff17ff2, 0xe00ec3ee,
88 .word 0x3e72f684, 0xbda12f68, 0x3ff16b28, 0xf55d72d4,
89 .word 0x3e72c9fb, 0x4d812ca0, 0x3ff156a8, 0x72b5ef62,
90 .word 0x3e729e41, 0x29e4129e, 0x3ff1426f, 0xac0654db,
91 .word 0x3e727350, 0xb8812735, 0x3ff12e7d, 0x02c40253,
92 .word 0x3e724924, 0x92492492, 0x3ff11ace, 0xe560242a,
93 .word 0x3e721fb7, 0x8121fb78, 0x3ff10763, 0xcec30b26,
94 .word 0x3e71f704, 0x7dc11f70, 0x3ff0f43a, 0x45cdedad,
95 .word 0x3e71cf06, 0xada2811d, 0x3ff0e150, 0xdce2b60c,
96 .word 0x3e71a7b9, 0x611a7b96, 0x3ff0cea6, 0x317186dc,
97 .word 0x3e718118, 0x11811812, 0x3ff0bc38, 0xeb8ba412,
98 .word 0x3e715b1e, 0x5f75270d, 0x3ff0aa07, 0xbd7b7488,
99 .word 0x3e7135c8, 0x1135c811, 0x3ff09811, 0x63615499,
100 .word 0x3e711111, 0x11111111, 0x3ff08654, 0xa2d4f6db,
101 .word 0x3e70ecf5, 0x6be69c90, 0x3ff074d0, 0x4a8b1438,
102 .word 0x3e70c971, 0x4fbcda3b, 0x3ff06383, 0x31ff307a,
103 .word 0x3e70a681, 0x0a6810a7, 0x3ff0526c, 0x39213bfa,
104 .word 0x3e708421, 0x08421084, 0x3ff0418a, 0x4806de7d,
105 .word 0x3e70624d, 0xd2f1a9fc, 0x3ff030dc, 0x4ea03a72,
106 .word 0x3e704104, 0x10410410, 0x3ff02061, 0x446ffa9a,
107 .word 0x3e702040, 0x81020408, 0x3ff01018, 0x28467ee9,
108 .word 0x3e800000, 0x00000000, 0x3ff00000, 0x00000000,
109 .word 0x3e7f81f8, 0x1f81f820, 0x3fefc0bd, 0x88a0f1d9,
110 .word 0x3e7f07c1, 0xf07c1f08, 0x3fef82ec, 0x882c0f9b,
111 .word 0x3e7e9131, 0xabf0b767, 0x3fef467f, 0x2814b0cc,
112 .word 0x3e7e1e1e, 0x1e1e1e1e, 0x3fef0b68, 0x48d2af1c,
113 .word 0x3e7dae60, 0x76b981db, 0x3feed19b, 0x75e78957,
114 .word 0x3e7d41d4, 0x1d41d41d, 0x3fee990c, 0xdad55ed2,
115 .word 0x3e7cd856, 0x89039b0b, 0x3fee61b1, 0x38f18adc,
116 .word 0x3e7c71c7, 0x1c71c71c, 0x3fee2b7d, 0xddfefa66,
117 .word 0x3e7c0e07, 0x0381c0e0, 0x3fedf668, 0x9b7e6350,
118 .word 0x3e7bacf9, 0x14c1bad0, 0x3fedc267, 0xbea45549,
119 .word 0x3e7b4e81, 0xb4e81b4f, 0x3fed8f72, 0x08e6b82d,
120 .word 0x3e7af286, 0xbca1af28, 0x3fed5d7e, 0xa914b937,
121 .word 0x3e7a98ef, 0x606a63be, 0x3fed2c85, 0x34ed6d86,
122 .word 0x3e7a41a4, 0x1a41a41a, 0x3fecfc7d, 0xa32a9213,
123 .word 0x3e79ec8e, 0x951033d9, 0x3feccd60, 0x45f5d358,
124 .word 0x3e799999, 0x9999999a, 0x3fec9f25, 0xc5bfedd9,
125 .word 0x3e7948b0, 0xfcd6e9e0, 0x3fec71c7, 0x1c71c71c,
126 .word 0x3e78f9c1, 0x8f9c18fa, 0x3fec453d, 0x90f057a2,
127 .word 0x3e78acb9, 0x0f6bf3aa, 0x3fec1982, 0xb2ece47b,
128 .word 0x3e786186, 0x18618618, 0x3febee90, 0x56fb9c39,
129 .word 0x3e781818, 0x18181818, 0x3febc460, 0x92eb3118,
130 .word 0x3e77d05f, 0x417d05f4, 0x3feb9aed, 0xba588347,
131 .word 0x3e778a4c, 0x8178a4c8, 0x3feb7232, 0x5b79db11,
132 .word 0x3e7745d1, 0x745d1746, 0x3feb4a29, 0x3c1d9550,
133 .word 0x3e7702e0, 0x5c0b8170, 0x3feb22cd, 0x56d87d7e,
134 .word 0x3e76c16c, 0x16c16c17, 0x3feafc19, 0xd8606169,
135 .word 0x3e768168, 0x16816817, 0x3fead60a, 0x1d0fb394,
136 .word 0x3e7642c8, 0x590b2164, 0x3feab099, 0xae8f539a,
137 .word 0x3e760581, 0x60581606, 0x3fea8bc4, 0x41a3d02c,
138 .word 0x3e75c988, 0x2b931057, 0x3fea6785, 0xb41bacf7,
139 .word 0x3e758ed2, 0x308158ed, 0x3fea43da, 0x0adc6899,
140 .word 0x3e755555, 0x55555555, 0x3fea20bd, 0x700c2c3e,
141 .word 0x3e751d07, 0xeae2f815, 0x3fe9fe2c, 0x315637ee,
142 .word 0x3e74e5e0, 0xa72f0539, 0x3fe9dc22, 0xbe484458,
143 .word 0x3e74afd6, 0xa052bf5b, 0x3fe9ba9d, 0xa6c73588,
144 .word 0x3e747ae1, 0x47ae147b, 0x3fe99999, 0x9999999a,
145 .word 0x3e7446f8, 0x6562d9fb, 0x3fe97913, 0x63068b54,
146 .word 0x3e741414, 0x14141414, 0x3fe95907, 0xeb87ab44,
147 .word 0x3e73e22c, 0xbce4a902, 0x3fe93974, 0x368cfa31,
148 .word 0x3e73b13b, 0x13b13b14, 0x3fe91a55, 0x6151761c,
149 .word 0x3e738138, 0x13813814, 0x3fe8fba8, 0xa1bf6f96,
150 .word 0x3e73521c, 0xfb2b78c1, 0x3fe8dd6b, 0x4563a009,
151 .word 0x3e7323e3, 0x4a2b10bf, 0x3fe8bf9a, 0xb06e1af3,
152 .word 0x3e72f684, 0xbda12f68, 0x3fe8a234, 0x5cc04426,
153 .word 0x3e72c9fb, 0x4d812ca0, 0x3fe88535, 0xd90703c6,
154 .word 0x3e729e41, 0x29e4129e, 0x3fe8689c, 0xc7e07e7d,
155 .word 0x3e727350, 0xb8812735, 0x3fe84c66, 0xdf0ca4c2,
156 .word 0x3e724924, 0x92492492, 0x3fe83091, 0xe6a7f7e7,
157 .word 0x3e721fb7, 0x8121fb78, 0x3fe8151b, 0xb86fee1d,
158 .word 0x3e71f704, 0x7dc11f70, 0x3fe7fa02, 0x3f1068d1,
159 .word 0x3e71cf06, 0xada2811d, 0x3fe7df43, 0x7579b9b5,
160 .word 0x3e71a7b9, 0x611a7b96, 0x3fe7c4dd, 0x663ebb88,
161 .word 0x3e718118, 0x11811812, 0x3fe7aace, 0x2afa8b72,
162 .word 0x3e715b1e, 0x5f75270d, 0x3fe79113, 0xebbd7729,
163 .word 0x3e7135c8, 0x1135c811, 0x3fe777ac, 0xde80baea,
164 .word 0x3e711111, 0x11111111, 0x3fe75e97, 0x46a0b098,
165 .word 0x3e70ecf5, 0x6be69c90, 0x3fe745d1, 0x745d1746,
166 .word 0x3e70c971, 0x4fbcda3b, 0x3fe72d59, 0xc45f1fc5,
167 .word 0x3e70a681, 0x0a6810a7, 0x3fe7152e, 0x9f44f01f,
168 .word 0x3e708421, 0x08421084, 0x3fe6fd4e, 0x79325467,
169 .word 0x3e70624d, 0xd2f1a9fc, 0x3fe6e5b7, 0xd16657e1,
170 .word 0x3e704104, 0x10410410, 0x3fe6ce69, 0x31d5858d,
171 .word 0x3e702040, 0x81020408, 0x3fe6b761, 0x2ec892f6,
173 .word 0x3fefffff, 0xfee7f18f ! K0 = 9.99999997962321453275e-01
174 .word 0xbfdfffff, 0xfe07e52f ! K1 = -4.99999998166077580600e-01
175 .word 0x3fd80118, 0x0ca296d9 ! K2 = 3.75066768969515586277e-01
176 .word 0xbfd400fc, 0x0bbb8e78 ! K3 = -3.12560092408808548438e-01
177 .word 0x7ffe0000, 0x7ffe0000 ! DC0
178 .word 0x3f800000, 0x40000000 ! FTWO
195 #define _0x00800000 %o2
196 #define _0x7f800000 %o4
198 #define tmp0 STACK_BIAS-0x30
199 #define tmp1 STACK_BIAS-0x28
200 #define tmp2 STACK_BIAS-0x20
201 #define tmp3 STACK_BIAS-0x18
202 #define tmp_counter STACK_BIAS-0x10
203 #define tmp_px STACK_BIAS-0x08
205 ! sizeof temp storage - must be a multiple of 16 for V9
208 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
209 ! !!!!! algorithm !!!!!
210 ! ((float*)&ddx0)[0] = *px;
213 ! ((float*)&ddx0)[1] = *(px + stridex);
214 ! ax1 = *(int*)(px + stridex);
218 ! if ( ax0 >= 0x7f800000 )
220 ! RETURN ( FONE / ((float*)&dres0)[0] );
222 ! if ( ax0 < 0x00800000 )
224 ! float res = ((float*)&dres0)[0];
226 ! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */
228 ! RETURN ( FONE / res )
230 ! else if ( ax0 >= 0 ) /* X = denormal */
232 ! double res0, xx0, tbl_div0, tbl_sqrt0;
234 ! int iax0, si0, iexp0;
240 ! iexp0 = 0x3f + 0x4b - iexp0;
241 ! iexp0 = iexp0 << 23;
243 ! si0 = (ax0 >> 13) & 0x7f0;
245 ! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
246 ! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
247 ! iax0 = ax0 & 0x7ffe0000;
249 ! xx0 = iax0 * tbl_div0;
250 ! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
253 ! iexp0 += *(int*)&fres0;
254 ! RETURN(*(float*)&iexp0)
256 ! else /* X = negative */
258 ! RETURN ( sqrtf(res) )
261 ! if ( ax1 >= 0x7f800000 )
263 ! RETURN ( FONE / ((float*)&dres0)[1] )
265 ! if ( ax1 < 0x00800000 )
267 ! float res = ((float*)&dres0)[1];
268 ! if ( (ax0 & 0x7fffffff) == 0 ) /* |X| = zero */
270 ! RETURN ( FONE / res )
272 ! else if ( ax0 >= 0 ) /* X = denormal */
274 ! double res0, xx0, tbl_div0, tbl_sqrt0;
276 ! int iax1, si0, iexp0;
282 ! iexp0 = 0x3f + 0x4b - iexp0;
283 ! iexp0 = iexp0 << 23;
285 ! si0 = (ax1 >> 13) & 0x7f0;
287 ! tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
288 ! tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
289 ! iax1 = ax1 & 0x7ffe0000;
291 ! xx0 = iax1 * tbl_div0;
292 ! res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
295 ! iexp0 += *(int*)&fres0;
296 ! RETURN(*(float*)&iexp0)
298 ! else /* X = negative */
300 ! RETURN ( sqrtf(res) )
306 ! iexp0 = 0x3f - iexp0;
307 ! iexp1 = 0x3f - iexp1;
309 ! lexp0 = iexp0 << 55;
310 ! lexp1 = iexp1 << 23;
314 ! fdx0 = *((double*)&lexp0);
321 ! addr0 = (char*)TBL + si0;
322 ! addr1 = (char*)TBL + si1;
323 ! tbl_div0 = ((double*)((char*)TBL + si0))[0];
324 ! tbl_div1 = ((double*)((char*)TBL + si1))[0];
325 ! tbl_sqrt0 = ((double*)addr0)[1];
326 ! tbl_sqrt1 = ((double*)addr1)[1];
327 ! dfx0 = vis_fand(ddx0,DC0);
328 ! dfx0 = vis_fpsub32(ddx0,dfx0);
329 ! dtmp0 = (double)(((int*)&dfx0)[0]);
330 ! dtmp1 = (double)(((int*)&dfx0)[1]);
331 ! xx0 = dtmp0 * tbl_div0;
332 ! xx1 = dtmp1 * tbl_div1;
345 ! res0 = tbl_sqrt0 * res0;
346 ! res1 = tbl_sqrt1 * res1;
347 ! ((float*)&dres0)[0] = (float)res0;
348 ! ((float*)&dres0)[1] = (float)res1;
349 ! dres0 = vis_fpadd32(dres0,fdx0);
350 ! *py = ((float*)&dres0)[0];
351 ! *(py + stridey) = ((float*)&dres0)[1];
354 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
357 save %sp,-SA(MINFRAME)-tmps,%sp
359 PIC_SET(l7,.CONST_TBL,l2)
361 st %i0,[%fp+tmp_counter]
372 sethi %hi(0x7f800000),_0x7f800000
373 sll stridex,1,stridex2
376 sethi %hi(0x00800000),_0x00800000
378 ldd [TBL+2048+32],DC0
381 ldd [TBL+2048+40],FONE
382 ! ld [TBL+2048+44],FTWO
384 ld [%fp+tmp_counter],counter
386 st %g0,[%fp+tmp_counter]
391 lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
393 lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
394 sethi %hi(0x7ffffc00),%o0
396 lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
397 add %l7,stridex2,%i1 ! px += stridex2
400 lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
401 fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
403 sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
404 add %i1,stridex2,%o5 ! px += stridex2
406 cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000
407 bge,pn %icc,.spec0 ! (4_1) if ( ax0 >= 0x7f800000 )
410 cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000
411 bl,pn %icc,.spec1 ! (4_1) if ( ax0 < 0x00800000 )
412 sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
414 and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
416 ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
417 sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
418 and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
419 fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
421 ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
422 sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
423 sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
425 and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
426 add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
428 sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
429 sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
430 fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
432 sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
433 fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
435 or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
437 stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
439 fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
441 lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px;
442 fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
444 lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
446 lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px;
448 lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex);
449 cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000
450 bge,pn %icc,.update0 ! (5_1) if ( ax1 >= 0x7f800000 )
451 fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0;
453 fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
454 cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000
455 bl,pn %icc,.update1 ! (5_1) if ( ax1 < 0x00800000 )
456 fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
458 sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
459 cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000
461 sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
462 and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
464 ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
465 sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
466 and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
467 fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
469 ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
470 sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
471 sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1;
472 faddd %f52,K2,%f62 ! (4_1) res0 += K2;
474 sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
475 bge,pn %icc,.update2 ! (0_0) if ( ax0 >= 0x7f800000 )
476 faddd %f50,K2,%f60 ! (5_1) res1 += K2;
478 cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000
479 and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff;
480 fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
482 sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
483 bl,pn %icc,.update3 ! (0_0) if ( ax0 < 0x00800000 )
484 fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
486 fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0;
487 sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55;
489 fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1;
490 or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1;
491 stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0);
493 fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0;
494 sll stridex,1,stridex2 ! stridex2 = stridex * 2;
496 lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px;
497 add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
498 fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
500 lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
501 add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0;
502 faddd %f30,K1,%f62 ! (4_1) res0 += K1;
504 lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px;
505 add %o5,stridex2,%l7 ! px += stridex2
506 faddd %f48,K1,%f42 ! (5_1) res1 += K1;
508 lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex);
509 cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000
510 bge,pn %icc,.update4 ! (1_0) if ( ax1 >= 0x7f800000 )
511 fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0;
513 fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
514 cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000
515 bl,pn %icc,.update5 ! (1_0) if ( ax1 < 0x00800000 )
516 fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
518 fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0;
519 sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
520 cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000
522 fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1;
523 sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
524 and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
526 ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
527 sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
528 and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
529 fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
531 ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
532 sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
533 sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1;
534 faddd %f52,K2,%f40 ! (0_0) res0 += K2;
536 ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
537 sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
538 and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff;
539 faddd %f50,K2,%f60 ! (1_0) res0 += K2;
541 ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
542 sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55;
543 add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
544 fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
546 sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
547 fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
549 fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0;
550 or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1;
551 faddd %f48,K0,%f62 ! (4_1) res0 += K0;
553 fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1;
554 add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
555 stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
556 faddd %f58,K0,%f60 ! (5_1) res1 += K0;
558 fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
559 bge,pn %icc,.update6 ! (2_0) if ( ax0 >= 0x7f800000 )
560 lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
562 cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000
563 bl,pn %icc,.update7 ! (2_0) if ( ax0 < 0x00800000 )
566 fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
568 lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
569 cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000
570 fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0;
571 faddd %f40,K1,%f46 ! (0_0) res0 += K1;
573 lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
574 add %l7,stridex2,%i1 ! px += stridex2
575 fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1;
576 faddd %f48,K1,%f62 ! (1_0) res1 += K1;
578 lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
579 add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0;
580 bge,pn %icc,.update8 ! (3_0) if ( ax1 >= 0x7f800000 )
581 fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0;
583 fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
584 cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000
585 bl,pn %icc,.update9 ! (3_0) if ( ax1 < 0x00800000 )
586 fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
588 fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0;
589 sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
590 add %i1,stridex2,%o5 ! px += stridex2
591 fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0;
593 fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1;
594 sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
595 and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
596 fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1;
598 ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
599 sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
600 and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
601 fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
603 ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
604 sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
605 sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
606 faddd %f52,K2,%f58 ! (2_0) res0 += K2;
608 ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
609 and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
610 add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
611 faddd %f50,K2,%f60 ! (3_0) res1 += K2;
613 ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
614 sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
615 sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
616 fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
618 ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0);
619 sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
620 fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
622 fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0;
623 or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
624 faddd %f48,K0,%f22 ! (0_0) res0 += K0;
626 fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1;
627 stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
628 faddd %f40,K0,%f26 ! (1_0) res1 += K0;
630 fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
631 fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
634 add stridey,stridey,stridey2
641 sub counter,6,counter ! counter
645 lda [%i1]0x82,%f18 ! (0_0) ((float*)&ddx0)[0] = *px;
646 cmp %g1,_0x7f800000 ! (4_1) ax0 ? 0x7f800000
647 bge,pn %icc,.update10 ! (4_1) if ( ax0 >= 0x7f800000 )
648 fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
650 lda [stridex+%i1]0x82,%f19 ! (1_0) ((float*)&ddx0)[1] = *(px + stridex);
651 cmp %g1,_0x00800000 ! (4_1) ax0 ? 0x00800000
652 fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0;
653 faddd %f62,K1,%f42 ! (2_1) res0 += K1;
655 lda [%i1]0x82,%g1 ! (0_0) ax0 = *(int*)px;
656 fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1;
657 bl,pn %icc,.update11 ! (4_1) if ( ax0 < 0x00800000 )
658 faddd %f58,K1,%f62 ! (3_1) res1 += K1;
660 lda [stridex+%i1]0x82,%i4 ! (1_0) ax1 = *(int*)(px + stridex);
661 cmp %g5,_0x7f800000 ! (5_1) ax1 ? 0x7f800000
662 bge,pn %icc,.update12 ! (5_1) if ( ax1 >= 0x7f800000 )
663 fmuld K3,%f40,%f52 ! (4_1) res0 = K3 * xx0;
665 fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
666 cmp %g5,_0x00800000 ! (5_1) ax1 ? 0x00800000
667 bl,pn %icc,.update13 ! (5_1) if ( ax1 < 0x00800000 )
668 fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
670 fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0;
671 sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
672 cmp %g1,_0x7f800000 ! (0_0) ax0 ? 0x7f800000
673 fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0;
675 fmuld %f62,%f24,%f58 ! (3_1) res1 *= xx1;
676 sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
677 and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
678 fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1;
680 ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
681 sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
682 and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
683 fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
685 ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
686 sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
687 sub %l0,%i1,%i1 ! (1_0) iexp1 = 0x3f - iexp1;
688 faddd %f52,K2,%f62 ! (4_1) res0 += K2;
690 ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
691 sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
692 bge,pn %icc,.update14 ! (0_0) if ( ax0 >= 0x7f800000 )
693 faddd %f50,K2,%f60 ! (5_1) res1 += K2;
695 ldd [%o1+8],%f28 ! (3_1) tbl_sqrt1 = ((double*)addr0)[1];
696 cmp %g1,_0x00800000 ! (0_0) ax0 ? 0x00800000
697 and %i1,511,%i0 ! (1_0) iexp1 = 0x1ff;
698 fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
700 ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0);
701 sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
702 bl,pn %icc,.update15 ! (0_0) if ( ax0 < 0x00800000 )
703 fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
705 fmuld %f62,%f40,%f30 ! (4_1) res0 *= xx0;
706 sllx %g5,55,%g5 ! (0_0) lexp0 = iexp0 << 55;
707 st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0];
708 faddd %f48,K0,%f62 ! (2_1) res0 += K0;
710 fmuld %f60,%f46,%f48 ! (5_1) res1 *= xx1;
711 or %g5,%i0,%g5 ! (0_0) lexp0 |= lexp1;
712 stx %g5,[%fp+tmp1] ! (0_0) fdx0 = *((double*)lexp0);
713 faddd %f58,K0,%f60 ! (3_1) res1 += K0;
715 fmuld %f56,%f54,%f26 ! (0_0) xx0 = dtmp0 * tbl_div0;
716 sll stridex,1,stridex2 ! stridex2 = stridex * 2;
717 st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1];
718 fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
720 lda [%o5]0x82,%f24 ! (2_0) ((float*)&ddx0)[0] = *px;
721 add %l7,stridey2,%i1 ! py += stridey2
722 add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
723 fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
725 lda [stridex+%o5]0x82,%f25 ! (3_0) ((float*)&ddx0)[1] = *(px + stridex);
726 add %l5,TBL,%l5 ! (4_1) addr0 = (char*)TBL + si0;
727 fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0;
728 faddd %f30,K1,%f62 ! (4_1) res0 += K1;
730 lda [%o5]0x82,%g1 ! (2_0) ax0 = *(int*)px;
731 add %o5,stridex2,%l7 ! px += stridex2
732 fmuld %f28,%f60,%f56 ! (3_1) res1 = tbl_sqrt1 * res1;
733 faddd %f48,K1,%f42 ! (5_1) res1 += K1;
735 lda [stridex+%o5]0x82,%o5 ! (3_0) ax1 = *(int*)(px + stridex);
736 cmp %i4,_0x7f800000 ! (1_0) ax1 ? 0x7f800000
737 bge,pn %icc,.update16 ! (1_0) if ( ax1 >= 0x7f800000 )
738 fmuld K3,%f26,%f52 ! (0_0) res0 = K3 * xx0;
740 fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
741 cmp %i4,_0x00800000 ! (1_0) ax1 ? 0x00800000
742 bl,pn %icc,.update17 ! (1_0) if ( ax1 < 0x00800000 )
743 fand %f24,DC0,%f54 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
745 fmuld %f62,%f40,%f48 ! (4_1) res0 *= xx0;
746 sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
747 cmp %g1,_0x7f800000 ! (2_0) ax0 ? 0x7f800000
748 fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0;
750 fmuld %f42,%f46,%f58 ! (5_1) res1 *= xx1;
751 sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
752 and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
753 fdtos %f56,%f21 ! (3_1) ((float*)&dres0)[0] = (float)res0;
755 ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
756 sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
757 and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
758 fpsub32 %f24,%f54,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
760 ldd [%o1+TBL],%f46 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
761 sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
762 sub %l0,%o3,%o3 ! (3_0) iexp1 = 0x3f - iexp1;
763 faddd %f52,K2,%f40 ! (0_0) res0 += K2;
765 ldd [%l5+8],%f42 ! (4_1) tbl_sqrt0 = ((double*)addr0)[1];
766 sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
767 and %o3,511,%i3 ! (3_0) iexp1 &= 0x1ff;
768 faddd %f50,K2,%f60 ! (1_0) res0 += K2;
770 ldd [%l6+8],%f28 ! (5_1) tbl_sqrt1 = ((double*)addr1)[1];
771 sllx %g5,55,%g5 ! (2_0) lexp0 = iexp0 << 55;
772 add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
773 fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
775 ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0);
776 sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
777 add %i1,stridey2,%o3 ! py += stridey2
778 fitod %f13,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
780 fmuld %f40,%f26,%f40 ! (0_0) res0 *= xx0;
781 or %g5,%i3,%g5 ! (2_0) lexp0 |= lexp1;
782 st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0];
783 faddd %f48,K0,%f62 ! (4_1) res0 += K0;
785 fmuld %f60,%f44,%f48 ! (1_0) res1 *= xx1;
786 add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
787 stx %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
788 faddd %f58,K0,%f60 ! (5_1) res1 += K0;
790 fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
791 bge,pn %icc,.update18 ! (2_0) if ( ax0 >= 0x7f800000 )
792 st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1];
793 fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
795 cmp %g1,_0x00800000 ! (2_0) ax0 ? 0x00800000
796 bl,pn %icc,.update19 ! (2_0) if ( ax0 < 0x00800000 )
797 lda [%l7]0x82,%f14 ! (4_0) ((float*)&ddx0)[0] = *px;
798 fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
800 lda [stridex+%l7]0x82,%f15 ! (5_0) ((float*)&ddx0)[1] = *(px + stridex);
801 cmp %o5,_0x7f800000 ! (3_0) ax1 ? 0x7f800000
802 fmuld %f42,%f62,%f58 ! (4_1) res0 = tbl_sqrt0 * res0;
803 faddd %f40,K1,%f46 ! (0_0) res0 += K1;
805 lda [%l7]0x82,%g1 ! (4_0) ax0 = *(int*)px;
806 add %l7,stridex2,%i1 ! px += stridex2
807 fmuld %f28,%f60,%f56 ! (5_1) res1 = tbl_sqrt1 * res1;
808 faddd %f48,K1,%f62 ! (1_0) res1 += K1;
810 lda [stridex+%l7]0x82,%g5 ! (5_0) ax1 = *(int*)(px + stridex);
811 add %o0,TBL,%o0 ! (0_0) addr0 = (char*)TBL + si0;
812 bge,pn %icc,.update20 ! (3_0) if ( ax1 >= 0x7f800000 )
813 fmuld K3,%f30,%f52 ! (2_0) res0 = K3 * xx0;
815 fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
816 cmp %o5,_0x00800000 ! (3_0) ax1 ? 0x00800000
817 bl,pn %icc,.update21 ! (3_0) if ( ax1 < 0x00800000 )
818 fand %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
820 fmuld %f46,%f26,%f48 ! (0_0) res0 *= xx0;
821 sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
822 add %i1,stridex2,%o5 ! px += stridex2
823 fdtos %f58,%f6 ! (4_1) ((float*)&dres0)[0] = (float)res0;
825 fmuld %f62,%f44,%f40 ! (1_0) res1 *= xx1;
826 sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
827 and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
828 fdtos %f56,%f7 ! (5_1) ((float*)&dres0)[1] = (float)res1;
830 ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
831 sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
832 and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
833 fpsub32 %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
835 ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
836 sra %g1,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
837 sub %l0,%l7,%l7 ! (5_0) iexp1 = 0x3f - iexp1;
838 faddd %f52,K2,%f58 ! (2_0) res0 += K2;
840 ldd [%o0+8],%f42 ! (0_0) tbl_sqrt0 = ((double*)addr0)[1];
841 and %l7,511,%l1 ! (5_0) iexp1 = 0x1ff;
842 add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
843 faddd %f50,K2,%f60 ! (3_0) res1 += K2;
845 ldd [%o7+8],%f28 ! (1_0) tbl_sqrt1 = ((double*)addr1)[1];
846 sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
847 sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
848 fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
850 ldd [%fp+tmp0],%f52 ! (4_1) fdx0 = *((double*)lexp0);
851 sllx %o0,55,%o0 ! (4_0) lexp0 = iexp0 << 55;
852 add %o3,stridey2,%l7 ! py += stridey2
853 fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
855 fmuld %f58,%f30,%f62 ! (2_0) res0 *= xx0;
856 or %o0,%l1,%o0 ! (4_0) lexp0 |= lexp1;
857 st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0];
858 faddd %f48,K0,%f22 ! (0_0) res0 += K0;
860 fmuld %f60,%f24,%f58 ! (3_0) res1 *= xx1;
861 subcc counter,6,counter ! counter -= 6;
862 stx %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
863 faddd %f40,K0,%f26 ! (1_0) res1 += K0;
865 fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
866 st %f1,[stridey+%o3] ! (3_1) *(py + stridey) = ((float*)&dres0)[1];
867 bpos,pt %icc,.main_loop
868 fpadd32 %f6,%f52,%f10 ! (4_1) dres0 = vis_fpadd32(dres0,fdx0);
870 add counter,6,counter
872 sll stridex,1,stridex2
873 subcc counter,1,counter
877 fmuld %f42,%f22,%f44 ! (0_1) res0 = tbl_sqrt0 * res0;
878 faddd %f62,K1,%f42 ! (2_1) res0 += K1;
880 fmuld %f28,%f26,%f60 ! (1_1) res1 = tbl_sqrt1 * res1;
882 fmuld %f42,%f30,%f48 ! (2_1) res0 *= xx0;
883 fdtos %f44,%f8 ! (0_1) ((float*)&dres0)[0] = (float)res0;
885 fdtos %f60,%f9 ! (1_1) ((float*)&dres0)[1] = (float)res1;
887 ldd [%i0+8],%f42 ! (2_1) tbl_sqrt0 = ((double*)addr0)[1];
889 ldd [%fp+tmp1],%f52 ! (0_1) fdx0 = *((double*)lexp0);
891 st %f10,[%l7] ! (4_2) *py = ((float*)&dres0)[0];
892 subcc counter,1,counter
896 faddd %f48,K0,%f62 ! (2_1) res0 += K0;
897 st %f11,[stridey+%l7] ! (5_2) *(py + stridey) = ((float*)&dres0)[1];
898 subcc counter,1,counter
901 fpadd32 %f8,%f52,%f10 ! (0_1) dres0 = vis_fpadd32(dres0,fdx0);
903 add %l7,stridey2,%i1 ! py += stridey2
905 fmuld %f42,%f62,%f58 ! (2_1) res0 = tbl_sqrt0 * res0;
907 fdtos %f58,%f20 ! (2_1) ((float*)&dres0)[0] = (float)res0;
909 ldd [%fp+tmp2],%f52 ! (2_1) fdx0 = *((double*)lexp0);
910 add %i1,stridey2,%o3 ! py += stridey2
912 st %f10,[%i1] ! (0_1) *py = ((float*)&dres0)[0];
913 subcc counter,1,counter
917 st %f11,[stridey+%i1] ! (1_1) *(py + stridey) = ((float*)&dres0)[1];
918 subcc counter,1,counter
921 fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
923 st %f0,[%o3] ! (2_1) *py = ((float*)&dres0)[0];
929 fdivs FONE,%f14,%f14 ! x0 = FONE / x0;
930 add %l7,stridex,%l7 ! px += stridex
931 st %f14,[%i2] ! *py = x0;
932 sub counter,1,counter
934 add %i2,stridey,%i2 ! py += stridey
940 fdivs FONE,%f14,%f14 ! x0 = DONE / x0;
944 fsqrts %f14,%f14 ! x0 = sqrtf(x0);
951 sethi %hi(0x4b000000),%o0
952 sra %g1,13,%l5 ! (4_0) si0 = ax0 >> 13;
953 fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
957 add %l7,stridex,%l7 ! px += stridex
958 sub counter,1,counter
959 st %f14,[%i2] ! *py = x0;
961 add %i2,stridey,%i2 ! py += stridey
972 sub counter,1,counter
973 st counter,[%fp+tmp_counter]
980 sethi %hi(0x7ffffc00),%o0
999 sethi %hi(0x4b000000),%o0
1002 fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
1004 sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
1006 sra %g5,24,%l7 ! (5_0) iexp1 = ax1 >> 24;
1007 and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
1009 fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1011 ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1012 sub %l0,%l7,%l1 ! (5_0) iexp1 = 0x3f - iexp1;
1014 sll %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
1015 add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
1016 st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0);
1017 fitod %f17,%f44 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
1019 fmuld %f44,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
1022 fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
1025 stx %o1,[%fp+tmp_px]
1027 sub counter,1,counter
1028 st counter,[%fp+tmp_counter]
1040 stx %o1,[%fp+tmp_px]
1042 sub counter,2,counter
1043 st counter,[%fp+tmp_counter]
1050 sethi %hi(0x7ffffc00),%o1
1066 fmuls %f18,FTWO,%f18
1069 sethi %hi(0x4b000000),%o1
1072 fand %f18,DC0,%f56 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
1073 sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
1075 and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
1077 ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1078 fpsub32 %f18,%f56,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1080 sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
1081 sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
1083 fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
1086 stx %o1,[%fp+tmp_px]
1088 sub counter,2,counter
1089 st counter,[%fp+tmp_counter]
1098 sub %l7,stridex2,%o1
1101 stx %o1,[%fp+tmp_px]
1103 sub counter,3,counter
1104 st counter,[%fp+tmp_counter]
1111 sethi %hi(0x7ffffc00),%o1
1119 sub %l7,stridex2,%o1
1123 sub %l7,stridex2,%o1
1127 fmuls %f19,FTWO,%f19
1130 sethi %hi(0x4b000000),%o1
1133 fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
1135 sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
1137 sra %i4,24,%i1 ! (1_0) iexp1 = ax1 >> 24;
1138 and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
1139 fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1141 ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1142 sub %l0,%i1,%i0 ! (1_0) iexp1 = 0x3f - iexp1;
1144 sll %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
1145 fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
1147 st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0);
1149 add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
1150 fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
1153 fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
1156 stx %o1,[%fp+tmp_px]
1158 sub counter,3,counter
1159 st counter,[%fp+tmp_counter]
1171 stx %o3,[%fp+tmp_px]
1173 sub counter,4,counter
1174 st counter,[%fp+tmp_counter]
1181 sethi %hi(0x7ffffc00),%o3
1197 fmuls %f24,FTWO,%f24
1200 sethi %hi(0x4b000000),%o3
1203 fands %f24,DC0,%f0 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
1204 sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
1206 and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
1208 ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1209 fpsub32s %f24,%f0,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1211 sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
1213 sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
1215 sll %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55;
1216 add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
1217 fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
1219 st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
1221 fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
1224 stx %o3,[%fp+tmp_px]
1226 sub counter,4,counter
1227 st counter,[%fp+tmp_counter]
1239 stx %o3,[%fp+tmp_px]
1241 sub counter,5,counter
1242 st counter,[%fp+tmp_counter]
1249 sethi %hi(0x7ffffc00),%o3
1269 sethi %hi(0x4b000000),%o3
1272 fands %f0,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
1274 sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
1276 sra %o5,24,%o3 ! (3_0) iexp1 = ax1 >> 24;
1277 and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
1278 fpsub32s %f0,%f8,%f0 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1280 ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1281 sub %l0,%o3,%i3 ! (3_0) iexp1 = 0x3f - iexp1;
1283 sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
1284 fitod %f0,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
1286 add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
1287 st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0);
1289 fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
1292 fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
1294 stx %i3,[%fp+tmp_px]
1296 sub counter,5,counter
1297 st counter,[%fp+tmp_counter]
1309 stx %o3,[%fp+tmp_px]
1311 st counter,[%fp+tmp_counter]
1318 sethi %hi(0x7ffffc00),%i4
1336 fmuls %f14,FTWO,%f14
1339 sethi %hi(0x4b000000),%o3
1342 fands %f14,DC0,%f16 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
1343 sra %i3,13,%l5 ! (4_0) si0 = ax0 >> 13;
1345 and %l5,2032,%l5 ! (4_0) si0 &= 0x7f0;
1347 ldd [%l5+TBL],%f54 ! (4_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1348 fpsub32s %f14,%f16,%f16 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1350 sra %i3,24,%i3 ! (4_0) iexp0 = ax0 >> 24;
1352 sub %l0,%i3,%o0 ! (4_0) iexp0 = 0x3f - iexp0;
1353 fitod %f16,%f56 ! (4_0) dtmp0 = (double)(((int*)dfx0)[0]);
1355 sllx %o0,23,%o0 ! (4_0) lexp0 = iexp0 << 55;
1357 st %o0,[%fp+tmp0] ! (4_0) fdx0 = *((double*)lexp0);
1360 fmuld %f56,%f54,%f40 ! (4_0) xx0 = dtmp0 * tbl_div0;
1362 stx %o3,[%fp+tmp_px]
1364 st counter,[%fp+tmp_counter]
1376 stx %i1,[%fp+tmp_px]
1378 sub counter,1,counter
1379 st counter,[%fp+tmp_counter]
1386 sethi %hi(0x7ffffc00),%o3
1401 fmuls %f15,FTWO,%f15
1404 sethi %hi(0x4b000000),%o3
1407 fands %f15,DC0,%f17 ! (4_0) dfx0 = vis_fand(ddx0,DC0);
1409 sra %g5,13,%l6 ! (5_0) si1 = ax1 >> 13;
1410 sra %g5,24,%o3 ! (5_0) iexp1 = ax1 >> 24;
1411 and %l6,2032,%l6 ! (5_0) si1 &= 0x7f0;
1412 fpsub32s %f15,%f17,%f17 ! (4_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1414 ldd [%l6+TBL],%f46 ! (5_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1415 sub %l0,%o3,%l1 ! (5_0) iexp1 = 0x3f - iexp1;
1417 add %l6,TBL,%l6 ! (5_0) addr1 = (char*)TBL + si1;
1419 sllx %l1,23,%l1 ! (5_0) lexp1 = iexp1 << 23;
1420 st %l1,[%fp+tmp0+4] ! (4_0) fdx0 = *((double*)lexp0);
1422 fitod %f17,%f0 ! (5_0) dtmp1 = (double)(((int*)dfx0)[1]);
1424 fmuld %f0,%f46,%f46 ! (5_1) xx1 = dtmp1 * tbl_div1;
1426 fmuld K3,%f46,%f50 ! (5_1) res1 = K3 * xx1;
1429 stx %i1,[%fp+tmp_px]
1431 sub counter,1,counter
1432 st counter,[%fp+tmp_counter]
1444 stx %o3,[%fp+tmp_px]
1446 sub counter,2,counter
1447 st counter,[%fp+tmp_counter]
1454 sethi %hi(0x7ffffc00),%i3
1471 fmuls %f18,FTWO,%f18
1474 sethi %hi(0x4b000000),%o3
1477 fands %f18,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
1478 sra %g1,13,%o0 ! (0_0) si0 = ax0 >> 13;
1479 and %o0,2032,%o0 ! (0_0) si0 &= 0x7f0;
1481 ldd [%o0+TBL],%f54 ! (0_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1482 fpsub32s %f18,%f0,%f30 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1484 sra %g1,24,%i3 ! (0_0) iexp0 = ax0 >> 24;
1486 sub %l0,%i3,%g5 ! (0_0) iexp0 = 0x3f - iexp0;
1489 fitod %f30,%f56 ! (0_0) dtmp0 = (double)(((int*)dfx0)[0]);
1491 stx %o3,[%fp+tmp_px]
1493 sub counter,2,counter
1494 st counter,[%fp+tmp_counter]
1503 sub %l7,stridex2,%o3
1506 stx %o3,[%fp+tmp_px]
1508 sub counter,3,counter
1509 st counter,[%fp+tmp_counter]
1516 sethi %hi(0x7ffffc00),%i3
1519 sub %l7,stridex2,%o3
1533 fmuls %f19,FTWO,%f19
1536 sethi %hi(0x4b000000),%o3
1539 fands %f19,DC0,%f0 ! (0_0) dfx0 = vis_fand(ddx0,DC0);
1541 sra %i4,13,%g5 ! (1_0) si1 = ax1 >> 13;
1543 sra %i4,24,%i0 ! (1_0) iexp1 = ax1 >> 24;
1544 and %g5,2032,%o7 ! (1_0) si1 &= 0x7f0;
1545 fpsub32s %f19,%f0,%f31 ! (0_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1547 ldd [%o7+TBL],%f44 ! (1_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1548 sub %l0,%i0,%i0 ! (1_0) iexp1 = 0x3f - iexp1;
1550 sllx %i0,23,%i0 ! (1_0) lexp1 = iexp1 << 23;
1551 fitod %f31,%f50 ! (1_0) dtmp0 = (double)(((int*)dfx0)[0]);
1553 st %i0,[%fp+tmp1+4] ! (0_0) fdx0 = *((double*)lexp0);
1555 add %o7,TBL,%o7 ! (1_0) addr0 = (char*)TBL + si0;
1556 fmuld %f50,%f44,%f44 ! (1_0) xx0 = dtmp0 * tbl_div0;
1559 fmuld K3,%f44,%f50 ! (1_0) res1 = K3 * xx1;
1561 stx %o3,[%fp+tmp_px]
1563 sub counter,3,counter
1564 st counter,[%fp+tmp_counter]
1573 fpadd32 %f20,%f52,%f0 ! (2_1) dres0 = vis_fpadd32(dres0,fdx0);
1575 sub %l7,stridex2,%i3
1576 stx %i3,[%fp+tmp_px]
1578 sub counter,4,counter
1579 st counter,[%fp+tmp_counter]
1586 sethi %hi(0x7ffffc00),%i3
1589 fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
1603 fmuls %f24,FTWO,%f24
1606 sethi %hi(0x4b000000),%i3
1609 fands %f24,DC0,%f8 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
1610 sra %g1,13,%i0 ! (2_0) si0 = ax0 >> 13;
1612 and %i0,2032,%i0 ! (2_0) si0 &= 0x7f0;
1614 ldd [%i0+TBL],%f30 ! (2_0) tbl_div0 = ((double*)((char*)TBL + si0))[0];
1615 fpsub32s %f24,%f8,%f12 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1617 sra %g1,24,%i3 ! (2_0) iexp0 = ax0 >> 24;
1619 sub %l0,%i3,%g5 ! (2_0) iexp0 = 0x3f - iexp0;
1621 sllx %g5,23,%g5 ! (2_0) lexp0 = iexp0 << 55;
1622 add %i0,TBL,%i0 ! (2_0) addr0 = (char*)TBL + si0;
1623 fitod %f12,%f56 ! (2_0) dtmp0 = (double)(((int*)dfx0)[0]);
1625 st %g5,[%fp+tmp2] ! (2_0) fdx0 = *((double*)lexp0);
1626 fmuld %f56,%f30,%f30 ! (2_0) xx0 = dtmp0 * tbl_div0;
1629 fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
1631 sub %l7,stridex2,%i3
1632 stx %i3,[%fp+tmp_px]
1634 sub counter,4,counter
1635 st counter,[%fp+tmp_counter]
1639 fmuld %f50,%f46,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
1648 stx %i3,[%fp+tmp_px]
1650 sub counter,5,counter
1651 st counter,[%fp+tmp_counter]
1658 sethi %hi(0x7ffffc00),%i3
1679 sethi %hi(0x4b000000),%i3
1682 fands %f8,DC0,%f24 ! (2_0) dfx0 = vis_fand(ddx0,DC0);
1684 sra %o5,13,%o1 ! (3_0) si1 = ax1 >> 13;
1686 sra %o5,24,%i3 ! (3_0) iexp1 = ax1 >> 24;
1687 and %o1,2032,%o1 ! (3_0) si1 &= 0x7f0;
1688 fpsub32s %f8,%f24,%f24 ! (2_0) dfx0 = vis_fpsub32(ddx0,dfx0);
1690 ldd [%o1+TBL],%f8 ! (3_0) tbl_div1 = ((double*)((char*)TBL + si1))[0];
1691 sub %l0,%i3,%i3 ! (3_0) iexp1 = 0x3f - iexp1;
1693 sllx %i3,23,%i3 ! (3_0) lexp1 = iexp1 << 23;
1694 fitod %f24,%f50 ! (3_0) dtmp1 = (double)(((int*)dfx0)[1]);
1696 add %o1,TBL,%o1 ! (3_0) addr1 = (char*)TBL + si1;
1697 st %i3,[%fp+tmp2+4] ! (2_0) fdx0 = *((double*)lexp0);
1699 fmuld %f50,%f8,%f24 ! (3_0) xx1 = dtmp1 * tbl_div1;
1702 fmuld K3,%f24,%f50 ! (3_0) res1 = K3 * xx1;
1705 stx %i3,[%fp+tmp_px]
1707 sub counter,5,counter
1708 st counter,[%fp+tmp_counter]