4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
37 .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01;
38 .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01;
39 .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01;
40 .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01;
41 .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01;
42 .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01;
44 .word 0x001fffff, 0xffffffff ! DC0
45 .word 0x3fe00000, 0x00000000 ! DC1
46 .word 0x00002000, 0x00000000 ! DC2
47 .word 0x7fffc000, 0x00000000 ! DC3
48 .word 0x0007ffff, 0xffffffff ! DC4
50 .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51)
51 .word 0x3ff00000, 0x00000000 ! DONE = 1.0
57 #define _0x7ff00000 %o0
58 #define _0x00100000 %o1
72 #define tmp_counter %g5
75 #define tmp0 STACK_BIAS-0x40
76 #define tmp1 STACK_BIAS-0x38
77 #define tmp2 STACK_BIAS-0x30
78 #define tmp3 STACK_BIAS-0x28
79 #define tmp4 STACK_BIAS-0x20
80 #define tmp5 STACK_BIAS-0x18
81 #define tmp6 STACK_BIAS-0x10
82 #define tmp7 STACK_BIAS-0x08
84 ! sizeof temp storage - must be a multiple of 16 for V9
87 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
88 ! !!!!! algorithm !!!!!
89 ! ((float*)&res)[0] = ((float*)px)[0];
90 ! ((float*)&res)[1] = ((float*)px)[1];
92 ! if ( hx >= 0x7ff00000 )
95 ! ((float*)py)[0] = ((float*)&res)[0];
96 ! ((float*)py)[1] = ((float*)&res)[1];
101 ! if ( hx < 0x00100000 )
103 ! ax = hx & 0x7fffffff;
104 ! lx = ((int*)px)[1];
106 ! if ( (ax | lx) == 0 )
109 ! ((float*)py)[0] = ((float*)&res)[0];
110 ! ((float*)py)[1] = ((float*)&res)[1];
115 ! else if ( hx >= 0 )
117 ! if ( hx < 0x00080000 )
119 ! res = *(long long*)&res;
120 ! hx = *(int*)&res - (537 << 21);
124 ! res = vis_fand(res,DC4);
125 ! res = *(long long*)&res;
127 ! hx = *(int*)&res - (537 << 21);
133 ! ((float*)py)[0] = ((float*)&res)[0];
134 ! ((float*)py)[1] = ((float*)&res)[1];
145 ! dlexp = *(double*)&lexp;
151 ! res = vis_fand(res,DC0);
152 ! res = vis_for(res,DC1);
153 ! res_c = vis_fpadd32(res,DC2);
154 ! res_c = vis_fand(res_c,DC3);
156 ! addr = (char*)arr + hx;
157 ! dexp_hi = ((double*)addr)[0];
158 ! dexp_lo = ((double*)addr)[1];
159 ! dtmp0 = dexp_hi * dexp_hi;
173 ! res = dexp_hi * res;
179 ! ((float*)py)[0] = ((float*)&res)[0];
180 ! ((float*)py)[1] = ((float*)&res)[1];
182 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
185 save %sp,-SA(MINFRAME)-tmps,%sp
187 PIC_SET(l7,.CONST_TBL,o3)
188 PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
192 sethi %hi(0x7ff00000),%o0
196 sethi %hi(0x00100000),%o1
214 mov tmp_counter,counter
222 lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
223 sethi %hi(0x7ffffc00),%i0
225 lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
228 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
230 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
231 sethi %hi(0x00080000),%i4
234 add %i1,stridex,%l6 ! px += stridex
236 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
237 lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
238 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
240 lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
241 sra %g1,10,%o2 ! (6_1) hx >>= 10;
244 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
245 bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 )
246 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
248 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
249 bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 )
250 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
252 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
254 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
256 add %o2,8,%l4 ! (6_1) hx += 8;
258 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
260 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
261 sllx %o7,52,%o7 ! (6_1) iexp << 52;
262 and %l4,-16,%l4 ! (6_1) hx = -16;
264 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
265 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
267 add %l6,stridex,%l6 ! px += stridex
268 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
270 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
271 lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
272 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
274 sra %g1,10,%o2 ! (0_0) hx >>= 10;
275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
276 lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
278 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
279 bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
280 fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3);
282 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
283 fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
285 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
286 bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
287 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
289 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
291 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
293 add %o2,8,%l2 ! (0_0) hx += 8;
294 fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c;
296 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
297 sllx %o7,52,%o7 ! (0_0) iexp << 52;
298 and %l2,-16,%l2 ! (0_0) hx = -16;
300 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
301 add %l6,stridex,%l6 ! px += stridex
302 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
304 fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0;
305 ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0];
307 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
308 lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
309 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
311 sra %g1,10,%o2 ! (1_0) hx >>= 10;
312 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
313 bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
314 lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
316 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
318 fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
319 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
320 bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
321 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
323 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
324 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
326 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
327 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
329 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
330 add %o2,8,%i2 ! (1_0) hx += 8;
331 fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c;
333 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
334 sllx %o7,52,%o7 ! (1_0) iexp << 52;
335 and %i2,-16,%i2 ! (1_0) hx = -16;
337 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
338 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
340 fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0;
341 add %l6,stridex,%l6 ! px += stridex
342 ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0];
343 faddd %f62,K5,%f62 ! (6_1) res += K5;
345 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
346 lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
347 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
349 sra %g1,10,%o2 ! (2_0) hx >>= 10;
350 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
351 bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
352 lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
354 fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
355 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
357 fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
358 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
359 bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
360 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
362 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
363 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
365 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
366 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
368 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
369 add %o2,8,%i4 ! (2_0) hx += 8;
370 fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c;
372 faddd %f40,K4,%f40 ! (6_1) res += K4;
374 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
375 sllx %o7,52,%o7 ! (2_0) iexp << 52;
376 and %i4,-16,%i4 ! (2_0) hx = -16;
378 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
379 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
381 fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0;
382 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
383 faddd %f62,K5,%f62 ! (0_0) res += K5;
385 fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
386 add %l6,stridex,%l6 ! px += stridex
388 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
389 lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
390 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
392 sra %g1,10,%o2 ! (3_0) hx >>= 10;
393 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
394 bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
395 lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
397 fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
398 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
399 fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3);
401 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
402 bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
403 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
404 faddd %f34,K3,%f6 ! (6_1) res += K3;
406 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
407 fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
409 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
410 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
412 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
413 add %o2,8,%i5 ! (3_0) hx += 8;
414 fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c;
416 fmuld %f6,%f26,%f22 ! (6_1) res *= xx;
417 faddd %f60,K4,%f60 ! (0_0) res += K4;
419 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
420 sllx %o7,52,%o7 ! (3_0) iexp << 52;
421 and %i5,-16,%i5 ! (3_0) hx = -16;
423 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
424 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
426 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
427 add %l6,stridex,%i0 ! px += stridex
428 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
429 faddd %f62,K5,%f62 ! (1_0) res += K5;
431 faddd %f22,K2,%f10 ! (6_1) res += K2;
432 fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
434 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
435 lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
436 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
438 sra %g1,10,%o2 ! (4_0) hx >>= 10;
439 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
440 bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
441 lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
443 fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3);
444 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
446 fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
447 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
448 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
449 faddd %f34,K3,%f60 ! (0_0) res += K3;
451 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
452 bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
453 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
454 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
456 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
457 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
459 fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx;
460 add %o2,8,%l1 ! (4_0) hx += 8;
461 fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c;
463 fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
464 faddd %f62,K4,%f6 ! (1_0) res += K4;
466 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
467 sllx %o7,52,%o7 ! (4_0) iexp << 52;
468 and %l1,-16,%l1 ! (4_0) hx = -16;
469 faddd %f58,K1,%f58 ! (6_1) res += K1;
471 add %i0,stridex,%i1 ! px += stridex
472 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
473 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
475 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
476 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
477 faddd %f10,K5,%f62 ! (2_0) res += K5;
479 fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
480 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
482 faddd %f60,K2,%f60 ! (0_0) res += K2;
484 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
485 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
486 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
487 fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
489 sra %g1,10,%o2 ! (5_0) hx >>= 10;
490 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
491 bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 )
492 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
494 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
495 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
497 fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
498 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
499 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
500 faddd %f34,K3,%f34 ! (1_0) res += K3;
502 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
503 bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 )
505 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
507 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
508 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
509 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
511 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
512 add %o2,8,%i3 ! (5_0) hx += 8;
513 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
515 fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
520 faddd %f62,K4,%f34 ! (2_0) res += K4;
523 sub counter,7,counter ! counter
527 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
528 and %i3,-16,%i3 ! (5_1) hx = -16;
529 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
530 faddd %f58,K1,%f58 ! (0_1) res += K1;
532 add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx;
533 sllx %o7,52,%o7 ! (5_1) iexp << 52;
534 stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp;
535 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
537 faddd %f22,K5,%f62 ! (3_1) res += K5;
538 add %i1,stridex,%l6 ! px += stridex
539 ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0];
540 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
542 faddd %f24,K2,%f26 ! (1_1) res += K2;
543 add %i0,stridey,%i1 ! px += stridey
544 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
545 fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
547 fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
548 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
549 lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
550 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
552 lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
553 sra %g1,10,%o2 ! (6_1) hx >>= 10;
554 fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi;
555 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
557 fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
558 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
559 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
560 fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3);
562 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
563 bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 )
564 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
565 faddd %f34,K3,%f34 ! (2_1) res += K3;
567 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
568 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
569 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
570 fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
572 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
573 bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 )
574 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
575 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
577 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
578 add %o2,8,%l4 ! (6_1) hx += 8;
579 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
580 fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c;
582 fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
583 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
584 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
585 faddd %f32,K4,%f32 ! (3_1) res += K4;
587 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
588 sllx %o7,52,%o7 ! (6_1) iexp << 52;
589 and %l4,-16,%l4 ! (6_1) hx = -16;
590 faddd %f26,K1,%f26 ! (1_1) res += K1;
592 add %i1,stridey,%i0 ! px += stridey
593 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
594 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
595 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
597 fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0;
598 add %l6,stridex,%l6 ! px += stridex
599 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
600 faddd %f62,K5,%f62 ! (4_1) res += K5;
602 fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
603 sra %g1,10,%o2 ! (0_0) hx >>= 10;
604 ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0];
605 faddd %f28,K2,%f32 ! (2_1) res += K2;
607 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
608 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
609 lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
610 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
612 fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
613 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
614 lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
615 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
617 fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
618 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
619 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
620 fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3);
622 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
623 bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 )
624 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
625 faddd %f34,K3,%f34 ! (3_1) res += K3;
627 fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res;
628 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
629 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
630 fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
632 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
633 bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 )
634 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
635 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
637 fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx;
638 add %o2,8,%l2 ! (0_0) hx += 8;
639 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
640 fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c;
642 fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
644 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
645 faddd %f38,K4,%f38 ! (4_1) res += K4;
647 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
648 sllx %o7,52,%o7 ! (0_0) iexp << 52;
649 and %l2,-16,%l2 ! (0_0) hx = -16;
650 faddd %f32,K1,%f32 ! (2_1) res += K1;
652 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
653 add %l6,stridex,%l6 ! px += stridex
654 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
655 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
657 fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0;
658 add %i0,stridey,%i1 ! px += stridey
659 ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0];
660 faddd %f62,K5,%f62 ! (5_1) res += K5;
662 fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
663 sra %g1,10,%o2 ! (1_0) hx >>= 10;
664 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
665 faddd %f44,K2,%f38 ! (3_1) res += K2;
667 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
668 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
669 lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
670 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
672 fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
673 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
674 lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
675 faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi;
677 fmuld %f62,%f58,%f36 ! (5_1) res *= xx;
678 bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 )
679 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
680 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
682 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
683 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
684 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
685 faddd %f34,K3,%f34 ! (4_1) res += K3;
687 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
688 bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 )
689 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
690 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
692 fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp;
693 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
694 ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1];
695 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
697 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
698 add %o2,8,%i2 ! (1_0) hx += 8;
699 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
700 fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c;
702 fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
704 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
705 faddd %f36,K4,%f36 ! (5_1) res += K4;
707 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
708 sllx %o7,52,%o7 ! (1_0) iexp << 52;
709 and %i2,-16,%i2 ! (1_0) hx = -16;
710 faddd %f38,K1,%f38 ! (3_1) res += K1;
712 add %i1,stridey,%i0 ! px += stridey
713 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
714 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
715 faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo;
717 fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0;
718 add %l6,stridex,%l6 ! px += stridex
719 ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0];
720 faddd %f62,K5,%f62 ! (6_1) res += K5;
722 fmuld %f36,%f58,%f34 ! (5_1) res *= xx;
723 sra %g1,10,%o2 ! (2_0) hx >>= 10;
724 ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0];
725 faddd %f28,K2,%f36 ! (4_1) res += K2;
727 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
728 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
729 lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
730 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
732 fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
733 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
734 lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
735 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
737 fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
738 bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 )
739 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
740 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
742 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
743 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
744 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
745 faddd %f34,K3,%f34 ! (5_1) res += K3;
747 fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res;
748 bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 )
749 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
750 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
752 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
753 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
754 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
755 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
757 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
758 add %o2,8,%i4 ! (2_0) hx += 8;
759 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
760 fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c;
762 fmuld %f34,%f58,%f44 ! (5_1) res *= xx;
764 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
765 faddd %f40,K4,%f40 ! (6_1) res += K4;
767 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
768 sllx %o7,52,%o7 ! (2_0) iexp << 52;
769 and %i4,-16,%i4 ! (2_0) hx = -16;
770 faddd %f36,K1,%f36 ! (4_1) res += K1;
772 add %l6,stridex,%l6 ! px += stridex
773 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
774 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
775 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
777 fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0;
778 add %i0,stridey,%i1 ! px += stridey
779 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
780 faddd %f62,K5,%f62 ! (0_0) res += K5;
782 fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
783 sra %g1,10,%o2 ! (3_0) hx >>= 10;
784 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
785 faddd %f44,K2,%f40 ! (5_1) res += K2;
787 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
788 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
789 lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
790 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
792 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
793 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
794 lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
795 faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi;
797 fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
798 bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 )
799 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
800 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
802 fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
803 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
804 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
805 faddd %f34,K3,%f10 ! (6_1) res += K3;
807 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
808 bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 )
809 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
810 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
812 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
813 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
814 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
815 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
817 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
818 add %o2,8,%i5 ! (3_0) hx += 8;
819 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
820 fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c;
822 fmuld %f10,%f26,%f4 ! (6_1) res *= xx;
824 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
825 faddd %f60,K4,%f60 ! (0_0) res += K4;
827 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
828 sllx %o7,52,%o7 ! (3_0) iexp << 52;
829 and %i5,-16,%i5 ! (3_0) hx = -16;
830 faddd %f40,K1,%f40 ! (5_1) res += K1;
832 add %l6,stridex,%i0 ! px += stridex
833 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
834 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
835 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
837 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
838 add %i1,stridey,%l6 ! px += stridey
839 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
840 faddd %f62,K5,%f62 ! (1_0) res += K5;
842 faddd %f4,K2,%f10 ! (6_1) res += K2;
843 sra %g1,10,%o2 ! (4_0) hx >>= 10;
845 fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
847 fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
848 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
849 lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
850 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
852 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
853 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
854 lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
855 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
857 fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3);
858 bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 )
859 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
860 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
862 fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
863 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
864 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
865 faddd %f34,K3,%f60 ! (0_0) res += K3;
867 fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res;
868 bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 )
869 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
870 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
872 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
873 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
874 ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1];
875 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
877 fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx;
878 add %o2,8,%l1 ! (4_0) hx += 8;
879 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
880 fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c;
882 fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
883 sllx %o7,52,%o7 ! (4_0) iexp << 52;
884 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
885 faddd %f62,K4,%f6 ! (1_0) res += K4;
887 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
888 add %i0,stridex,%i1 ! px += stridex
889 and %l1,-16,%l1 ! (4_0) hx = -16;
890 faddd %f58,K1,%f58 ! (6_1) res += K1;
892 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
893 add %l6,stridey,%i0 ! px += stridey
894 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
895 faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo;
897 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
899 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
900 faddd %f30,K5,%f62 ! (2_0) res += K5;
902 fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
903 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
904 ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0];
905 faddd %f60,K2,%f60 ! (0_0) res += K2;
907 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
908 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
909 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
910 fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
912 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
913 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
914 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
915 faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi;
917 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
918 bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 )
919 ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp;
920 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
922 fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
923 sra %g1,10,%o2 ! (5_0) hx >>= 10;
924 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
925 faddd %f34,K3,%f34 ! (1_0) res += K3;
927 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
928 bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 )
929 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
930 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
932 fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp;
933 subcc counter,7,counter ! counter -= 7;
934 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
935 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
937 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
938 add %o2,8,%i3 ! (5_0) hx += 8;
939 st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0];
940 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
942 fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
943 st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1];
944 bpos,pt %icc,.main_loop
945 faddd %f62,K4,%f34 ! (2_0) res += K4;
947 add counter,7,counter
949 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
950 subcc counter,1,counter
954 faddd %f58,K1,%f58 ! (0_1) res += K1;
956 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
958 faddd %f22,K5,%f62 ! (3_1) res += K5;
959 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
961 faddd %f24,K2,%f26 ! (1_1) res += K2;
962 add %i1,stridex,%l6 ! px += stridex
963 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
964 fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
966 fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
968 add %i0,stridey,%i1 ! px += stridey
969 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
971 fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
972 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
974 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
975 faddd %f34,K3,%f34 ! (2_1) res += K3;
977 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
979 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
980 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
982 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
983 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
985 fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
986 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
987 faddd %f32,K4,%f32 ! (3_1) res += K4;
989 subcc counter,1,counter
993 faddd %f26,K1,%f26 ! (1_1) res += K1;
995 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
997 add %l6,stridex,%l6 ! px += stridex
998 faddd %f62,K5,%f62 ! (4_1) res += K5;
1000 fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
1001 add %i1,stridey,%i0 ! px += stridey
1002 ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0];
1003 faddd %f28,K2,%f32 ! (2_1) res += K2;
1005 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
1007 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
1009 fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
1010 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
1012 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
1013 faddd %f34,K3,%f34 ! (3_1) res += K3;
1015 fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res;
1017 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
1018 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
1020 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
1022 fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
1023 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
1024 faddd %f38,K4,%f38 ! (4_1) res += K4;
1026 subcc counter,1,counter
1030 faddd %f32,K1,%f32 ! (2_1) res += K1;
1032 add %l6,stridex,%l6 ! px += stridex
1033 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
1035 add %i0,stridey,%i1 ! px += stridey
1037 fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
1038 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
1039 faddd %f44,K2,%f38 ! (3_1) res += K2;
1041 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
1043 faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi;
1045 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
1047 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
1048 faddd %f34,K3,%f34 ! (4_1) res += K3;
1050 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
1052 fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp;
1053 ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1];
1055 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
1057 fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
1058 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
1060 subcc counter,1,counter
1064 faddd %f38,K1,%f38 ! (3_1) res += K1;
1066 faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo;
1068 add %l6,stridex,%l6 ! px += stridex
1070 add %i1,stridey,%i0 ! px += stridey
1071 ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0];
1072 faddd %f28,K2,%f36 ! (4_1) res += K2;
1074 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
1076 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
1078 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
1080 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
1082 fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res;
1084 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
1085 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
1087 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
1089 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
1091 subcc counter,1,counter
1095 faddd %f36,K1,%f36 ! (4_1) res += K1;
1097 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
1099 add %i0,stridey,%i1 ! px += stridey
1101 add %l6,stridex,%l6 ! px += stridex
1102 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
1104 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
1106 faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi;
1108 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
1110 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
1112 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
1113 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
1115 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
1117 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
1119 subcc counter,1,counter
1123 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
1125 add %l6,stridex,%i0 ! px += stridex
1127 add %i1,stridey,%l6 ! px += stridey
1129 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
1131 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
1133 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
1135 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
1137 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
1144 fdivd DONE,%f0,%f0 ! res = DONE / res;
1145 add %i1,stridex,%i1 ! px += stridex
1146 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
1147 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
1148 add %o4,stridey,%o4 ! py += stridey
1150 sub counter,1,counter
1156 fdivd DONE,%f0,%f0 ! res = DONE / res;
1160 fsqrtd %f0,%f0 ! res = sqrt(res);
1166 fxtod %f0,%f0 ! res = *(long long*)&res;
1169 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1172 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1173 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1175 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1178 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1180 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1183 fand %f0,%f18,%f0 ! res = vis_fand(res,DC4);
1186 fxtod %f0,%f0 ! res = *(long long*)&res;
1188 faddd %f0,%f28,%f0 ! res += D2ON51;
1191 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1194 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1195 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1197 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1200 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1202 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1205 add %i1,stridex,%i1 ! px += stridex
1206 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
1207 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
1208 add %o4,stridey,%o4 ! py += stridey
1210 sub counter,1,counter
1218 sub %l6,stridex,tmp_px
1219 sub counter,1,tmp_counter
1236 sethi %hi(0x00080000),%i3
1242 fxtod %f8,%f8 ! res = *(long long*)&res;
1245 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
1248 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1249 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1250 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1254 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1256 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1258 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1261 fxtod %f8,%f8 ! res = *(long long*)&res;
1266 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
1269 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1270 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1271 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1277 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1279 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1281 sub %l6,stridex,tmp_px
1282 sub counter,1,tmp_counter
1293 sub %l6,stridex,tmp_px
1294 sub counter,2,tmp_counter
1311 sethi %hi(0x00080000),%i3
1317 fxtod %f0,%f0 ! res = *(long long*)&res;
1320 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
1323 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1324 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1326 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1329 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1332 fxtod %f0,%f0 ! res = *(long long*)&res;
1337 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
1340 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1341 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1343 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1346 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1348 sub %l6,stridex,tmp_px
1349 sub counter,2,tmp_counter
1360 sub %l6,stridex,tmp_px
1361 sub counter,3,tmp_counter
1378 sethi %hi(0x00080000),%i4
1384 fxtod %f6,%f6 ! res = *(long long*)&res;
1387 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1390 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1391 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1394 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1396 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1399 fxtod %f6,%f6 ! res = *(long long*)&res;
1404 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1407 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1408 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1411 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1413 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1415 sub %l6,stridex,tmp_px
1416 sub counter,3,tmp_counter
1427 sub %l6,stridex,tmp_px
1428 sub counter,4,tmp_counter
1438 faddd %f34,K3,%f6 ! (6_1) res += K3;
1446 sethi %hi(0x00080000),%i5
1452 fxtod %f0,%f0 ! res = *(long long*)&res;
1455 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1458 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1459 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1462 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1464 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1467 fxtod %f0,%f0 ! res = *(long long*)&res;
1472 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1475 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1476 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1479 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1481 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1483 sub %l6,stridex,tmp_px
1484 sub counter,4,tmp_counter
1496 sub counter,5,tmp_counter
1506 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
1513 sethi %hi(0x00080000),%i1
1519 fxtod %f8,%f8 ! res = *(long long*)&res;
1522 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
1525 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
1526 sra %g1,10,%o2 ! (4_0) hx >>= 10;
1530 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
1531 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
1533 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1536 fxtod %f8,%f8 ! res = *(long long*)&res;
1541 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
1544 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
1545 sra %g1,10,%o2 ! (4_0) hx >>= 10;
1549 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
1550 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
1552 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1555 sub counter,5,tmp_counter
1567 sub counter,6,tmp_counter
1577 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1584 sethi %hi(0x00080000),%i3
1590 fxtod %f0,%f0 ! res = *(long long*)&res;
1593 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
1596 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
1597 sra %g1,10,%o2 ! (5_0) hx >>= 10;
1601 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
1603 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
1605 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1608 fxtod %f0,%f0 ! res = *(long long*)&res;
1613 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
1616 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
1617 sra %g1,10,%o2 ! (5_0) hx >>= 10;
1621 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
1623 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
1625 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1628 sub counter,6,tmp_counter
1637 faddd %f34,K3,%f34 ! (2_1) res += K3;
1639 sub %l6,stridex,tmp_px
1640 sub counter,0,tmp_counter
1650 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1658 sethi %hi(0x00080000),%l4
1664 fxtod %f6,%f6 ! res = *(long long*)&res;
1667 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
1670 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1671 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1674 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1675 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1677 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1679 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1682 fxtod %f6,%f6 ! res = *(long long*)&res;
1687 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
1690 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1691 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1692 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1696 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1697 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1699 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1701 sub %l6,stridex,tmp_px
1702 sub counter,0,tmp_counter
1711 faddd %f34,K3,%f34 ! (3_1) res += K3;
1713 sub %l6,stridex,tmp_px
1714 sub counter,1,tmp_counter
1724 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1732 sethi %hi(0x00080000),%l2
1738 fxtod %f0,%f0 ! res = *(long long*)&res;
1741 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
1744 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1745 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1748 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1750 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1752 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1753 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1755 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1758 fxtod %f0,%f0 ! res = *(long long*)&res;
1763 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
1766 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1767 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1768 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1772 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1774 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1775 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1777 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1779 sub %l6,stridex,tmp_px
1780 sub counter,1,tmp_counter
1789 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
1791 sub %l6,stridex,tmp_px
1792 sub counter,2,tmp_counter
1802 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1810 sethi %hi(0x00080000),%i2
1816 fxtod %f6,%f6 ! res = *(long long*)&res;
1819 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
1822 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1823 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1827 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1828 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
1830 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1833 fxtod %f6,%f6 ! res = *(long long*)&res;
1838 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
1841 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1842 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1846 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1847 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
1849 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1851 sub %l6,stridex,tmp_px
1852 sub counter,2,tmp_counter
1861 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
1863 sub %l6,stridex,tmp_px
1864 sub counter,3,tmp_counter
1874 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1882 sethi %hi(0x00080000),%i4
1888 fxtod %f0,%f0 ! res = *(long long*)&res;
1891 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
1894 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1896 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1899 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1900 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
1902 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1905 fxtod %f0,%f0 ! res = *(long long*)&res;
1910 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
1913 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1915 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1918 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1919 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
1921 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1923 sub %l6,stridex,tmp_px
1924 sub counter,3,tmp_counter
1933 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
1935 sub %l6,stridex,tmp_px
1936 sub counter,4,tmp_counter
1946 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
1954 sethi %hi(0x00080000),%i5
1960 fxtod %f6,%f6 ! res = *(long long*)&res;
1963 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
1966 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1967 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1970 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1972 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
1974 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1977 fxtod %f6,%f6 ! res = *(long long*)&res;
1982 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
1985 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1986 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1989 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1991 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
1993 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1995 sub %l6,stridex,tmp_px
1996 sub counter,4,tmp_counter
2005 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
2007 sub %i0,stridex,tmp_px
2008 sub counter,5,tmp_counter
2018 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
2026 sethi %hi(0x00080000),%l1
2032 fxtod %f0,%f0 ! res = *(long long*)&res;
2035 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
2038 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
2040 sra %g1,10,%o2 ! (4_0) hx >>= 10;
2043 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
2044 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
2046 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2049 fxtod %f0,%f0 ! res = *(long long*)&res;
2054 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
2057 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
2059 sra %g1,10,%o2 ! (4_0) hx >>= 10;
2062 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
2063 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
2065 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2067 sub %i0,stridex,tmp_px
2068 sub counter,5,tmp_counter
2077 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
2079 sub %i1,stridex,tmp_px
2080 sub counter,6,tmp_counter
2090 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
2104 sethi %hi(0x00080000),%i3
2110 fxtod %f10,%f10 ! res = *(long long*)&res;
2113 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
2116 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
2118 sra %g1,10,%o2 ! (5_0) hx >>= 10;
2121 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
2122 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
2125 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2128 fxtod %f10,%f10 ! res = *(long long*)&res;
2130 faddd %f10,%f60,%f10
2133 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
2136 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
2138 sra %g1,10,%o2 ! (5_0) hx >>= 10;
2141 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
2142 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
2145 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2147 sub %i1,stridex,tmp_px
2148 sub counter,6,tmp_counter