4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
35 ! following is the C version of the ATAN algorithm
38 ! double jkatan(double *x)
40 ! double f, z, ans, ansu, ansl, tmp, poly, conup, conlo, dummy;
41 ! int index, sign, intf, intz;
42 ! extern const double __vlibm_TBL_atan1[];
43 ! long *pf = (long *) &f, *pz = (long *) &z;
45 ! /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
46 ! * Error = -3.08254E-18 On the interval |x| < 1/64 */
48 ! /* define dummy names for readability. Use parray to help compiler optimize loads */
49 ! #define p3 parray[0]
50 ! #define p2 parray[1]
51 ! #define p1 parray[2]
54 ! static const double parray[] = {
55 ! -1.428029046844299722E-01, /* p[3] */
56 ! 1.999999917247000615E-01, /* p[2] */
57 ! -3.333333333329292858E-01, /* p[1] */
58 ! 1.0, /* not used for p[0], though */
59 ! -1.0, /* used to flip sign of answer */
62 ! f = *x; /* fetch argument */
63 ! intf = pf[0]; /* grab upper half */
64 ! sign = intf & 0x80000000; /* sign of argument */
65 ! intf ^= sign; /* abs(upper argument) */
66 ! sign = (unsigned) sign >> 31; /* sign bit = 0 or 1 */
69 ! if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
71 ! if( (intf > 0x7ff00000) ||
72 ! ((intf == 0x7ff00000) && (pf[1] !=0)) ) return (*x-*x);/* return NaN if x=NaN*/
73 ! if( intf < 0x3e300000 ) /* avoid underflow for small arg */
79 ! if( intf > 0x43600000 ) /* avoid underflow for big arg */
82 ! f = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */
83 ! f = parray[soffset + sign] * f; /* put sign bit on ans */
88 ! index = 0; /* points to 0,0 in table */
89 ! if (intf > 0x40500000) /* if(|x| > 64 */
91 ! index = 2; /* point to pi/2 upper, lower */
93 ! else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
95 ! intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
96 ! pz[0] = intz; /* store as a double (z) */
97 ! pz[1] = 0; /* ...lower */
98 ! f = (f - z)/(1.0 + f*z); /* get reduced argument */
99 ! index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
100 ! index += 4; /* skip over 0,0,pi/2,pi/2 */
102 ! conup = __vlibm_TBL_atan1[index]; /* upper table */
103 ! conlo = __vlibm_TBL_atan1[index+1]; /* lower table */
105 ! poly = (f*tmp)*((p3*tmp + p2)*tmp + p1);
106 ! ansu = conup + f; /* compute atan(f) upper */
107 ! ansl = (((conup - ansu) + f) + poly) + conlo;
109 ! ans = parray[soffset + sign] * ans;
113 /* 8 bytes = 1 double f.p. word */
116 .align 32 !align with full D-cache line
118 .double 0r-1.428029046844299722E-01 !p[3]
119 .double 0r1.999999917247000615E-01 !p[2]
120 .double 0r-3.333333333329292858E-01 !p[1]
121 .double 0r-1.0, !constant -1.0
122 .word 0x00008000,0x0 !for fp rounding of reduced arg
123 .word 0x7fff0000,0x0 !for fp truncation
124 .word 0x47900000,0 !a number close to 1.0E37
125 .word 0x80000000,0x0 !mask for fp sign bit
126 .word 0x3f800000,0x0 !1.0/128.0 dummy "safe" argument
127 .type .COEFFS,#object
130 save %sp,-SA(MINFRAME)-16,%sp
132 PIC_SET(g5,__vlibm_TBL_atan1,o4)
133 PIC_SET(g5,.COEFFS,o0)
135 __vatan(int n, double *x, int stridex, double *y, stridey)
136 computes y(i) = atan( x(i) ), for 1=1,n. Stridex, stridey
137 are the distance between x and y elements
146 ble,pn %icc,.RETURN !....then do nothing
147 sll %i2,3,%i2 !convert stride to byte count
148 sll %i4,3,%i4 !convert stride to byte count
150 /* pre-load constants before beginning main loop */
152 ldd [%o0],%f58 !load p[3]
153 mov 2,%i5 !argcount = 3
155 ldd [%o0+WSIZE],%f60 !load p[2]
156 add %fp,STACK_BIAS-8,%l1 !yaddr1 = &dummy
157 fzero %f18 !ansu1 = 0
159 ldd [%o0+2*WSIZE],%f62 !load p[1]
160 add %fp,STACK_BIAS-8,%l2 !yaddr2 = &dummy
161 fzero %f12 !(poly1) = 0
163 ldd [%o0+3*WSIZE],%f56 !-1.0
166 ldd [%o0+4*WSIZE],%f52 !load rounding mask
167 fzero %f16 !conup1 = 0
169 ldd [%o0+5*WSIZE],%f54 !load truncation mask
172 ldd [%o0+6*WSIZE],%f50 !1.0e37
175 ldd [%o0+7*WSIZE],%f32 !mask for sign bit
177 ldd [%o4+2*WSIZE],%f46 !pi/2 upper
178 ldd [%o4+(2*WSIZE+8)],%f48 !pi/2 lower
179 sethi %hi(0x40500000),%l6 !64.0
180 sethi %hi(0x3f900000),%l7 !1/64.0
181 mov 0,%l4 !index1 = 0
182 mov 0,%l5 !index2 = 0
186 /*--------------------------------------------------------------------------*/
187 /*--------------------------------------------------------------------------*/
188 /*--------------------------------------------------------------------------*/
193 mov %i1,%o5 !xuse = x (delay slot)
198 PIC_SET(g5,.COEFFS+8*WSIZE,o5)
201 sethi %hi(0x80000000),%o7 !mask for sign bit
202 /*2 */ sethi %hi(0x43600000),%o1 !big = 0x43600000,0
203 ld [%o5],%o0 !intf = pf[0] = f upper
204 ldd [%o4+%l5],%f26 !conup2 = __vlibm_TBL_atan1[index2]
206 sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
207 /*4 */ andn %o0,%o7,%o0 !intf = fabs(intf)
208 ldd [%o5],%f34 !f = *x into f34
210 sub %o1,%o0,%o1 !(-) if intf > big
211 /*6 */ sub %o0,%o2,%o2 !(-) if intf < small
212 fand %f34,%f32,%f40 !sign0 = sign bit
213 fmuld %f38,%f38,%f24 !tmp2= f2*f2
215 /*7 */ orcc %o1,%o2,%g0 !(-) if either true
216 bneg,pn %icc,.SPECIAL0 !if (-) goto special cases below
217 fabsd %f34,%f34 !abs(f) (delay slot)
218 !----------------------
221 sethi %hi(0x8000),%o7 !rounding bit
222 /*8 */ fpadd32 %f34,%f52,%f0 !intf + 0x00008000 (again)
223 faddd %f26,%f38,%f28 !ansu2 = conup2 + f2
225 add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
226 /*9*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
227 fmuld %f58,%f24,%f22 !p[3]*tmp2
229 /*10 */ sethi %hi(0x7fff0000),%o7 !mask for rounding argument
230 fmuld %f34,%f0,%f10 !f*z
231 fsubd %f34,%f0,%f20 !f - z
232 add %o4,%l4,%l4 !base addr + index1
233 fmuld %f14,%f12,%f12 !poly1 = (f1*tmp1)*((p3*tmp1 + p2)*tmp1 + p1)
234 faddd %f16,%f36,%f16 !(conup1 - ansu1) + f1
236 /*12 */ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
237 faddd %f22,%f60,%f22 !p[3]*tmp2 + p[2]
238 ldd [%l4+WSIZE],%f14 !conlo1 = __vlibm_TBL_atan1[index+1]
240 /*13 */ sub %o0,%l7,%o2 !intz - 0x3f900000
241 fsubd %f10,%f56,%f10 !(f*z - (-1.0))
242 faddd %f16,%f12,%f12 !((conup1 - ansu1) + f1) + poly1
244 cmp %o0,%l6 !(|f| > 64)
245 ble .ELSE0 !if(|f| > 64) then
246 /*15 */ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
247 mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
249 /*16 */ fdivd %f56,%f34,%f34 !f = -1.0/f (delay slot)
250 .ELSE0: !else f( |x| >= (1/64))
251 cmp %o0,%l7 !if intf >= 1/64
252 bl .ENDIF0 !if( |x| >= (1/64) ) then...
253 mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
254 add %o3,4,%o1 !index = index + 4
255 /*16 */ fdivd %f20,%f10,%f34 !f = (f - z)/(1.0 + f*z), reduced argument
258 /*17*/ sll %o1,3,%l3 !index0 = index
259 mov %i3,%l0 !yaddr0 = address of y
260 faddd %f12,%f14,%f12 !ansl1 = (((conup1 - ansu)1 + f1) + poly1) + conlo1
261 fmuld %f22,%f24,%f22 !(p3*tmp2 + p2)*tmp2
262 fsubd %f26,%f28,%f26 !conup2 - ansu2
264 /*20*/ add %i1,%i2,%i1 !x += stridex
265 add %i3,%i4,%i3 !y += stridey
266 faddd %f18,%f12,%f36 !ans1 = ansu1 + ansl1
267 fmuld %f38,%f24,%f24 !f*tmp2
268 faddd %f22,%f62,%f22 !(p3*tmp2 + p2)*tmp2 + p1
270 /*23*/ for %f36,%f42,%f36 !sign(ans1) = sign of argument
271 std %f36,[%l1] !*yaddr1 = ans1
272 add %o4,%l5,%l5 !base addr + index2
273 fmuld %f24,%f22,%f22 !poly2 = (f2*tmp2)*((p3*tmp2 + p2)*tmp2 + p1)
274 faddd %f26,%f38,%f26 !(conup2 - ansu2) + f2
275 cmp %i5,0 !if argcount =0, we are done
279 /*--------------------------------------------------------------------------*/
280 /*--------------------------------------------------------------------------*/
281 /*--------------------------------------------------------------------------*/
284 /*25*/ deccc %i0 !--n
286 mov %i1,%o5 !xuse = x (delay slot)
290 PIC_SET(g5,.COEFFS+8*WSIZE,o5)
294 /*26*/ sethi %hi(0x80000000),%o7 !mask for sign bit
295 sethi %hi(0x43600000),%o1 !big = 0x43600000,0
296 ld [%o5],%o0 !intf = pf[0] = f upper
298 /*28*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
299 andn %o0,%o7,%o0 !intf = fabs(intf)
300 ldd [%o5],%f36 !f = *x into f36
302 /*30*/ sub %o1,%o0,%o1 !(-) if intf > big
303 sub %o0,%o2,%o2 !(-) if intf < small
304 fand %f36,%f32,%f42 !sign1 = sign bit
306 /*31*/ orcc %o1,%o2,%g0 !(-) if either true
307 bneg,pn %icc,.SPECIAL1 !if (-) goto special cases below
308 fabsd %f36,%f36 !abs(f) (delay slot)
309 !----------------------
311 /*32*/ fpadd32 %f36,%f52,%f0 !intf + 0x00008000 (again)
312 ldd [%l5+WSIZE],%f24 !conlo2 = __vlibm_TBL_atan1[index2+1]
314 /*33*/ fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
315 sethi %hi(0x8000),%o7 !rounding bit
316 faddd %f26,%f22,%f22 !((conup2 - ansu2) + f2) + poly2
318 /*34*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
319 sethi %hi(0x7fff0000),%o7 !mask for rounding argument
320 fmuld %f36,%f0,%f10 !f*z
321 fsubd %f36,%f0,%f20 !f - z
323 /*35*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
324 faddd %f22,%f24,%f22 !ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2
326 /*37*/ sub %o0,%l7,%o2 !intz - 0x3f900000
327 fsubd %f10,%f56,%f10 !(f*z - (-1.0))
328 ldd [%o4+%l3],%f6 !conup0 = __vlibm_TBL_atan1[index0]
330 cmp %o0,%l6 !(|f| > 64)
331 ble .ELSE1 !if(|f| > 64) then
332 /*38*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
333 mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
335 /*40*/ fdivd %f56,%f36,%f36 !f = -1.0/f (delay slot)
336 .ELSE1: !else f( |x| >= (1/64))
337 cmp %o0,%l7 !if intf >= 1/64
338 bl .ENDIF1 !if( |x| >= (1/64) ) then...
339 mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
340 add %o3,4,%o1 !index = index + 4
341 /*40*/ fdivd %f20,%f10,%f36 !f = (f - z)/(1.0 + f*z), reduced argument
344 /*41*/sll %o1,3,%l4 !index1 = index
345 mov %i3,%l1 !yaddr1 = address of y
346 fmuld %f34,%f34,%f4 !tmp0= f0*f0
347 faddd %f28,%f22,%f38 !ans2 = ansu2 + ansl2
349 /*44*/add %i1,%i2,%i1 !x += stridex
350 add %i3,%i4,%i3 !y += stridey
351 fmuld %f58,%f4,%f2 !p[3]*tmp0
352 faddd %f6,%f34,%f8 !ansu0 = conup0 + f0
353 for %f38,%f44,%f38 !sign(ans2) = sign of argument
354 std %f38,[%l2] !*yaddr2 = ans2
355 cmp %i5,0 !if argcount =0, we are done
359 /*--------------------------------------------------------------------------*/
360 /*--------------------------------------------------------------------------*/
361 /*--------------------------------------------------------------------------*/
364 /*46*/ deccc %i0 !--n
366 mov %i1,%o5 !xuse = x (delay slot)
370 PIC_SET(g5,.COEFFS+8*WSIZE,o5)
374 /*47*/ sethi %hi(0x80000000),%o7 !mask for sign bit
375 sethi %hi(0x43600000),%o1 !big = 0x43600000,0
376 ld [%o5],%o0 !intf = pf[0] = f upper
378 /*49*/ sethi %hi(0x3e300000),%o2 !small = 0x3e300000,0
379 andn %o0,%o7,%o0 !intf = fabs(intf)
380 ldd [%o5],%f38 !f = *x into f38
382 /*51*/ sub %o1,%o0,%o1 !(-) if intf > big
383 sub %o0,%o2,%o2 !(-) if intf < small
384 fand %f38,%f32,%f44 !sign2 = sign bit
386 /*52*/ orcc %o1,%o2,%g0 !(-) if either true
387 bneg,pn %icc,.SPECIAL2 !if (-) goto special cases below
388 fabsd %f38,%f38 !abs(f) (delay slot)
389 !----------------------
391 /*53*/ fpadd32 %f38,%f52,%f0 !intf + 0x00008000 (again)
392 faddd %f2,%f60,%f2 !p[3]*tmp0 + p[2]
394 /*54*/ sethi %hi(0x8000),%o7 !rounding bit
395 fand %f0,%f54,%f0 !pz[0] = intz = (intf + 0x00008000) & 0x7fff0000 (again)
397 /*55*/ add %o0,%o7,%o0 !intf + 0x00008000 (delay slot)
398 sethi %hi(0x7fff0000),%o7 !mask for rounding argument
399 fmuld %f38,%f0,%f10 !f*z
400 fsubd %f38,%f0,%f20 !f - z
402 /*56*/ and %o0,%o7,%o0 !intz = (intf + 0x00008000) & 0x7fff0000
403 fmuld %f2,%f4,%f2 !(p3*tmp0 + p2)*tmp0
404 fsubd %f6,%f8,%f6 !conup0 - ansu0
406 /*58*/ sub %o0,%l7,%o2 !intz - 0x3f900000
407 fsubd %f10,%f56,%f10 !(f*z - (-1.0))
408 ldd [%o4+%l4],%f16 !conup1 = __vlibm_TBL_atan1[index1]
410 cmp %o0,%l6 !(|f| > 64)
411 ble .ELSE2 !if(|f| > 64) then
412 /*60*/ sra %o2,15,%o3 !index = (intz - 0x3f900000) >> 15
413 mov 2,%o1 !index == 2, point to conup, conlo = pi/2 upper, lower
415 /*61*/ fdivd %f56,%f38,%f38 !f = -1.0/f (delay slot)
416 .ELSE2: !else f( |x| >= (1/64))
417 cmp %o0,%l7 !if intf >= 1/64
418 bl .ENDIF2 !if( |x| >= (1/64) ) then...
419 mov 0,%o1 !index == 0 , point to conup,conlo = 0,0
420 add %o3,4,%o1 !index = index + 4
421 /*61*/ fdivd %f20,%f10,%f38 !f = (f - z)/(1.0 + f*z), reduced argument
425 /*62*/ sll %o1,3,%l5 !index2 = index
426 mov %i3,%l2 !yaddr2 = address of y
427 fmuld %f34,%f4,%f4 !f0*tmp0
428 faddd %f2,%f62,%f2 !(p3*tmp0 + p2)*tmp0 + p1
429 fmuld %f36,%f36,%f14 !tmp1= f1*f1
431 /*65*/add %o4,%l3,%l3 !base addr + index0
432 fmuld %f4,%f2,%f2 !poly0 = (f0*tmp0)*((p3*tmp0 + p2)*tmp0 + p1)
433 faddd %f6,%f34,%f6 !(conup0 - ansu0) + f0
434 fmuld %f58,%f14,%f12 !p[3]*tmp1
435 faddd %f16,%f36,%f18 !ansu1 = conup1 + f1
436 ldd [%l3+WSIZE],%f4 !conlo0 = __vlibm_TBL_atan1[index0+1]
438 /*68*/ add %i1,%i2,%i1 !x += stridex
439 add %i3,%i4,%i3 !y += stridey
440 faddd %f6,%f2,%f2 !((conup0 - ansu0) + f0) + poly0
441 faddd %f12,%f60,%f12 !p[3]*tmp1 + p[2]
443 /*71*/faddd %f2,%f4,%f2 !ansl0 = (((conup0 - ansu)0 + f0) + poly0) + conlo0
444 fmuld %f12,%f14,%f12 !(p3*tmp1 + p2)*tmp1
445 fsubd %f16,%f18,%f16 !conup1 - ansu1
447 /*74*/faddd %f8,%f2,%f34 !ans0 = ansu0 + ansl0
448 fmuld %f36,%f14,%f14 !f1*tmp1
449 faddd %f12,%f62,%f12 !(p3*tmp1 + p2)*tmp1 + p1
451 /*77*/ for %f34,%f40,%f34 !sign(ans0) = sign of argument
452 std %f34,[%l0] !*yaddr0 = ans, always gets stored (delay slot)
453 cmp %i5,0 !if argcount =0, we are done
457 /*--------------------------------------------------------------------------*/
458 /*--------------------------------------------------------------------------*/
459 /*--------------------------------------------------------------------------*/
465 /*--------------------------------------------------------------------------*/
466 /*------------SPECIAL CASE HANDLING FOR LOOP0 ------------------------------*/
467 /*--------------------------------------------------------------------------*/
472 %o2 intf - 0x3e300000
474 %f40,42,44 sign0,sign1,sign2
477 .align 32 !align on I-cache boundary
479 orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
480 bpos 1f !if >=...continue
481 sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
483 faddd %f34,%f50,%f30 !dummy op just to generate exception (delay slot)
485 ld [%o5+4],%o5 !load x lower word
486 sllx %o0,32,%o0 !left justify intf
487 sllx %g1,32,%g1 !left justify Inf
488 or %o0,%o5,%o0 !merge in lower intf
489 cmp %o0,%g1 !if intf > 0x7ff00000 00000000
490 ble,pt %xcc,2f !pass thru if NaN
492 fmuld %f34,%f34,%f34 !...... (x*x) trigger invalid exception
496 faddd %f46,%f48,%f34 !ans = pi/2 upper + pi/2 lower
498 add %i1,%i2,%i1 !x += stridex
499 for %f34,%f40,%f34 !sign(ans) = sign of argument
500 std %f34,[%i3] !*y = ans
501 ba .LOOP0 !keep looping
502 add %i3,%i4,%i3 !y += stridey (delay slot)
504 /*--------------------------------------------------------------------------*/
505 /*-----------SPECIAL CASE HANDLING FOR LOOP1 -------------------------------*/
506 /*--------------------------------------------------------------------------*/
508 .align 32 !align on I-cache boundary
510 orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
511 bpos 1f !if >=...continue
512 sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
514 faddd %f36,%f50,%f30 !dummy op just to generate exception (delay slot)
516 ld [%o5+4],%o5 !load x lower word
517 sllx %o0,32,%o0 !left justify intf
518 sllx %g1,32,%g1 !left justify Inf
519 or %o0,%o5,%o0 !merge in lower intf
520 cmp %o0,%g1 !if intf > 0x7ff00000 00000000
521 ble,pt %xcc,2f !pass thru if NaN
523 fmuld %f36,%f36,%f36 !...... (x*x) trigger invalid exception
527 faddd %f46,%f48,%f36 !ans = pi/2 upper + pi/2 lower
529 add %i1,%i2,%i1 !x += stridex
530 for %f36,%f42,%f36 !sign(ans) = sign of argument
531 std %f36,[%i3] !*y = ans
532 ba .LOOP1 !keep looping
533 add %i3,%i4,%i3 !y += stridey (delay slot)
535 /*--------------------------------------------------------------------------*/
536 /*------------SPECIAL CASE HANDLING FOR LOOP2 ------------------------------*/
537 /*--------------------------------------------------------------------------*/
539 .align 32 !align on I-cache boundary
541 orcc %o2,%g0,%g0 !(-) if intf < 0x3e300000
542 bpos 1f !if >=...continue
543 sethi %hi(0x7ff00000),%g1 !upper word of Inf (we use 64-bit wide int for this)
545 faddd %f38,%f50,%f30 !dummy op just to generate exception (delay slot)
547 ld [%o5+4],%o5 !load x lower word
548 sllx %o0,32,%o0 !left justify intf
549 sllx %g1,32,%g1 !left justify Inf
550 or %o0,%o5,%o0 !merge in lower intf
551 cmp %o0,%g1 !if intf > 0x7ff00000 00000000
552 ble,pt %xcc,2f !pass thru if NaN
554 fmuld %f38,%f38,%f38 !...... (x*x) trigger invalid exception
558 faddd %f46,%f48,%f38 !ans = pi/2 upper + pi/2 lower
560 add %i1,%i2,%i1 !x += stridex
561 for %f38,%f44,%f38 !sign(ans) = sign of argument
562 std %f38,[%i3] !*y = ans
563 ba .LOOP2 !keep looping
564 add %i3,%i4,%i3 !y += stridey
566 /*--------------------------------------------------------------------------*/
567 /*--------------------------------------------------------------------------*/
568 /*--------------------------------------------------------------------------*/
572 ! .ident "03-20-96 Sparc V9 3-way-unrolled version"