Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/gregkh/driver...
[linux-2.6/verdex.git] / arch / m68k / fpsp040 / satan.S
blob20dae222d51e48e06637825067d9b28776564f5c
2 |       satan.sa 3.3 12/19/90
4 |       The entry point satan computes the arctangent of an
5 |       input value. satand does the same except the input value is a
6 |       denormalized number.
8 |       Input: Double-extended value in memory location pointed to by address
9 |               register a0.
11 |       Output: Arctan(X) returned in floating-point register Fp0.
13 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
14 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
15 |               result is subsequently rounded to double precision. The
16 |               result is provably monotonic in double precision.
18 |       Speed: The program satan takes approximately 160 cycles for input
19 |               argument X such that 1/16 < |X| < 16. For the other arguments,
20 |               the program will run no worse than 10% slower.
22 |       Algorithm:
23 |       Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
25 |       Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
26 |               Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
27 |               of X with a bit-1 attached at the 6-th bit position. Define u
28 |               to be u = (X-F) / (1 + X*F).
30 |       Step 3. Approximate arctan(u) by a polynomial poly.
32 |       Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
33 |               calculated beforehand. Exit.
35 |       Step 5. If |X| >= 16, go to Step 7.
37 |       Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
39 |       Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
40 |               Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
43 |               Copyright (C) Motorola, Inc. 1990
44 |                       All Rights Reserved
46 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
47 |       The copyright notice above does not evidence any
48 |       actual or intended publication of such source code.
50 |satan  idnt    2,1 | Motorola 040 Floating Point Software Package
52         |section        8
54 #include "fpsp.h"
56 BOUNDS1:        .long 0x3FFB8000,0x4002FFFF
58 ONE:    .long 0x3F800000
60         .long 0x00000000
62 ATANA3: .long 0xBFF6687E,0x314987D8
63 ATANA2: .long 0x4002AC69,0x34A26DB3
65 ATANA1: .long 0xBFC2476F,0x4E1DA28E
66 ATANB6: .long 0x3FB34444,0x7F876989
68 ATANB5: .long 0xBFB744EE,0x7FAF45DB
69 ATANB4: .long 0x3FBC71C6,0x46940220
71 ATANB3: .long 0xBFC24924,0x921872F9
72 ATANB2: .long 0x3FC99999,0x99998FA9
74 ATANB1: .long 0xBFD55555,0x55555555
75 ATANC5: .long 0xBFB70BF3,0x98539E6A
77 ATANC4: .long 0x3FBC7187,0x962D1D7D
78 ATANC3: .long 0xBFC24924,0x827107B8
80 ATANC2: .long 0x3FC99999,0x9996263E
81 ATANC1: .long 0xBFD55555,0x55555536
83 PPIBY2: .long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
84 NPIBY2: .long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
85 PTINY:  .long 0x00010000,0x80000000,0x00000000,0x00000000
86 NTINY:  .long 0x80010000,0x80000000,0x00000000,0x00000000
88 ATANTBL:
89         .long   0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
90         .long   0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
91         .long   0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
92         .long   0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
93         .long   0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
94         .long   0x3FFB0000,0xAB98E943,0x62765619,0x00000000
95         .long   0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
96         .long   0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
97         .long   0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
98         .long   0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
99         .long   0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
100         .long   0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
101         .long   0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
102         .long   0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
103         .long   0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
104         .long   0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
105         .long   0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
106         .long   0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
107         .long   0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
108         .long   0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
109         .long   0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
110         .long   0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
111         .long   0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
112         .long   0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
113         .long   0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
114         .long   0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
115         .long   0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
116         .long   0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
117         .long   0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
118         .long   0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
119         .long   0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
120         .long   0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
121         .long   0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
122         .long   0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
123         .long   0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
124         .long   0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
125         .long   0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
126         .long   0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
127         .long   0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
128         .long   0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
129         .long   0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
130         .long   0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
131         .long   0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
132         .long   0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
133         .long   0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
134         .long   0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
135         .long   0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
136         .long   0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
137         .long   0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
138         .long   0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
139         .long   0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
140         .long   0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
141         .long   0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
142         .long   0x3FFE0000,0x97731420,0x365E538C,0x00000000
143         .long   0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
144         .long   0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
145         .long   0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
146         .long   0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
147         .long   0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
148         .long   0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
149         .long   0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
150         .long   0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
151         .long   0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
152         .long   0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
153         .long   0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
154         .long   0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
155         .long   0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
156         .long   0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
157         .long   0x3FFE0000,0xE8771129,0xC4353259,0x00000000
158         .long   0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
159         .long   0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
160         .long   0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
161         .long   0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
162         .long   0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
163         .long   0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
164         .long   0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
165         .long   0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
166         .long   0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
167         .long   0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
168         .long   0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
169         .long   0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
170         .long   0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
171         .long   0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
172         .long   0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
173         .long   0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
174         .long   0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
175         .long   0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
176         .long   0x3FFF0000,0x9F100575,0x006CC571,0x00000000
177         .long   0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
178         .long   0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
179         .long   0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
180         .long   0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
181         .long   0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
182         .long   0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
183         .long   0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
184         .long   0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
185         .long   0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
186         .long   0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
187         .long   0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
188         .long   0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
189         .long   0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
190         .long   0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
191         .long   0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
192         .long   0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
193         .long   0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
194         .long   0x3FFF0000,0xB525529D,0x562246BD,0x00000000
195         .long   0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
196         .long   0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
197         .long   0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
198         .long   0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
199         .long   0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
200         .long   0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
201         .long   0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
202         .long   0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
203         .long   0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
204         .long   0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
205         .long   0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
206         .long   0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
207         .long   0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
208         .long   0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
209         .long   0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
210         .long   0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
211         .long   0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
212         .long   0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
213         .long   0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
214         .long   0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
215         .long   0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
216         .long   0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
218         .set    X,FP_SCR1
219         .set    XDCARE,X+2
220         .set    XFRAC,X+4
221         .set    XFRACLO,X+8
223         .set    ATANF,FP_SCR2
224         .set    ATANFHI,ATANF+4
225         .set    ATANFLO,ATANF+8
228         | xref  t_frcinx
229         |xref   t_extdnrm
231         .global satand
232 satand:
233 |--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
235         bra             t_extdnrm
237         .global satan
238 satan:
239 |--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
241         fmovex          (%a0),%fp0      | ...LOAD INPUT
243         movel           (%a0),%d0
244         movew           4(%a0),%d0
245         fmovex          %fp0,X(%a6)
246         andil           #0x7FFFFFFF,%d0
248         cmpil           #0x3FFB8000,%d0         | ...|X| >= 1/16?
249         bges            ATANOK1
250         bra             ATANSM
252 ATANOK1:
253         cmpil           #0x4002FFFF,%d0         | ...|X| < 16 ?
254         bles            ATANMAIN
255         bra             ATANBIG
258 |--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
259 |--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
260 |--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
261 |--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
262 |--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
263 |--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
264 |--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
265 |--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
266 |--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
267 |--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
268 |--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
269 |--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
270 |--WILL INVOLVE A VERY LONG POLYNOMIAL.
272 |--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
273 |--WE CHOSE F TO BE +-2^K * 1.BBBB1
274 |--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
275 |--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
276 |--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
277 |-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
279 ATANMAIN:
281         movew           #0x0000,XDCARE(%a6)     | ...CLEAN UP X JUST IN CASE
282         andil           #0xF8000000,XFRAC(%a6)  | ...FIRST 5 BITS
283         oril            #0x04000000,XFRAC(%a6)  | ...SET 6-TH BIT TO 1
284         movel           #0x00000000,XFRACLO(%a6)        | ...LOCATION OF X IS NOW F
286         fmovex          %fp0,%fp1                       | ...FP1 IS X
287         fmulx           X(%a6),%fp1             | ...FP1 IS X*F, NOTE THAT X*F > 0
288         fsubx           X(%a6),%fp0             | ...FP0 IS X-F
289         fadds           #0x3F800000,%fp1                | ...FP1 IS 1 + X*F
290         fdivx           %fp1,%fp0                       | ...FP0 IS U = (X-F)/(1+X*F)
292 |--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
293 |--CREATE ATAN(F) AND STORE IT IN ATANF, AND
294 |--SAVE REGISTERS FP2.
296         movel           %d2,-(%a7)      | ...SAVE d2 TEMPORARILY
297         movel           %d0,%d2         | ...THE EXPO AND 16 BITS OF X
298         andil           #0x00007800,%d0 | ...4 VARYING BITS OF F'S FRACTION
299         andil           #0x7FFF0000,%d2 | ...EXPONENT OF F
300         subil           #0x3FFB0000,%d2 | ...K+4
301         asrl            #1,%d2
302         addl            %d2,%d0         | ...THE 7 BITS IDENTIFYING F
303         asrl            #7,%d0          | ...INDEX INTO TBL OF ATAN(|F|)
304         lea             ATANTBL,%a1
305         addal           %d0,%a1         | ...ADDRESS OF ATAN(|F|)
306         movel           (%a1)+,ATANF(%a6)
307         movel           (%a1)+,ATANFHI(%a6)
308         movel           (%a1)+,ATANFLO(%a6)     | ...ATANF IS NOW ATAN(|F|)
309         movel           X(%a6),%d0              | ...LOAD SIGN AND EXPO. AGAIN
310         andil           #0x80000000,%d0 | ...SIGN(F)
311         orl             %d0,ATANF(%a6)  | ...ATANF IS NOW SIGN(F)*ATAN(|F|)
312         movel           (%a7)+,%d2      | ...RESTORE d2
314 |--THAT'S ALL I HAVE TO DO FOR NOW,
315 |--BUT ALAS, THE DIVIDE IS STILL CRANKING!
317 |--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
318 |--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
319 |--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
320 |--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
321 |--WHAT WE HAVE HERE IS MERELY  A1 = A3, A2 = A1/A3, A3 = A2/A3.
322 |--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
323 |--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
326         fmovex          %fp0,%fp1
327         fmulx           %fp1,%fp1
328         fmoved          ATANA3,%fp2
329         faddx           %fp1,%fp2               | ...A3+V
330         fmulx           %fp1,%fp2               | ...V*(A3+V)
331         fmulx           %fp0,%fp1               | ...U*V
332         faddd           ATANA2,%fp2     | ...A2+V*(A3+V)
333         fmuld           ATANA1,%fp1     | ...A1*U*V
334         fmulx           %fp2,%fp1               | ...A1*U*V*(A2+V*(A3+V))
336         faddx           %fp1,%fp0               | ...ATAN(U), FP1 RELEASED
337         fmovel          %d1,%FPCR               |restore users exceptions
338         faddx           ATANF(%a6),%fp0 | ...ATAN(X)
339         bra             t_frcinx
341 ATANBORS:
342 |--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
343 |--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
344         cmpil           #0x3FFF8000,%d0
345         bgt             ATANBIG | ...I.E. |X| >= 16
347 ATANSM:
348 |--|X| <= 1/16
349 |--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
350 |--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
351 |--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
352 |--WHERE Y = X*X, AND Z = Y*Y.
354         cmpil           #0x3FD78000,%d0
355         blt             ATANTINY
356 |--COMPUTE POLYNOMIAL
357         fmulx           %fp0,%fp0       | ...FP0 IS Y = X*X
360         movew           #0x0000,XDCARE(%a6)
362         fmovex          %fp0,%fp1
363         fmulx           %fp1,%fp1               | ...FP1 IS Z = Y*Y
365         fmoved          ATANB6,%fp2
366         fmoved          ATANB5,%fp3
368         fmulx           %fp1,%fp2               | ...Z*B6
369         fmulx           %fp1,%fp3               | ...Z*B5
371         faddd           ATANB4,%fp2     | ...B4+Z*B6
372         faddd           ATANB3,%fp3     | ...B3+Z*B5
374         fmulx           %fp1,%fp2               | ...Z*(B4+Z*B6)
375         fmulx           %fp3,%fp1               | ...Z*(B3+Z*B5)
377         faddd           ATANB2,%fp2     | ...B2+Z*(B4+Z*B6)
378         faddd           ATANB1,%fp1     | ...B1+Z*(B3+Z*B5)
380         fmulx           %fp0,%fp2               | ...Y*(B2+Z*(B4+Z*B6))
381         fmulx           X(%a6),%fp0             | ...X*Y
383         faddx           %fp2,%fp1               | ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
386         fmulx           %fp1,%fp0       | ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
388         fmovel          %d1,%FPCR               |restore users exceptions
389         faddx           X(%a6),%fp0
391         bra             t_frcinx
393 ATANTINY:
394 |--|X| < 2^(-40), ATAN(X) = X
395         movew           #0x0000,XDCARE(%a6)
397         fmovel          %d1,%FPCR               |restore users exceptions
398         fmovex          X(%a6),%fp0     |last inst - possible exception set
400         bra             t_frcinx
402 ATANBIG:
403 |--IF |X| > 2^(100), RETURN     SIGN(X)*(PI/2 - TINY). OTHERWISE,
404 |--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
405         cmpil           #0x40638000,%d0
406         bgt             ATANHUGE
408 |--APPROXIMATE ATAN(-1/X) BY
409 |--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
410 |--THIS CAN BE RE-WRITTEN AS
411 |--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
413         fmoves          #0xBF800000,%fp1        | ...LOAD -1
414         fdivx           %fp0,%fp1               | ...FP1 IS -1/X
417 |--DIVIDE IS STILL CRANKING
419         fmovex          %fp1,%fp0               | ...FP0 IS X'
420         fmulx           %fp0,%fp0               | ...FP0 IS Y = X'*X'
421         fmovex          %fp1,X(%a6)             | ...X IS REALLY X'
423         fmovex          %fp0,%fp1
424         fmulx           %fp1,%fp1               | ...FP1 IS Z = Y*Y
426         fmoved          ATANC5,%fp3
427         fmoved          ATANC4,%fp2
429         fmulx           %fp1,%fp3               | ...Z*C5
430         fmulx           %fp1,%fp2               | ...Z*B4
432         faddd           ATANC3,%fp3     | ...C3+Z*C5
433         faddd           ATANC2,%fp2     | ...C2+Z*C4
435         fmulx           %fp3,%fp1               | ...Z*(C3+Z*C5), FP3 RELEASED
436         fmulx           %fp0,%fp2               | ...Y*(C2+Z*C4)
438         faddd           ATANC1,%fp1     | ...C1+Z*(C3+Z*C5)
439         fmulx           X(%a6),%fp0             | ...X'*Y
441         faddx           %fp2,%fp1               | ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
444         fmulx           %fp1,%fp0               | ...X'*Y*([B1+Z*(B3+Z*B5)]
445 |                                       ...     +[Y*(B2+Z*(B4+Z*B6))])
446         faddx           X(%a6),%fp0
448         fmovel          %d1,%FPCR               |restore users exceptions
450         btstb           #7,(%a0)
451         beqs            pos_big
453 neg_big:
454         faddx           NPIBY2,%fp0
455         bra             t_frcinx
457 pos_big:
458         faddx           PPIBY2,%fp0
459         bra             t_frcinx
461 ATANHUGE:
462 |--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
463         btstb           #7,(%a0)
464         beqs            pos_huge
466 neg_huge:
467         fmovex          NPIBY2,%fp0
468         fmovel          %d1,%fpcr
469         fsubx           NTINY,%fp0
470         bra             t_frcinx
472 pos_huge:
473         fmovex          PPIBY2,%fp0
474         fmovel          %d1,%fpcr
475         fsubx           PTINY,%fp0
476         bra             t_frcinx
478         |end