* better
[mascara-docs.git] / i386 / linux-2.3.21 / arch / m68k / fpsp040 / stwotox.S
blobda06e31c1945078dd2fc080a9e05d2fe3df9949c
2 |       stwotox.sa 3.1 12/10/90
4 |       stwotox  --- 2**X
5 |       stwotoxd --- 2**X for denormalized X
6 |       stentox  --- 10**X
7 |       stentoxd --- 10**X for denormalized X
9 |       Input: Double-extended number X in location pointed to
10 |               by address register a0.
12 |       Output: The function values are returned in Fp0.
14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |               result is subsequently rounded to double precision. The
17 |               result is provably monotonic in double precision.
19 |       Speed: The program stwotox takes approximately 190 cycles and the
20 |               program stentox takes approximately 200 cycles.
22 |       Algorithm:
24 |       twotox
25 |       1. If |X| > 16480, go to ExpBig.
27 |       2. If |X| < 2**(-70), go to ExpSm.
29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30 |               decompose N as
31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
33 |       4. Overwrite r := r * log2. Then
34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35 |               Go to expr to compute that expression.
37 |       tentox
38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
40 |       2. If |X| < 2**(-70), go to ExpSm.
42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43 |               N := round-to-int(y). Decompose N as
44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
46 |       4. Define r as
47 |               r := ((X - N*L1)-N*L2) * L10
48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
49 |               and L10 is the natural log of 10. Then
50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51 |               Go to expr to compute that expression.
53 |       expr
54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
56 |       2. Overwrite Fact1 and Fact2 by
57 |               Fact1 := 2**(M) * Fact1
58 |               Fact2 := 2**(M) * Fact2
59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
61 |       3. Calculate P where 1 + P approximates exp(r):
62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
64 |       4. Let AdjFact := 2**(M'). Return
65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66 |               Exit.
68 |       ExpBig
69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70 |               underflow by Tiny * Tiny.
72 |       ExpSm
73 |       1. Return 1 + X.
76 |               Copyright (C) Motorola, Inc. 1990
77 |                       All Rights Reserved
79 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA 
80 |       The copyright notice above does not evidence any  
81 |       actual or intended publication of such source code.
83 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
85         |section        8
87         .include "fpsp.h"
89 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
90 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
92 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
93 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
95 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
97 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
99 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
101 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
102 EXPA4:  .long 0x3F811112,0x302C712C
103 EXPA3:  .long 0x3FA55555,0x55554CC1
104 EXPA2:  .long 0x3FC55555,0x55554A54
105 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
107 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
110 EXPTBL:
111         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
112         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
113         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
114         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
115         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
116         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
117         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
118         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
119         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
120         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
121         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
122         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
123         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
124         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
125         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
126         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
127         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
128         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
129         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
130         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
131         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
132         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
133         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
134         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
135         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
136         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
137         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
138         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
139         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
140         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
141         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
142         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
143         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
144         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
145         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
146         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
147         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
148         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
149         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
150         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
151         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
152         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
153         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
154         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
155         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
156         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
157         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
158         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
159         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
160         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
161         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
162         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
163         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
164         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
165         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
166         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
167         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
168         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
169         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
170         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
171         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
172         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
173         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
174         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
176         .set    N,L_SCR1
178         .set    X,FP_SCR1
179         .set    XDCARE,X+2
180         .set    XFRAC,X+4
182         .set    ADJFACT,FP_SCR2
184         .set    FACT1,FP_SCR3
185         .set    FACT1HI,FACT1+4
186         .set    FACT1LOW,FACT1+8
188         .set    FACT2,FP_SCR4
189         .set    FACT2HI,FACT2+4
190         .set    FACT2LOW,FACT2+8
192         | xref  t_unfl
193         |xref   t_ovfl
194         |xref   t_frcinx
196         .global stwotoxd
197 stwotoxd:
198 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
200         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
201         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
202         movel           (%a0),%d0
203         orl             #0x00800001,%d0
204         fadds           %d0,%fp0
205         bra             t_frcinx
207         .global stwotox
208 stwotox:
209 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
210         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
212         movel           (%a0),%d0
213         movew           4(%a0),%d0
214         fmovex          %fp0,X(%a6)
215         andil           #0x7FFFFFFF,%d0
217         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
218         bges            TWOOK1
219         bra             EXPBORS
221 TWOOK1:
222         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
223         bles            TWOMAIN
224         bra             EXPBORS
225         
227 TWOMAIN:
228 |--USUAL CASE, 2^(-70) <= |X| <= 16480
230         fmovex          %fp0,%fp1
231         fmuls           #0x42800000,%fp1  | ...64 * X
232         
233         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
234         movel           %d2,-(%sp)
235         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
236         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
237         movel           N(%a6),%d0
238         movel           %d0,%d2
239         andil           #0x3F,%d0               | ...D0 IS J
240         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
241         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
242         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
243         movel           %d2,%d0
244         asrl            #1,%d0          | ...D0 IS M
245         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
246         addil           #0x3FFF,%d2
247         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
248         movel           (%sp)+,%d2
249 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
250 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
251 |--ADJFACT = 2^(M').
252 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
254         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
255         movel           (%a1)+,FACT1(%a6)
256         movel           (%a1)+,FACT1HI(%a6)
257         movel           (%a1)+,FACT1LOW(%a6)
258         movew           (%a1)+,FACT2(%a6)
259         clrw            FACT2+2(%a6)
261         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
263         movew           (%a1)+,FACT2HI(%a6)
264         clrw            FACT2HI+2(%a6)
265         clrl            FACT2LOW(%a6)
266         addw            %d0,FACT1(%a6)
267         
268         fmulx           LOG2,%fp0       | ...FP0 IS R
269         addw            %d0,FACT2(%a6)
271         bra             expr
273 EXPBORS:
274 |--FPCR, D0 SAVED
275         cmpil           #0x3FFF8000,%d0
276         bgts            EXPBIG
278 EXPSM:
279 |--|X| IS SMALL, RETURN 1 + X
281         fmovel          %d1,%FPCR               |restore users exceptions
282         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
284         bra             t_frcinx
286 EXPBIG:
287 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
288 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
289         movel           X(%a6),%d0
290         cmpil           #0,%d0
291         blts            EXPNEG
293         bclrb           #7,(%a0)                |t_ovfl expects positive value
294         bra             t_ovfl
296 EXPNEG:
297         bclrb           #7,(%a0)                |t_unfl expects positive value
298         bra             t_unfl
300         .global stentoxd
301 stentoxd:
302 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
304         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
305         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
306         movel           (%a0),%d0
307         orl             #0x00800001,%d0
308         fadds           %d0,%fp0
309         bra             t_frcinx
311         .global stentox
312 stentox:
313 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
314         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
316         movel           (%a0),%d0
317         movew           4(%a0),%d0
318         fmovex          %fp0,X(%a6)
319         andil           #0x7FFFFFFF,%d0
321         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
322         bges            TENOK1
323         bra             EXPBORS
325 TENOK1:
326         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
327         bles            TENMAIN
328         bra             EXPBORS
330 TENMAIN:
331 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
333         fmovex          %fp0,%fp1
334         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
335         
336         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
337         movel           %d2,-(%sp)
338         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
339         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
340         movel           N(%a6),%d0
341         movel           %d0,%d2
342         andil           #0x3F,%d0               | ...D0 IS J
343         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
344         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
345         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
346         movel           %d2,%d0
347         asrl            #1,%d0          | ...D0 IS M
348         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
349         addil           #0x3FFF,%d2
350         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
351         movel           (%sp)+,%d2
353 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
354 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
355 |--ADJFACT = 2^(M').
356 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
358         fmovex          %fp1,%fp2
360         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
361         movel           (%a1)+,FACT1(%a6)
363         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
365         movel           (%a1)+,FACT1HI(%a6)
366         movel           (%a1)+,FACT1LOW(%a6)
367         fsubx           %fp1,%fp0               | ...X - N L_LEAD
368         movew           (%a1)+,FACT2(%a6)
370         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
372         clrw            FACT2+2(%a6)
373         movew           (%a1)+,FACT2HI(%a6)
374         clrw            FACT2HI+2(%a6)
375         clrl            FACT2LOW(%a6)
377         fmulx           LOG10,%fp0      | ...FP0 IS R
378         
379         addw            %d0,FACT1(%a6)
380         addw            %d0,FACT2(%a6)
382 expr:
383 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
384 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
385 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
386 |--     2**(M'+M) * 2**(J/64) * EXP(R)
388         fmovex          %fp0,%fp1
389         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
391         fmoved          EXPA5,%fp2      | ...FP2 IS A5
392         fmoved          EXPA4,%fp3      | ...FP3 IS A4
394         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
395         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
397         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
398         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
400         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
401         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
403         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
404         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
406         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
407         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
408         
409         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
410         
412 |--FINAL RECONSTRUCTION PROCESS
413 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
415         fmulx           FACT1(%a6),%fp0
416         faddx           FACT2(%a6),%fp0
417         faddx           FACT1(%a6),%fp0
419         fmovel          %d1,%FPCR               |restore users exceptions
420         clrw            ADJFACT+2(%a6)
421         movel           #0x80000000,ADJFACT+4(%a6)
422         clrl            ADJFACT+8(%a6)
423         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
425         bra             t_frcinx
427         |end