mm-only debug patch...
[mmotm.git] / arch / m68k / fpsp040 / stwotox.S
blob0d5e6a1436a638c59f0fc123974f43c418169d69
2 |       stwotox.sa 3.1 12/10/90
4 |       stwotox  --- 2**X
5 |       stwotoxd --- 2**X for denormalized X
6 |       stentox  --- 10**X
7 |       stentoxd --- 10**X for denormalized X
9 |       Input: Double-extended number X in location pointed to
10 |               by address register a0.
12 |       Output: The function values are returned in Fp0.
14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |               result is subsequently rounded to double precision. The
17 |               result is provably monotonic in double precision.
19 |       Speed: The program stwotox takes approximately 190 cycles and the
20 |               program stentox takes approximately 200 cycles.
22 |       Algorithm:
24 |       twotox
25 |       1. If |X| > 16480, go to ExpBig.
27 |       2. If |X| < 2**(-70), go to ExpSm.
29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30 |               decompose N as
31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
33 |       4. Overwrite r := r * log2. Then
34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35 |               Go to expr to compute that expression.
37 |       tentox
38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
40 |       2. If |X| < 2**(-70), go to ExpSm.
42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43 |               N := round-to-int(y). Decompose N as
44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
46 |       4. Define r as
47 |               r := ((X - N*L1)-N*L2) * L10
48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
49 |               and L10 is the natural log of 10. Then
50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51 |               Go to expr to compute that expression.
53 |       expr
54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
56 |       2. Overwrite Fact1 and Fact2 by
57 |               Fact1 := 2**(M) * Fact1
58 |               Fact2 := 2**(M) * Fact2
59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
61 |       3. Calculate P where 1 + P approximates exp(r):
62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
64 |       4. Let AdjFact := 2**(M'). Return
65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66 |               Exit.
68 |       ExpBig
69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70 |               underflow by Tiny * Tiny.
72 |       ExpSm
73 |       1. Return 1 + X.
76 |               Copyright (C) Motorola, Inc. 1990
77 |                       All Rights Reserved
79 |       For details on the license for this file, please see the
80 |       file, README, in this same directory.
82 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
84         |section        8
86 #include "fpsp.h"
88 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
89 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
91 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
92 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
94 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
96 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
98 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
100 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
101 EXPA4:  .long 0x3F811112,0x302C712C
102 EXPA3:  .long 0x3FA55555,0x55554CC1
103 EXPA2:  .long 0x3FC55555,0x55554A54
104 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
106 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
107 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
109 EXPTBL:
110         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
111         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
112         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
113         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
114         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
115         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
116         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
117         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
118         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
119         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
120         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
121         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
122         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
123         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
124         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
125         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
126         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
127         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
128         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
129         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
130         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
131         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
132         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
133         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
134         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
135         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
136         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
137         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
138         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
139         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
140         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
141         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
142         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
143         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
144         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
145         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
146         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
147         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
148         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
149         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
150         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
151         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
152         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
153         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
154         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
155         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
156         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
157         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
158         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
159         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
160         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
161         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
162         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
163         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
164         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
165         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
166         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
167         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
168         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
169         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
170         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
171         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
172         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
173         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
175         .set    N,L_SCR1
177         .set    X,FP_SCR1
178         .set    XDCARE,X+2
179         .set    XFRAC,X+4
181         .set    ADJFACT,FP_SCR2
183         .set    FACT1,FP_SCR3
184         .set    FACT1HI,FACT1+4
185         .set    FACT1LOW,FACT1+8
187         .set    FACT2,FP_SCR4
188         .set    FACT2HI,FACT2+4
189         .set    FACT2LOW,FACT2+8
191         | xref  t_unfl
192         |xref   t_ovfl
193         |xref   t_frcinx
195         .global stwotoxd
196 stwotoxd:
197 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
199         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
200         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
201         movel           (%a0),%d0
202         orl             #0x00800001,%d0
203         fadds           %d0,%fp0
204         bra             t_frcinx
206         .global stwotox
207 stwotox:
208 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
209         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
211         movel           (%a0),%d0
212         movew           4(%a0),%d0
213         fmovex          %fp0,X(%a6)
214         andil           #0x7FFFFFFF,%d0
216         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
217         bges            TWOOK1
218         bra             EXPBORS
220 TWOOK1:
221         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
222         bles            TWOMAIN
223         bra             EXPBORS
226 TWOMAIN:
227 |--USUAL CASE, 2^(-70) <= |X| <= 16480
229         fmovex          %fp0,%fp1
230         fmuls           #0x42800000,%fp1  | ...64 * X
232         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
233         movel           %d2,-(%sp)
234         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
235         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
236         movel           N(%a6),%d0
237         movel           %d0,%d2
238         andil           #0x3F,%d0               | ...D0 IS J
239         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
240         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
241         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
242         movel           %d2,%d0
243         asrl            #1,%d0          | ...D0 IS M
244         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
245         addil           #0x3FFF,%d2
246         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
247         movel           (%sp)+,%d2
248 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
249 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
250 |--ADJFACT = 2^(M').
251 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
253         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
254         movel           (%a1)+,FACT1(%a6)
255         movel           (%a1)+,FACT1HI(%a6)
256         movel           (%a1)+,FACT1LOW(%a6)
257         movew           (%a1)+,FACT2(%a6)
258         clrw            FACT2+2(%a6)
260         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
262         movew           (%a1)+,FACT2HI(%a6)
263         clrw            FACT2HI+2(%a6)
264         clrl            FACT2LOW(%a6)
265         addw            %d0,FACT1(%a6)
267         fmulx           LOG2,%fp0       | ...FP0 IS R
268         addw            %d0,FACT2(%a6)
270         bra             expr
272 EXPBORS:
273 |--FPCR, D0 SAVED
274         cmpil           #0x3FFF8000,%d0
275         bgts            EXPBIG
277 EXPSM:
278 |--|X| IS SMALL, RETURN 1 + X
280         fmovel          %d1,%FPCR               |restore users exceptions
281         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
283         bra             t_frcinx
285 EXPBIG:
286 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
287 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
288         movel           X(%a6),%d0
289         cmpil           #0,%d0
290         blts            EXPNEG
292         bclrb           #7,(%a0)                |t_ovfl expects positive value
293         bra             t_ovfl
295 EXPNEG:
296         bclrb           #7,(%a0)                |t_unfl expects positive value
297         bra             t_unfl
299         .global stentoxd
300 stentoxd:
301 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
303         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
304         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
305         movel           (%a0),%d0
306         orl             #0x00800001,%d0
307         fadds           %d0,%fp0
308         bra             t_frcinx
310         .global stentox
311 stentox:
312 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
313         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
315         movel           (%a0),%d0
316         movew           4(%a0),%d0
317         fmovex          %fp0,X(%a6)
318         andil           #0x7FFFFFFF,%d0
320         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
321         bges            TENOK1
322         bra             EXPBORS
324 TENOK1:
325         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
326         bles            TENMAIN
327         bra             EXPBORS
329 TENMAIN:
330 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
332         fmovex          %fp0,%fp1
333         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
335         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
336         movel           %d2,-(%sp)
337         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
338         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
339         movel           N(%a6),%d0
340         movel           %d0,%d2
341         andil           #0x3F,%d0               | ...D0 IS J
342         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
343         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
344         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
345         movel           %d2,%d0
346         asrl            #1,%d0          | ...D0 IS M
347         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
348         addil           #0x3FFF,%d2
349         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
350         movel           (%sp)+,%d2
352 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
353 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
354 |--ADJFACT = 2^(M').
355 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
357         fmovex          %fp1,%fp2
359         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
360         movel           (%a1)+,FACT1(%a6)
362         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
364         movel           (%a1)+,FACT1HI(%a6)
365         movel           (%a1)+,FACT1LOW(%a6)
366         fsubx           %fp1,%fp0               | ...X - N L_LEAD
367         movew           (%a1)+,FACT2(%a6)
369         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
371         clrw            FACT2+2(%a6)
372         movew           (%a1)+,FACT2HI(%a6)
373         clrw            FACT2HI+2(%a6)
374         clrl            FACT2LOW(%a6)
376         fmulx           LOG10,%fp0      | ...FP0 IS R
378         addw            %d0,FACT1(%a6)
379         addw            %d0,FACT2(%a6)
381 expr:
382 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
383 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
384 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
385 |--     2**(M'+M) * 2**(J/64) * EXP(R)
387         fmovex          %fp0,%fp1
388         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
390         fmoved          EXPA5,%fp2      | ...FP2 IS A5
391         fmoved          EXPA4,%fp3      | ...FP3 IS A4
393         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
394         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
396         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
397         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
399         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
400         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
402         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
403         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
405         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
406         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
408         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
411 |--FINAL RECONSTRUCTION PROCESS
412 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
414         fmulx           FACT1(%a6),%fp0
415         faddx           FACT2(%a6),%fp0
416         faddx           FACT1(%a6),%fp0
418         fmovel          %d1,%FPCR               |restore users exceptions
419         clrw            ADJFACT+2(%a6)
420         movel           #0x80000000,ADJFACT+4(%a6)
421         clrl            ADJFACT+8(%a6)
422         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
424         bra             t_frcinx
426         |end