Linux-2.6.12-rc2
[linux-2.6/next.git] / arch / m68k / fpsp040 / slogn.S
blob2aaa0725c035d98696e962f2d662359b54bd4ccc
2 |       slogn.sa 3.1 12/10/90
4 |       slogn computes the natural logarithm of an
5 |       input value. slognd does the same except the input value is a
6 |       denormalized number. slognp1 computes log(1+X), and slognp1d
7 |       computes log(1+X) for denormalized X.
9 |       Input: Double-extended value in memory location pointed to by address
10 |               register a0.
12 |       Output: log(X) or log(1+X) returned in floating-point register Fp0.
14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |               result is subsequently rounded to double precision. The
17 |               result is provably monotonic in double precision.
19 |       Speed: The program slogn takes approximately 190 cycles for input
20 |               argument X such that |X-1| >= 1/16, which is the usual
21 |               situation. For those arguments, slognp1 takes approximately
22 |                210 cycles. For the less common arguments, the program will
23 |                run no worse than 10% slower.
25 |       Algorithm:
26 |       LOGN:
27 |       Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
28 |               u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
30 |       Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
31 |               significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
32 |               2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
34 |       Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
35 |               log(1+u) = poly.
37 |       Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
38 |               by k*log(2) + (log(F) + poly). The values of log(F) are calculated
39 |               beforehand and stored in the program.
41 |       lognp1:
42 |       Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
43 |               u where u = 2X/(2+X). Otherwise, move on to Step 2.
45 |       Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
46 |               of the algorithm for LOGN and compute log(1+X) as
47 |               k*log(2) + log(F) + poly where poly approximates log(1+u),
48 |               u = (Y-F)/F.
50 |       Implementation Notes:
51 |       Note 1. There are 64 different possible values for F, thus 64 log(F)'s
52 |               need to be tabulated. Moreover, the values of 1/F are also
53 |               tabulated so that the division in (Y-F)/F can be performed by a
54 |               multiplication.
56 |       Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
57 |               Y-F has to be calculated carefully when 1/2 <= X < 3/2.
59 |       Note 3. To fully exploit the pipeline, polynomials are usually separated
60 |               into two parts evaluated independently before being added up.
63 |               Copyright (C) Motorola, Inc. 1990
64 |                       All Rights Reserved
66 |       THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
67 |       The copyright notice above does not evidence any
68 |       actual or intended publication of such source code.
70 |slogn  idnt    2,1 | Motorola 040 Floating Point Software Package
72         |section        8
74 #include "fpsp.h"
76 BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
77 BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
79 LOGOF2: .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
81 one:    .long 0x3F800000
82 zero:   .long 0x00000000
83 infty:  .long 0x7F800000
84 negone: .long 0xBF800000
86 LOGA6:  .long 0x3FC2499A,0xB5E4040B
87 LOGA5:  .long 0xBFC555B5,0x848CB7DB
89 LOGA4:  .long 0x3FC99999,0x987D8730
90 LOGA3:  .long 0xBFCFFFFF,0xFF6F7E97
92 LOGA2:  .long 0x3FD55555,0x555555a4
93 LOGA1:  .long 0xBFE00000,0x00000008
95 LOGB5:  .long 0x3F175496,0xADD7DAD6
96 LOGB4:  .long 0x3F3C71C2,0xFE80C7E0
98 LOGB3:  .long 0x3F624924,0x928BCCFF
99 LOGB2:  .long 0x3F899999,0x999995EC
101 LOGB1:  .long 0x3FB55555,0x55555555
102 TWO:    .long 0x40000000,0x00000000
104 LTHOLD: .long 0x3f990000,0x80000000,0x00000000,0x00000000
106 LOGTBL:
107         .long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
108         .long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
109         .long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
110         .long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
111         .long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
112         .long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
113         .long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
114         .long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
115         .long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
116         .long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
117         .long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
118         .long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
119         .long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
120         .long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
121         .long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
122         .long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
123         .long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
124         .long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
125         .long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
126         .long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
127         .long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
128         .long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
129         .long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
130         .long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
131         .long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
132         .long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
133         .long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
134         .long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
135         .long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
136         .long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
137         .long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
138         .long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
139         .long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
140         .long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
141         .long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
142         .long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
143         .long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
144         .long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
145         .long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
146         .long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
147         .long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
148         .long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
149         .long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
150         .long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
151         .long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
152         .long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
153         .long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
154         .long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
155         .long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
156         .long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
157         .long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
158         .long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
159         .long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
160         .long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
161         .long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
162         .long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
163         .long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
164         .long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
165         .long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
166         .long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
167         .long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
168         .long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
169         .long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
170         .long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
171         .long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
172         .long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
173         .long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
174         .long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
175         .long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
176         .long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
177         .long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
178         .long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
179         .long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
180         .long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
181         .long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
182         .long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
183         .long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
184         .long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
185         .long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
186         .long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
187         .long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
188         .long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
189         .long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
190         .long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
191         .long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
192         .long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
193         .long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
194         .long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
195         .long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
196         .long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
197         .long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
198         .long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
199         .long  0x3FFE0000,0x94458094,0x45809446,0x00000000
200         .long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
201         .long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
202         .long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
203         .long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
204         .long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
205         .long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
206         .long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
207         .long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
208         .long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
209         .long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
210         .long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
211         .long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
212         .long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
213         .long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
214         .long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
215         .long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
216         .long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
217         .long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
218         .long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
219         .long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
220         .long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
221         .long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
222         .long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
223         .long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
224         .long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
225         .long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
226         .long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
227         .long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
228         .long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
229         .long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
230         .long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
231         .long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
232         .long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
233         .long  0x3FFE0000,0x80808080,0x80808081,0x00000000
234         .long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
236         .set    ADJK,L_SCR1
238         .set    X,FP_SCR1
239         .set    XDCARE,X+2
240         .set    XFRAC,X+4
242         .set    F,FP_SCR2
243         .set    FFRAC,F+4
245         .set    KLOG2,FP_SCR3
247         .set    SAVEU,FP_SCR4
249         | xref  t_frcinx
250         |xref   t_extdnrm
251         |xref   t_operr
252         |xref   t_dz
254         .global slognd
255 slognd:
256 |--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
258         movel           #-100,ADJK(%a6) | ...INPUT = 2^(ADJK) * FP0
260 |----normalize the input value by left shifting k bits (k to be determined
261 |----below), adjusting exponent and storing -k to  ADJK
262 |----the value TWOTO100 is no longer needed.
263 |----Note that this code assumes the denormalized input is NON-ZERO.
265      moveml     %d2-%d7,-(%a7)          | ...save some registers
266      movel      #0x00000000,%d3         | ...D3 is exponent of smallest norm. #
267      movel      4(%a0),%d4
268      movel      8(%a0),%d5              | ...(D4,D5) is (Hi_X,Lo_X)
269      clrl       %d2                     | ...D2 used for holding K
271      tstl       %d4
272      bnes       HiX_not0
274 HiX_0:
275      movel      %d5,%d4
276      clrl       %d5
277      movel      #32,%d2
278      clrl       %d6
279      bfffo      %d4{#0:#32},%d6
280      lsll      %d6,%d4
281      addl       %d6,%d2                 | ...(D3,D4,D5) is normalized
283      movel      %d3,X(%a6)
284      movel      %d4,XFRAC(%a6)
285      movel      %d5,XFRAC+4(%a6)
286      negl       %d2
287      movel      %d2,ADJK(%a6)
288      fmovex     X(%a6),%fp0
289      moveml     (%a7)+,%d2-%d7          | ...restore registers
290      lea        X(%a6),%a0
291      bras       LOGBGN                  | ...begin regular log(X)
294 HiX_not0:
295      clrl       %d6
296      bfffo      %d4{#0:#32},%d6         | ...find first 1
297      movel      %d6,%d2                 | ...get k
298      lsll       %d6,%d4
299      movel      %d5,%d7                 | ...a copy of D5
300      lsll       %d6,%d5
301      negl       %d6
302      addil      #32,%d6
303      lsrl       %d6,%d7
304      orl        %d7,%d4                 | ...(D3,D4,D5) normalized
306      movel      %d3,X(%a6)
307      movel      %d4,XFRAC(%a6)
308      movel      %d5,XFRAC+4(%a6)
309      negl       %d2
310      movel      %d2,ADJK(%a6)
311      fmovex     X(%a6),%fp0
312      moveml     (%a7)+,%d2-%d7          | ...restore registers
313      lea        X(%a6),%a0
314      bras       LOGBGN                  | ...begin regular log(X)
317         .global slogn
318 slogn:
319 |--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
321         fmovex          (%a0),%fp0      | ...LOAD INPUT
322         movel           #0x00000000,ADJK(%a6)
324 LOGBGN:
325 |--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
326 |--A FINITE, NON-ZERO, NORMALIZED NUMBER.
328         movel   (%a0),%d0
329         movew   4(%a0),%d0
331         movel   (%a0),X(%a6)
332         movel   4(%a0),X+4(%a6)
333         movel   8(%a0),X+8(%a6)
335         cmpil   #0,%d0          | ...CHECK IF X IS NEGATIVE
336         blt     LOGNEG          | ...LOG OF NEGATIVE ARGUMENT IS INVALID
337         cmp2l   BOUNDS1,%d0     | ...X IS POSITIVE, CHECK IF X IS NEAR 1
338         bcc     LOGNEAR1        | ...BOUNDS IS ROUGHLY [15/16, 17/16]
340 LOGMAIN:
341 |--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
343 |--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
344 |--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
345 |--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
346 |--                      = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
347 |--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
348 |--LOG(1+U) CAN BE VERY EFFICIENT.
349 |--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
350 |--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
352 |--GET K, Y, F, AND ADDRESS OF 1/F.
353         asrl    #8,%d0
354         asrl    #8,%d0          | ...SHIFTED 16 BITS, BIASED EXPO. OF X
355         subil   #0x3FFF,%d0     | ...THIS IS K
356         addl    ADJK(%a6),%d0   | ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
357         lea     LOGTBL,%a0      | ...BASE ADDRESS OF 1/F AND LOG(F)
358         fmovel  %d0,%fp1                | ...CONVERT K TO FLOATING-POINT FORMAT
360 |--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
361         movel   #0x3FFF0000,X(%a6)      | ...X IS NOW Y, I.E. 2^(-K)*X
362         movel   XFRAC(%a6),FFRAC(%a6)
363         andil   #0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
364         oril    #0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
365         movel   FFRAC(%a6),%d0  | ...READY TO GET ADDRESS OF 1/F
366         andil   #0x7E000000,%d0
367         asrl    #8,%d0
368         asrl    #8,%d0
369         asrl    #4,%d0          | ...SHIFTED 20, D0 IS THE DISPLACEMENT
370         addal   %d0,%a0         | ...A0 IS THE ADDRESS FOR 1/F
372         fmovex  X(%a6),%fp0
373         movel   #0x3fff0000,F(%a6)
374         clrl    F+8(%a6)
375         fsubx   F(%a6),%fp0             | ...Y-F
376         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2 WHILE FP0 IS NOT READY
377 |--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
378 |--REGISTERS SAVED: FPCR, FP1, FP2
380 LP1CONT1:
381 |--AN RE-ENTRY POINT FOR LOGNP1
382         fmulx   (%a0),%fp0      | ...FP0 IS U = (Y-F)/F
383         fmulx   LOGOF2,%fp1     | ...GET K*LOG2 WHILE FP0 IS NOT READY
384         fmovex  %fp0,%fp2
385         fmulx   %fp2,%fp2               | ...FP2 IS V=U*U
386         fmovex  %fp1,KLOG2(%a6) | ...PUT K*LOG2 IN MEMORY, FREE FP1
388 |--LOG(1+U) IS APPROXIMATED BY
389 |--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
390 |--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
392         fmovex  %fp2,%fp3
393         fmovex  %fp2,%fp1
395         fmuld   LOGA6,%fp1      | ...V*A6
396         fmuld   LOGA5,%fp2      | ...V*A5
398         faddd   LOGA4,%fp1      | ...A4+V*A6
399         faddd   LOGA3,%fp2      | ...A3+V*A5
401         fmulx   %fp3,%fp1               | ...V*(A4+V*A6)
402         fmulx   %fp3,%fp2               | ...V*(A3+V*A5)
404         faddd   LOGA2,%fp1      | ...A2+V*(A4+V*A6)
405         faddd   LOGA1,%fp2      | ...A1+V*(A3+V*A5)
407         fmulx   %fp3,%fp1               | ...V*(A2+V*(A4+V*A6))
408         addal   #16,%a0         | ...ADDRESS OF LOG(F)
409         fmulx   %fp3,%fp2               | ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
411         fmulx   %fp0,%fp1               | ...U*V*(A2+V*(A4+V*A6))
412         faddx   %fp2,%fp0               | ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
414         faddx   (%a0),%fp1      | ...LOG(F)+U*V*(A2+V*(A4+V*A6))
415         fmovemx  (%sp)+,%fp2-%fp2/%fp3  | ...RESTORE FP2
416         faddx   %fp1,%fp0               | ...FP0 IS LOG(F) + LOG(1+U)
418         fmovel  %d1,%fpcr
419         faddx   KLOG2(%a6),%fp0 | ...FINAL ADD
420         bra     t_frcinx
423 LOGNEAR1:
424 |--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
425         fmovex  %fp0,%fp1
426         fsubs   one,%fp1                | ...FP1 IS X-1
427         fadds   one,%fp0                | ...FP0 IS X+1
428         faddx   %fp1,%fp1               | ...FP1 IS 2(X-1)
429 |--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
430 |--IN U, U = 2(X-1)/(X+1) = FP1/FP0
432 LP1CONT2:
433 |--THIS IS AN RE-ENTRY POINT FOR LOGNP1
434         fdivx   %fp0,%fp1               | ...FP1 IS U
435         fmovemx %fp2-%fp2/%fp3,-(%sp)    | ...SAVE FP2
436 |--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
437 |--LET V=U*U, W=V*V, CALCULATE
438 |--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
439 |--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
440         fmovex  %fp1,%fp0
441         fmulx   %fp0,%fp0       | ...FP0 IS V
442         fmovex  %fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
443         fmovex  %fp0,%fp1
444         fmulx   %fp1,%fp1       | ...FP1 IS W
446         fmoved  LOGB5,%fp3
447         fmoved  LOGB4,%fp2
449         fmulx   %fp1,%fp3       | ...W*B5
450         fmulx   %fp1,%fp2       | ...W*B4
452         faddd   LOGB3,%fp3 | ...B3+W*B5
453         faddd   LOGB2,%fp2 | ...B2+W*B4
455         fmulx   %fp3,%fp1       | ...W*(B3+W*B5), FP3 RELEASED
457         fmulx   %fp0,%fp2       | ...V*(B2+W*B4)
459         faddd   LOGB1,%fp1 | ...B1+W*(B3+W*B5)
460         fmulx   SAVEU(%a6),%fp0 | ...FP0 IS U*V
462         faddx   %fp2,%fp1       | ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
463         fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
465         fmulx   %fp1,%fp0       | ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
467         fmovel  %d1,%fpcr
468         faddx   SAVEU(%a6),%fp0
469         bra     t_frcinx
470         rts
472 LOGNEG:
473 |--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
474         bra     t_operr
476         .global slognp1d
477 slognp1d:
478 |--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
479 | Simply return the denorm
481         bra     t_extdnrm
483         .global slognp1
484 slognp1:
485 |--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
487         fmovex  (%a0),%fp0      | ...LOAD INPUT
488         fabsx   %fp0            |test magnitude
489         fcmpx   LTHOLD,%fp0     |compare with min threshold
490         fbgt    LP1REAL         |if greater, continue
491         fmovel  #0,%fpsr                |clr N flag from compare
492         fmovel  %d1,%fpcr
493         fmovex  (%a0),%fp0      |return signed argument
494         bra     t_frcinx
496 LP1REAL:
497         fmovex  (%a0),%fp0      | ...LOAD INPUT
498         movel   #0x00000000,ADJK(%a6)
499         fmovex  %fp0,%fp1       | ...FP1 IS INPUT Z
500         fadds   one,%fp0        | ...X := ROUND(1+Z)
501         fmovex  %fp0,X(%a6)
502         movew   XFRAC(%a6),XDCARE(%a6)
503         movel   X(%a6),%d0
504         cmpil   #0,%d0
505         ble     LP1NEG0 | ...LOG OF ZERO OR -VE
506         cmp2l   BOUNDS2,%d0
507         bcs     LOGMAIN | ...BOUNDS2 IS [1/2,3/2]
508 |--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
509 |--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
510 |--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
512 LP1NEAR1:
513 |--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
514         cmp2l   BOUNDS1,%d0
515         bcss    LP1CARE
517 LP1ONE16:
518 |--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
519 |--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
520         faddx   %fp1,%fp1       | ...FP1 IS 2Z
521         fadds   one,%fp0        | ...FP0 IS 1+X
522 |--U = FP1/FP0
523         bra     LP1CONT2
525 LP1CARE:
526 |--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
527 |--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
528 |--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
529 |--THERE ARE ONLY TWO CASES.
530 |--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
531 |--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
532 |--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
533 |--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
535         movel   XFRAC(%a6),FFRAC(%a6)
536         andil   #0xFE000000,FFRAC(%a6)
537         oril    #0x01000000,FFRAC(%a6)  | ...F OBTAINED
538         cmpil   #0x3FFF8000,%d0 | ...SEE IF 1+Z > 1
539         bges    KISZERO
541 KISNEG1:
542         fmoves  TWO,%fp0
543         movel   #0x3fff0000,F(%a6)
544         clrl    F+8(%a6)
545         fsubx   F(%a6),%fp0     | ...2-F
546         movel   FFRAC(%a6),%d0
547         andil   #0x7E000000,%d0
548         asrl    #8,%d0
549         asrl    #8,%d0
550         asrl    #4,%d0          | ...D0 CONTAINS DISPLACEMENT FOR 1/F
551         faddx   %fp1,%fp1               | ...GET 2Z
552         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...SAVE FP2
553         faddx   %fp1,%fp0               | ...FP0 IS Y-F = (2-F)+2Z
554         lea     LOGTBL,%a0      | ...A0 IS ADDRESS OF 1/F
555         addal   %d0,%a0
556         fmoves  negone,%fp1     | ...FP1 IS K = -1
557         bra     LP1CONT1
559 KISZERO:
560         fmoves  one,%fp0
561         movel   #0x3fff0000,F(%a6)
562         clrl    F+8(%a6)
563         fsubx   F(%a6),%fp0             | ...1-F
564         movel   FFRAC(%a6),%d0
565         andil   #0x7E000000,%d0
566         asrl    #8,%d0
567         asrl    #8,%d0
568         asrl    #4,%d0
569         faddx   %fp1,%fp0               | ...FP0 IS Y-F
570         fmovemx %fp2-%fp2/%fp3,-(%sp)   | ...FP2 SAVED
571         lea     LOGTBL,%a0
572         addal   %d0,%a0         | ...A0 IS ADDRESS OF 1/F
573         fmoves  zero,%fp1       | ...FP1 IS K = 0
574         bra     LP1CONT1
576 LP1NEG0:
577 |--FPCR SAVED. D0 IS X IN COMPACT FORM.
578         cmpil   #0,%d0
579         blts    LP1NEG
580 LP1ZERO:
581         fmoves  negone,%fp0
583         fmovel  %d1,%fpcr
584         bra t_dz
586 LP1NEG:
587         fmoves  zero,%fp0
589         fmovel  %d1,%fpcr
590         bra     t_operr
592         |end