tools/adflib: build only host variant which is used by Sam440 target
[AROS.git] / arch / m68k-all / m680x0 / 060sp / dist / fpsp.s
blob14ca74db12d03b2ee3c66a8878e5db6fa4f648c4
2 # $NetBSD: fpsp.s,v 1.5 2005/12/11 12:17:52 christos Exp $
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
9 #
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27 # Motorola assumes no responsibility for the maintenance and support
28 # of the SOFTWARE.
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 # freal.s:
40 # This file is appended to the top of the 060FPSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located
43 # after _060FPSP_TABLE.
44 # Also, subroutine stubs exist in this file (_fpsp_done for
45 # example) that are referenced by the FPSP package itself in order
46 # to call a given routine. The stub routine actually performs the
47 # callout. The FPSP code does a "bsr" to the stub routine. This
48 # extra layer of hierarchy adds a slight performance penalty but
49 # it makes the FPSP code easier to read and more mainatinable.
52 set _off_bsun, 0x00
53 set _off_snan, 0x04
54 set _off_operr, 0x08
55 set _off_ovfl, 0x0c
56 set _off_unfl, 0x10
57 set _off_dz, 0x14
58 set _off_inex, 0x18
59 set _off_fline, 0x1c
60 set _off_fpu_dis, 0x20
61 set _off_trap, 0x24
62 set _off_trace, 0x28
63 set _off_access, 0x2c
64 set _off_done, 0x30
66 set _off_imr, 0x40
67 set _off_dmr, 0x44
68 set _off_dmw, 0x48
69 set _off_irw, 0x4c
70 set _off_irl, 0x50
71 set _off_drb, 0x54
72 set _off_drw, 0x58
73 set _off_drl, 0x5c
74 set _off_dwb, 0x60
75 set _off_dww, 0x64
76 set _off_dwl, 0x68
78 _060FPSP_TABLE:
80 ###############################################################
82 # Here's the table of ENTRY POINTS for those linking the package.
83 bra.l _fpsp_snan
84 short 0x0000
85 bra.l _fpsp_operr
86 short 0x0000
87 bra.l _fpsp_ovfl
88 short 0x0000
89 bra.l _fpsp_unfl
90 short 0x0000
91 bra.l _fpsp_dz
92 short 0x0000
93 bra.l _fpsp_inex
94 short 0x0000
95 bra.l _fpsp_fline
96 short 0x0000
97 bra.l _fpsp_unsupp
98 short 0x0000
99 bra.l _fpsp_effadd
100 short 0x0000
102 space 56
104 ###############################################################
105 global _fpsp_done
106 _fpsp_done:
107 mov.l %d0,-(%sp)
108 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
109 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
110 mov.l 0x4(%sp),%d0
111 rtd &0x4
113 global _real_ovfl
114 _real_ovfl:
115 mov.l %d0,-(%sp)
116 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
117 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
118 mov.l 0x4(%sp),%d0
119 rtd &0x4
121 global _real_unfl
122 _real_unfl:
123 mov.l %d0,-(%sp)
124 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
125 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
126 mov.l 0x4(%sp),%d0
127 rtd &0x4
129 global _real_inex
130 _real_inex:
131 mov.l %d0,-(%sp)
132 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
133 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
134 mov.l 0x4(%sp),%d0
135 rtd &0x4
137 global _real_bsun
138 _real_bsun:
139 mov.l %d0,-(%sp)
140 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
141 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
142 mov.l 0x4(%sp),%d0
143 rtd &0x4
145 global _real_operr
146 _real_operr:
147 mov.l %d0,-(%sp)
148 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
149 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
150 mov.l 0x4(%sp),%d0
151 rtd &0x4
153 global _real_snan
154 _real_snan:
155 mov.l %d0,-(%sp)
156 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
157 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
158 mov.l 0x4(%sp),%d0
159 rtd &0x4
161 global _real_dz
162 _real_dz:
163 mov.l %d0,-(%sp)
164 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
165 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
166 mov.l 0x4(%sp),%d0
167 rtd &0x4
169 global _real_fline
170 _real_fline:
171 mov.l %d0,-(%sp)
172 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
173 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
174 mov.l 0x4(%sp),%d0
175 rtd &0x4
177 global _real_fpu_disabled
178 _real_fpu_disabled:
179 mov.l %d0,-(%sp)
180 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
181 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
182 mov.l 0x4(%sp),%d0
183 rtd &0x4
185 global _real_trap
186 _real_trap:
187 mov.l %d0,-(%sp)
188 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
189 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
190 mov.l 0x4(%sp),%d0
191 rtd &0x4
193 global _real_trace
194 _real_trace:
195 mov.l %d0,-(%sp)
196 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
197 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
198 mov.l 0x4(%sp),%d0
199 rtd &0x4
201 global _real_access
202 _real_access:
203 mov.l %d0,-(%sp)
204 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
206 mov.l 0x4(%sp),%d0
207 rtd &0x4
209 #######################################
211 global _imem_read
212 _imem_read:
213 mov.l %d0,-(%sp)
214 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
215 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
216 mov.l 0x4(%sp),%d0
217 rtd &0x4
219 global _dmem_read
220 _dmem_read:
221 mov.l %d0,-(%sp)
222 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
223 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
224 mov.l 0x4(%sp),%d0
225 rtd &0x4
227 global _dmem_write
228 _dmem_write:
229 mov.l %d0,-(%sp)
230 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
231 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
232 mov.l 0x4(%sp),%d0
233 rtd &0x4
235 global _imem_read_word
236 _imem_read_word:
237 mov.l %d0,-(%sp)
238 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
239 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
240 mov.l 0x4(%sp),%d0
241 rtd &0x4
243 global _imem_read_long
244 _imem_read_long:
245 mov.l %d0,-(%sp)
246 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
247 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
248 mov.l 0x4(%sp),%d0
249 rtd &0x4
251 global _dmem_read_byte
252 _dmem_read_byte:
253 mov.l %d0,-(%sp)
254 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
255 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
256 mov.l 0x4(%sp),%d0
257 rtd &0x4
259 global _dmem_read_word
260 _dmem_read_word:
261 mov.l %d0,-(%sp)
262 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
263 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
264 mov.l 0x4(%sp),%d0
265 rtd &0x4
267 global _dmem_read_long
268 _dmem_read_long:
269 mov.l %d0,-(%sp)
270 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
271 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
272 mov.l 0x4(%sp),%d0
273 rtd &0x4
275 global _dmem_write_byte
276 _dmem_write_byte:
277 mov.l %d0,-(%sp)
278 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
279 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
280 mov.l 0x4(%sp),%d0
281 rtd &0x4
283 global _dmem_write_word
284 _dmem_write_word:
285 mov.l %d0,-(%sp)
286 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
287 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
288 mov.l 0x4(%sp),%d0
289 rtd &0x4
291 global _dmem_write_long
292 _dmem_write_long:
293 mov.l %d0,-(%sp)
294 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
295 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
296 mov.l 0x4(%sp),%d0
297 rtd &0x4
300 # This file contains a set of define statements for constants
301 # in order to promote readability within the corecode itself.
304 set LOCAL_SIZE, 192 # stack frame size(bytes)
305 set LV, -LOCAL_SIZE # stack offset
307 set EXC_SR, 0x4 # stack status register
308 set EXC_PC, 0x6 # stack pc
309 set EXC_VOFF, 0xa # stacked vector offset
310 set EXC_EA, 0xc # stacked <ea>
312 set EXC_FP, 0x0 # frame pointer
314 set EXC_AREGS, -68 # offset of all address regs
315 set EXC_DREGS, -100 # offset of all data regs
316 set EXC_FPREGS, -36 # offset of all fp regs
318 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
319 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
320 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
321 set EXC_A5, EXC_AREGS+(5*4)
322 set EXC_A4, EXC_AREGS+(4*4)
323 set EXC_A3, EXC_AREGS+(3*4)
324 set EXC_A2, EXC_AREGS+(2*4)
325 set EXC_A1, EXC_AREGS+(1*4)
326 set EXC_A0, EXC_AREGS+(0*4)
327 set EXC_D7, EXC_DREGS+(7*4)
328 set EXC_D6, EXC_DREGS+(6*4)
329 set EXC_D5, EXC_DREGS+(5*4)
330 set EXC_D4, EXC_DREGS+(4*4)
331 set EXC_D3, EXC_DREGS+(3*4)
332 set EXC_D2, EXC_DREGS+(2*4)
333 set EXC_D1, EXC_DREGS+(1*4)
334 set EXC_D0, EXC_DREGS+(0*4)
336 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
337 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
338 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
340 set FP_SCR1, LV+80 # fp scratch 1
341 set FP_SCR1_EX, FP_SCR1+0
342 set FP_SCR1_SGN, FP_SCR1+2
343 set FP_SCR1_HI, FP_SCR1+4
344 set FP_SCR1_LO, FP_SCR1+8
346 set FP_SCR0, LV+68 # fp scratch 0
347 set FP_SCR0_EX, FP_SCR0+0
348 set FP_SCR0_SGN, FP_SCR0+2
349 set FP_SCR0_HI, FP_SCR0+4
350 set FP_SCR0_LO, FP_SCR0+8
352 set FP_DST, LV+56 # fp destination operand
353 set FP_DST_EX, FP_DST+0
354 set FP_DST_SGN, FP_DST+2
355 set FP_DST_HI, FP_DST+4
356 set FP_DST_LO, FP_DST+8
358 set FP_SRC, LV+44 # fp source operand
359 set FP_SRC_EX, FP_SRC+0
360 set FP_SRC_SGN, FP_SRC+2
361 set FP_SRC_HI, FP_SRC+4
362 set FP_SRC_LO, FP_SRC+8
364 set USER_FPIAR, LV+40 # FP instr address register
366 set USER_FPSR, LV+36 # FP status register
367 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
368 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
369 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
370 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
372 set USER_FPCR, LV+32 # FP control register
373 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
374 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
376 set L_SCR3, LV+28 # integer scratch 3
377 set L_SCR2, LV+24 # integer scratch 2
378 set L_SCR1, LV+20 # integer scratch 1
380 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
382 set EXC_TEMP2, LV+24 # temporary space
383 set EXC_TEMP, LV+16 # temporary space
385 set DTAG, LV+15 # destination operand type
386 set STAG, LV+14 # source operand type
388 set SPCOND_FLG, LV+10 # flag: special case (see below)
390 set EXC_CC, LV+8 # saved condition codes
391 set EXC_EXTWPTR, LV+4 # saved current PC (active)
392 set EXC_EXTWORD, LV+2 # saved extension word
393 set EXC_CMDREG, LV+2 # saved extension word
394 set EXC_OPWORD, LV+0 # saved operation word
396 ################################
398 # Helpful macros
400 set FTEMP, 0 # offsets within an
401 set FTEMP_EX, 0 # extended precision
402 set FTEMP_SGN, 2 # value saved in memory.
403 set FTEMP_HI, 4
404 set FTEMP_LO, 8
405 set FTEMP_GRS, 12
407 set LOCAL, 0 # offsets within an
408 set LOCAL_EX, 0 # extended precision
409 set LOCAL_SGN, 2 # value saved in memory.
410 set LOCAL_HI, 4
411 set LOCAL_LO, 8
412 set LOCAL_GRS, 12
414 set DST, 0 # offsets within an
415 set DST_EX, 0 # extended precision
416 set DST_HI, 4 # value saved in memory.
417 set DST_LO, 8
419 set SRC, 0 # offsets within an
420 set SRC_EX, 0 # extended precision
421 set SRC_HI, 4 # value saved in memory.
422 set SRC_LO, 8
424 set SGL_LO, 0x3f81 # min sgl prec exponent
425 set SGL_HI, 0x407e # max sgl prec exponent
426 set DBL_LO, 0x3c01 # min dbl prec exponent
427 set DBL_HI, 0x43fe # max dbl prec exponent
428 set EXT_LO, 0x0 # min ext prec exponent
429 set EXT_HI, 0x7ffe # max ext prec exponent
431 set EXT_BIAS, 0x3fff # extended precision bias
432 set SGL_BIAS, 0x007f # single precision bias
433 set DBL_BIAS, 0x03ff # double precision bias
435 set NORM, 0x00 # operand type for STAG/DTAG
436 set ZERO, 0x01 # operand type for STAG/DTAG
437 set INF, 0x02 # operand type for STAG/DTAG
438 set QNAN, 0x03 # operand type for STAG/DTAG
439 set DENORM, 0x04 # operand type for STAG/DTAG
440 set SNAN, 0x05 # operand type for STAG/DTAG
441 set UNNORM, 0x06 # operand type for STAG/DTAG
443 ##################
444 # FPSR/FPCR bits #
445 ##################
446 set neg_bit, 0x3 # negative result
447 set z_bit, 0x2 # zero result
448 set inf_bit, 0x1 # infinite result
449 set nan_bit, 0x0 # NAN result
451 set q_sn_bit, 0x7 # sign bit of quotient byte
453 set bsun_bit, 7 # branch on unordered
454 set snan_bit, 6 # signalling NAN
455 set operr_bit, 5 # operand error
456 set ovfl_bit, 4 # overflow
457 set unfl_bit, 3 # underflow
458 set dz_bit, 2 # divide by zero
459 set inex2_bit, 1 # inexact result 2
460 set inex1_bit, 0 # inexact result 1
462 set aiop_bit, 7 # accrued inexact operation bit
463 set aovfl_bit, 6 # accrued overflow bit
464 set aunfl_bit, 5 # accrued underflow bit
465 set adz_bit, 4 # accrued dz bit
466 set ainex_bit, 3 # accrued inexact bit
468 #############################
469 # FPSR individual bit masks #
470 #############################
471 set neg_mask, 0x08000000 # negative bit mask (lw)
472 set inf_mask, 0x02000000 # infinity bit mask (lw)
473 set z_mask, 0x04000000 # zero bit mask (lw)
474 set nan_mask, 0x01000000 # nan bit mask (lw)
476 set neg_bmask, 0x08 # negative bit mask (byte)
477 set inf_bmask, 0x02 # infinity bit mask (byte)
478 set z_bmask, 0x04 # zero bit mask (byte)
479 set nan_bmask, 0x01 # nan bit mask (byte)
481 set bsun_mask, 0x00008000 # bsun exception mask
482 set snan_mask, 0x00004000 # snan exception mask
483 set operr_mask, 0x00002000 # operr exception mask
484 set ovfl_mask, 0x00001000 # overflow exception mask
485 set unfl_mask, 0x00000800 # underflow exception mask
486 set dz_mask, 0x00000400 # dz exception mask
487 set inex2_mask, 0x00000200 # inex2 exception mask
488 set inex1_mask, 0x00000100 # inex1 exception mask
490 set aiop_mask, 0x00000080 # accrued illegal operation
491 set aovfl_mask, 0x00000040 # accrued overflow
492 set aunfl_mask, 0x00000020 # accrued underflow
493 set adz_mask, 0x00000010 # accrued divide by zero
494 set ainex_mask, 0x00000008 # accrued inexact
496 ######################################
497 # FPSR combinations used in the FPSP #
498 ######################################
499 set dzinf_mask, inf_mask+dz_mask+adz_mask
500 set opnan_mask, nan_mask+operr_mask+aiop_mask
501 set nzi_mask, 0x01ffffff #clears N, Z, and I
502 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
504 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505 set inx1a_mask, inex1_mask+ainex_mask
506 set inx2a_mask, inex2_mask+ainex_mask
507 set snaniop_mask, nan_mask+snan_mask+aiop_mask
508 set snaniop2_mask, snan_mask+aiop_mask
509 set naniop_mask, nan_mask+aiop_mask
510 set neginf_mask, neg_mask+inf_mask
511 set infaiop_mask, inf_mask+aiop_mask
512 set negz_mask, neg_mask+z_mask
513 set opaop_mask, operr_mask+aiop_mask
514 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
515 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
517 #########
518 # misc. #
519 #########
520 set rnd_stky_bit, 29 # stky bit pos in longword
522 set sign_bit, 0x7 # sign bit
523 set signan_bit, 0x6 # signalling nan bit
525 set sgl_thresh, 0x3f81 # minimum sgl exponent
526 set dbl_thresh, 0x3c01 # minimum dbl exponent
528 set x_mode, 0x0 # extended precision
529 set s_mode, 0x4 # single precision
530 set d_mode, 0x8 # double precision
532 set rn_mode, 0x0 # round-to-nearest
533 set rz_mode, 0x1 # round-to-zero
534 set rm_mode, 0x2 # round-tp-minus-infinity
535 set rp_mode, 0x3 # round-to-plus-infinity
537 set mantissalen, 64 # length of mantissa in bits
539 set BYTE, 1 # len(byte) == 1 byte
540 set WORD, 2 # len(word) == 2 bytes
541 set LONG, 4 # len(longword) == 2 bytes
543 set BSUN_VEC, 0xc0 # bsun vector offset
544 set INEX_VEC, 0xc4 # inexact vector offset
545 set DZ_VEC, 0xc8 # dz vector offset
546 set UNFL_VEC, 0xcc # unfl vector offset
547 set OPERR_VEC, 0xd0 # operr vector offset
548 set OVFL_VEC, 0xd4 # ovfl vector offset
549 set SNAN_VEC, 0xd8 # snan vector offset
551 ###########################
552 # SPecial CONDition FLaGs #
553 ###########################
554 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
555 set fbsun_flg, 0x02 # flag bit: bsun exception
556 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
557 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
558 set fmovm_flg, 0x40 # flag bit: fmovm instruction
559 set immed_flg, 0x80 # flag bit: &<data> <ea>
561 set ftrapcc_bit, 0x0
562 set fbsun_bit, 0x1
563 set mia7_bit, 0x2
564 set mda7_bit, 0x3
565 set immed_bit, 0x7
567 ##################################
568 # TRANSCENDENTAL "LAST-OP" FLAGS #
569 ##################################
570 set FMUL_OP, 0x0 # fmul instr performed last
571 set FDIV_OP, 0x1 # fdiv performed last
572 set FADD_OP, 0x2 # fadd performed last
573 set FMOV_OP, 0x3 # fmov performed last
575 #############
576 # CONSTANTS #
577 #############
578 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
579 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
581 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
584 TWOBYPI:
585 long 0x3FE45F30,0x6DC9C883
587 #########################################################################
588 # XDEF **************************************************************** #
589 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
591 # This handler should be the first code executed upon taking the #
592 # FP Overflow exception in an operating system. #
594 # XREF **************************************************************** #
595 # _imem_read_long() - read instruction longword #
596 # fix_skewed_ops() - adjust src operand in fsave frame #
597 # set_tag_x() - determine optype of src/dst operands #
598 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
599 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
600 # load_fpn2() - load dst operand from FP regfile #
601 # fout() - emulate an opclass 3 instruction #
602 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
603 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
604 # _real_ovfl() - "callout" for Overflow exception enabled code #
605 # _real_inex() - "callout" for Inexact exception enabled code #
606 # _real_trace() - "callout" for Trace exception code #
608 # INPUT *************************************************************** #
609 # - The system stack contains the FP Ovfl exception stack frame #
610 # - The fsave frame contains the source operand #
612 # OUTPUT ************************************************************** #
613 # Overflow Exception enabled: #
614 # - The system stack is unchanged #
615 # - The fsave frame contains the adjusted src op for opclass 0,2 #
616 # Overflow Exception disabled: #
617 # - The system stack is unchanged #
618 # - The "exception present" flag in the fsave frame is cleared #
620 # ALGORITHM *********************************************************** #
621 # On the 060, if an FP overflow is present as the result of any #
622 # instruction, the 060 will take an overflow exception whether the #
623 # exception is enabled or disabled in the FPCR. For the disabled case, #
624 # This handler emulates the instruction to determine what the correct #
625 # default result should be for the operation. This default result is #
626 # then stored in either the FP regfile, data regfile, or memory. #
627 # Finally, the handler exits through the "callout" _fpsp_done() #
628 # denoting that no exceptional conditions exist within the machine. #
629 # If the exception is enabled, then this handler must create the #
630 # exceptional operand and plave it in the fsave state frame, and store #
631 # the default result (only if the instruction is opclass 3). For #
632 # exceptions enabled, this handler must exit through the "callout" #
633 # _real_ovfl() so that the operating system enabled overflow handler #
634 # can handle this case. #
635 # Two other conditions exist. First, if overflow was disabled #
636 # but the inexact exception was enabled, this handler must exit #
637 # through the "callout" _real_inex() regardless of whether the result #
638 # was inexact. #
639 # Also, in the case of an opclass three instruction where #
640 # overflow was disabled and the trace exception was enabled, this #
641 # handler must exit through the "callout" _real_trace(). #
643 #########################################################################
645 global _fpsp_ovfl
646 _fpsp_ovfl:
648 #$# sub.l &24,%sp # make room for src/dst
650 link.w %a6,&-LOCAL_SIZE # init stack frame
652 fsave FP_SRC(%a6) # grab the "busy" frame
654 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
655 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
656 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
658 # the FPIAR holds the "current PC" of the faulting instruction
659 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
660 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
661 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
662 bsr.l _imem_read_long # fetch the instruction words
663 mov.l %d0,EXC_OPWORD(%a6)
665 ##############################################################################
667 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
668 bne.w fovfl_out
671 lea FP_SRC(%a6),%a0 # pass: ptr to src op
672 bsr.l fix_skewed_ops # fix src op
674 # since, I believe, only NORMs and DENORMs can come through here,
675 # maybe we can avoid the subroutine call.
676 lea FP_SRC(%a6),%a0 # pass: ptr to src op
677 bsr.l set_tag_x # tag the operand type
678 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
680 # bit five of the fp extension word separates the monadic and dyadic operations
681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682 # will never take this exception.
683 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
684 beq.b fovfl_extract # monadic
686 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
687 bsr.l load_fpn2 # load dst into FP_DST
689 lea FP_DST(%a6),%a0 # pass: ptr to dst op
690 bsr.l set_tag_x # tag the operand type
691 cmpi.b %d0,&UNNORM # is operand an UNNORM?
692 bne.b fovfl_op2_done # no
693 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
694 fovfl_op2_done:
695 mov.b %d0,DTAG(%a6) # save dst optype tag
697 fovfl_extract:
699 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
706 clr.l %d0
707 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
709 mov.b 1+EXC_CMDREG(%a6),%d1
710 andi.w &0x007f,%d1 # extract extension
712 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
714 fmov.l &0x0,%fpcr # zero current control regs
715 fmov.l &0x0,%fpsr
717 lea FP_SRC(%a6),%a0
718 lea FP_DST(%a6),%a1
720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
721 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
722 jsr (tbl_unsupp.l,%pc,%d1.l*1)
724 # the operation has been emulated. the result is in fp0.
725 # the EXOP, if an exception occurred, is in fp1.
726 # we must save the default result regardless of whether
727 # traps are enabled or disabled.
728 bfextu EXC_CMDREG(%a6){&6:&3},%d0
729 bsr.l store_fpreg
731 # the exceptional possibilities we have left ourselves with are ONLY overflow
732 # and inexact. and, the inexact is such that overflow occurred and was disabled
733 # but inexact was enabled.
734 btst &ovfl_bit,FPCR_ENABLE(%a6)
735 bne.b fovfl_ovfl_on
737 btst &inex2_bit,FPCR_ENABLE(%a6)
738 bne.b fovfl_inex_on
740 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
741 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
742 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
744 unlk %a6
745 #$# add.l &24,%sp
746 bra.l _fpsp_done
748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749 # in fp1. now, simply jump to _real_ovfl()!
750 fovfl_ovfl_on:
751 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
753 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
755 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
756 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
757 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
759 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
761 unlk %a6
763 bra.l _real_ovfl
765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766 # we must jump to real_inex().
767 fovfl_inex_on:
769 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
771 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
772 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
774 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
775 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
776 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
778 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
780 unlk %a6
782 bra.l _real_inex
784 ########################################################################
785 fovfl_out:
788 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
792 # the src operand is definitely a NORM(!), so tag it as such
793 mov.b &NORM,STAG(%a6) # set src optype tag
795 clr.l %d0
796 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
798 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
800 fmov.l &0x0,%fpcr # zero current control regs
801 fmov.l &0x0,%fpsr
803 lea FP_SRC(%a6),%a0 # pass ptr to src operand
805 bsr.l fout
807 btst &ovfl_bit,FPCR_ENABLE(%a6)
808 bne.w fovfl_ovfl_on
810 btst &inex2_bit,FPCR_ENABLE(%a6)
811 bne.w fovfl_inex_on
813 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
814 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
815 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
817 unlk %a6
818 #$# add.l &24,%sp
820 btst &0x7,(%sp) # is trace on?
821 beq.l _fpsp_done # no
823 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
824 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
825 bra.l _real_trace
827 #########################################################################
828 # XDEF **************************************************************** #
829 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
831 # This handler should be the first code executed upon taking the #
832 # FP Underflow exception in an operating system. #
834 # XREF **************************************************************** #
835 # _imem_read_long() - read instruction longword #
836 # fix_skewed_ops() - adjust src operand in fsave frame #
837 # set_tag_x() - determine optype of src/dst operands #
838 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
839 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
840 # load_fpn2() - load dst operand from FP regfile #
841 # fout() - emulate an opclass 3 instruction #
842 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
843 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
844 # _real_ovfl() - "callout" for Overflow exception enabled code #
845 # _real_inex() - "callout" for Inexact exception enabled code #
846 # _real_trace() - "callout" for Trace exception code #
848 # INPUT *************************************************************** #
849 # - The system stack contains the FP Unfl exception stack frame #
850 # - The fsave frame contains the source operand #
852 # OUTPUT ************************************************************** #
853 # Underflow Exception enabled: #
854 # - The system stack is unchanged #
855 # - The fsave frame contains the adjusted src op for opclass 0,2 #
856 # Underflow Exception disabled: #
857 # - The system stack is unchanged #
858 # - The "exception present" flag in the fsave frame is cleared #
860 # ALGORITHM *********************************************************** #
861 # On the 060, if an FP underflow is present as the result of any #
862 # instruction, the 060 will take an underflow exception whether the #
863 # exception is enabled or disabled in the FPCR. For the disabled case, #
864 # This handler emulates the instruction to determine what the correct #
865 # default result should be for the operation. This default result is #
866 # then stored in either the FP regfile, data regfile, or memory. #
867 # Finally, the handler exits through the "callout" _fpsp_done() #
868 # denoting that no exceptional conditions exist within the machine. #
869 # If the exception is enabled, then this handler must create the #
870 # exceptional operand and plave it in the fsave state frame, and store #
871 # the default result (only if the instruction is opclass 3). For #
872 # exceptions enabled, this handler must exit through the "callout" #
873 # _real_unfl() so that the operating system enabled overflow handler #
874 # can handle this case. #
875 # Two other conditions exist. First, if underflow was disabled #
876 # but the inexact exception was enabled and the result was inexact, #
877 # this handler must exit through the "callout" _real_inex(). #
878 # was inexact. #
879 # Also, in the case of an opclass three instruction where #
880 # underflow was disabled and the trace exception was enabled, this #
881 # handler must exit through the "callout" _real_trace(). #
883 #########################################################################
885 global _fpsp_unfl
886 _fpsp_unfl:
888 #$# sub.l &24,%sp # make room for src/dst
890 link.w %a6,&-LOCAL_SIZE # init stack frame
892 fsave FP_SRC(%a6) # grab the "busy" frame
894 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
895 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
896 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
898 # the FPIAR holds the "current PC" of the faulting instruction
899 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
900 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
901 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
902 bsr.l _imem_read_long # fetch the instruction words
903 mov.l %d0,EXC_OPWORD(%a6)
905 ##############################################################################
907 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
908 bne.w funfl_out
911 lea FP_SRC(%a6),%a0 # pass: ptr to src op
912 bsr.l fix_skewed_ops # fix src op
914 lea FP_SRC(%a6),%a0 # pass: ptr to src op
915 bsr.l set_tag_x # tag the operand type
916 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
918 # bit five of the fp ext word separates the monadic and dyadic operations
919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
920 # will never take this exception.
921 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
922 beq.b funfl_extract # monadic
924 # now, what's left that's not dyadic is fsincos. we can distinguish it
925 # from all dyadics by the '0110xxx pattern
926 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
927 bne.b funfl_extract # yes
929 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
930 bsr.l load_fpn2 # load dst into FP_DST
932 lea FP_DST(%a6),%a0 # pass: ptr to dst op
933 bsr.l set_tag_x # tag the operand type
934 cmpi.b %d0,&UNNORM # is operand an UNNORM?
935 bne.b funfl_op2_done # no
936 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
937 funfl_op2_done:
938 mov.b %d0,DTAG(%a6) # save dst optype tag
940 funfl_extract:
942 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
949 clr.l %d0
950 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
952 mov.b 1+EXC_CMDREG(%a6),%d1
953 andi.w &0x007f,%d1 # extract extension
955 andi.l &0x00ff01ff,USER_FPSR(%a6)
957 fmov.l &0x0,%fpcr # zero current control regs
958 fmov.l &0x0,%fpsr
960 lea FP_SRC(%a6),%a0
961 lea FP_DST(%a6),%a1
963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
964 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
965 jsr (tbl_unsupp.l,%pc,%d1.l*1)
967 bfextu EXC_CMDREG(%a6){&6:&3},%d0
968 bsr.l store_fpreg
970 # The `060 FPU multiplier hardware is such that if the result of a
971 # multiply operation is the smallest possible normalized number
972 # (0x00000000_80000000_00000000), then the machine will take an
973 # underflow exception. Since this is incorrect, we need to check
974 # if our emulation, after re-doing the operation, decided that
975 # no underflow was called for. We do these checks only in
976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977 # special case will simply exit gracefully with the correct result.
979 # the exceptional possibilities we have left ourselves with are ONLY overflow
980 # and inexact. and, the inexact is such that overflow occurred and was disabled
981 # but inexact was enabled.
982 btst &unfl_bit,FPCR_ENABLE(%a6)
983 bne.b funfl_unfl_on
985 funfl_chkinex:
986 btst &inex2_bit,FPCR_ENABLE(%a6)
987 bne.b funfl_inex_on
989 funfl_exit:
990 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
991 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
994 unlk %a6
995 #$# add.l &24,%sp
996 bra.l _fpsp_done
998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1001 funfl_unfl_on:
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009 btst &unfl_bit,FPSR_EXCEPT(%a6)
1010 beq.w funfl_chkinex
1012 funfl_unfl_on2:
1013 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1015 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1017 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1021 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1023 unlk %a6
1025 bra.l _real_unfl
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1029 funfl_inex_on:
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1038 btst &inex2_bit,FPSR_EXCEPT(%a6)
1039 beq.w funfl_exit
1041 funfl_inex_on2:
1043 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1045 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1046 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1048 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1049 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1052 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1054 unlk %a6
1056 bra.l _real_inex
1058 #######################################################################
1059 funfl_out:
1062 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1066 # the src operand is definitely a NORM(!), so tag it as such
1067 mov.b &NORM,STAG(%a6) # set src optype tag
1069 clr.l %d0
1070 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1072 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1074 fmov.l &0x0,%fpcr # zero current control regs
1075 fmov.l &0x0,%fpsr
1077 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1079 bsr.l fout
1081 btst &unfl_bit,FPCR_ENABLE(%a6)
1082 bne.w funfl_unfl_on2
1084 btst &inex2_bit,FPCR_ENABLE(%a6)
1085 bne.w funfl_inex_on2
1087 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1088 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1091 unlk %a6
1092 #$# add.l &24,%sp
1094 btst &0x7,(%sp) # is trace on?
1095 beq.l _fpsp_done # no
1097 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1098 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1099 bra.l _real_trace
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1104 # Data Type" exception. #
1106 # This handler should be the first code executed upon taking the #
1107 # FP Unimplemented Data Type exception in an operating system. #
1109 # XREF **************************************************************** #
1110 # _imem_read_{word,long}() - read instruction word/longword #
1111 # fix_skewed_ops() - adjust src operand in fsave frame #
1112 # set_tag_x() - determine optype of src/dst operands #
1113 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1114 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1115 # load_fpn2() - load dst operand from FP regfile #
1116 # load_fpn1() - load src operand from FP regfile #
1117 # fout() - emulate an opclass 3 instruction #
1118 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 # _real_inex() - "callout" to operating system inexact handler #
1120 # _fpsp_done() - "callout" for exit; work all done #
1121 # _real_trace() - "callout" for Trace enabled exception #
1122 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1123 # _real_snan() - "callout" for SNAN exception #
1124 # _real_operr() - "callout" for OPERR exception #
1125 # _real_ovfl() - "callout" for OVFL exception #
1126 # _real_unfl() - "callout" for UNFL exception #
1127 # get_packed() - fetch packed operand from memory #
1129 # INPUT *************************************************************** #
1130 # - The system stack contains the "Unimp Data Type" stk frame #
1131 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1133 # OUTPUT ************************************************************** #
1134 # If Inexact exception (opclass 3): #
1135 # - The system stack is changed to an Inexact exception stk frame #
1136 # If SNAN exception (opclass 3): #
1137 # - The system stack is changed to an SNAN exception stk frame #
1138 # If OPERR exception (opclass 3): #
1139 # - The system stack is changed to an OPERR exception stk frame #
1140 # If OVFL exception (opclass 3): #
1141 # - The system stack is changed to an OVFL exception stk frame #
1142 # If UNFL exception (opclass 3): #
1143 # - The system stack is changed to an UNFL exception stack frame #
1144 # If Trace exception enabled: #
1145 # - The system stack is changed to a Trace exception stack frame #
1146 # Else: (normal case) #
1147 # - Correct result has been stored as appropriate #
1149 # ALGORITHM *********************************************************** #
1150 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3 #
1152 # instructions, and (2) PACKED unimplemented data format instructions #
1153 # also of opclasses 0,2, or 3. #
1154 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1155 # operand from the fsave state frame and the dst operand (if dyadic) #
1156 # from the FP register file. The instruction is then emulated by #
1157 # choosing an emulation routine from a table of routines indexed by #
1158 # instruction type. Once the instruction has been emulated and result #
1159 # saved, then we check to see if any enabled exceptions resulted from #
1160 # instruction emulation. If none, then we exit through the "callout" #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1162 # this exception into the FPU in the fsave state frame and then exit #
1163 # through _fpsp_done(). #
1164 # PACKED opclass 0 and 2 is similar in how the instruction is #
1165 # emulated and exceptions handled. The differences occur in how the #
1166 # handler loads the packed op (by calling get_packed() routine) and #
1167 # by the fact that a Trace exception could be pending for PACKED ops. #
1168 # If a Trace exception is pending, then the current exception stack #
1169 # frame is changed to a Trace exception stack frame and an exit is #
1170 # made through _real_trace(). #
1171 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception #
1176 # should occur, then we must create an exception stack frame of that #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1179 # emulation is performed in a similar manner. #
1181 #########################################################################
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1186 # post-instruction
1187 # *****************
1188 # * EA *
1189 # pre-instruction * *
1190 # ***************** *****************
1191 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1192 # ***************** *****************
1193 # * Next * * Next *
1194 # * PC * * PC *
1195 # ***************** *****************
1196 # * SR * * SR *
1197 # ***************** *****************
1199 # (2) PACKED format (unsupported) opclasses two and three:
1200 # *****************
1201 # * EA *
1202 # * *
1203 # *****************
1204 # * 0x2 * 0x0dc *
1205 # *****************
1206 # * Next *
1207 # * PC *
1208 # *****************
1209 # * SR *
1210 # *****************
1212 global _fpsp_unsupp
1213 _fpsp_unsupp:
1215 link.w %a6,&-LOCAL_SIZE # init stack frame
1217 fsave FP_SRC(%a6) # save fp state
1219 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1220 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1223 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1224 bne.b fu_s
1225 fu_u:
1226 mov.l %usp,%a0 # fetch user stack pointer
1227 mov.l %a0,EXC_A7(%a6) # save on stack
1228 bra.b fu_cont
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1232 fu_s:
1233 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1234 mov.l %a0,EXC_A7(%a6) # save on stack
1236 fu_cont:
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1240 # this point.
1241 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1243 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1244 bsr.l _imem_read_long # fetch the instruction words
1245 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1247 ############################
1249 clr.b SPCOND_FLG(%a6) # clear special condition flag
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1254 bne.w fu_out # yes
1256 # Separate packed opclass two instructions.
1257 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1258 cmpi.b %d0,&0x13
1259 beq.w fu_in_pack
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1266 fmov.l &0x0,%fpcr # zero current control regs
1267 fmov.l &0x0,%fpsr
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272 lea FP_SRC(%a6),%a0 # pass ptr to input
1273 bsr.l fix_skewed_ops
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1279 bsr.l set_tag_x # tag the operand type
1280 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1281 bne.b fu_op2 # no
1282 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1284 fu_op2:
1285 mov.b %d0,STAG(%a6) # save src optype tag
1287 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1290 # at this point
1291 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1292 beq.b fu_extract # monadic
1293 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1294 beq.b fu_extract # yes, so it's monadic, too
1296 bsr.l load_fpn2 # load dst into FP_DST
1298 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1299 bsr.l set_tag_x # tag the operand type
1300 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1301 bne.b fu_op2_done # no
1302 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1303 fu_op2_done:
1304 mov.b %d0,DTAG(%a6) # save dst optype tag
1306 fu_extract:
1307 clr.l %d0
1308 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1310 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1312 lea FP_SRC(%a6),%a0
1313 lea FP_DST(%a6),%a1
1315 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1319 # Exceptions in order of precedence:
1320 # BSUN : none
1321 # SNAN : all dyadic ops
1322 # OPERR : fsqrt(-NORM)
1323 # OVFL : all except ftst,fcmp
1324 # UNFL : all except ftst,fcmp
1325 # DZ : fdiv
1326 # INEX2 : all except ftst,fcmp
1327 # INEX1 : none (packed doesn't go through here)
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1333 bne.b fu_in_ena # some are enabled
1335 fu_in_cont:
1336 # fcmp and ftst do not store any result.
1337 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1338 andi.b &0x38,%d0 # extract bits 3-5
1339 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1340 beq.b fu_in_exit # yes
1342 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343 bsr.l store_fpreg # store the result
1345 fu_in_exit:
1347 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1348 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1351 unlk %a6
1353 bra.l _fpsp_done
1355 fu_in_ena:
1356 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1357 bfffo %d0{&24:&8},%d0 # find highest priority exception
1358 bne.b fu_in_exc # there is at least one set
1361 # No exceptions occurred that were also enabled. Now:
1363 # if (OVFL && ovfl_disabled && inexact_enabled) {
1364 # branch to _real_inex() (even if the result was exact!);
1365 # } else {
1366 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1367 # return;
1370 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371 beq.b fu_in_cont # no
1373 fu_in_ovflchk:
1374 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375 beq.b fu_in_cont # no
1376 bra.w fu_in_exc_ovfl # go insert overflow frame
1379 # An exception occurred and that exception was enabled:
1381 # shift enabled exception field into lo byte of d0;
1382 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384 # /*
1385 # * this is the case where we must call _real_inex() now or else
1386 # * there will be no other way to pass it the exceptional operand
1387 # */
1388 # call _real_inex();
1389 # } else {
1390 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1393 fu_in_exc:
1394 subi.l &24,%d0 # fix offset to be 0-8
1395 cmpi.b %d0,&0x6 # is exception INEX? (6)
1396 bne.b fu_in_exc_exit # no
1398 # the enabled exception was inexact
1399 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400 bne.w fu_in_exc_unfl # yes
1401 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402 bne.w fu_in_exc_ovfl # yes
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1406 # src operand.
1407 fu_in_exc_exit:
1408 mov.l %d0,-(%sp) # save d0
1409 bsr.l funimp_skew # skew sgl or dbl inputs
1410 mov.l (%sp)+,%d0 # restore d0
1412 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1414 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1415 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1418 frestore FP_SRC(%a6) # restore src op
1420 unlk %a6
1422 bra.l _fpsp_done
1424 tbl_except:
1425 short 0xe000,0xe006,0xe004,0xe005
1426 short 0xe003,0xe002,0xe001,0xe001
1428 fu_in_exc_unfl:
1429 mov.w &0x4,%d0
1430 bra.b fu_in_exc_exit
1431 fu_in_exc_ovfl:
1432 mov.w &0x03,%d0
1433 bra.b fu_in_exc_exit
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1438 # number.
1439 global fix_skewed_ops
1440 fix_skewed_ops:
1441 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1443 beq.b fso_sgl # yes
1444 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1445 beq.b fso_dbl # yes
1446 rts # no
1448 fso_sgl:
1449 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1450 andi.w &0x7fff,%d0 # strip sign
1451 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1452 beq.b fso_sgl_dnrm_zero # yes
1453 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1454 beq.b fso_infnan # yes
1455 rts # no
1457 fso_sgl_dnrm_zero:
1458 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459 beq.b fso_zero # it's a skewed zero
1460 fso_sgl_dnrm:
1461 # here, we count on norm not to alter a0...
1462 bsr.l norm # normalize mantissa
1463 neg.w %d0 # -shft amt
1464 addi.w &0x3f81,%d0 # adjust new exponent
1465 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1466 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1469 fso_zero:
1470 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1473 fso_infnan:
1474 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1475 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1478 fso_dbl:
1479 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1480 andi.w &0x7fff,%d0 # strip sign
1481 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1482 beq.b fso_dbl_dnrm_zero # yes
1483 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1484 beq.b fso_infnan # yes
1485 rts # no
1487 fso_dbl_dnrm_zero:
1488 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489 bne.b fso_dbl_dnrm # it's a skewed denorm
1490 tst.l LOCAL_LO(%a0) # is it a zero?
1491 beq.b fso_zero # yes
1492 fso_dbl_dnrm:
1493 # here, we count on norm not to alter a0...
1494 bsr.l norm # normalize mantissa
1495 neg.w %d0 # -shft amt
1496 addi.w &0x3c01,%d0 # adjust new exponent
1497 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1498 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1501 #################################################################
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1506 fu_out:
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1510 cmpi.b %d0,&0x3
1511 beq.w fu_out_pack
1512 cmpi.b %d0,&0x7
1513 beq.w fu_out_pack
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1521 fmov.l &0x0,%fpcr # zero current control regs
1522 fmov.l &0x0,%fpsr
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1527 andi.w &0x7fff,%d0 # strip sign
1528 beq.b fu_out_denorm # it's a DENORM
1530 lea FP_SRC(%a6),%a0
1531 bsr.l unnorm_fix # yes; fix it
1533 mov.b %d0,STAG(%a6)
1535 bra.b fu_out_cont
1536 fu_out_denorm:
1537 mov.b &DENORM,STAG(%a6)
1538 fu_out_cont:
1540 clr.l %d0
1541 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1543 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1545 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1546 bsr.l fout # call fmove out routine
1548 # Exceptions in order of precedence:
1549 # BSUN : none
1550 # SNAN : none
1551 # OPERR : fmove.{b,w,l} out of large UNNORM
1552 # OVFL : fmove.{s,d}
1553 # UNFL : fmove.{s,d,x}
1554 # DZ : none
1555 # INEX2 : all
1556 # INEX1 : none (packed doesn't travel through here)
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1561 bne.w fu_out_ena # some are enabled
1563 fu_out_done:
1565 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572 btst &0x5,EXC_SR(%a6)
1573 bne.b fu_out_done_s
1575 mov.l EXC_A7(%a6),%a0 # restore a7
1576 mov.l %a0,%usp
1578 fu_out_done_cont:
1579 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1580 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1583 unlk %a6
1585 btst &0x7,(%sp) # is trace on?
1586 bne.b fu_out_trace # yes
1588 bra.l _fpsp_done
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1592 fu_out_done_s:
1593 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1594 bne.b fu_out_done_cont
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1603 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1604 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1607 mov.l (%a6),%a6 # restore frame pointer
1609 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1612 # now, copy the result to the proper place on the stack
1613 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1617 add.l &LOCAL_SIZE-0x8,%sp
1619 btst &0x7,(%sp)
1620 bne.b fu_out_trace
1622 bra.l _fpsp_done
1624 fu_out_ena:
1625 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1626 bfffo %d0{&24:&8},%d0 # find highest priority exception
1627 bne.b fu_out_exc # there is at least one set
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633 beq.w fu_out_done # no
1635 fu_out_ovflchk:
1636 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637 beq.w fu_out_done # no
1638 bra.w fu_inex # yes
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1645 # UNSUPP FRAME TRACE FRAME
1646 # ***************** *****************
1647 # * EA * * Current *
1648 # * * * PC *
1649 # ***************** *****************
1650 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1651 # ***************** *****************
1652 # * Next * * Next *
1653 # * PC * * PC *
1654 # ***************** *****************
1655 # * SR * * SR *
1656 # ***************** *****************
1658 fu_out_trace:
1659 mov.w &0x2024,0x6(%sp)
1660 fmov.l %fpiar,0x8(%sp)
1661 bra.l _real_trace
1663 # an exception occurred and that exception was enabled.
1664 fu_out_exc:
1665 subi.l &24,%d0 # fix offset to be 0-8
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1670 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1672 swbeg &0x8
1673 tbl_fu_out:
1674 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1675 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1676 short fu_operr - tbl_fu_out # OPERR
1677 short fu_ovfl - tbl_fu_out # OVFL
1678 short fu_unfl - tbl_fu_out # UNFL
1679 short tbl_fu_out - tbl_fu_out # DZ can't happen
1680 short fu_inex - tbl_fu_out # INEX2
1681 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684 # frestore it.
1685 fu_snan:
1686 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1687 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1690 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1691 mov.w &0xe006,2+FP_SRC(%a6)
1693 frestore FP_SRC(%a6)
1695 unlk %a6
1698 bra.l _real_snan
1700 fu_operr:
1701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1705 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1706 mov.w &0xe004,2+FP_SRC(%a6)
1708 frestore FP_SRC(%a6)
1710 unlk %a6
1713 bra.l _real_operr
1715 fu_ovfl:
1716 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1718 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1719 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1722 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1723 mov.w &0xe005,2+FP_SRC(%a6)
1725 frestore FP_SRC(%a6) # restore EXOP
1727 unlk %a6
1729 bra.l _real_ovfl
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1735 fu_unfl:
1736 mov.l EXC_A6(%a6),(%a6) # restore a6
1738 btst &0x5,EXC_SR(%a6)
1739 bne.w fu_unfl_s
1741 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1742 mov.l %a0,%usp # to or not...
1744 fu_unfl_cont:
1745 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1747 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1748 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1751 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1752 mov.w &0xe003,2+FP_SRC(%a6)
1754 frestore FP_SRC(%a6) # restore EXOP
1756 unlk %a6
1758 bra.l _real_unfl
1760 fu_unfl_s:
1761 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762 bne.b fu_unfl_cont
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1769 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1771 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1772 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1775 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1776 mov.w &0xe003,2+FP_DST(%a6)
1778 frestore FP_DST(%a6) # restore EXOP
1780 mov.l (%a6),%a6 # restore frame pointer
1782 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1786 # now, copy the result to the proper place on the stack
1787 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1791 add.l &LOCAL_SIZE-0x8,%sp
1793 bra.l _real_unfl
1795 # fmove in and out enter here.
1796 fu_inex:
1797 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1799 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1803 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1804 mov.w &0xe001,2+FP_SRC(%a6)
1806 frestore FP_SRC(%a6) # restore EXOP
1808 unlk %a6
1811 bra.l _real_inex
1813 #########################################################################
1814 #########################################################################
1815 fu_in_pack:
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1822 fmov.l &0x0,%fpcr # zero current control regs
1823 fmov.l &0x0,%fpsr
1825 bsr.l get_packed # fetch packed src operand
1827 lea FP_SRC(%a6),%a0 # pass ptr to src
1828 bsr.l set_tag_x # set src optype tag
1830 mov.b %d0,STAG(%a6) # save src optype tag
1832 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1835 # at this point
1836 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1837 beq.b fu_extract_p # monadic
1838 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1839 beq.b fu_extract_p # yes, so it's monadic, too
1841 bsr.l load_fpn2 # load dst into FP_DST
1843 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1844 bsr.l set_tag_x # tag the operand type
1845 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1846 bne.b fu_op2_done_p # no
1847 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1848 fu_op2_done_p:
1849 mov.b %d0,DTAG(%a6) # save dst optype tag
1851 fu_extract_p:
1852 clr.l %d0
1853 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1855 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1857 lea FP_SRC(%a6),%a0
1858 lea FP_DST(%a6),%a1
1860 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1864 # Exceptions in order of precedence:
1865 # BSUN : none
1866 # SNAN : all dyadic ops
1867 # OPERR : fsqrt(-NORM)
1868 # OVFL : all except ftst,fcmp
1869 # UNFL : all except ftst,fcmp
1870 # DZ : fdiv
1871 # INEX2 : all except ftst,fcmp
1872 # INEX1 : all
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1878 bne.w fu_in_ena_p # some are enabled
1880 fu_in_cont_p:
1881 # fcmp and ftst do not store any result.
1882 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1883 andi.b &0x38,%d0 # extract bits 3-5
1884 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1885 beq.b fu_in_exit_p # yes
1887 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888 bsr.l store_fpreg # store the result
1890 fu_in_exit_p:
1892 btst &0x5,EXC_SR(%a6) # user or supervisor?
1893 bne.w fu_in_exit_s_p # supervisor
1895 mov.l EXC_A7(%a6),%a0 # update user a7
1896 mov.l %a0,%usp
1898 fu_in_exit_cont_p:
1899 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1900 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1903 unlk %a6 # unravel stack frame
1905 btst &0x7,(%sp) # is trace on?
1906 bne.w fu_trace_p # yes
1908 bra.l _fpsp_done # exit to os
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1912 # stack frame "up".
1913 fu_in_exit_s_p:
1914 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915 beq.b fu_in_exit_cont_p # no
1917 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1918 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1921 unlk %a6 # unravel stack frame
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924 mov.l 0x4(%sp),0x10(%sp)
1925 mov.l 0x0(%sp),0xc(%sp)
1926 add.l &0xc,%sp
1928 btst &0x7,(%sp) # is trace on?
1929 bne.w fu_trace_p # yes
1931 bra.l _fpsp_done # exit to os
1933 fu_in_ena_p:
1934 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1935 bfffo %d0{&24:&8},%d0 # find highest priority exception
1936 bne.b fu_in_exc_p # at least one was set
1939 # No exceptions occurred that were also enabled. Now:
1941 # if (OVFL && ovfl_disabled && inexact_enabled) {
1942 # branch to _real_inex() (even if the result was exact!);
1943 # } else {
1944 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1945 # return;
1948 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949 beq.w fu_in_cont_p # no
1951 fu_in_ovflchk_p:
1952 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953 beq.w fu_in_cont_p # no
1954 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1957 # An exception occurred and that exception was enabled:
1959 # shift enabled exception field into lo byte of d0;
1960 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962 # /*
1963 # * this is the case where we must call _real_inex() now or else
1964 # * there will be no other way to pass it the exceptional operand
1965 # */
1966 # call _real_inex();
1967 # } else {
1968 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1971 fu_in_exc_p:
1972 subi.l &24,%d0 # fix offset to be 0-8
1973 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1974 blt.b fu_in_exc_exit_p # no
1976 # the enabled exception was inexact
1977 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978 bne.w fu_in_exc_unfl_p # yes
1979 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980 bne.w fu_in_exc_ovfl_p # yes
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1984 # src operand.
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988 fu_in_exc_exit_p:
1989 btst &0x5,EXC_SR(%a6) # user or supervisor?
1990 bne.w fu_in_exc_exit_s_p # supervisor
1992 mov.l EXC_A7(%a6),%a0 # update user a7
1993 mov.l %a0,%usp
1995 fu_in_exc_exit_cont_p:
1996 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1998 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1999 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2002 frestore FP_SRC(%a6) # restore src op
2004 unlk %a6
2006 btst &0x7,(%sp) # is trace enabled?
2007 bne.w fu_trace_p # yes
2009 bra.l _fpsp_done
2011 tbl_except_p:
2012 short 0xe000,0xe006,0xe004,0xe005
2013 short 0xe003,0xe002,0xe001,0xe001
2015 fu_in_exc_ovfl_p:
2016 mov.w &0x3,%d0
2017 bra.w fu_in_exc_exit_p
2019 fu_in_exc_unfl_p:
2020 mov.w &0x4,%d0
2021 bra.w fu_in_exc_exit_p
2023 fu_in_exc_exit_s_p:
2024 btst &mia7_bit,SPCOND_FLG(%a6)
2025 beq.b fu_in_exc_exit_cont_p
2027 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2033 frestore FP_SRC(%a6) # restore src op
2035 unlk %a6 # unravel stack frame
2037 # shift stack frame "up". who cares about <ea> field.
2038 mov.l 0x4(%sp),0x10(%sp)
2039 mov.l 0x0(%sp),0xc(%sp)
2040 add.l &0xc,%sp
2042 btst &0x7,(%sp) # is trace on?
2043 bne.b fu_trace_p # yes
2045 bra.l _fpsp_done # exit to os
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2052 # UNSUPP FRAME TRACE FRAME
2053 # ***************** *****************
2054 # * EA * * Current *
2055 # * * * PC *
2056 # ***************** *****************
2057 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2058 # ***************** *****************
2059 # * Next * * Next *
2060 # * PC * * PC *
2061 # ***************** *****************
2062 # * SR * * SR *
2063 # ***************** *****************
2064 fu_trace_p:
2065 mov.w &0x2024,0x6(%sp)
2066 fmov.l %fpiar,0x8(%sp)
2068 bra.l _real_trace
2070 #########################################################
2071 #########################################################
2072 fu_out_pack:
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2080 fmov.l &0x0,%fpcr # zero current control regs
2081 fmov.l &0x0,%fpsr
2083 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2084 bsr.l load_fpn1
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2088 lea FP_SRC(%a6),%a0
2089 bsr.l set_tag_x # tag the operand type
2090 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2091 bne.b fu_op2_p # no
2092 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2094 fu_op2_p:
2095 mov.b %d0,STAG(%a6) # save src optype tag
2097 clr.l %d0
2098 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2100 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2102 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2103 bsr.l fout # call fmove out routine
2105 # Exceptions in order of precedence:
2106 # BSUN : no
2107 # SNAN : yes
2108 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109 # OVFL : no
2110 # UNFL : no
2111 # DZ : no
2112 # INEX2 : yes
2113 # INEX1 : no
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2118 bne.w fu_out_ena_p # some are enabled
2120 fu_out_exit_p:
2121 mov.l EXC_A6(%a6),(%a6) # restore a6
2123 btst &0x5,EXC_SR(%a6) # user or supervisor?
2124 bne.b fu_out_exit_s_p # supervisor
2126 mov.l EXC_A7(%a6),%a0 # update user a7
2127 mov.l %a0,%usp
2129 fu_out_exit_cont_p:
2130 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2131 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2134 unlk %a6 # unravel stack frame
2136 btst &0x7,(%sp) # is trace on?
2137 bne.w fu_trace_p # yes
2139 bra.l _fpsp_done # exit to os
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2144 fu_out_exit_s_p:
2145 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146 beq.b fu_out_exit_cont_p # no
2148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2152 mov.l (%a6),%a6 # restore frame pointer
2154 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2157 # now, copy the result to the proper place on the stack
2158 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2162 add.l &LOCAL_SIZE-0x8,%sp
2164 btst &0x7,(%sp)
2165 bne.w fu_trace_p
2167 bra.l _fpsp_done
2169 fu_out_ena_p:
2170 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2171 bfffo %d0{&24:&8},%d0 # find highest priority exception
2172 beq.w fu_out_exit_p
2174 mov.l EXC_A6(%a6),(%a6) # restore a6
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178 fu_out_exc_p:
2179 cmpi.b %d0,&0x1a
2180 bgt.w fu_inex_p2
2181 beq.w fu_operr_p
2183 fu_snan_p:
2184 btst &0x5,EXC_SR(%a6)
2185 bne.b fu_snan_s_p
2187 mov.l EXC_A7(%a6),%a0
2188 mov.l %a0,%usp
2189 bra.w fu_snan
2191 fu_snan_s_p:
2192 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2193 bne.w fu_snan
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2199 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2202 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2203 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2205 frestore FP_SRC(%a6) # restore src operand
2207 mov.l (%a6),%a6 # restore frame pointer
2209 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2213 # now, we copy the default result to it's proper location
2214 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2218 add.l &LOCAL_SIZE-0x8,%sp
2221 bra.l _real_snan
2223 fu_operr_p:
2224 btst &0x5,EXC_SR(%a6)
2225 bne.w fu_operr_p_s
2227 mov.l EXC_A7(%a6),%a0
2228 mov.l %a0,%usp
2229 bra.w fu_operr
2231 fu_operr_p_s:
2232 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2233 bne.w fu_operr
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2239 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2242 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2243 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2245 frestore FP_SRC(%a6) # restore src operand
2247 mov.l (%a6),%a6 # restore frame pointer
2249 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2253 # now, we copy the default result to it's proper location
2254 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2258 add.l &LOCAL_SIZE-0x8,%sp
2261 bra.l _real_operr
2263 fu_inex_p2:
2264 btst &0x5,EXC_SR(%a6)
2265 bne.w fu_inex_s_p2
2267 mov.l EXC_A7(%a6),%a0
2268 mov.l %a0,%usp
2269 bra.w fu_inex
2271 fu_inex_s_p2:
2272 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2273 bne.w fu_inex
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2279 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2282 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2283 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2285 frestore FP_SRC(%a6) # restore src operand
2287 mov.l (%a6),%a6 # restore frame pointer
2289 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2293 # now, we copy the default result to it's proper location
2294 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2298 add.l &LOCAL_SIZE-0x8,%sp
2301 bra.l _real_inex
2303 #########################################################################
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2310 global funimp_skew
2311 funimp_skew:
2312 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313 cmpi.b %d0,&0x1 # was src sgl?
2314 beq.b funimp_skew_sgl # yes
2315 cmpi.b %d0,&0x5 # was src dbl?
2316 beq.b funimp_skew_dbl # yes
2319 funimp_skew_sgl:
2320 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2321 andi.w &0x7fff,%d0 # strip sign
2322 beq.b funimp_skew_sgl_not
2323 cmpi.w %d0,&0x3f80
2324 bgt.b funimp_skew_sgl_not
2325 neg.w %d0 # make exponent negative
2326 addi.w &0x3f81,%d0 # find amt to shift
2327 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2328 lsr.l %d0,%d1 # shift it
2329 bset &31,%d1 # set j-bit
2330 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2331 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2332 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2333 funimp_skew_sgl_not:
2336 funimp_skew_dbl:
2337 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2338 andi.w &0x7fff,%d0 # strip sign
2339 beq.b funimp_skew_dbl_not
2340 cmpi.w %d0,&0x3c00
2341 bgt.b funimp_skew_dbl_not
2343 tst.b FP_SRC_EX(%a6) # make "internal format"
2344 smi.b 0x2+FP_SRC(%a6)
2345 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2346 clr.l %d0 # clear g,r,s
2347 lea FP_SRC(%a6),%a0 # pass ptr to src op
2348 mov.w &0x3c01,%d1 # pass denorm threshold
2349 bsr.l dnrm_lp # denorm it
2350 mov.w &0x3c00,%d0 # new exponent
2351 tst.b 0x2+FP_SRC(%a6) # is sign set?
2352 beq.b fss_dbl_denorm_done # no
2353 bset &15,%d0 # set sign
2354 fss_dbl_denorm_done:
2355 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2356 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2357 funimp_skew_dbl_not:
2360 #########################################################################
2361 global _mem_write2
2362 _mem_write2:
2363 btst &0x5,EXC_SR(%a6)
2364 beq.l _dmem_write
2365 mov.l 0x0(%a0),FP_DST_EX(%a6)
2366 mov.l 0x4(%a0),FP_DST_HI(%a6)
2367 mov.l 0x8(%a0),FP_DST_LO(%a6)
2368 clr.l %d1
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2374 # effective address" exception. #
2376 # This handler should be the first code executed upon taking the #
2377 # FP Unimplemented Effective Address exception in an operating #
2378 # system. #
2380 # XREF **************************************************************** #
2381 # _imem_read_long() - read instruction longword #
2382 # fix_skewed_ops() - adjust src operand in fsave frame #
2383 # set_tag_x() - determine optype of src/dst operands #
2384 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2385 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2386 # load_fpn2() - load dst operand from FP regfile #
2387 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 # decbin() - convert packed data to FP binary data #
2389 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2390 # _real_access() - "callout" for access error exception #
2391 # _mem_read() - read extended immediate operand from memory #
2392 # _fpsp_done() - "callout" for exit; work all done #
2393 # _real_trace() - "callout" for Trace enabled exception #
2394 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2395 # fmovm_ctrl() - emulate fmovm control instruction #
2397 # INPUT *************************************************************** #
2398 # - The system stack contains the "Unimplemented <ea>" stk frame #
2400 # OUTPUT ************************************************************** #
2401 # If access error: #
2402 # - The system stack is changed to an access error stack frame #
2403 # If FPU disabled: #
2404 # - The system stack is changed to an FPU disabled stack frame #
2405 # If Trace exception enabled: #
2406 # - The system stack is changed to a Trace exception stack frame #
2407 # Else: (normal case) #
2408 # - None (correct result has been stored as appropriate) #
2410 # ALGORITHM *********************************************************** #
2411 # This exception handles 3 types of operations: #
2412 # (1) FP Instructions using extended precision or packed immediate #
2413 # addressing mode. #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2417 # For immediate data operations, the data is read in w/ a #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the #
2421 # emulation, then the result is stored to the destination register and #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been #
2423 # signalled as a result of emulation, then an fsave state frame #
2424 # corresponding to the FP exception type must be entered into the 060 #
2425 # FPU before exiting. In either the enabled or disabled cases, we #
2426 # must also check if a Trace exception is pending, in which case, we #
2427 # must create a Trace exception stack frame from the current exception #
2428 # stack frame. If no Trace is pending, we simply exit through #
2429 # _fpsp_done(). #
2430 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2431 # decode and emulate the instruction. No FP exceptions can be pending #
2432 # as a result of this operation emulation. A Trace exception can be #
2433 # pending, though, which means the current stack frame must be changed #
2434 # to a Trace stack frame and an exit made through _real_trace(). #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2436 # was executed from supervisor mode, this handler must store the FP #
2437 # register file values to the system stack by itself since #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through #
2439 # fpsp_done(). #
2440 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2444 # Before any of the above is attempted, it must be checked to #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set, #
2448 # then we must create an 8 word "FPU disabled" exception stack frame #
2449 # from the current 4 word exception stack frame. This includes #
2450 # reproducing the effective address of the instruction to put on the #
2451 # new stack frame. #
2453 # In the process of all emulation work, if a _mem_read() #
2454 # "callout" returns a failing result indicating an access error, then #
2455 # we must create an access error stack frame from the current stack #
2456 # frame. This information includes a faulting address and a fault- #
2457 # status-longword. These are created within this handler. #
2459 #########################################################################
2461 global _fpsp_effadd
2462 _fpsp_effadd:
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467 mov.l %d0,-(%sp) # save d0
2468 movc %pcr,%d0 # load proc cr
2469 btst &0x1,%d0 # is FPU disabled?
2470 bne.w iea_disabled # yes
2471 mov.l (%sp)+,%d0 # restore d0
2473 link %a6,&-LOCAL_SIZE # init stack frame
2475 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2476 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2479 # PC of instruction that took the exception is the PC in the frame
2480 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2482 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2483 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2484 bsr.l _imem_read_long # fetch the instruction words
2485 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2487 #########################################################################
2489 tst.w %d0 # is operation fmovem?
2490 bmi.w iea_fmovm # yes
2493 # here, we will have:
2494 # fabs fdabs fsabs facos fmod
2495 # fadd fdadd fsadd fasin frem
2496 # fcmp fatan fscale
2497 # fdiv fddiv fsdiv fatanh fsin
2498 # fint fcos fsincos
2499 # fintrz fcosh fsinh
2500 # fmove fdmove fsmove fetox ftan
2501 # fmul fdmul fsmul fetoxm1 ftanh
2502 # fneg fdneg fsneg fgetexp ftentox
2503 # fsgldiv fgetman ftwotox
2504 # fsglmul flog10
2505 # fsqrt flog2
2506 # fsub fdsub fssub flogn
2507 # ftst flognp1
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2511 iea_op:
2512 andi.l &0x00ff00ff,USER_FPSR(%a6)
2514 btst &0xa,%d0 # is src fmt x or p?
2515 bne.b iea_op_pack # packed
2518 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2519 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2520 mov.l &0xc,%d0 # pass: 12 bytes
2521 bsr.l _imem_read # read extended immediate
2523 tst.l %d1 # did ifetch fail?
2524 bne.w iea_iacc # yes
2526 bra.b iea_op_setsrc
2528 iea_op_pack:
2530 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2531 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2532 mov.l &0xc,%d0 # pass: 12 bytes
2533 bsr.l _imem_read # read packed operand
2535 tst.l %d1 # did ifetch fail?
2536 bne.w iea_iacc # yes
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2540 cmpi.w %d0,&0x7fff # INF or NAN?
2541 beq.b iea_op_setsrc # operand is an INF or NAN
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2546 andi.b &0x0f,%d0 # clear all but last nybble
2547 bne.b iea_op_gp_not_spec # not a zero
2548 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2549 bne.b iea_op_gp_not_spec # not a zero
2550 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2551 beq.b iea_op_setsrc # operand is a ZERO
2552 iea_op_gp_not_spec:
2553 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2554 bsr.l decbin # convert to extended
2555 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2557 iea_op_setsrc:
2558 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2560 # FP_SRC now holds the src operand.
2561 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2562 bsr.l set_tag_x # tag the operand type
2563 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2564 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2565 bne.b iea_op_getdst # no
2566 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2567 mov.b %d0,STAG(%a6) # set new optype tag
2568 iea_op_getdst:
2569 clr.b STORE_FLG(%a6) # clear "store result" boolean
2571 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2572 beq.b iea_op_extract # monadic
2573 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2574 bne.b iea_op_spec # yes
2576 iea_op_loaddst:
2577 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578 bsr.l load_fpn2 # load dst operand
2580 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2581 bsr.l set_tag_x # tag the operand type
2582 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2583 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2584 bne.b iea_op_extract # no
2585 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2586 mov.b %d0,DTAG(%a6) # set new optype tag
2587 bra.b iea_op_extract
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590 iea_op_spec:
2591 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2592 beq.b iea_op_extract # yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595 st STORE_FLG(%a6) # don't store a final result
2596 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2597 beq.b iea_op_loaddst # yes
2599 iea_op_extract:
2600 clr.l %d0
2601 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2603 mov.b 1+EXC_CMDREG(%a6),%d1
2604 andi.w &0x007f,%d1 # extract extension
2606 fmov.l &0x0,%fpcr
2607 fmov.l &0x0,%fpsr
2609 lea FP_SRC(%a6),%a0
2610 lea FP_DST(%a6),%a1
2612 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2616 # Exceptions in order of precedence:
2617 # BSUN : none
2618 # SNAN : all operations
2619 # OPERR : all reg-reg or mem-reg operations that can normally operr
2620 # OVFL : same as OPERR
2621 # UNFL : same as OPERR
2622 # DZ : same as OPERR
2623 # INEX2 : same as OPERR
2624 # INEX1 : all packed immediate operations
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2630 bne.b iea_op_ena # some are enabled
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2634 iea_op_save:
2635 tst.b STORE_FLG(%a6) # does this op store a result?
2636 bne.b iea_op_exit1 # exit with no frestore
2638 iea_op_store:
2639 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640 bsr.l store_fpreg # store the result
2642 iea_op_exit1:
2643 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2646 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2647 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2650 unlk %a6 # unravel the frame
2652 btst &0x7,(%sp) # is trace on?
2653 bne.w iea_op_trace # yes
2655 bra.l _fpsp_done # exit to os
2657 iea_op_ena:
2658 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2659 bfffo %d0{&24:&8},%d0 # find highest priority exception
2660 bne.b iea_op_exc # at least one was set
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665 beq.b iea_op_save
2667 iea_op_ovfl:
2668 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669 beq.b iea_op_store # no
2670 bra.b iea_op_exc_ovfl # yes
2672 # an enabled exception occurred. we have to insert the exception type back into
2673 # the machine.
2674 iea_op_exc:
2675 subi.l &24,%d0 # fix offset to be 0-8
2676 cmpi.b %d0,&0x6 # is exception INEX?
2677 bne.b iea_op_exc_force # no
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2681 # underflow frame.
2682 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683 bne.b iea_op_exc_ovfl # yes
2684 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685 bne.b iea_op_exc_unfl # yes
2687 iea_op_exc_force:
2688 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689 bra.b iea_op_exit2 # exit with frestore
2691 tbl_iea_except:
2692 short 0xe002, 0xe006, 0xe004, 0xe005
2693 short 0xe003, 0xe002, 0xe001, 0xe001
2695 iea_op_exc_ovfl:
2696 mov.w &0xe005,2+FP_SRC(%a6)
2697 bra.b iea_op_exit2
2699 iea_op_exc_unfl:
2700 mov.w &0xe003,2+FP_SRC(%a6)
2702 iea_op_exit2:
2703 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2706 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2707 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2710 frestore FP_SRC(%a6) # restore exceptional state
2712 unlk %a6 # unravel the frame
2714 btst &0x7,(%sp) # is trace on?
2715 bne.b iea_op_trace # yes
2717 bra.l _fpsp_done # exit to os
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2724 # UNIMP EA FRAME TRACE FRAME
2725 # ***************** *****************
2726 # * 0x0 * 0x0f0 * * Current *
2727 # ***************** * PC *
2728 # * Current * *****************
2729 # * PC * * 0x2 * 0x024 *
2730 # ***************** *****************
2731 # * SR * * Next *
2732 # ***************** * PC *
2733 # *****************
2734 # * SR *
2735 # *****************
2736 iea_op_trace:
2737 mov.l (%sp),-(%sp) # shift stack frame "down"
2738 mov.w 0x8(%sp),0x4(%sp)
2739 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2740 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2742 bra.l _real_trace
2744 #########################################################################
2745 iea_fmovm:
2746 btst &14,%d0 # ctrl or data reg
2747 beq.w iea_fmovm_ctrl
2749 iea_fmovm_data:
2751 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2752 bne.b iea_fmovm_data_s
2754 iea_fmovm_data_u:
2755 mov.l %usp,%a0
2756 mov.l %a0,EXC_A7(%a6) # store current a7
2757 bsr.l fmovm_dynamic # do dynamic fmovm
2758 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2759 mov.l %a0,%usp # update usp
2760 bra.w iea_fmovm_exit
2762 iea_fmovm_data_s:
2763 clr.b SPCOND_FLG(%a6)
2764 lea 0x2+EXC_VOFF(%a6),%a0
2765 mov.l %a0,EXC_A7(%a6)
2766 bsr.l fmovm_dynamic # do dynamic fmovm
2768 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2769 beq.w iea_fmovm_data_predec
2770 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2771 bne.w iea_fmovm_exit
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2776 # do it here.
2777 iea_fmovm_data_postinc:
2778 btst &0x7,EXC_SR(%a6)
2779 bne.b iea_fmovm_data_pi_trace
2781 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2785 lea (EXC_SR,%a6,%d0),%a0
2786 mov.l %a0,EXC_SR(%a6)
2788 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2789 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2792 unlk %a6
2793 mov.l (%sp)+,%sp
2794 bra.l _fpsp_done
2796 iea_fmovm_data_pi_trace:
2797 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2802 lea (EXC_SR-0x4,%a6,%d0),%a0
2803 mov.l %a0,EXC_SR(%a6)
2805 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2806 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2809 unlk %a6
2810 mov.l (%sp)+,%sp
2811 bra.l _real_trace
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec:
2815 mov.b %d1,EXC_VOFF(%a6) # store strg
2816 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2818 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2822 mov.l (%a6),-(%sp) # make a copy of a6
2823 mov.l %d0,-(%sp) # save d0
2824 mov.l %d1,-(%sp) # save d1
2825 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2827 clr.l %d0
2828 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2829 neg.l %d0 # get negative of size
2831 btst &0x7,EXC_SR(%a6) # is trace enabled?
2832 beq.b iea_fmovm_data_p2
2834 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2837 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2839 pea (%a6,%d0) # create final sp
2840 bra.b iea_fmovm_data_p3
2842 iea_fmovm_data_p2:
2843 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2845 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2847 pea (0x4,%a6,%d0) # create final sp
2849 iea_fmovm_data_p3:
2850 clr.l %d1
2851 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2853 tst.b %d1
2854 bpl.b fm_1
2855 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2856 addi.l &0xc,%d0
2857 fm_1:
2858 lsl.b &0x1,%d1
2859 bpl.b fm_2
2860 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2861 addi.l &0xc,%d0
2862 fm_2:
2863 lsl.b &0x1,%d1
2864 bpl.b fm_3
2865 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2866 addi.l &0xc,%d0
2867 fm_3:
2868 lsl.b &0x1,%d1
2869 bpl.b fm_4
2870 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2871 addi.l &0xc,%d0
2872 fm_4:
2873 lsl.b &0x1,%d1
2874 bpl.b fm_5
2875 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2876 addi.l &0xc,%d0
2877 fm_5:
2878 lsl.b &0x1,%d1
2879 bpl.b fm_6
2880 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2881 addi.l &0xc,%d0
2882 fm_6:
2883 lsl.b &0x1,%d1
2884 bpl.b fm_7
2885 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2886 addi.l &0xc,%d0
2887 fm_7:
2888 lsl.b &0x1,%d1
2889 bpl.b fm_end
2890 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2891 fm_end:
2892 mov.l 0x4(%sp),%d1
2893 mov.l 0x8(%sp),%d0
2894 mov.l 0xc(%sp),%a6
2895 mov.l (%sp)+,%sp
2897 btst &0x7,(%sp) # is trace enabled?
2898 beq.l _fpsp_done
2899 bra.l _real_trace
2901 #########################################################################
2902 iea_fmovm_ctrl:
2904 bsr.l fmovm_ctrl # load ctrl regs
2906 iea_fmovm_exit:
2907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2911 btst &0x7,EXC_SR(%a6) # is trace on?
2912 bne.b iea_fmovm_trace # yes
2914 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2916 unlk %a6 # unravel the frame
2918 bra.l _fpsp_done # exit to os
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2926 # UNIMP EA FRAME TRACE FRAME
2927 # ***************** *****************
2928 # * 0x0 * 0x0f0 * * Current *
2929 # ***************** * PC *
2930 # * Current * *****************
2931 # * PC * * 0x2 * 0x024 *
2932 # ***************** *****************
2933 # * SR * * Next *
2934 # ***************** * PC *
2935 # *****************
2936 # * SR *
2937 # *****************
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2942 iea_fmovm_trace:
2943 mov.l (%a6),%a6 # restore frame pointer
2944 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948 add.l &LOCAL_SIZE,%sp # clear stack frame
2950 bra.l _real_trace
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2959 iea_disabled:
2960 mov.l (%sp)+,%d0 # restore d0
2962 link %a6,&-LOCAL_SIZE # init stack frame
2964 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2966 # PC of instruction that took the exception is the PC in the frame
2967 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2969 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2970 bsr.l _imem_read_long # fetch the instruction words
2971 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2973 tst.w %d0 # is instr fmovm?
2974 bmi.b iea_dis_fmovm # yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2977 iea_dis_immed:
2978 mov.l &0x10,%d0 # 16 bytes of instruction
2979 bra.b iea_dis_cont
2980 iea_dis_fmovm:
2981 btst &0xe,%d0 # is instr fmovm ctrl
2982 bne.b iea_dis_fmovm_data # no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984 bfextu %d0{&19:&3},%d1
2985 mov.l &0xc,%d0
2986 cmpi.b %d1,&0x7 # move all regs?
2987 bne.b iea_dis_cont
2988 addq.l &0x4,%d0
2989 bra.b iea_dis_cont
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2994 iea_dis_fmovm_data:
2995 clr.l %d0
2996 bsr.l fmovm_calc_ea
2997 mov.l EXC_EXTWPTR(%a6),%d0
2998 sub.l EXC_PC(%a6),%d0
2999 iea_dis_cont:
3000 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
3002 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3004 unlk %a6
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009 subq.l &0x8,%sp # make room for new stack
3010 mov.l %d0,-(%sp) # save d0
3011 mov.w 0xc(%sp),0x4(%sp) # move SR
3012 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3013 clr.l %d0
3014 mov.w 0x12(%sp),%d0
3015 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3016 add.l %d0,0x6(%sp) # make Next PC
3017 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3018 mov.l (%sp)+,%d0 # restore d0
3020 bra.l _real_fpu_disabled
3022 ##########
3024 iea_iacc:
3025 movc %pcr,%d0
3026 btst &0x1,%d0
3027 bne.b iea_iacc_cont
3028 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3030 iea_iacc_cont:
3031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3033 unlk %a6
3035 subq.w &0x8,%sp # make stack frame bigger
3036 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3037 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3038 mov.w &0x4008,0x6(%sp) # store voff
3039 mov.l 0x2(%sp),0x8(%sp) # store ea
3040 mov.l &0x09428001,0xc(%sp) # store fslw
3042 iea_acc_done:
3043 btst &0x5,(%sp) # user or supervisor mode?
3044 beq.b iea_acc_done2 # user
3045 bset &0x2,0xd(%sp) # set supervisor TM bit
3047 iea_acc_done2:
3048 bra.l _real_access
3050 iea_dacc:
3051 lea -LOCAL_SIZE(%a6),%sp
3053 movc %pcr,%d1
3054 btst &0x1,%d1
3055 bne.b iea_dacc_cont
3056 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3057 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058 iea_dacc_cont:
3059 mov.l (%a6),%a6
3061 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3068 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069 add.w &LOCAL_SIZE-0x4,%sp
3071 bra.b iea_acc_done
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3077 # This handler should be the first code executed upon taking the #
3078 # FP Operand Error exception in an operating system. #
3080 # XREF **************************************************************** #
3081 # _imem_read_long() - read instruction longword #
3082 # fix_skewed_ops() - adjust src operand in fsave frame #
3083 # _real_operr() - "callout" to operating system operr handler #
3084 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3085 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3086 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3088 # INPUT *************************************************************** #
3089 # - The system stack contains the FP Operr exception frame #
3090 # - The fsave frame contains the source operand #
3092 # OUTPUT ************************************************************** #
3093 # No access error: #
3094 # - The system stack is unchanged #
3095 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3097 # ALGORITHM *********************************************************** #
3098 # In a system where the FP Operr exception is enabled, the goal #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the #
3101 # input operand in the fsave frame may be incorrect for some cases #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3103 # do just this and then exits through _real_operr(). #
3104 # For opclass 3 instructions, the 060 doesn't store the default #
3105 # operr result out to memory or data register file as it should. #
3106 # This code must emulate the move out before finally exiting through #
3107 # _real_inex(). The move out, if to memory, is performed using #
3108 # _mem_write() "callout" routines that may return a failing result. #
3109 # In this special case, the handler must exit through facc_out() #
3110 # which creates an access error stack frame from the current operr #
3111 # stack frame. #
3113 #########################################################################
3115 global _fpsp_operr
3116 _fpsp_operr:
3118 link.w %a6,&-LOCAL_SIZE # init stack frame
3120 fsave FP_SRC(%a6) # grab the "busy" frame
3122 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3123 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3129 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3130 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3131 bsr.l _imem_read_long # fetch the instruction words
3132 mov.l %d0,EXC_OPWORD(%a6)
3134 ##############################################################################
3136 btst &13,%d0 # is instr an fmove out?
3137 bne.b foperr_out # fmove out
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3145 bsr.l fix_skewed_ops # fix src op
3147 foperr_exit:
3148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3152 frestore FP_SRC(%a6)
3154 unlk %a6
3155 bra.l _real_operr
3157 ########################################################################
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3174 foperr_out:
3176 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3177 andi.w &0x7fff,%d1
3178 cmpi.w %d1,&0x7fff
3179 bne.b foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181 tst.l FP_SRC_LO(%a6)
3182 bne.b foperr_out_qnan
3183 mov.l FP_SRC_HI(%a6),%d1
3184 andi.l &0x7fffffff,%d1
3185 beq.b foperr_out_not_qnan
3186 foperr_out_qnan:
3187 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3188 bra.b foperr_out_jmp
3190 foperr_out_not_qnan:
3191 mov.l &0x7fffffff,%d1
3192 tst.b FP_SRC_EX(%a6)
3193 bpl.b foperr_out_not_qnan2
3194 addq.l &0x1,%d1
3195 foperr_out_not_qnan2:
3196 mov.l %d1,L_SCR1(%a6)
3198 foperr_out_jmp:
3199 bfextu %d0{&19:&3},%d0 # extract dst format field
3200 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3201 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3202 jmp (tbl_operr.b,%pc,%a0)
3204 tbl_operr:
3205 short foperr_out_l - tbl_operr # long word integer
3206 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3207 short tbl_operr - tbl_operr # ext prec shouldn't happen
3208 short foperr_exit - tbl_operr # packed won't enter here
3209 short foperr_out_w - tbl_operr # word integer
3210 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3211 short foperr_out_b - tbl_operr # byte integer
3212 short tbl_operr - tbl_operr # packed won't enter here
3214 foperr_out_b:
3215 mov.b L_SCR1(%a6),%d0 # load positive default result
3216 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3217 ble.b foperr_out_b_save_dn # yes
3218 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3219 bsr.l _dmem_write_byte # write the default result
3221 tst.l %d1 # did dstore fail?
3222 bne.l facc_out_b # yes
3224 bra.w foperr_exit
3225 foperr_out_b_save_dn:
3226 andi.w &0x0007,%d1
3227 bsr.l store_dreg_b # store result to regfile
3228 bra.w foperr_exit
3230 foperr_out_w:
3231 mov.w L_SCR1(%a6),%d0 # load positive default result
3232 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3233 ble.b foperr_out_w_save_dn # yes
3234 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3235 bsr.l _dmem_write_word # write the default result
3237 tst.l %d1 # did dstore fail?
3238 bne.l facc_out_w # yes
3240 bra.w foperr_exit
3241 foperr_out_w_save_dn:
3242 andi.w &0x0007,%d1
3243 bsr.l store_dreg_w # store result to regfile
3244 bra.w foperr_exit
3246 foperr_out_l:
3247 mov.l L_SCR1(%a6),%d0 # load positive default result
3248 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3249 ble.b foperr_out_l_save_dn # yes
3250 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3251 bsr.l _dmem_write_long # write the default result
3253 tst.l %d1 # did dstore fail?
3254 bne.l facc_out_l # yes
3256 bra.w foperr_exit
3257 foperr_out_l_save_dn:
3258 andi.w &0x0007,%d1
3259 bsr.l store_dreg_l # store result to regfile
3260 bra.w foperr_exit
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3266 # This handler should be the first code executed upon taking the #
3267 # FP Signalling NAN exception in an operating system. #
3269 # XREF **************************************************************** #
3270 # _imem_read_long() - read instruction longword #
3271 # fix_skewed_ops() - adjust src operand in fsave frame #
3272 # _real_snan() - "callout" to operating system SNAN handler #
3273 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3274 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3275 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3276 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3278 # INPUT *************************************************************** #
3279 # - The system stack contains the FP SNAN exception frame #
3280 # - The fsave frame contains the source operand #
3282 # OUTPUT ************************************************************** #
3283 # No access error: #
3284 # - The system stack is unchanged #
3285 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3287 # ALGORITHM *********************************************************** #
3288 # In a system where the FP SNAN exception is enabled, the goal #
3289 # is to get to the handler specified at _real_snan(). But, on the 060, #
3290 # for opclass zero and two instructions taking this exception, the #
3291 # input operand in the fsave frame may be incorrect for some cases #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3293 # do just this and then exits through _real_snan(). #
3294 # For opclass 3 instructions, the 060 doesn't store the default #
3295 # SNAN result out to memory or data register file as it should. #
3296 # This code must emulate the move out before finally exiting through #
3297 # _real_snan(). The move out, if to memory, is performed using #
3298 # _mem_write() "callout" routines that may return a failing result. #
3299 # In this special case, the handler must exit through facc_out() #
3300 # which creates an access error stack frame from the current SNAN #
3301 # stack frame. #
3302 # For the case of an extended precision opclass 3 instruction, #
3303 # if the effective addressing mode was -() or ()+, then the address #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3305 # was -(a7) from supervisor mode, then the exception frame currently #
3306 # on the system stack must be carefully moved "down" to make room #
3307 # for the operand being moved. #
3309 #########################################################################
3311 global _fpsp_snan
3312 _fpsp_snan:
3314 link.w %a6,&-LOCAL_SIZE # init stack frame
3316 fsave FP_SRC(%a6) # grab the "busy" frame
3318 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3319 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3325 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3326 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3327 bsr.l _imem_read_long # fetch the instruction words
3328 mov.l %d0,EXC_OPWORD(%a6)
3330 ##############################################################################
3332 btst &13,%d0 # is instr an fmove out?
3333 bne.w fsnan_out # fmove out
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339 # fixed here.
3340 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3341 bsr.l fix_skewed_ops # fix src op
3343 fsnan_exit:
3344 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3348 frestore FP_SRC(%a6)
3350 unlk %a6
3351 bra.l _real_snan
3353 ########################################################################
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3367 fsnan_out:
3369 bfextu %d0{&19:&3},%d0 # extract dst format field
3370 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3371 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3372 jmp (tbl_snan.b,%pc,%a0)
3374 tbl_snan:
3375 short fsnan_out_l - tbl_snan # long word integer
3376 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378 short tbl_snan - tbl_snan # packed needs no help
3379 short fsnan_out_w - tbl_snan # word integer
3380 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381 short fsnan_out_b - tbl_snan # byte integer
3382 short tbl_snan - tbl_snan # packed needs no help
3384 fsnan_out_b:
3385 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3386 bset &6,%d0 # set SNAN bit
3387 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3388 ble.b fsnan_out_b_dn # yes
3389 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3390 bsr.l _dmem_write_byte # write the default result
3392 tst.l %d1 # did dstore fail?
3393 bne.l facc_out_b # yes
3395 bra.w fsnan_exit
3396 fsnan_out_b_dn:
3397 andi.w &0x0007,%d1
3398 bsr.l store_dreg_b # store result to regfile
3399 bra.w fsnan_exit
3401 fsnan_out_w:
3402 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3403 bset &14,%d0 # set SNAN bit
3404 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3405 ble.b fsnan_out_w_dn # yes
3406 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3407 bsr.l _dmem_write_word # write the default result
3409 tst.l %d1 # did dstore fail?
3410 bne.l facc_out_w # yes
3412 bra.w fsnan_exit
3413 fsnan_out_w_dn:
3414 andi.w &0x0007,%d1
3415 bsr.l store_dreg_w # store result to regfile
3416 bra.w fsnan_exit
3418 fsnan_out_l:
3419 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3420 bset &30,%d0 # set SNAN bit
3421 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3422 ble.b fsnan_out_l_dn # yes
3423 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3424 bsr.l _dmem_write_long # write the default result
3426 tst.l %d1 # did dstore fail?
3427 bne.l facc_out_l # yes
3429 bra.w fsnan_exit
3430 fsnan_out_l_dn:
3431 andi.w &0x0007,%d1
3432 bsr.l store_dreg_l # store result to regfile
3433 bra.w fsnan_exit
3435 fsnan_out_s:
3436 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3437 ble.b fsnan_out_d_dn # yes
3438 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3439 andi.l &0x80000000,%d0 # keep sign
3440 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3441 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3442 lsr.l &0x8,%d1 # shift mantissa for sgl
3443 or.l %d1,%d0 # create sgl SNAN
3444 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3445 bsr.l _dmem_write_long # write the default result
3447 tst.l %d1 # did dstore fail?
3448 bne.l facc_out_l # yes
3450 bra.w fsnan_exit
3451 fsnan_out_d_dn:
3452 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3453 andi.l &0x80000000,%d0 # keep sign
3454 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3455 mov.l %d1,-(%sp)
3456 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3457 lsr.l &0x8,%d1 # shift mantissa for sgl
3458 or.l %d1,%d0 # create sgl SNAN
3459 mov.l (%sp)+,%d1
3460 andi.w &0x0007,%d1
3461 bsr.l store_dreg_l # store result to regfile
3462 bra.w fsnan_exit
3464 fsnan_out_d:
3465 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3466 andi.l &0x80000000,%d0 # keep sign
3467 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3468 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3469 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3470 mov.l &11,%d0 # load shift amt
3471 lsr.l %d0,%d1
3472 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3473 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3474 andi.l &0x000007ff,%d1
3475 ror.l %d0,%d1
3476 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3477 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3478 lsr.l %d0,%d1
3479 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3480 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3481 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3482 movq.l &0x8,%d0 # pass: size of 8 bytes
3483 bsr.l _dmem_write # write the default result
3485 tst.l %d1 # did dstore fail?
3486 bne.l facc_out_d # yes
3488 bra.w fsnan_exit
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3493 fsnan_out_x:
3494 clr.b SPCOND_FLG(%a6) # clear special case flag
3496 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497 clr.w 2+FP_SCR0(%a6)
3498 mov.l FP_SRC_HI(%a6),%d0
3499 bset &30,%d0
3500 mov.l %d0,FP_SCR0_HI(%a6)
3501 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3503 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3504 bne.b fsnan_out_x_s # yes
3506 mov.l %usp,%a0 # fetch user stack pointer
3507 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3508 mov.l (%a6),EXC_A6(%a6)
3510 bsr.l _calc_ea_fout # find the correct ea,update An
3511 mov.l %a0,%a1
3512 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3514 mov.l EXC_A7(%a6),%a0
3515 mov.l %a0,%usp # restore user stack pointer
3516 mov.l EXC_A6(%a6),(%a6)
3518 fsnan_out_x_save:
3519 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3520 movq.l &0xc,%d0 # pass: size of extended
3521 bsr.l _dmem_write # write the default result
3523 tst.l %d1 # did dstore fail?
3524 bne.l facc_out_x # yes
3526 bra.w fsnan_exit
3528 fsnan_out_x_s:
3529 mov.l (%a6),EXC_A6(%a6)
3531 bsr.l _calc_ea_fout # find the correct ea,update An
3532 mov.l %a0,%a1
3533 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3535 mov.l EXC_A6(%a6),(%a6)
3537 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538 bne.b fsnan_out_x_save # no
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3542 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3545 frestore FP_SRC(%a6)
3547 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3549 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3553 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3557 add.l &LOCAL_SIZE-0x8,%sp
3559 bra.l _real_snan
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3565 # This handler should be the first code executed upon taking the #
3566 # FP Inexact exception in an operating system. #
3568 # XREF **************************************************************** #
3569 # _imem_read_long() - read instruction longword #
3570 # fix_skewed_ops() - adjust src operand in fsave frame #
3571 # set_tag_x() - determine optype of src/dst operands #
3572 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3573 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3574 # load_fpn2() - load dst operand from FP regfile #
3575 # smovcr() - emulate an "fmovcr" instruction #
3576 # fout() - emulate an opclass 3 instruction #
3577 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 # _real_inex() - "callout" to operating system inexact handler #
3580 # INPUT *************************************************************** #
3581 # - The system stack contains the FP Inexact exception frame #
3582 # - The fsave frame contains the source operand #
3584 # OUTPUT ************************************************************** #
3585 # - The system stack is unchanged #
3586 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3588 # ALGORITHM *********************************************************** #
3589 # In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060, #
3591 # for opclass zero and two instruction taking this exception, the #
3592 # hardware doesn't store the correct result to the destination FP #
3593 # register as did the '040 and '881/2. This handler must emulate the #
3594 # instruction in order to get this value and then store it to the #
3595 # correct register before calling _real_inex(). #
3596 # For opclass 3 instructions, the 060 doesn't store the default #
3597 # inexact result out to memory or data register file as it should. #
3598 # This code must emulate the move out by calling fout() before finally #
3599 # exiting through _real_inex(). #
3601 #########################################################################
3603 global _fpsp_inex
3604 _fpsp_inex:
3606 link.w %a6,&-LOCAL_SIZE # init stack frame
3608 fsave FP_SRC(%a6) # grab the "busy" frame
3610 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3611 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3617 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3618 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3619 bsr.l _imem_read_long # fetch the instruction words
3620 mov.l %d0,EXC_OPWORD(%a6)
3622 ##############################################################################
3624 btst &13,%d0 # is instr an fmove out?
3625 bne.w finex_out # fmove out
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631 bfextu %d0{&19:&3},%d0 # fetch instr size
3632 bne.b finex_cont # instr size is not long
3633 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3634 bne.b finex_cont # no
3635 fmov.l &0x0,%fpcr
3636 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3637 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3638 mov.w &0xe001,0x2+FP_SRC(%a6)
3640 finex_cont:
3641 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3642 bsr.l fix_skewed_ops # fix src op
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3651 fmov.l &0x0,%fpcr # zero current control regs
3652 fmov.l &0x0,%fpsr
3654 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655 cmpi.b %d1,&0x17 # is op an fmovecr?
3656 beq.w finex_fmovcr # yes
3658 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3659 bsr.l set_tag_x # tag the operand type
3660 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3666 beq.b finex_extract # monadic
3668 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3669 bne.b finex_extract # yes
3671 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672 bsr.l load_fpn2 # load dst into FP_DST
3674 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3675 bsr.l set_tag_x # tag the operand type
3676 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3677 bne.b finex_op2_done # no
3678 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3679 finex_op2_done:
3680 mov.b %d0,DTAG(%a6) # save dst optype tag
3682 finex_extract:
3683 clr.l %d0
3684 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3686 mov.b 1+EXC_CMDREG(%a6),%d1
3687 andi.w &0x007f,%d1 # extract extension
3689 lea FP_SRC(%a6),%a0
3690 lea FP_DST(%a6),%a1
3692 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3695 # the operation has been emulated. the result is in fp0.
3696 finex_save:
3697 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3698 bsr.l store_fpreg
3700 finex_exit:
3701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3705 frestore FP_SRC(%a6)
3707 unlk %a6
3708 bra.l _real_inex
3710 finex_fmovcr:
3711 clr.l %d0
3712 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3713 mov.b 1+EXC_CMDREG(%a6),%d1
3714 andi.l &0x0000007f,%d1 # pass rom offset
3715 bsr.l smovcr
3716 bra.b finex_save
3718 ########################################################################
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3732 finex_out:
3734 mov.b &NORM,STAG(%a6) # src is a NORM
3736 clr.l %d0
3737 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3739 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3741 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3743 bsr.l fout # store the default result
3745 bra.b finex_exit
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3751 # This handler should be the first code executed upon taking #
3752 # the FP DZ exception in an operating system. #
3754 # XREF **************************************************************** #
3755 # _imem_read_long() - read instruction longword from memory #
3756 # fix_skewed_ops() - adjust fsave operand #
3757 # _real_dz() - "callout" exit point from FP DZ handler #
3759 # INPUT *************************************************************** #
3760 # - The system stack contains the FP DZ exception stack. #
3761 # - The fsave frame contains the source operand. #
3763 # OUTPUT ************************************************************** #
3764 # - The system stack contains the FP DZ exception stack. #
3765 # - The fsave frame contains the adjusted source operand. #
3767 # ALGORITHM *********************************************************** #
3768 # In a system where the DZ exception is enabled, the goal is to #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to #
3773 # _real_dz(). #
3775 #########################################################################
3777 global _fpsp_dz
3778 _fpsp_dz:
3780 link.w %a6,&-LOCAL_SIZE # init stack frame
3782 fsave FP_SRC(%a6) # grab the "busy" frame
3784 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3785 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3791 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3792 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3793 bsr.l _imem_read_long # fetch the instruction words
3794 mov.l %d0,EXC_OPWORD(%a6)
3796 ##############################################################################
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3803 bsr.l fix_skewed_ops # fix src op
3805 fdz_exit:
3806 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3807 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3810 frestore FP_SRC(%a6)
3812 unlk %a6
3813 bra.l _real_dz
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
3819 # This handler should be the first code executed upon taking the #
3820 # "Line F Emulator" exception in an operating system. #
3822 # XREF **************************************************************** #
3823 # _fpsp_unimp() - handle "FP Unimplemented" exceptions #
3824 # _real_fpu_disabled() - handle "FPU disabled" exceptions #
3825 # _real_fline() - handle "FLINE" exceptions #
3826 # _imem_read_long() - read instruction longword #
3828 # INPUT *************************************************************** #
3829 # - The system stack contains a "Line F Emulator" exception #
3830 # stack frame. #
3832 # OUTPUT ************************************************************** #
3833 # - The system stack is unchanged #
3835 # ALGORITHM *********************************************************** #
3836 # When a "Line F Emulator" exception occurs, there are 3 possible #
3837 # exception types, denoted by the exception stack frame format number: #
3838 # (1) FPU unimplemented instruction (6 word stack frame) #
3839 # (2) FPU disabled (8 word stack frame) #
3840 # (3) Line F (4 word stack frame) #
3842 # This module determines which and forks the flow off to the #
3843 # appropriate "callout" (for "disabled" and "Line F") or to the #
3844 # correct emulation code (for "FPU unimplemented"). #
3845 # This code also must check for "fmovecr" instructions w/ a #
3846 # non-zero <ea> field. These may get flagged as "Line F" but should #
3847 # really be flagged as "FPU Unimplemented". (This is a "feature" on #
3848 # the '060. #
3850 #########################################################################
3852 global _fpsp_fline
3853 _fpsp_fline:
3855 # check to see if this exception is a "FP Unimplemented Instruction"
3856 # exception. if so, branch directly to that handler's entry point.
3857 cmpi.w 0x6(%sp),&0x202c
3858 beq.l _fpsp_unimp
3860 # check to see if the FPU is disabled. if so, jump to the OS entry
3861 # point for that condition.
3862 cmpi.w 0x6(%sp),&0x402c
3863 beq.l _real_fpu_disabled
3865 # the exception was an "F-Line Illegal" exception. we check to see
3866 # if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
3867 # so, convert the F-Line exception stack frame to an FP Unimplemented
3868 # Instruction exception stack frame else branch to the OS entry
3869 # point for the F-Line exception handler.
3870 link.w %a6,&-LOCAL_SIZE # init stack frame
3872 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3874 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
3875 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3876 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3877 bsr.l _imem_read_long # fetch instruction words
3879 bfextu %d0{&0:&10},%d1 # is it an fmovecr?
3880 cmpi.w %d1,&0x03c8
3881 bne.b fline_fline # no
3883 bfextu %d0{&16:&6},%d1 # is it an fmovecr?
3884 cmpi.b %d1,&0x17
3885 bne.b fline_fline # no
3887 # it's an fmovecr w/ a non-zero <ea> that has entered through
3888 # the F-Line Illegal exception.
3889 # so, we need to convert the F-Line exception stack frame into an
3890 # FP Unimplemented Instruction stack frame and jump to that entry
3891 # point.
3893 # but, if the FPU is disabled, then we need to jump to the FPU diabled
3894 # entry point.
3895 movc %pcr,%d0
3896 btst &0x1,%d0
3897 beq.b fline_fmovcr
3899 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3901 unlk %a6
3903 sub.l &0x8,%sp # make room for "Next PC", <ea>
3904 mov.w 0x8(%sp),(%sp)
3905 mov.l 0xa(%sp),0x2(%sp) # move "Current PC"
3906 mov.w &0x402c,0x6(%sp)
3907 mov.l 0x2(%sp),0xc(%sp)
3908 addq.l &0x4,0x2(%sp) # set "Next PC"
3910 bra.l _real_fpu_disabled
3912 fline_fmovcr:
3913 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3915 unlk %a6
3917 fmov.l 0x2(%sp),%fpiar # set current PC
3918 addq.l &0x4,0x2(%sp) # set Next PC
3920 mov.l (%sp),-(%sp)
3921 mov.l 0x8(%sp),0x4(%sp)
3922 mov.b &0x20,0x6(%sp)
3924 bra.l _fpsp_unimp
3926 fline_fline:
3927 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3929 unlk %a6
3931 bra.l _real_fline
3933 #########################################################################
3934 # XDEF **************************************************************** #
3935 # _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
3936 # Instruction" exception. #
3938 # This handler should be the first code executed upon taking the #
3939 # FP Unimplemented Instruction exception in an operating system. #
3941 # XREF **************************************************************** #
3942 # _imem_read_{word,long}() - read instruction word/longword #
3943 # load_fop() - load src/dst ops from memory and/or FP regfile #
3944 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3945 # tbl_trans - addr of table of emulation routines for trnscndls #
3946 # _real_access() - "callout" for access error exception #
3947 # _fpsp_done() - "callout" for exit; work all done #
3948 # _real_trace() - "callout" for Trace enabled exception #
3949 # smovcr() - emulate "fmovecr" instruction #
3950 # funimp_skew() - adjust fsave src ops to "incorrect" value #
3951 # _ftrapcc() - emulate an "ftrapcc" instruction #
3952 # _fdbcc() - emulate an "fdbcc" instruction #
3953 # _fscc() - emulate an "fscc" instruction #
3954 # _real_trap() - "callout" for Trap exception #
3955 # _real_bsun() - "callout" for enabled Bsun exception #
3957 # INPUT *************************************************************** #
3958 # - The system stack contains the "Unimplemented Instr" stk frame #
3960 # OUTPUT ************************************************************** #
3961 # If access error: #
3962 # - The system stack is changed to an access error stack frame #
3963 # If Trace exception enabled: #
3964 # - The system stack is changed to a Trace exception stack frame #
3965 # Else: (normal case) #
3966 # - Correct result has been stored as appropriate #
3968 # ALGORITHM *********************************************************** #
3969 # There are two main cases of instructions that may enter here to #
3970 # be emulated: (1) the FPgen instructions, most of which were also #
3971 # unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
3972 # For the first set, this handler calls the routine load_fop() #
3973 # to load the source and destination (for dyadic) operands to be used #
3974 # for instruction emulation. The correct emulation routine is then #
3975 # chosen by decoding the instruction type and indexing into an #
3976 # emulation subroutine index table. After emulation returns, this #
3977 # handler checks to see if an exception should occur as a result of the #
3978 # FP instruction emulation. If so, then an FP exception of the correct #
3979 # type is inserted into the FPU state frame using the "frestore" #
3980 # instruction before exiting through _fpsp_done(). In either the #
3981 # exceptional or non-exceptional cases, we must check to see if the #
3982 # Trace exception is enabled. If so, then we must create a Trace #
3983 # exception frame from the current exception frame and exit through #
3984 # _real_trace(). #
3985 # For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
3986 # _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
3987 # may flag that a BSUN exception should be taken. If so, then the #
3988 # current exception stack frame is converted into a BSUN exception #
3989 # stack frame and an exit is made through _real_bsun(). If the #
3990 # instruction was "ftrapcc" and a Trap exception should result, a Trap #
3991 # exception stack frame is created from the current frame and an exit #
3992 # is made through _real_trap(). If a Trace exception is pending, then #
3993 # a Trace exception frame is created from the current frame and a jump #
3994 # is made to _real_trace(). Finally, if none of these conditions exist, #
3995 # then the handler exits though the callout _fpsp_done(). #
3997 # In any of the above scenarios, if a _mem_read() or _mem_write() #
3998 # "callout" returns a failing value, then an access error stack frame #
3999 # is created from the current stack frame and an exit is made through #
4000 # _real_access(). #
4002 #########################################################################
4005 # FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
4007 # *****************
4008 # * * => <ea> of fp unimp instr.
4009 # - EA -
4010 # * *
4011 # *****************
4012 # * 0x2 * 0x02c * => frame format and vector offset(vector #11)
4013 # *****************
4014 # * *
4015 # - Next PC - => PC of instr to execute after exc handling
4016 # * *
4017 # *****************
4018 # * SR * => SR at the time the exception was taken
4019 # *****************
4021 # Note: the !NULL bit does not get set in the fsave frame when the
4022 # machine encounters an fp unimp exception. Therefore, it must be set
4023 # before leaving this handler.
4025 global _fpsp_unimp
4026 _fpsp_unimp:
4028 link.w %a6,&-LOCAL_SIZE # init stack frame
4030 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4031 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
4032 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1
4034 btst &0x5,EXC_SR(%a6) # user mode exception?
4035 bne.b funimp_s # no; supervisor mode
4037 # save the value of the user stack pointer onto the stack frame
4038 funimp_u:
4039 mov.l %usp,%a0 # fetch user stack pointer
4040 mov.l %a0,EXC_A7(%a6) # store in stack frame
4041 bra.b funimp_cont
4043 # store the value of the supervisor stack pointer BEFORE the exc occurred.
4044 # old_sp is address just above stacked effective address.
4045 funimp_s:
4046 lea 4+EXC_EA(%a6),%a0 # load old a7'
4047 mov.l %a0,EXC_A7(%a6) # store a7'
4048 mov.l %a0,OLD_A7(%a6) # make a copy
4050 funimp_cont:
4052 # the FPIAR holds the "current PC" of the faulting instruction.
4053 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
4055 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4056 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
4057 bsr.l _imem_read_long # fetch the instruction words
4058 mov.l %d0,EXC_OPWORD(%a6)
4060 ############################################################################
4062 fmov.l &0x0,%fpcr # clear FPCR
4063 fmov.l &0x0,%fpsr # clear FPSR
4065 clr.b SPCOND_FLG(%a6) # clear "special case" flag
4067 # Divide the fp instructions into 8 types based on the TYPE field in
4068 # bits 6-8 of the opword(classes 6,7 are undefined).
4069 # (for the '060, only two types can take this exception)
4070 # bftst %d0{&7:&3} # test TYPE
4071 btst &22,%d0 # type 0 or 1 ?
4072 bne.w funimp_misc # type 1
4074 #########################################
4075 # TYPE == 0: General instructions #
4076 #########################################
4077 funimp_gen:
4079 clr.b STORE_FLG(%a6) # clear "store result" flag
4081 # clear the ccode byte and exception status byte
4082 andi.l &0x00ff00ff,USER_FPSR(%a6)
4084 bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg
4085 cmpi.b %d1,&0x17 # is op an fmovecr?
4086 beq.w funimp_fmovcr # yes
4088 funimp_gen_op:
4089 bsr.l _load_fop # load
4091 clr.l %d0
4092 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode
4094 mov.b 1+EXC_CMDREG(%a6),%d1
4095 andi.w &0x003f,%d1 # extract extension bits
4096 lsl.w &0x3,%d1 # shift right 3 bits
4097 or.b STAG(%a6),%d1 # insert src optag bits
4099 lea FP_DST(%a6),%a1 # pass dst ptr in a1
4100 lea FP_SRC(%a6),%a0 # pass src ptr in a0
4102 mov.w (tbl_trans.w,%pc,%d1.w*2),%d1
4103 jsr (tbl_trans.w,%pc,%d1.w*1) # emulate
4105 funimp_fsave:
4106 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
4107 bne.w funimp_ena # some are enabled
4109 funimp_store:
4110 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
4111 bsr.l store_fpreg # store result to fp regfile
4113 funimp_gen_exit:
4114 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4115 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4116 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4118 funimp_gen_exit_cmp:
4119 cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
4120 beq.b funimp_gen_exit_a7 # yes
4122 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
4123 beq.b funimp_gen_exit_a7 # yes
4125 funimp_gen_exit_cont:
4126 unlk %a6
4128 funimp_gen_exit_cont2:
4129 btst &0x7,(%sp) # is trace on?
4130 beq.l _fpsp_done # no
4132 # this catches a problem with the case where an exception will be re-inserted
4133 # into the machine. the frestore has already been executed...so, the fmov.l
4134 # alone of the control register would trigger an unwanted exception.
4135 # until I feel like fixing this, we'll sidestep the exception.
4136 fsave -(%sp)
4137 fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR
4138 frestore (%sp)+
4139 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24
4140 bra.l _real_trace
4142 funimp_gen_exit_a7:
4143 btst &0x5,EXC_SR(%a6) # supervisor or user mode?
4144 bne.b funimp_gen_exit_a7_s # supervisor
4146 mov.l %a0,-(%sp)
4147 mov.l EXC_A7(%a6),%a0
4148 mov.l %a0,%usp
4149 mov.l (%sp)+,%a0
4150 bra.b funimp_gen_exit_cont
4152 # if the instruction was executed from supervisor mode and the addressing
4153 # mode was (a7)+, then the stack frame for the rte must be shifted "up"
4154 # "n" bytes where "n" is the size of the src operand type.
4155 # f<op>.{b,w,l,s,d,x,p}
4156 funimp_gen_exit_a7_s:
4157 mov.l %d0,-(%sp) # save d0
4158 mov.l EXC_A7(%a6),%d0 # load new a7'
4159 sub.l OLD_A7(%a6),%d0 # subtract old a7'
4160 mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
4161 mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
4162 mov.w %d0,EXC_SR(%a6) # store incr number
4163 mov.l (%sp)+,%d0 # restore d0
4165 unlk %a6
4167 add.w (%sp),%sp # stack frame shifted
4168 bra.b funimp_gen_exit_cont2
4170 ######################
4171 # fmovecr.x #ccc,fpn #
4172 ######################
4173 funimp_fmovcr:
4174 clr.l %d0
4175 mov.b FPCR_MODE(%a6),%d0
4176 mov.b 1+EXC_CMDREG(%a6),%d1
4177 andi.l &0x0000007f,%d1 # pass rom offset in d1
4178 bsr.l smovcr
4179 bra.w funimp_fsave
4181 #########################################################################
4184 # the user has enabled some exceptions. we figure not to see this too
4185 # often so that's why it gets lower priority.
4187 funimp_ena:
4189 # was an exception set that was also enabled?
4190 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set
4191 bfffo %d0{&24:&8},%d0 # find highest priority exception
4192 bne.b funimp_exc # at least one was set
4194 # no exception that was enabled was set BUT if we got an exact overflow
4195 # and overflow wasn't enabled but inexact was (yech!) then this is
4196 # an inexact exception; otherwise, return to normal non-exception flow.
4197 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4198 beq.w funimp_store # no; return to normal flow
4200 # the overflow w/ exact result happened but was inexact set in the FPCR?
4201 funimp_ovfl:
4202 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
4203 beq.w funimp_store # no; return to normal flow
4204 bra.b funimp_exc_ovfl # yes
4206 # some exception happened that was actually enabled.
4207 # we'll insert this new exception into the FPU and then return.
4208 funimp_exc:
4209 subi.l &24,%d0 # fix offset to be 0-8
4210 cmpi.b %d0,&0x6 # is exception INEX?
4211 bne.b funimp_exc_force # no
4213 # the enabled exception was inexact. so, if it occurs with an overflow
4214 # or underflow that was disabled, then we have to force an overflow or
4215 # underflow frame. the eventual overflow or underflow handler will see that
4216 # it's actually an inexact and act appropriately. this is the only easy
4217 # way to have the EXOP available for the enabled inexact handler when
4218 # a disabled overflow or underflow has also happened.
4219 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
4220 bne.b funimp_exc_ovfl # yes
4221 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
4222 bne.b funimp_exc_unfl # yes
4224 # force the fsave exception status bits to signal an exception of the
4225 # appropriate type. don't forget to "skew" the source operand in case we
4226 # "unskewed" the one the hardware initially gave us.
4227 funimp_exc_force:
4228 mov.l %d0,-(%sp) # save d0
4229 bsr.l funimp_skew # check for special case
4230 mov.l (%sp)+,%d0 # restore d0
4231 mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
4232 bra.b funimp_gen_exit2 # exit with frestore
4234 tbl_funimp_except:
4235 short 0xe002, 0xe006, 0xe004, 0xe005
4236 short 0xe003, 0xe002, 0xe001, 0xe001
4238 # insert an overflow frame
4239 funimp_exc_ovfl:
4240 bsr.l funimp_skew # check for special case
4241 mov.w &0xe005,2+FP_SRC(%a6)
4242 bra.b funimp_gen_exit2
4244 # insert an underflow frame
4245 funimp_exc_unfl:
4246 bsr.l funimp_skew # check for special case
4247 mov.w &0xe003,2+FP_SRC(%a6)
4249 # this is the general exit point for an enabled exception that will be
4250 # restored into the machine for the instruction just emulated.
4251 funimp_gen_exit2:
4252 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4253 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4254 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4256 frestore FP_SRC(%a6) # insert exceptional status
4258 bra.w funimp_gen_exit_cmp
4260 ############################################################################
4263 # TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
4265 # These instructions were implemented on the '881/2 and '040 in hardware but
4266 # are emulated in software on the '060.
4268 funimp_misc:
4269 bfextu %d0{&10:&3},%d1 # extract mode field
4270 cmpi.b %d1,&0x1 # is it an fdb<cc>?
4271 beq.w funimp_fdbcc # yes
4272 cmpi.b %d1,&0x7 # is it an fs<cc>?
4273 bne.w funimp_fscc # yes
4274 bfextu %d0{&13:&3},%d1
4275 cmpi.b %d1,&0x2 # is it an fs<cc>?
4276 blt.w funimp_fscc # yes
4278 #########################
4279 # ftrap<cc> #
4280 # ftrap<cc>.w #<data> #
4281 # ftrap<cc>.l #<data> #
4282 #########################
4283 funimp_ftrapcc:
4285 bsr.l _ftrapcc # FTRAP<cc>()
4287 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4288 beq.w funimp_bsun # yes
4290 cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
4291 bne.w funimp_done # no
4293 # FP UNIMP FRAME TRAP FRAME
4294 # ***************** *****************
4295 # ** <EA> ** ** Current PC **
4296 # ***************** *****************
4297 # * 0x2 * 0x02c * * 0x2 * 0x01c *
4298 # ***************** *****************
4299 # ** Next PC ** ** Next PC **
4300 # ***************** *****************
4301 # * SR * * SR *
4302 # ***************** *****************
4303 # (6 words) (6 words)
4305 # the ftrapcc instruction should take a trap. so, here we must create a
4306 # trap stack frame from an unimplemented fp instruction stack frame and
4307 # jump to the user supplied entry point for the trap exception
4308 funimp_ftrapcc_tp:
4309 mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
4310 mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c
4312 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4313 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4314 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4316 unlk %a6
4317 bra.l _real_trap
4319 #########################
4320 # fdb<cc> Dn,<label> #
4321 #########################
4322 funimp_fdbcc:
4324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4325 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4326 bsr.l _imem_read_word # read displacement
4328 tst.l %d1 # did ifetch fail?
4329 bne.w funimp_iacc # yes
4331 ext.l %d0 # sign extend displacement
4333 bsr.l _fdbcc # FDB<cc>()
4335 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4336 beq.w funimp_bsun
4338 bra.w funimp_done # branch to finish
4340 #################
4341 # fs<cc>.b <ea> #
4342 #################
4343 funimp_fscc:
4345 bsr.l _fscc # FS<cc>()
4347 # I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
4348 # does not need to update "An" before taking a bsun exception.
4349 cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
4350 beq.w funimp_bsun
4352 btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception?
4353 bne.b funimp_fscc_s # no
4355 funimp_fscc_u:
4356 mov.l EXC_A7(%a6),%a0 # yes; set new USP
4357 mov.l %a0,%usp
4358 bra.w funimp_done # branch to finish
4360 # remember, I'm assuming that post-increment is bogus...(it IS!!!)
4361 # so, the least significant WORD of the stacked effective address got
4362 # overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
4363 # so that the rte will work correctly without destroying the result.
4364 # even though the operation size is byte, the stack ptr is decr by 2.
4366 # remember, also, this instruction may be traced.
4367 funimp_fscc_s:
4368 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
4369 bne.w funimp_done # no
4371 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4372 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4373 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4375 unlk %a6
4377 btst &0x7,(%sp) # is trace enabled?
4378 bne.b funimp_fscc_s_trace # yes
4380 subq.l &0x2,%sp
4381 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4382 mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down"
4383 bra.l _fpsp_done
4385 funimp_fscc_s_trace:
4386 subq.l &0x2,%sp
4387 mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
4388 mov.w 0x6(%sp),0x4(%sp) # shift lo(PC)
4389 mov.w &0x2024,0x6(%sp) # fmt/voff = $2024
4390 fmov.l %fpiar,0x8(%sp) # insert "current PC"
4392 bra.l _real_trace
4395 # The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
4396 # the fp unimplemented instruction exception stack frame into a bsun stack frame,
4397 # restore a bsun exception into the machine, and branch to the user
4398 # supplied bsun hook.
4400 # FP UNIMP FRAME BSUN FRAME
4401 # ***************** *****************
4402 # ** <EA> ** * 0x0 * 0x0c0 *
4403 # ***************** *****************
4404 # * 0x2 * 0x02c * ** Current PC **
4405 # ***************** *****************
4406 # ** Next PC ** * SR *
4407 # ***************** *****************
4408 # * SR * (4 words)
4409 # *****************
4410 # (6 words)
4412 funimp_bsun:
4413 mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0
4414 mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
4415 mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
4417 mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled
4419 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4420 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4421 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4423 frestore FP_SRC(%a6) # restore bsun exception
4425 unlk %a6
4427 addq.l &0x4,%sp # erase sludge
4429 bra.l _real_bsun # branch to user bsun hook
4432 # all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
4433 # and return.
4435 # as usual, we have to check for trace mode being on here. since instructions
4436 # modifying the supervisor stack frame don't pass through here, this is a
4437 # relatively easy task.
4439 funimp_done:
4440 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
4441 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4442 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4444 unlk %a6
4446 btst &0x7,(%sp) # is trace enabled?
4447 bne.b funimp_trace # yes
4449 bra.l _fpsp_done
4451 # FP UNIMP FRAME TRACE FRAME
4452 # ***************** *****************
4453 # ** <EA> ** ** Current PC **
4454 # ***************** *****************
4455 # * 0x2 * 0x02c * * 0x2 * 0x024 *
4456 # ***************** *****************
4457 # ** Next PC ** ** Next PC **
4458 # ***************** *****************
4459 # * SR * * SR *
4460 # ***************** *****************
4461 # (6 words) (6 words)
4463 # the fscc instruction should take a trace trap. so, here we must create a
4464 # trace stack frame from an unimplemented fp instruction stack frame and
4465 # jump to the user supplied entry point for the trace exception
4466 funimp_trace:
4467 fmov.l %fpiar,0x8(%sp) # current PC is in fpiar
4468 mov.b &0x24,0x7(%sp) # vector offset = 0x024
4470 bra.l _real_trace
4472 ################################################################
4474 global tbl_trans
4475 swbeg &0x1c0
4476 tbl_trans:
4477 short tbl_trans - tbl_trans # $00-0 fmovecr all
4478 short tbl_trans - tbl_trans # $00-1 fmovecr all
4479 short tbl_trans - tbl_trans # $00-2 fmovecr all
4480 short tbl_trans - tbl_trans # $00-3 fmovecr all
4481 short tbl_trans - tbl_trans # $00-4 fmovecr all
4482 short tbl_trans - tbl_trans # $00-5 fmovecr all
4483 short tbl_trans - tbl_trans # $00-6 fmovecr all
4484 short tbl_trans - tbl_trans # $00-7 fmovecr all
4486 short tbl_trans - tbl_trans # $01-0 fint norm
4487 short tbl_trans - tbl_trans # $01-1 fint zero
4488 short tbl_trans - tbl_trans # $01-2 fint inf
4489 short tbl_trans - tbl_trans # $01-3 fint qnan
4490 short tbl_trans - tbl_trans # $01-5 fint denorm
4491 short tbl_trans - tbl_trans # $01-4 fint snan
4492 short tbl_trans - tbl_trans # $01-6 fint unnorm
4493 short tbl_trans - tbl_trans # $01-7 ERROR
4495 short ssinh - tbl_trans # $02-0 fsinh norm
4496 short src_zero - tbl_trans # $02-1 fsinh zero
4497 short src_inf - tbl_trans # $02-2 fsinh inf
4498 short src_qnan - tbl_trans # $02-3 fsinh qnan
4499 short ssinhd - tbl_trans # $02-5 fsinh denorm
4500 short src_snan - tbl_trans # $02-4 fsinh snan
4501 short tbl_trans - tbl_trans # $02-6 fsinh unnorm
4502 short tbl_trans - tbl_trans # $02-7 ERROR
4504 short tbl_trans - tbl_trans # $03-0 fintrz norm
4505 short tbl_trans - tbl_trans # $03-1 fintrz zero
4506 short tbl_trans - tbl_trans # $03-2 fintrz inf
4507 short tbl_trans - tbl_trans # $03-3 fintrz qnan
4508 short tbl_trans - tbl_trans # $03-5 fintrz denorm
4509 short tbl_trans - tbl_trans # $03-4 fintrz snan
4510 short tbl_trans - tbl_trans # $03-6 fintrz unnorm
4511 short tbl_trans - tbl_trans # $03-7 ERROR
4513 short tbl_trans - tbl_trans # $04-0 fsqrt norm
4514 short tbl_trans - tbl_trans # $04-1 fsqrt zero
4515 short tbl_trans - tbl_trans # $04-2 fsqrt inf
4516 short tbl_trans - tbl_trans # $04-3 fsqrt qnan
4517 short tbl_trans - tbl_trans # $04-5 fsqrt denorm
4518 short tbl_trans - tbl_trans # $04-4 fsqrt snan
4519 short tbl_trans - tbl_trans # $04-6 fsqrt unnorm
4520 short tbl_trans - tbl_trans # $04-7 ERROR
4522 short tbl_trans - tbl_trans # $05-0 ERROR
4523 short tbl_trans - tbl_trans # $05-1 ERROR
4524 short tbl_trans - tbl_trans # $05-2 ERROR
4525 short tbl_trans - tbl_trans # $05-3 ERROR
4526 short tbl_trans - tbl_trans # $05-4 ERROR
4527 short tbl_trans - tbl_trans # $05-5 ERROR
4528 short tbl_trans - tbl_trans # $05-6 ERROR
4529 short tbl_trans - tbl_trans # $05-7 ERROR
4531 short slognp1 - tbl_trans # $06-0 flognp1 norm
4532 short src_zero - tbl_trans # $06-1 flognp1 zero
4533 short sopr_inf - tbl_trans # $06-2 flognp1 inf
4534 short src_qnan - tbl_trans # $06-3 flognp1 qnan
4535 short slognp1d - tbl_trans # $06-5 flognp1 denorm
4536 short src_snan - tbl_trans # $06-4 flognp1 snan
4537 short tbl_trans - tbl_trans # $06-6 flognp1 unnorm
4538 short tbl_trans - tbl_trans # $06-7 ERROR
4540 short tbl_trans - tbl_trans # $07-0 ERROR
4541 short tbl_trans - tbl_trans # $07-1 ERROR
4542 short tbl_trans - tbl_trans # $07-2 ERROR
4543 short tbl_trans - tbl_trans # $07-3 ERROR
4544 short tbl_trans - tbl_trans # $07-4 ERROR
4545 short tbl_trans - tbl_trans # $07-5 ERROR
4546 short tbl_trans - tbl_trans # $07-6 ERROR
4547 short tbl_trans - tbl_trans # $07-7 ERROR
4549 short setoxm1 - tbl_trans # $08-0 fetoxm1 norm
4550 short src_zero - tbl_trans # $08-1 fetoxm1 zero
4551 short setoxm1i - tbl_trans # $08-2 fetoxm1 inf
4552 short src_qnan - tbl_trans # $08-3 fetoxm1 qnan
4553 short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm
4554 short src_snan - tbl_trans # $08-4 fetoxm1 snan
4555 short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm
4556 short tbl_trans - tbl_trans # $08-7 ERROR
4558 short stanh - tbl_trans # $09-0 ftanh norm
4559 short src_zero - tbl_trans # $09-1 ftanh zero
4560 short src_one - tbl_trans # $09-2 ftanh inf
4561 short src_qnan - tbl_trans # $09-3 ftanh qnan
4562 short stanhd - tbl_trans # $09-5 ftanh denorm
4563 short src_snan - tbl_trans # $09-4 ftanh snan
4564 short tbl_trans - tbl_trans # $09-6 ftanh unnorm
4565 short tbl_trans - tbl_trans # $09-7 ERROR
4567 short satan - tbl_trans # $0a-0 fatan norm
4568 short src_zero - tbl_trans # $0a-1 fatan zero
4569 short spi_2 - tbl_trans # $0a-2 fatan inf
4570 short src_qnan - tbl_trans # $0a-3 fatan qnan
4571 short satand - tbl_trans # $0a-5 fatan denorm
4572 short src_snan - tbl_trans # $0a-4 fatan snan
4573 short tbl_trans - tbl_trans # $0a-6 fatan unnorm
4574 short tbl_trans - tbl_trans # $0a-7 ERROR
4576 short tbl_trans - tbl_trans # $0b-0 ERROR
4577 short tbl_trans - tbl_trans # $0b-1 ERROR
4578 short tbl_trans - tbl_trans # $0b-2 ERROR
4579 short tbl_trans - tbl_trans # $0b-3 ERROR
4580 short tbl_trans - tbl_trans # $0b-4 ERROR
4581 short tbl_trans - tbl_trans # $0b-5 ERROR
4582 short tbl_trans - tbl_trans # $0b-6 ERROR
4583 short tbl_trans - tbl_trans # $0b-7 ERROR
4585 short sasin - tbl_trans # $0c-0 fasin norm
4586 short src_zero - tbl_trans # $0c-1 fasin zero
4587 short t_operr - tbl_trans # $0c-2 fasin inf
4588 short src_qnan - tbl_trans # $0c-3 fasin qnan
4589 short sasind - tbl_trans # $0c-5 fasin denorm
4590 short src_snan - tbl_trans # $0c-4 fasin snan
4591 short tbl_trans - tbl_trans # $0c-6 fasin unnorm
4592 short tbl_trans - tbl_trans # $0c-7 ERROR
4594 short satanh - tbl_trans # $0d-0 fatanh norm
4595 short src_zero - tbl_trans # $0d-1 fatanh zero
4596 short t_operr - tbl_trans # $0d-2 fatanh inf
4597 short src_qnan - tbl_trans # $0d-3 fatanh qnan
4598 short satanhd - tbl_trans # $0d-5 fatanh denorm
4599 short src_snan - tbl_trans # $0d-4 fatanh snan
4600 short tbl_trans - tbl_trans # $0d-6 fatanh unnorm
4601 short tbl_trans - tbl_trans # $0d-7 ERROR
4603 short ssin - tbl_trans # $0e-0 fsin norm
4604 short src_zero - tbl_trans # $0e-1 fsin zero
4605 short t_operr - tbl_trans # $0e-2 fsin inf
4606 short src_qnan - tbl_trans # $0e-3 fsin qnan
4607 short ssind - tbl_trans # $0e-5 fsin denorm
4608 short src_snan - tbl_trans # $0e-4 fsin snan
4609 short tbl_trans - tbl_trans # $0e-6 fsin unnorm
4610 short tbl_trans - tbl_trans # $0e-7 ERROR
4612 short stan - tbl_trans # $0f-0 ftan norm
4613 short src_zero - tbl_trans # $0f-1 ftan zero
4614 short t_operr - tbl_trans # $0f-2 ftan inf
4615 short src_qnan - tbl_trans # $0f-3 ftan qnan
4616 short stand - tbl_trans # $0f-5 ftan denorm
4617 short src_snan - tbl_trans # $0f-4 ftan snan
4618 short tbl_trans - tbl_trans # $0f-6 ftan unnorm
4619 short tbl_trans - tbl_trans # $0f-7 ERROR
4621 short setox - tbl_trans # $10-0 fetox norm
4622 short ld_pone - tbl_trans # $10-1 fetox zero
4623 short szr_inf - tbl_trans # $10-2 fetox inf
4624 short src_qnan - tbl_trans # $10-3 fetox qnan
4625 short setoxd - tbl_trans # $10-5 fetox denorm
4626 short src_snan - tbl_trans # $10-4 fetox snan
4627 short tbl_trans - tbl_trans # $10-6 fetox unnorm
4628 short tbl_trans - tbl_trans # $10-7 ERROR
4630 short stwotox - tbl_trans # $11-0 ftwotox norm
4631 short ld_pone - tbl_trans # $11-1 ftwotox zero
4632 short szr_inf - tbl_trans # $11-2 ftwotox inf
4633 short src_qnan - tbl_trans # $11-3 ftwotox qnan
4634 short stwotoxd - tbl_trans # $11-5 ftwotox denorm
4635 short src_snan - tbl_trans # $11-4 ftwotox snan
4636 short tbl_trans - tbl_trans # $11-6 ftwotox unnorm
4637 short tbl_trans - tbl_trans # $11-7 ERROR
4639 short stentox - tbl_trans # $12-0 ftentox norm
4640 short ld_pone - tbl_trans # $12-1 ftentox zero
4641 short szr_inf - tbl_trans # $12-2 ftentox inf
4642 short src_qnan - tbl_trans # $12-3 ftentox qnan
4643 short stentoxd - tbl_trans # $12-5 ftentox denorm
4644 short src_snan - tbl_trans # $12-4 ftentox snan
4645 short tbl_trans - tbl_trans # $12-6 ftentox unnorm
4646 short tbl_trans - tbl_trans # $12-7 ERROR
4648 short tbl_trans - tbl_trans # $13-0 ERROR
4649 short tbl_trans - tbl_trans # $13-1 ERROR
4650 short tbl_trans - tbl_trans # $13-2 ERROR
4651 short tbl_trans - tbl_trans # $13-3 ERROR
4652 short tbl_trans - tbl_trans # $13-4 ERROR
4653 short tbl_trans - tbl_trans # $13-5 ERROR
4654 short tbl_trans - tbl_trans # $13-6 ERROR
4655 short tbl_trans - tbl_trans # $13-7 ERROR
4657 short slogn - tbl_trans # $14-0 flogn norm
4658 short t_dz2 - tbl_trans # $14-1 flogn zero
4659 short sopr_inf - tbl_trans # $14-2 flogn inf
4660 short src_qnan - tbl_trans # $14-3 flogn qnan
4661 short slognd - tbl_trans # $14-5 flogn denorm
4662 short src_snan - tbl_trans # $14-4 flogn snan
4663 short tbl_trans - tbl_trans # $14-6 flogn unnorm
4664 short tbl_trans - tbl_trans # $14-7 ERROR
4666 short slog10 - tbl_trans # $15-0 flog10 norm
4667 short t_dz2 - tbl_trans # $15-1 flog10 zero
4668 short sopr_inf - tbl_trans # $15-2 flog10 inf
4669 short src_qnan - tbl_trans # $15-3 flog10 qnan
4670 short slog10d - tbl_trans # $15-5 flog10 denorm
4671 short src_snan - tbl_trans # $15-4 flog10 snan
4672 short tbl_trans - tbl_trans # $15-6 flog10 unnorm
4673 short tbl_trans - tbl_trans # $15-7 ERROR
4675 short slog2 - tbl_trans # $16-0 flog2 norm
4676 short t_dz2 - tbl_trans # $16-1 flog2 zero
4677 short sopr_inf - tbl_trans # $16-2 flog2 inf
4678 short src_qnan - tbl_trans # $16-3 flog2 qnan
4679 short slog2d - tbl_trans # $16-5 flog2 denorm
4680 short src_snan - tbl_trans # $16-4 flog2 snan
4681 short tbl_trans - tbl_trans # $16-6 flog2 unnorm
4682 short tbl_trans - tbl_trans # $16-7 ERROR
4684 short tbl_trans - tbl_trans # $17-0 ERROR
4685 short tbl_trans - tbl_trans # $17-1 ERROR
4686 short tbl_trans - tbl_trans # $17-2 ERROR
4687 short tbl_trans - tbl_trans # $17-3 ERROR
4688 short tbl_trans - tbl_trans # $17-4 ERROR
4689 short tbl_trans - tbl_trans # $17-5 ERROR
4690 short tbl_trans - tbl_trans # $17-6 ERROR
4691 short tbl_trans - tbl_trans # $17-7 ERROR
4693 short tbl_trans - tbl_trans # $18-0 fabs norm
4694 short tbl_trans - tbl_trans # $18-1 fabs zero
4695 short tbl_trans - tbl_trans # $18-2 fabs inf
4696 short tbl_trans - tbl_trans # $18-3 fabs qnan
4697 short tbl_trans - tbl_trans # $18-5 fabs denorm
4698 short tbl_trans - tbl_trans # $18-4 fabs snan
4699 short tbl_trans - tbl_trans # $18-6 fabs unnorm
4700 short tbl_trans - tbl_trans # $18-7 ERROR
4702 short scosh - tbl_trans # $19-0 fcosh norm
4703 short ld_pone - tbl_trans # $19-1 fcosh zero
4704 short ld_pinf - tbl_trans # $19-2 fcosh inf
4705 short src_qnan - tbl_trans # $19-3 fcosh qnan
4706 short scoshd - tbl_trans # $19-5 fcosh denorm
4707 short src_snan - tbl_trans # $19-4 fcosh snan
4708 short tbl_trans - tbl_trans # $19-6 fcosh unnorm
4709 short tbl_trans - tbl_trans # $19-7 ERROR
4711 short tbl_trans - tbl_trans # $1a-0 fneg norm
4712 short tbl_trans - tbl_trans # $1a-1 fneg zero
4713 short tbl_trans - tbl_trans # $1a-2 fneg inf
4714 short tbl_trans - tbl_trans # $1a-3 fneg qnan
4715 short tbl_trans - tbl_trans # $1a-5 fneg denorm
4716 short tbl_trans - tbl_trans # $1a-4 fneg snan
4717 short tbl_trans - tbl_trans # $1a-6 fneg unnorm
4718 short tbl_trans - tbl_trans # $1a-7 ERROR
4720 short tbl_trans - tbl_trans # $1b-0 ERROR
4721 short tbl_trans - tbl_trans # $1b-1 ERROR
4722 short tbl_trans - tbl_trans # $1b-2 ERROR
4723 short tbl_trans - tbl_trans # $1b-3 ERROR
4724 short tbl_trans - tbl_trans # $1b-4 ERROR
4725 short tbl_trans - tbl_trans # $1b-5 ERROR
4726 short tbl_trans - tbl_trans # $1b-6 ERROR
4727 short tbl_trans - tbl_trans # $1b-7 ERROR
4729 short sacos - tbl_trans # $1c-0 facos norm
4730 short ld_ppi2 - tbl_trans # $1c-1 facos zero
4731 short t_operr - tbl_trans # $1c-2 facos inf
4732 short src_qnan - tbl_trans # $1c-3 facos qnan
4733 short sacosd - tbl_trans # $1c-5 facos denorm
4734 short src_snan - tbl_trans # $1c-4 facos snan
4735 short tbl_trans - tbl_trans # $1c-6 facos unnorm
4736 short tbl_trans - tbl_trans # $1c-7 ERROR
4738 short scos - tbl_trans # $1d-0 fcos norm
4739 short ld_pone - tbl_trans # $1d-1 fcos zero
4740 short t_operr - tbl_trans # $1d-2 fcos inf
4741 short src_qnan - tbl_trans # $1d-3 fcos qnan
4742 short scosd - tbl_trans # $1d-5 fcos denorm
4743 short src_snan - tbl_trans # $1d-4 fcos snan
4744 short tbl_trans - tbl_trans # $1d-6 fcos unnorm
4745 short tbl_trans - tbl_trans # $1d-7 ERROR
4747 short sgetexp - tbl_trans # $1e-0 fgetexp norm
4748 short src_zero - tbl_trans # $1e-1 fgetexp zero
4749 short t_operr - tbl_trans # $1e-2 fgetexp inf
4750 short src_qnan - tbl_trans # $1e-3 fgetexp qnan
4751 short sgetexpd - tbl_trans # $1e-5 fgetexp denorm
4752 short src_snan - tbl_trans # $1e-4 fgetexp snan
4753 short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm
4754 short tbl_trans - tbl_trans # $1e-7 ERROR
4756 short sgetman - tbl_trans # $1f-0 fgetman norm
4757 short src_zero - tbl_trans # $1f-1 fgetman zero
4758 short t_operr - tbl_trans # $1f-2 fgetman inf
4759 short src_qnan - tbl_trans # $1f-3 fgetman qnan
4760 short sgetmand - tbl_trans # $1f-5 fgetman denorm
4761 short src_snan - tbl_trans # $1f-4 fgetman snan
4762 short tbl_trans - tbl_trans # $1f-6 fgetman unnorm
4763 short tbl_trans - tbl_trans # $1f-7 ERROR
4765 short tbl_trans - tbl_trans # $20-0 fdiv norm
4766 short tbl_trans - tbl_trans # $20-1 fdiv zero
4767 short tbl_trans - tbl_trans # $20-2 fdiv inf
4768 short tbl_trans - tbl_trans # $20-3 fdiv qnan
4769 short tbl_trans - tbl_trans # $20-5 fdiv denorm
4770 short tbl_trans - tbl_trans # $20-4 fdiv snan
4771 short tbl_trans - tbl_trans # $20-6 fdiv unnorm
4772 short tbl_trans - tbl_trans # $20-7 ERROR
4774 short smod_snorm - tbl_trans # $21-0 fmod norm
4775 short smod_szero - tbl_trans # $21-1 fmod zero
4776 short smod_sinf - tbl_trans # $21-2 fmod inf
4777 short sop_sqnan - tbl_trans # $21-3 fmod qnan
4778 short smod_sdnrm - tbl_trans # $21-5 fmod denorm
4779 short sop_ssnan - tbl_trans # $21-4 fmod snan
4780 short tbl_trans - tbl_trans # $21-6 fmod unnorm
4781 short tbl_trans - tbl_trans # $21-7 ERROR
4783 short tbl_trans - tbl_trans # $22-0 fadd norm
4784 short tbl_trans - tbl_trans # $22-1 fadd zero
4785 short tbl_trans - tbl_trans # $22-2 fadd inf
4786 short tbl_trans - tbl_trans # $22-3 fadd qnan
4787 short tbl_trans - tbl_trans # $22-5 fadd denorm
4788 short tbl_trans - tbl_trans # $22-4 fadd snan
4789 short tbl_trans - tbl_trans # $22-6 fadd unnorm
4790 short tbl_trans - tbl_trans # $22-7 ERROR
4792 short tbl_trans - tbl_trans # $23-0 fmul norm
4793 short tbl_trans - tbl_trans # $23-1 fmul zero
4794 short tbl_trans - tbl_trans # $23-2 fmul inf
4795 short tbl_trans - tbl_trans # $23-3 fmul qnan
4796 short tbl_trans - tbl_trans # $23-5 fmul denorm
4797 short tbl_trans - tbl_trans # $23-4 fmul snan
4798 short tbl_trans - tbl_trans # $23-6 fmul unnorm
4799 short tbl_trans - tbl_trans # $23-7 ERROR
4801 short tbl_trans - tbl_trans # $24-0 fsgldiv norm
4802 short tbl_trans - tbl_trans # $24-1 fsgldiv zero
4803 short tbl_trans - tbl_trans # $24-2 fsgldiv inf
4804 short tbl_trans - tbl_trans # $24-3 fsgldiv qnan
4805 short tbl_trans - tbl_trans # $24-5 fsgldiv denorm
4806 short tbl_trans - tbl_trans # $24-4 fsgldiv snan
4807 short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm
4808 short tbl_trans - tbl_trans # $24-7 ERROR
4810 short srem_snorm - tbl_trans # $25-0 frem norm
4811 short srem_szero - tbl_trans # $25-1 frem zero
4812 short srem_sinf - tbl_trans # $25-2 frem inf
4813 short sop_sqnan - tbl_trans # $25-3 frem qnan
4814 short srem_sdnrm - tbl_trans # $25-5 frem denorm
4815 short sop_ssnan - tbl_trans # $25-4 frem snan
4816 short tbl_trans - tbl_trans # $25-6 frem unnorm
4817 short tbl_trans - tbl_trans # $25-7 ERROR
4819 short sscale_snorm - tbl_trans # $26-0 fscale norm
4820 short sscale_szero - tbl_trans # $26-1 fscale zero
4821 short sscale_sinf - tbl_trans # $26-2 fscale inf
4822 short sop_sqnan - tbl_trans # $26-3 fscale qnan
4823 short sscale_sdnrm - tbl_trans # $26-5 fscale denorm
4824 short sop_ssnan - tbl_trans # $26-4 fscale snan
4825 short tbl_trans - tbl_trans # $26-6 fscale unnorm
4826 short tbl_trans - tbl_trans # $26-7 ERROR
4828 short tbl_trans - tbl_trans # $27-0 fsglmul norm
4829 short tbl_trans - tbl_trans # $27-1 fsglmul zero
4830 short tbl_trans - tbl_trans # $27-2 fsglmul inf
4831 short tbl_trans - tbl_trans # $27-3 fsglmul qnan
4832 short tbl_trans - tbl_trans # $27-5 fsglmul denorm
4833 short tbl_trans - tbl_trans # $27-4 fsglmul snan
4834 short tbl_trans - tbl_trans # $27-6 fsglmul unnorm
4835 short tbl_trans - tbl_trans # $27-7 ERROR
4837 short tbl_trans - tbl_trans # $28-0 fsub norm
4838 short tbl_trans - tbl_trans # $28-1 fsub zero
4839 short tbl_trans - tbl_trans # $28-2 fsub inf
4840 short tbl_trans - tbl_trans # $28-3 fsub qnan
4841 short tbl_trans - tbl_trans # $28-5 fsub denorm
4842 short tbl_trans - tbl_trans # $28-4 fsub snan
4843 short tbl_trans - tbl_trans # $28-6 fsub unnorm
4844 short tbl_trans - tbl_trans # $28-7 ERROR
4846 short tbl_trans - tbl_trans # $29-0 ERROR
4847 short tbl_trans - tbl_trans # $29-1 ERROR
4848 short tbl_trans - tbl_trans # $29-2 ERROR
4849 short tbl_trans - tbl_trans # $29-3 ERROR
4850 short tbl_trans - tbl_trans # $29-4 ERROR
4851 short tbl_trans - tbl_trans # $29-5 ERROR
4852 short tbl_trans - tbl_trans # $29-6 ERROR
4853 short tbl_trans - tbl_trans # $29-7 ERROR
4855 short tbl_trans - tbl_trans # $2a-0 ERROR
4856 short tbl_trans - tbl_trans # $2a-1 ERROR
4857 short tbl_trans - tbl_trans # $2a-2 ERROR
4858 short tbl_trans - tbl_trans # $2a-3 ERROR
4859 short tbl_trans - tbl_trans # $2a-4 ERROR
4860 short tbl_trans - tbl_trans # $2a-5 ERROR
4861 short tbl_trans - tbl_trans # $2a-6 ERROR
4862 short tbl_trans - tbl_trans # $2a-7 ERROR
4864 short tbl_trans - tbl_trans # $2b-0 ERROR
4865 short tbl_trans - tbl_trans # $2b-1 ERROR
4866 short tbl_trans - tbl_trans # $2b-2 ERROR
4867 short tbl_trans - tbl_trans # $2b-3 ERROR
4868 short tbl_trans - tbl_trans # $2b-4 ERROR
4869 short tbl_trans - tbl_trans # $2b-5 ERROR
4870 short tbl_trans - tbl_trans # $2b-6 ERROR
4871 short tbl_trans - tbl_trans # $2b-7 ERROR
4873 short tbl_trans - tbl_trans # $2c-0 ERROR
4874 short tbl_trans - tbl_trans # $2c-1 ERROR
4875 short tbl_trans - tbl_trans # $2c-2 ERROR
4876 short tbl_trans - tbl_trans # $2c-3 ERROR
4877 short tbl_trans - tbl_trans # $2c-4 ERROR
4878 short tbl_trans - tbl_trans # $2c-5 ERROR
4879 short tbl_trans - tbl_trans # $2c-6 ERROR
4880 short tbl_trans - tbl_trans # $2c-7 ERROR
4882 short tbl_trans - tbl_trans # $2d-0 ERROR
4883 short tbl_trans - tbl_trans # $2d-1 ERROR
4884 short tbl_trans - tbl_trans # $2d-2 ERROR
4885 short tbl_trans - tbl_trans # $2d-3 ERROR
4886 short tbl_trans - tbl_trans # $2d-4 ERROR
4887 short tbl_trans - tbl_trans # $2d-5 ERROR
4888 short tbl_trans - tbl_trans # $2d-6 ERROR
4889 short tbl_trans - tbl_trans # $2d-7 ERROR
4891 short tbl_trans - tbl_trans # $2e-0 ERROR
4892 short tbl_trans - tbl_trans # $2e-1 ERROR
4893 short tbl_trans - tbl_trans # $2e-2 ERROR
4894 short tbl_trans - tbl_trans # $2e-3 ERROR
4895 short tbl_trans - tbl_trans # $2e-4 ERROR
4896 short tbl_trans - tbl_trans # $2e-5 ERROR
4897 short tbl_trans - tbl_trans # $2e-6 ERROR
4898 short tbl_trans - tbl_trans # $2e-7 ERROR
4900 short tbl_trans - tbl_trans # $2f-0 ERROR
4901 short tbl_trans - tbl_trans # $2f-1 ERROR
4902 short tbl_trans - tbl_trans # $2f-2 ERROR
4903 short tbl_trans - tbl_trans # $2f-3 ERROR
4904 short tbl_trans - tbl_trans # $2f-4 ERROR
4905 short tbl_trans - tbl_trans # $2f-5 ERROR
4906 short tbl_trans - tbl_trans # $2f-6 ERROR
4907 short tbl_trans - tbl_trans # $2f-7 ERROR
4909 short ssincos - tbl_trans # $30-0 fsincos norm
4910 short ssincosz - tbl_trans # $30-1 fsincos zero
4911 short ssincosi - tbl_trans # $30-2 fsincos inf
4912 short ssincosqnan - tbl_trans # $30-3 fsincos qnan
4913 short ssincosd - tbl_trans # $30-5 fsincos denorm
4914 short ssincossnan - tbl_trans # $30-4 fsincos snan
4915 short tbl_trans - tbl_trans # $30-6 fsincos unnorm
4916 short tbl_trans - tbl_trans # $30-7 ERROR
4918 short ssincos - tbl_trans # $31-0 fsincos norm
4919 short ssincosz - tbl_trans # $31-1 fsincos zero
4920 short ssincosi - tbl_trans # $31-2 fsincos inf
4921 short ssincosqnan - tbl_trans # $31-3 fsincos qnan
4922 short ssincosd - tbl_trans # $31-5 fsincos denorm
4923 short ssincossnan - tbl_trans # $31-4 fsincos snan
4924 short tbl_trans - tbl_trans # $31-6 fsincos unnorm
4925 short tbl_trans - tbl_trans # $31-7 ERROR
4927 short ssincos - tbl_trans # $32-0 fsincos norm
4928 short ssincosz - tbl_trans # $32-1 fsincos zero
4929 short ssincosi - tbl_trans # $32-2 fsincos inf
4930 short ssincosqnan - tbl_trans # $32-3 fsincos qnan
4931 short ssincosd - tbl_trans # $32-5 fsincos denorm
4932 short ssincossnan - tbl_trans # $32-4 fsincos snan
4933 short tbl_trans - tbl_trans # $32-6 fsincos unnorm
4934 short tbl_trans - tbl_trans # $32-7 ERROR
4936 short ssincos - tbl_trans # $33-0 fsincos norm
4937 short ssincosz - tbl_trans # $33-1 fsincos zero
4938 short ssincosi - tbl_trans # $33-2 fsincos inf
4939 short ssincosqnan - tbl_trans # $33-3 fsincos qnan
4940 short ssincosd - tbl_trans # $33-5 fsincos denorm
4941 short ssincossnan - tbl_trans # $33-4 fsincos snan
4942 short tbl_trans - tbl_trans # $33-6 fsincos unnorm
4943 short tbl_trans - tbl_trans # $33-7 ERROR
4945 short ssincos - tbl_trans # $34-0 fsincos norm
4946 short ssincosz - tbl_trans # $34-1 fsincos zero
4947 short ssincosi - tbl_trans # $34-2 fsincos inf
4948 short ssincosqnan - tbl_trans # $34-3 fsincos qnan
4949 short ssincosd - tbl_trans # $34-5 fsincos denorm
4950 short ssincossnan - tbl_trans # $34-4 fsincos snan
4951 short tbl_trans - tbl_trans # $34-6 fsincos unnorm
4952 short tbl_trans - tbl_trans # $34-7 ERROR
4954 short ssincos - tbl_trans # $35-0 fsincos norm
4955 short ssincosz - tbl_trans # $35-1 fsincos zero
4956 short ssincosi - tbl_trans # $35-2 fsincos inf
4957 short ssincosqnan - tbl_trans # $35-3 fsincos qnan
4958 short ssincosd - tbl_trans # $35-5 fsincos denorm
4959 short ssincossnan - tbl_trans # $35-4 fsincos snan
4960 short tbl_trans - tbl_trans # $35-6 fsincos unnorm
4961 short tbl_trans - tbl_trans # $35-7 ERROR
4963 short ssincos - tbl_trans # $36-0 fsincos norm
4964 short ssincosz - tbl_trans # $36-1 fsincos zero
4965 short ssincosi - tbl_trans # $36-2 fsincos inf
4966 short ssincosqnan - tbl_trans # $36-3 fsincos qnan
4967 short ssincosd - tbl_trans # $36-5 fsincos denorm
4968 short ssincossnan - tbl_trans # $36-4 fsincos snan
4969 short tbl_trans - tbl_trans # $36-6 fsincos unnorm
4970 short tbl_trans - tbl_trans # $36-7 ERROR
4972 short ssincos - tbl_trans # $37-0 fsincos norm
4973 short ssincosz - tbl_trans # $37-1 fsincos zero
4974 short ssincosi - tbl_trans # $37-2 fsincos inf
4975 short ssincosqnan - tbl_trans # $37-3 fsincos qnan
4976 short ssincosd - tbl_trans # $37-5 fsincos denorm
4977 short ssincossnan - tbl_trans # $37-4 fsincos snan
4978 short tbl_trans - tbl_trans # $37-6 fsincos unnorm
4979 short tbl_trans - tbl_trans # $37-7 ERROR
4981 ##########
4983 # the instruction fetch access for the displacement word for the
4984 # fdbcc emulation failed. here, we create an access error frame
4985 # from the current frame and branch to _real_access().
4986 funimp_iacc:
4987 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4988 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
4989 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
4991 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
4993 unlk %a6
4995 mov.l (%sp),-(%sp) # store SR,hi(PC)
4996 mov.w 0x8(%sp),0x4(%sp) # store lo(PC)
4997 mov.w &0x4008,0x6(%sp) # store voff
4998 mov.l 0x2(%sp),0x8(%sp) # store EA
4999 mov.l &0x09428001,0xc(%sp) # store FSLW
5001 btst &0x5,(%sp) # user or supervisor mode?
5002 beq.b funimp_iacc_end # user
5003 bset &0x2,0xd(%sp) # set supervisor TM bit
5005 funimp_iacc_end:
5006 bra.l _real_access
5008 #########################################################################
5009 # ssin(): computes the sine of a normalized input #
5010 # ssind(): computes the sine of a denormalized input #
5011 # scos(): computes the cosine of a normalized input #
5012 # scosd(): computes the cosine of a denormalized input #
5013 # ssincos(): computes the sine and cosine of a normalized input #
5014 # ssincosd(): computes the sine and cosine of a denormalized input #
5016 # INPUT *************************************************************** #
5017 # a0 = pointer to extended precision input #
5018 # d0 = round precision,mode #
5020 # OUTPUT ************************************************************** #
5021 # fp0 = sin(X) or cos(X) #
5023 # For ssincos(X): #
5024 # fp0 = sin(X) #
5025 # fp1 = cos(X) #
5027 # ACCURACY and MONOTONICITY ******************************************* #
5028 # The returned result is within 1 ulp in 64 significant bit, i.e. #
5029 # within 0.5001 ulp to 53 bits if the result is subsequently #
5030 # rounded to double precision. The result is provably monotonic #
5031 # in double precision. #
5033 # ALGORITHM *********************************************************** #
5035 # SIN and COS: #
5036 # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
5038 # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
5040 # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5041 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5042 # Overwrite k by k := k + AdjN. #
5044 # 4. If k is even, go to 6. #
5046 # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
5047 # Return sgn*cos(r) where cos(r) is approximated by an #
5048 # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
5049 # s = r*r. #
5050 # Exit. #
5052 # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
5053 # where sin(r) is approximated by an odd polynomial in r #
5054 # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
5055 # Exit. #
5057 # 7. If |X| > 1, go to 9. #
5059 # 8. (|X|<2**(-40)) If SIN is invoked, return X; #
5060 # otherwise return 1. #
5062 # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5063 # go back to 3. #
5065 # SINCOS: #
5066 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5068 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5069 # k = N mod 4, so in particular, k = 0,1,2,or 3. #
5071 # 3. If k is even, go to 5. #
5073 # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
5074 # j1 exclusive or with the l.s.b. of k. #
5075 # sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
5076 # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
5077 # sin(r) and cos(r) are computed as odd and even #
5078 # polynomials in r, respectively. Exit #
5080 # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
5081 # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
5082 # sin(r) and cos(r) are computed as odd and even #
5083 # polynomials in r, respectively. Exit #
5085 # 6. If |X| > 1, go to 8. #
5087 # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
5089 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
5090 # go back to 2. #
5092 #########################################################################
5094 SINA7: long 0xBD6AAA77,0xCCC994F5
5095 SINA6: long 0x3DE61209,0x7AAE8DA1
5096 SINA5: long 0xBE5AE645,0x2A118AE4
5097 SINA4: long 0x3EC71DE3,0xA5341531
5098 SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
5099 SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
5100 SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
5102 COSB8: long 0x3D2AC4D0,0xD6011EE3
5103 COSB7: long 0xBDA9396F,0x9F45AC19
5104 COSB6: long 0x3E21EED9,0x0612C972
5105 COSB5: long 0xBE927E4F,0xB79D9FCF
5106 COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
5107 COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
5108 COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
5109 COSB1: long 0xBF000000
5111 set INARG,FP_SCR0
5113 set X,FP_SCR0
5114 # set XDCARE,X+2
5115 set XFRAC,X+4
5117 set RPRIME,FP_SCR0
5118 set SPRIME,FP_SCR1
5120 set POSNEG1,L_SCR1
5121 set TWOTO63,L_SCR1
5123 set ENDFLAG,L_SCR2
5124 set INT,L_SCR2
5126 set ADJN,L_SCR3
5128 ############################################
5129 global ssin
5130 ssin:
5131 mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5132 bra.b SINBGN
5134 ############################################
5135 global scos
5136 scos:
5137 mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5139 ############################################
5140 SINBGN:
5141 #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5143 fmov.x (%a0),%fp0 # LOAD INPUT
5144 fmov.x %fp0,X(%a6) # save input at X
5146 # "COMPACTIFY" X
5147 mov.l (%a0),%d1 # put exp in hi word
5148 mov.w 4(%a0),%d1 # fetch hi(man)
5149 and.l &0x7FFFFFFF,%d1 # strip sign
5151 cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5152 bge.b SOK1 # no
5153 bra.w SINSM # yes; input is very small
5155 SOK1:
5156 cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5157 blt.b SINMAIN # no
5158 bra.w SREDUCEX # yes; input is very large
5160 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5161 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5162 SINMAIN:
5163 fmov.x %fp0,%fp1
5164 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5166 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5168 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5170 mov.l INT(%a6),%d1 # make a copy of N
5171 asl.l &4,%d1 # N *= 16
5172 add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5174 # A1 IS THE ADDRESS OF N*PIBY2
5175 # ...WHICH IS IN TWO PIECES Y1 & Y2
5176 fsub.x (%a1)+,%fp0 # X-Y1
5177 fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5179 SINCONT:
5180 #--continuation from REDUCEX
5182 #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5183 mov.l INT(%a6),%d1
5184 add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5185 ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5186 cmp.l %d1,&0
5187 blt.w COSPOLY
5189 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5190 #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5191 #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5192 #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5193 #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5194 #--WHERE T=S*S.
5195 #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5196 #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5197 SINPOLY:
5198 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5200 fmov.x %fp0,X(%a6) # X IS R
5201 fmul.x %fp0,%fp0 # FP0 IS S
5203 fmov.d SINA7(%pc),%fp3
5204 fmov.d SINA6(%pc),%fp2
5206 fmov.x %fp0,%fp1
5207 fmul.x %fp1,%fp1 # FP1 IS T
5209 ror.l &1,%d1
5210 and.l &0x80000000,%d1
5211 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5212 eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5214 fmul.x %fp1,%fp3 # TA7
5215 fmul.x %fp1,%fp2 # TA6
5217 fadd.d SINA5(%pc),%fp3 # A5+TA7
5218 fadd.d SINA4(%pc),%fp2 # A4+TA6
5220 fmul.x %fp1,%fp3 # T(A5+TA7)
5221 fmul.x %fp1,%fp2 # T(A4+TA6)
5223 fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5224 fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5226 fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5228 fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5229 fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5230 fmul.x X(%a6),%fp0 # R'*S
5232 fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5234 fmul.x %fp1,%fp0 # SIN(R')-R'
5236 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5238 fmov.l %d0,%fpcr # restore users round mode,prec
5239 fadd.x X(%a6),%fp0 # last inst - possible exception set
5240 bra t_inx2
5242 #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5243 #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5244 #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5245 #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5246 #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5247 #--WHERE T=S*S.
5248 #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5249 #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5250 #--AND IS THEREFORE STORED AS SINGLE PRECISION.
5251 COSPOLY:
5252 fmovm.x &0x0c,-(%sp) # save fp2/fp3
5254 fmul.x %fp0,%fp0 # FP0 IS S
5256 fmov.d COSB8(%pc),%fp2
5257 fmov.d COSB7(%pc),%fp3
5259 fmov.x %fp0,%fp1
5260 fmul.x %fp1,%fp1 # FP1 IS T
5262 fmov.x %fp0,X(%a6) # X IS S
5263 ror.l &1,%d1
5264 and.l &0x80000000,%d1
5265 # ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5267 fmul.x %fp1,%fp2 # TB8
5269 eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5270 and.l &0x80000000,%d1
5272 fmul.x %fp1,%fp3 # TB7
5274 or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5275 mov.l %d1,POSNEG1(%a6)
5277 fadd.d COSB6(%pc),%fp2 # B6+TB8
5278 fadd.d COSB5(%pc),%fp3 # B5+TB7
5280 fmul.x %fp1,%fp2 # T(B6+TB8)
5281 fmul.x %fp1,%fp3 # T(B5+TB7)
5283 fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5284 fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5286 fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5287 fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5289 fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5290 fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5292 fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5294 fadd.x %fp1,%fp0
5296 fmul.x X(%a6),%fp0
5298 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5300 fmov.l %d0,%fpcr # restore users round mode,prec
5301 fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5302 bra t_inx2
5304 ##############################################
5306 # SINe: Big OR Small?
5307 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5308 #--IF |X| < 2**(-40), RETURN X OR 1.
5309 SINBORS:
5310 cmp.l %d1,&0x3FFF8000
5311 bgt.l SREDUCEX
5313 SINSM:
5314 mov.l ADJN(%a6),%d1
5315 cmp.l %d1,&0
5316 bgt.b COSTINY
5318 # here, the operation may underflow iff the precision is sgl or dbl.
5319 # extended denorms are handled through another entry point.
5320 SINTINY:
5321 # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5323 fmov.l %d0,%fpcr # restore users round mode,prec
5324 mov.b &FMOV_OP,%d1 # last inst is MOVE
5325 fmov.x X(%a6),%fp0 # last inst - possible exception set
5326 bra t_catch
5328 COSTINY:
5329 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5330 fmov.l %d0,%fpcr # restore users round mode,prec
5331 fadd.s &0x80800000,%fp0 # last inst - possible exception set
5332 bra t_pinx2
5334 ################################################
5335 global ssind
5336 #--SIN(X) = X FOR DENORMALIZED X
5337 ssind:
5338 bra t_extdnrm
5340 ############################################
5341 global scosd
5342 #--COS(X) = 1 FOR DENORMALIZED X
5343 scosd:
5344 fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5345 bra t_pinx2
5347 ##################################################
5349 global ssincos
5350 ssincos:
5351 #--SET ADJN TO 4
5352 mov.l &4,ADJN(%a6)
5354 fmov.x (%a0),%fp0 # LOAD INPUT
5355 fmov.x %fp0,X(%a6)
5357 mov.l (%a0),%d1
5358 mov.w 4(%a0),%d1
5359 and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5361 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5362 bge.b SCOK1
5363 bra.w SCSM
5365 SCOK1:
5366 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5367 blt.b SCMAIN
5368 bra.w SREDUCEX
5371 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5372 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5373 SCMAIN:
5374 fmov.x %fp0,%fp1
5376 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5378 lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5380 fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5382 mov.l INT(%a6),%d1
5383 asl.l &4,%d1
5384 add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5386 fsub.x (%a1)+,%fp0 # X-Y1
5387 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5389 SCCONT:
5390 #--continuation point from REDUCEX
5392 mov.l INT(%a6),%d1
5393 ror.l &1,%d1
5394 cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5395 bge.w NEVEN
5397 SNODD:
5398 #--REGISTERS SAVED SO FAR: D0, A0, FP2.
5399 fmovm.x &0x04,-(%sp) # save fp2
5401 fmov.x %fp0,RPRIME(%a6)
5402 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5403 fmov.d SINA7(%pc),%fp1 # A7
5404 fmov.d COSB8(%pc),%fp2 # B8
5405 fmul.x %fp0,%fp1 # SA7
5406 fmul.x %fp0,%fp2 # SB8
5408 mov.l %d2,-(%sp)
5409 mov.l %d1,%d2
5410 ror.l &1,%d2
5411 and.l &0x80000000,%d2
5412 eor.l %d1,%d2
5413 and.l &0x80000000,%d2
5415 fadd.d SINA6(%pc),%fp1 # A6+SA7
5416 fadd.d COSB7(%pc),%fp2 # B7+SB8
5418 fmul.x %fp0,%fp1 # S(A6+SA7)
5419 eor.l %d2,RPRIME(%a6)
5420 mov.l (%sp)+,%d2
5421 fmul.x %fp0,%fp2 # S(B7+SB8)
5422 ror.l &1,%d1
5423 and.l &0x80000000,%d1
5424 mov.l &0x3F800000,POSNEG1(%a6)
5425 eor.l %d1,POSNEG1(%a6)
5427 fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5428 fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5430 fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5431 fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5432 fmov.x %fp0,SPRIME(%a6)
5434 fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5435 eor.l %d1,SPRIME(%a6)
5436 fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5438 fmul.x %fp0,%fp1 # S(A4+...)
5439 fmul.x %fp0,%fp2 # S(B5+...)
5441 fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5442 fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5444 fmul.x %fp0,%fp1 # S(A3+...)
5445 fmul.x %fp0,%fp2 # S(B4+...)
5447 fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5448 fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5450 fmul.x %fp0,%fp1 # S(A2+...)
5451 fmul.x %fp0,%fp2 # S(B3+...)
5453 fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5454 fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5456 fmul.x %fp0,%fp1 # S(A1+...)
5457 fmul.x %fp2,%fp0 # S(B2+...)
5459 fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5460 fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5461 fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5463 fmovm.x (%sp)+,&0x20 # restore fp2
5465 fmov.l %d0,%fpcr
5466 fadd.x RPRIME(%a6),%fp1 # COS(X)
5467 bsr sto_cos # store cosine result
5468 fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5469 bra t_inx2
5471 NEVEN:
5472 #--REGISTERS SAVED SO FAR: FP2.
5473 fmovm.x &0x04,-(%sp) # save fp2
5475 fmov.x %fp0,RPRIME(%a6)
5476 fmul.x %fp0,%fp0 # FP0 IS S = R*R
5478 fmov.d COSB8(%pc),%fp1 # B8
5479 fmov.d SINA7(%pc),%fp2 # A7
5481 fmul.x %fp0,%fp1 # SB8
5482 fmov.x %fp0,SPRIME(%a6)
5483 fmul.x %fp0,%fp2 # SA7
5485 ror.l &1,%d1
5486 and.l &0x80000000,%d1
5488 fadd.d COSB7(%pc),%fp1 # B7+SB8
5489 fadd.d SINA6(%pc),%fp2 # A6+SA7
5491 eor.l %d1,RPRIME(%a6)
5492 eor.l %d1,SPRIME(%a6)
5494 fmul.x %fp0,%fp1 # S(B7+SB8)
5496 or.l &0x3F800000,%d1
5497 mov.l %d1,POSNEG1(%a6)
5499 fmul.x %fp0,%fp2 # S(A6+SA7)
5501 fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5502 fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5504 fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5505 fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5507 fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5508 fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5510 fmul.x %fp0,%fp1 # S(B5+...)
5511 fmul.x %fp0,%fp2 # S(A4+...)
5513 fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5514 fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5516 fmul.x %fp0,%fp1 # S(B4+...)
5517 fmul.x %fp0,%fp2 # S(A3+...)
5519 fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5520 fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5522 fmul.x %fp0,%fp1 # S(B3+...)
5523 fmul.x %fp0,%fp2 # S(A2+...)
5525 fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5526 fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5528 fmul.x %fp0,%fp1 # S(B2+...)
5529 fmul.x %fp2,%fp0 # s(a1+...)
5532 fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5533 fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5534 fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5536 fmovm.x (%sp)+,&0x20 # restore fp2
5538 fmov.l %d0,%fpcr
5539 fadd.s POSNEG1(%a6),%fp1 # COS(X)
5540 bsr sto_cos # store cosine result
5541 fadd.x RPRIME(%a6),%fp0 # SIN(X)
5542 bra t_inx2
5544 ################################################
5546 SCBORS:
5547 cmp.l %d1,&0x3FFF8000
5548 bgt.w SREDUCEX
5550 ################################################
5552 SCSM:
5553 # mov.w &0x0000,XDCARE(%a6)
5554 fmov.s &0x3F800000,%fp1
5556 fmov.l %d0,%fpcr
5557 fsub.s &0x00800000,%fp1
5558 bsr sto_cos # store cosine result
5559 fmov.l %fpcr,%d0 # d0 must have fpcr,too
5560 mov.b &FMOV_OP,%d1 # last inst is MOVE
5561 fmov.x X(%a6),%fp0
5562 bra t_catch
5564 ##############################################
5566 global ssincosd
5567 #--SIN AND COS OF X FOR DENORMALIZED X
5568 ssincosd:
5569 mov.l %d0,-(%sp) # save d0
5570 fmov.s &0x3F800000,%fp1
5571 bsr sto_cos # store cosine result
5572 mov.l (%sp)+,%d0 # restore d0
5573 bra t_extdnrm
5575 ############################################
5577 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5578 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5579 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5580 SREDUCEX:
5581 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5582 mov.l %d2,-(%sp) # save d2
5583 fmov.s &0x00000000,%fp1 # fp1 = 0
5585 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5586 #--there is a danger of unwanted overflow in first LOOP iteration. In this
5587 #--case, reduce argument by one remainder step to make subsequent reduction
5588 #--safe.
5589 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5590 bne.b SLOOP # no
5592 # yes; create 2**16383*PI/2
5593 mov.w &0x7ffe,FP_SCR0_EX(%a6)
5594 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5595 clr.l FP_SCR0_LO(%a6)
5597 # create low half of 2**16383*PI/2 at FP_SCR1
5598 mov.w &0x7fdc,FP_SCR1_EX(%a6)
5599 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5600 clr.l FP_SCR1_LO(%a6)
5602 ftest.x %fp0 # test sign of argument
5603 fblt.w sred_neg
5605 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5606 or.b &0x80,FP_SCR1_EX(%a6)
5607 sred_neg:
5608 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5609 fmov.x %fp0,%fp1 # save high result in fp1
5610 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5611 fsub.x %fp0,%fp1 # determine low component of result
5612 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5614 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5615 #--integer quotient will be stored in N
5616 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5617 SLOOP:
5618 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5619 mov.w INARG(%a6),%d1
5620 mov.l %d1,%a1 # save a copy of D0
5621 and.l &0x00007FFF,%d1
5622 sub.l &0x00003FFF,%d1 # d0 = K
5623 cmp.l %d1,&28
5624 ble.b SLASTLOOP
5625 SCONTLOOP:
5626 sub.l &27,%d1 # d0 = L := K-27
5627 mov.b &0,ENDFLAG(%a6)
5628 bra.b SWORK
5629 SLASTLOOP:
5630 clr.l %d1 # d0 = L := 0
5631 mov.b &1,ENDFLAG(%a6)
5633 SWORK:
5634 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5635 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5637 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5638 #--2**L * (PIby2_1), 2**L * (PIby2_2)
5640 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5641 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5643 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5644 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5645 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5647 fmov.x %fp0,%fp2
5648 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5650 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5651 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5652 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5653 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5654 #--US THE DESIRED VALUE IN FLOATING POINT.
5655 mov.l %a1,%d2
5656 swap %d2
5657 and.l &0x80000000,%d2
5658 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5659 mov.l %d2,TWOTO63(%a6)
5660 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5661 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5662 # fint.x %fp2
5664 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5665 mov.l %d1,%d2 # d2 = L
5667 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5668 mov.w %d2,FP_SCR0_EX(%a6)
5669 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5670 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5672 add.l &0x00003FDD,%d1
5673 mov.w %d1,FP_SCR1_EX(%a6)
5674 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5675 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5677 mov.b ENDFLAG(%a6),%d1
5679 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5680 #--P2 = 2**(L) * Piby2_2
5681 fmov.x %fp2,%fp4 # fp4 = N
5682 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5683 fmov.x %fp2,%fp5 # fp5 = N
5684 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5685 fmov.x %fp4,%fp3 # fp3 = W = N*P1
5687 #--we want P+p = W+w but |p| <= half ulp of P
5688 #--Then, we need to compute A := R-P and a := r-p
5689 fadd.x %fp5,%fp3 # fp3 = P
5690 fsub.x %fp3,%fp4 # fp4 = W-P
5692 fsub.x %fp3,%fp0 # fp0 = A := R - P
5693 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5695 fmov.x %fp0,%fp3 # fp3 = A
5696 fsub.x %fp4,%fp1 # fp1 = a := r - p
5698 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5699 #--|r| <= half ulp of R.
5700 fadd.x %fp1,%fp0 # fp0 = R := A+a
5701 #--No need to calculate r if this is the last loop
5702 cmp.b %d1,&0
5703 bgt.w SRESTORE
5705 #--Need to calculate r
5706 fsub.x %fp0,%fp3 # fp3 = A-R
5707 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5708 bra.w SLOOP
5710 SRESTORE:
5711 fmov.l %fp2,INT(%a6)
5712 mov.l (%sp)+,%d2 # restore d2
5713 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5715 mov.l ADJN(%a6),%d1
5716 cmp.l %d1,&4
5718 blt.w SINCONT
5719 bra.w SCCONT
5721 #########################################################################
5722 # stan(): computes the tangent of a normalized input #
5723 # stand(): computes the tangent of a denormalized input #
5725 # INPUT *************************************************************** #
5726 # a0 = pointer to extended precision input #
5727 # d0 = round precision,mode #
5729 # OUTPUT ************************************************************** #
5730 # fp0 = tan(X) #
5732 # ACCURACY and MONOTONICITY ******************************************* #
5733 # The returned result is within 3 ulp in 64 significant bit, i.e. #
5734 # within 0.5001 ulp to 53 bits if the result is subsequently #
5735 # rounded to double precision. The result is provably monotonic #
5736 # in double precision. #
5738 # ALGORITHM *********************************************************** #
5740 # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5742 # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5743 # k = N mod 2, so in particular, k = 0 or 1. #
5745 # 3. If k is odd, go to 5. #
5747 # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5748 # rational function U/V where #
5749 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5750 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5751 # Exit. #
5753 # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5754 # a rational function U/V where #
5755 # U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5756 # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5757 # -Cot(r) = -V/U. Exit. #
5759 # 6. If |X| > 1, go to 8. #
5761 # 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5763 # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5764 # to 2. #
5766 #########################################################################
5768 TANQ4:
5769 long 0x3EA0B759,0xF50F8688
5770 TANP3:
5771 long 0xBEF2BAA5,0xA8924F04
5773 TANQ3:
5774 long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5776 TANP2:
5777 long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5779 TANQ2:
5780 long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5782 TANP1:
5783 long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5785 TANQ1:
5786 long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5788 INVTWOPI:
5789 long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5791 TWOPI1:
5792 long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5793 TWOPI2:
5794 long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5796 #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5797 #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5798 #--MOST 69 BITS LONG.
5799 # global PITBL
5800 PITBL:
5801 long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5802 long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5803 long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5804 long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5805 long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5806 long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5807 long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5808 long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5809 long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5810 long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5811 long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5812 long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5813 long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5814 long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5815 long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5816 long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5817 long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5818 long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5819 long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5820 long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5821 long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5822 long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5823 long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5824 long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5825 long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5826 long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5827 long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5828 long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5829 long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5830 long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5831 long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5832 long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5833 long 0x00000000,0x00000000,0x00000000,0x00000000
5834 long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5835 long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5836 long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5837 long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5838 long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5839 long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5840 long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5841 long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5842 long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5843 long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5844 long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5845 long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5846 long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5847 long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5848 long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5849 long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5850 long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5851 long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5852 long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5853 long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5854 long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5855 long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5856 long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5857 long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5858 long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5859 long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5860 long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5861 long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5862 long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5863 long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5864 long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5865 long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5867 set INARG,FP_SCR0
5869 set TWOTO63,L_SCR1
5870 set INT,L_SCR1
5871 set ENDFLAG,L_SCR2
5873 global stan
5874 stan:
5875 fmov.x (%a0),%fp0 # LOAD INPUT
5877 mov.l (%a0),%d1
5878 mov.w 4(%a0),%d1
5879 and.l &0x7FFFFFFF,%d1
5881 cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5882 bge.b TANOK1
5883 bra.w TANSM
5884 TANOK1:
5885 cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5886 blt.b TANMAIN
5887 bra.w REDUCEX
5889 TANMAIN:
5890 #--THIS IS THE USUAL CASE, |X| <= 15 PI.
5891 #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5892 fmov.x %fp0,%fp1
5893 fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5895 lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5897 fmov.l %fp1,%d1 # CONVERT TO INTEGER
5899 asl.l &4,%d1
5900 add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5902 fsub.x (%a1)+,%fp0 # X-Y1
5904 fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5906 ror.l &5,%d1
5907 and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5909 TANCONT:
5910 fmovm.x &0x0c,-(%sp) # save fp2,fp3
5912 cmp.l %d1,&0
5913 blt.w NODD
5915 fmov.x %fp0,%fp1
5916 fmul.x %fp1,%fp1 # S = R*R
5918 fmov.d TANQ4(%pc),%fp3
5919 fmov.d TANP3(%pc),%fp2
5921 fmul.x %fp1,%fp3 # SQ4
5922 fmul.x %fp1,%fp2 # SP3
5924 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5925 fadd.x TANP2(%pc),%fp2 # P2+SP3
5927 fmul.x %fp1,%fp3 # S(Q3+SQ4)
5928 fmul.x %fp1,%fp2 # S(P2+SP3)
5930 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5931 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5933 fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5934 fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5936 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5937 fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5939 fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5941 fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5943 fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5945 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5947 fmov.l %d0,%fpcr # restore users round mode,prec
5948 fdiv.x %fp1,%fp0 # last inst - possible exception set
5949 bra t_inx2
5951 NODD:
5952 fmov.x %fp0,%fp1
5953 fmul.x %fp0,%fp0 # S = R*R
5955 fmov.d TANQ4(%pc),%fp3
5956 fmov.d TANP3(%pc),%fp2
5958 fmul.x %fp0,%fp3 # SQ4
5959 fmul.x %fp0,%fp2 # SP3
5961 fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5962 fadd.x TANP2(%pc),%fp2 # P2+SP3
5964 fmul.x %fp0,%fp3 # S(Q3+SQ4)
5965 fmul.x %fp0,%fp2 # S(P2+SP3)
5967 fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5968 fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5970 fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5971 fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5973 fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5974 fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5976 fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5978 fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5979 fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5981 fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5983 fmov.x %fp1,-(%sp)
5984 eor.l &0x80000000,(%sp)
5986 fmov.l %d0,%fpcr # restore users round mode,prec
5987 fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5988 bra t_inx2
5990 TANBORS:
5991 #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5992 #--IF |X| < 2**(-40), RETURN X OR 1.
5993 cmp.l %d1,&0x3FFF8000
5994 bgt.b REDUCEX
5996 TANSM:
5997 fmov.x %fp0,-(%sp)
5998 fmov.l %d0,%fpcr # restore users round mode,prec
5999 mov.b &FMOV_OP,%d1 # last inst is MOVE
6000 fmov.x (%sp)+,%fp0 # last inst - posibble exception set
6001 bra t_catch
6003 global stand
6004 #--TAN(X) = X FOR DENORMALIZED X
6005 stand:
6006 bra t_extdnrm
6008 #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
6009 #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
6010 #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
6011 REDUCEX:
6012 fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
6013 mov.l %d2,-(%sp) # save d2
6014 fmov.s &0x00000000,%fp1 # fp1 = 0
6016 #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
6017 #--there is a danger of unwanted overflow in first LOOP iteration. In this
6018 #--case, reduce argument by one remainder step to make subsequent reduction
6019 #--safe.
6020 cmp.l %d1,&0x7ffeffff # is arg dangerously large?
6021 bne.b LOOP # no
6023 # yes; create 2**16383*PI/2
6024 mov.w &0x7ffe,FP_SCR0_EX(%a6)
6025 mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
6026 clr.l FP_SCR0_LO(%a6)
6028 # create low half of 2**16383*PI/2 at FP_SCR1
6029 mov.w &0x7fdc,FP_SCR1_EX(%a6)
6030 mov.l &0x85a308d3,FP_SCR1_HI(%a6)
6031 clr.l FP_SCR1_LO(%a6)
6033 ftest.x %fp0 # test sign of argument
6034 fblt.w red_neg
6036 or.b &0x80,FP_SCR0_EX(%a6) # positive arg
6037 or.b &0x80,FP_SCR1_EX(%a6)
6038 red_neg:
6039 fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
6040 fmov.x %fp0,%fp1 # save high result in fp1
6041 fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
6042 fsub.x %fp0,%fp1 # determine low component of result
6043 fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
6045 #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
6046 #--integer quotient will be stored in N
6047 #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
6048 LOOP:
6049 fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
6050 mov.w INARG(%a6),%d1
6051 mov.l %d1,%a1 # save a copy of D0
6052 and.l &0x00007FFF,%d1
6053 sub.l &0x00003FFF,%d1 # d0 = K
6054 cmp.l %d1,&28
6055 ble.b LASTLOOP
6056 CONTLOOP:
6057 sub.l &27,%d1 # d0 = L := K-27
6058 mov.b &0,ENDFLAG(%a6)
6059 bra.b WORK
6060 LASTLOOP:
6061 clr.l %d1 # d0 = L := 0
6062 mov.b &1,ENDFLAG(%a6)
6064 WORK:
6065 #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
6066 #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
6068 #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
6069 #--2**L * (PIby2_1), 2**L * (PIby2_2)
6071 mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
6072 sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
6074 mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
6075 mov.l &0x4E44152A,FP_SCR0_LO(%a6)
6076 mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
6078 fmov.x %fp0,%fp2
6079 fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
6081 #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
6082 #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
6083 #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
6084 #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
6085 #--US THE DESIRED VALUE IN FLOATING POINT.
6086 mov.l %a1,%d2
6087 swap %d2
6088 and.l &0x80000000,%d2
6089 or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
6090 mov.l %d2,TWOTO63(%a6)
6091 fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
6092 fsub.s TWOTO63(%a6),%fp2 # fp2 = N
6093 # fintrz.x %fp2,%fp2
6095 #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
6096 mov.l %d1,%d2 # d2 = L
6098 add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
6099 mov.w %d2,FP_SCR0_EX(%a6)
6100 mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
6101 clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
6103 add.l &0x00003FDD,%d1
6104 mov.w %d1,FP_SCR1_EX(%a6)
6105 mov.l &0x85A308D3,FP_SCR1_HI(%a6)
6106 clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
6108 mov.b ENDFLAG(%a6),%d1
6110 #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
6111 #--P2 = 2**(L) * Piby2_2
6112 fmov.x %fp2,%fp4 # fp4 = N
6113 fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
6114 fmov.x %fp2,%fp5 # fp5 = N
6115 fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6116 fmov.x %fp4,%fp3 # fp3 = W = N*P1
6118 #--we want P+p = W+w but |p| <= half ulp of P
6119 #--Then, we need to compute A := R-P and a := r-p
6120 fadd.x %fp5,%fp3 # fp3 = P
6121 fsub.x %fp3,%fp4 # fp4 = W-P
6123 fsub.x %fp3,%fp0 # fp0 = A := R - P
6124 fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6126 fmov.x %fp0,%fp3 # fp3 = A
6127 fsub.x %fp4,%fp1 # fp1 = a := r - p
6129 #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6130 #--|r| <= half ulp of R.
6131 fadd.x %fp1,%fp0 # fp0 = R := A+a
6132 #--No need to calculate r if this is the last loop
6133 cmp.b %d1,&0
6134 bgt.w RESTORE
6136 #--Need to calculate r
6137 fsub.x %fp0,%fp3 # fp3 = A-R
6138 fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6139 bra.w LOOP
6141 RESTORE:
6142 fmov.l %fp2,INT(%a6)
6143 mov.l (%sp)+,%d2 # restore d2
6144 fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6146 mov.l INT(%a6),%d1
6147 ror.l &1,%d1
6149 bra.w TANCONT
6151 #########################################################################
6152 # satan(): computes the arctangent of a normalized number #
6153 # satand(): computes the arctangent of a denormalized number #
6155 # INPUT *************************************************************** #
6156 # a0 = pointer to extended precision input #
6157 # d0 = round precision,mode #
6159 # OUTPUT ************************************************************** #
6160 # fp0 = arctan(X) #
6162 # ACCURACY and MONOTONICITY ******************************************* #
6163 # The returned result is within 2 ulps in 64 significant bit, #
6164 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6165 # rounded to double precision. The result is provably monotonic #
6166 # in double precision. #
6168 # ALGORITHM *********************************************************** #
6169 # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6171 # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6172 # Note that k = -4, -3,..., or 3. #
6173 # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6174 # significant bits of X with a bit-1 attached at the 6-th #
6175 # bit position. Define u to be u = (X-F) / (1 + X*F). #
6177 # Step 3. Approximate arctan(u) by a polynomial poly. #
6179 # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6180 # table of values calculated beforehand. Exit. #
6182 # Step 5. If |X| >= 16, go to Step 7. #
6184 # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6186 # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6187 # polynomial in X'. #
6188 # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6190 #########################################################################
6192 ATANA3: long 0xBFF6687E,0x314987D8
6193 ATANA2: long 0x4002AC69,0x34A26DB3
6194 ATANA1: long 0xBFC2476F,0x4E1DA28E
6196 ATANB6: long 0x3FB34444,0x7F876989
6197 ATANB5: long 0xBFB744EE,0x7FAF45DB
6198 ATANB4: long 0x3FBC71C6,0x46940220
6199 ATANB3: long 0xBFC24924,0x921872F9
6200 ATANB2: long 0x3FC99999,0x99998FA9
6201 ATANB1: long 0xBFD55555,0x55555555
6203 ATANC5: long 0xBFB70BF3,0x98539E6A
6204 ATANC4: long 0x3FBC7187,0x962D1D7D
6205 ATANC3: long 0xBFC24924,0x827107B8
6206 ATANC2: long 0x3FC99999,0x9996263E
6207 ATANC1: long 0xBFD55555,0x55555536
6209 PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6210 NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6212 PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6213 NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6215 ATANTBL:
6216 long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6217 long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6218 long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6219 long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6220 long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6221 long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6222 long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6223 long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6224 long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6225 long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6226 long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6227 long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6228 long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6229 long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6230 long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6231 long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6232 long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6233 long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6234 long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6235 long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6236 long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6237 long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6238 long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6239 long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6240 long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6241 long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6242 long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6243 long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6244 long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6245 long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6246 long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6247 long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6248 long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6249 long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6250 long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6251 long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6252 long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6253 long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6254 long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6255 long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6256 long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6257 long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6258 long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6259 long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6260 long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6261 long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6262 long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6263 long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6264 long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6265 long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6266 long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6267 long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6268 long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6269 long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6270 long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6271 long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6272 long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6273 long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6274 long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6275 long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6276 long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6277 long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6278 long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6279 long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6280 long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6281 long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6282 long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6283 long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6284 long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6285 long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6286 long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6287 long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6288 long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6289 long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6290 long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6291 long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6292 long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6293 long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6294 long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6295 long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6296 long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6297 long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6298 long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6299 long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6300 long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6301 long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6302 long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6303 long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6304 long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6305 long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6306 long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6307 long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6308 long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6309 long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6310 long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6311 long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6312 long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6313 long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6314 long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6315 long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6316 long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6317 long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6318 long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6319 long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6320 long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6321 long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6322 long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6323 long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6324 long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6325 long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6326 long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6327 long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6328 long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6329 long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6330 long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6331 long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6332 long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6333 long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6334 long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6335 long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6336 long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6337 long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6338 long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6339 long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6340 long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6341 long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6342 long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6343 long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6345 set X,FP_SCR0
6346 set XDCARE,X+2
6347 set XFRAC,X+4
6348 set XFRACLO,X+8
6350 set ATANF,FP_SCR1
6351 set ATANFHI,ATANF+4
6352 set ATANFLO,ATANF+8
6354 global satan
6355 #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6356 satan:
6357 fmov.x (%a0),%fp0 # LOAD INPUT
6359 mov.l (%a0),%d1
6360 mov.w 4(%a0),%d1
6361 fmov.x %fp0,X(%a6)
6362 and.l &0x7FFFFFFF,%d1
6364 cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6365 bge.b ATANOK1
6366 bra.w ATANSM
6368 ATANOK1:
6369 cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6370 ble.b ATANMAIN
6371 bra.w ATANBIG
6373 #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6374 #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6375 #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6376 #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6377 #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6378 #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6379 #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6380 #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6381 #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6382 #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6383 #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6384 #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6385 #--WILL INVOLVE A VERY LONG POLYNOMIAL.
6387 #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6388 #--WE CHOSE F TO BE +-2^K * 1.BBBB1
6389 #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6390 #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6391 #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6392 #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6394 ATANMAIN:
6396 and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6397 or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6398 mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6400 fmov.x %fp0,%fp1 # FP1 IS X
6401 fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6402 fsub.x X(%a6),%fp0 # FP0 IS X-F
6403 fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6404 fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6406 #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6407 #--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6408 #--SAVE REGISTERS FP2.
6410 mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6411 mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6412 and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6413 and.l &0x7FFF0000,%d2 # EXPONENT OF F
6414 sub.l &0x3FFB0000,%d2 # K+4
6415 asr.l &1,%d2
6416 add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6417 asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6418 lea ATANTBL(%pc),%a1
6419 add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6420 mov.l (%a1)+,ATANF(%a6)
6421 mov.l (%a1)+,ATANFHI(%a6)
6422 mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6423 mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6424 and.l &0x80000000,%d1 # SIGN(F)
6425 or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6426 mov.l (%sp)+,%d2 # RESTORE d2
6428 #--THAT'S ALL I HAVE TO DO FOR NOW,
6429 #--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6431 #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6432 #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6433 #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6434 #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6435 #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6436 #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6437 #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6439 fmovm.x &0x04,-(%sp) # save fp2
6441 fmov.x %fp0,%fp1
6442 fmul.x %fp1,%fp1
6443 fmov.d ATANA3(%pc),%fp2
6444 fadd.x %fp1,%fp2 # A3+V
6445 fmul.x %fp1,%fp2 # V*(A3+V)
6446 fmul.x %fp0,%fp1 # U*V
6447 fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6448 fmul.d ATANA1(%pc),%fp1 # A1*U*V
6449 fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6450 fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6452 fmovm.x (%sp)+,&0x20 # restore fp2
6454 fmov.l %d0,%fpcr # restore users rnd mode,prec
6455 fadd.x ATANF(%a6),%fp0 # ATAN(X)
6456 bra t_inx2
6458 ATANBORS:
6459 #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6460 #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6461 cmp.l %d1,&0x3FFF8000
6462 bgt.w ATANBIG # I.E. |X| >= 16
6464 ATANSM:
6465 #--|X| <= 1/16
6466 #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6467 #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6468 #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6469 #--WHERE Y = X*X, AND Z = Y*Y.
6471 cmp.l %d1,&0x3FD78000
6472 blt.w ATANTINY
6474 #--COMPUTE POLYNOMIAL
6475 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6477 fmul.x %fp0,%fp0 # FPO IS Y = X*X
6479 fmov.x %fp0,%fp1
6480 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6482 fmov.d ATANB6(%pc),%fp2
6483 fmov.d ATANB5(%pc),%fp3
6485 fmul.x %fp1,%fp2 # Z*B6
6486 fmul.x %fp1,%fp3 # Z*B5
6488 fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6489 fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6491 fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6492 fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6494 fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6495 fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6497 fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6498 fmul.x X(%a6),%fp0 # X*Y
6500 fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6502 fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6504 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6506 fmov.l %d0,%fpcr # restore users rnd mode,prec
6507 fadd.x X(%a6),%fp0
6508 bra t_inx2
6510 ATANTINY:
6511 #--|X| < 2^(-40), ATAN(X) = X
6513 fmov.l %d0,%fpcr # restore users rnd mode,prec
6514 mov.b &FMOV_OP,%d1 # last inst is MOVE
6515 fmov.x X(%a6),%fp0 # last inst - possible exception set
6517 bra t_catch
6519 ATANBIG:
6520 #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6521 #--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6522 cmp.l %d1,&0x40638000
6523 bgt.w ATANHUGE
6525 #--APPROXIMATE ATAN(-1/X) BY
6526 #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6527 #--THIS CAN BE RE-WRITTEN AS
6528 #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6530 fmovm.x &0x0c,-(%sp) # save fp2/fp3
6532 fmov.s &0xBF800000,%fp1 # LOAD -1
6533 fdiv.x %fp0,%fp1 # FP1 IS -1/X
6535 #--DIVIDE IS STILL CRANKING
6537 fmov.x %fp1,%fp0 # FP0 IS X'
6538 fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6539 fmov.x %fp1,X(%a6) # X IS REALLY X'
6541 fmov.x %fp0,%fp1
6542 fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6544 fmov.d ATANC5(%pc),%fp3
6545 fmov.d ATANC4(%pc),%fp2
6547 fmul.x %fp1,%fp3 # Z*C5
6548 fmul.x %fp1,%fp2 # Z*B4
6550 fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6551 fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6553 fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6554 fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6556 fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6557 fmul.x X(%a6),%fp0 # X'*Y
6559 fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6561 fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6562 # ... +[Y*(B2+Z*(B4+Z*B6))])
6563 fadd.x X(%a6),%fp0
6565 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6567 fmov.l %d0,%fpcr # restore users rnd mode,prec
6568 tst.b (%a0)
6569 bpl.b pos_big
6571 neg_big:
6572 fadd.x NPIBY2(%pc),%fp0
6573 bra t_minx2
6575 pos_big:
6576 fadd.x PPIBY2(%pc),%fp0
6577 bra t_pinx2
6579 ATANHUGE:
6580 #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6581 tst.b (%a0)
6582 bpl.b pos_huge
6584 neg_huge:
6585 fmov.x NPIBY2(%pc),%fp0
6586 fmov.l %d0,%fpcr
6587 fadd.x PTINY(%pc),%fp0
6588 bra t_minx2
6590 pos_huge:
6591 fmov.x PPIBY2(%pc),%fp0
6592 fmov.l %d0,%fpcr
6593 fadd.x NTINY(%pc),%fp0
6594 bra t_pinx2
6596 global satand
6597 #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6598 satand:
6599 bra t_extdnrm
6601 #########################################################################
6602 # sasin(): computes the inverse sine of a normalized input #
6603 # sasind(): computes the inverse sine of a denormalized input #
6605 # INPUT *************************************************************** #
6606 # a0 = pointer to extended precision input #
6607 # d0 = round precision,mode #
6609 # OUTPUT ************************************************************** #
6610 # fp0 = arcsin(X) #
6612 # ACCURACY and MONOTONICITY ******************************************* #
6613 # The returned result is within 3 ulps in 64 significant bit, #
6614 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6615 # rounded to double precision. The result is provably monotonic #
6616 # in double precision. #
6618 # ALGORITHM *********************************************************** #
6620 # ASIN #
6621 # 1. If |X| >= 1, go to 3. #
6623 # 2. (|X| < 1) Calculate asin(X) by #
6624 # z := sqrt( [1-X][1+X] ) #
6625 # asin(X) = atan( x / z ). #
6626 # Exit. #
6628 # 3. If |X| > 1, go to 5. #
6630 # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6632 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6633 # Exit. #
6635 #########################################################################
6637 global sasin
6638 sasin:
6639 fmov.x (%a0),%fp0 # LOAD INPUT
6641 mov.l (%a0),%d1
6642 mov.w 4(%a0),%d1
6643 and.l &0x7FFFFFFF,%d1
6644 cmp.l %d1,&0x3FFF8000
6645 bge.b ASINBIG
6647 # This catch is added here for the '060 QSP. Originally, the call to
6648 # satan() would handle this case by causing the exception which would
6649 # not be caught until gen_except(). Now, with the exceptions being
6650 # detected inside of satan(), the exception would have been handled there
6651 # instead of inside sasin() as expected.
6652 cmp.l %d1,&0x3FD78000
6653 blt.w ASINTINY
6655 #--THIS IS THE USUAL CASE, |X| < 1
6656 #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6658 ASINMAIN:
6659 fmov.s &0x3F800000,%fp1
6660 fsub.x %fp0,%fp1 # 1-X
6661 fmovm.x &0x4,-(%sp) # {fp2}
6662 fmov.s &0x3F800000,%fp2
6663 fadd.x %fp0,%fp2 # 1+X
6664 fmul.x %fp2,%fp1 # (1+X)(1-X)
6665 fmovm.x (%sp)+,&0x20 # {fp2}
6666 fsqrt.x %fp1 # SQRT([1-X][1+X])
6667 fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6668 fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6669 lea (%sp),%a0 # pass ptr to X/SQRT(...)
6670 bsr satan
6671 add.l &0xc,%sp # clear X/SQRT(...) from stack
6672 bra t_inx2
6674 ASINBIG:
6675 fabs.x %fp0 # |X|
6676 fcmp.s %fp0,&0x3F800000
6677 fbgt t_operr # cause an operr exception
6679 #--|X| = 1, ASIN(X) = +- PI/2.
6680 ASINONE:
6681 fmov.x PIBY2(%pc),%fp0
6682 mov.l (%a0),%d1
6683 and.l &0x80000000,%d1 # SIGN BIT OF X
6684 or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6685 mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6686 fmov.l %d0,%fpcr
6687 fmul.s (%sp)+,%fp0
6688 bra t_inx2
6690 #--|X| < 2^(-40), ATAN(X) = X
6691 ASINTINY:
6692 fmov.l %d0,%fpcr # restore users rnd mode,prec
6693 mov.b &FMOV_OP,%d1 # last inst is MOVE
6694 fmov.x (%a0),%fp0 # last inst - possible exception
6695 bra t_catch
6697 global sasind
6698 #--ASIN(X) = X FOR DENORMALIZED X
6699 sasind:
6700 bra t_extdnrm
6702 #########################################################################
6703 # sacos(): computes the inverse cosine of a normalized input #
6704 # sacosd(): computes the inverse cosine of a denormalized input #
6706 # INPUT *************************************************************** #
6707 # a0 = pointer to extended precision input #
6708 # d0 = round precision,mode #
6710 # OUTPUT ************************************************************** #
6711 # fp0 = arccos(X) #
6713 # ACCURACY and MONOTONICITY ******************************************* #
6714 # The returned result is within 3 ulps in 64 significant bit, #
6715 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6716 # rounded to double precision. The result is provably monotonic #
6717 # in double precision. #
6719 # ALGORITHM *********************************************************** #
6721 # ACOS #
6722 # 1. If |X| >= 1, go to 3. #
6724 # 2. (|X| < 1) Calculate acos(X) by #
6725 # z := (1-X) / (1+X) #
6726 # acos(X) = 2 * atan( sqrt(z) ). #
6727 # Exit. #
6729 # 3. If |X| > 1, go to 5. #
6731 # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6733 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6734 # Exit. #
6736 #########################################################################
6738 global sacos
6739 sacos:
6740 fmov.x (%a0),%fp0 # LOAD INPUT
6742 mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6743 mov.w 4(%a0),%d1
6744 and.l &0x7FFFFFFF,%d1
6745 cmp.l %d1,&0x3FFF8000
6746 bge.b ACOSBIG
6748 #--THIS IS THE USUAL CASE, |X| < 1
6749 #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6751 ACOSMAIN:
6752 fmov.s &0x3F800000,%fp1
6753 fadd.x %fp0,%fp1 # 1+X
6754 fneg.x %fp0 # -X
6755 fadd.s &0x3F800000,%fp0 # 1-X
6756 fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6757 fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6758 mov.l %d0,-(%sp) # save original users fpcr
6759 clr.l %d0
6760 fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6761 lea (%sp),%a0 # pass ptr to sqrt
6762 bsr satan # ATAN(SQRT([1-X]/[1+X]))
6763 add.l &0xc,%sp # clear SQRT(...) from stack
6765 fmov.l (%sp)+,%fpcr # restore users round prec,mode
6766 fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6767 bra t_pinx2
6769 ACOSBIG:
6770 fabs.x %fp0
6771 fcmp.s %fp0,&0x3F800000
6772 fbgt t_operr # cause an operr exception
6774 #--|X| = 1, ACOS(X) = 0 OR PI
6775 tst.b (%a0) # is X positive or negative?
6776 bpl.b ACOSP1
6778 #--X = -1
6779 #Returns PI and inexact exception
6780 ACOSM1:
6781 fmov.x PI(%pc),%fp0 # load PI
6782 fmov.l %d0,%fpcr # load round mode,prec
6783 fadd.s &0x00800000,%fp0 # add a small value
6784 bra t_pinx2
6786 ACOSP1:
6787 bra ld_pzero # answer is positive zero
6789 global sacosd
6790 #--ACOS(X) = PI/2 FOR DENORMALIZED X
6791 sacosd:
6792 fmov.l %d0,%fpcr # load user's rnd mode/prec
6793 fmov.x PIBY2(%pc),%fp0
6794 bra t_pinx2
6796 #########################################################################
6797 # setox(): computes the exponential for a normalized input #
6798 # setoxd(): computes the exponential for a denormalized input #
6799 # setoxm1(): computes the exponential minus 1 for a normalized input #
6800 # setoxm1d(): computes the exponential minus 1 for a denormalized input #
6802 # INPUT *************************************************************** #
6803 # a0 = pointer to extended precision input #
6804 # d0 = round precision,mode #
6806 # OUTPUT ************************************************************** #
6807 # fp0 = exp(X) or exp(X)-1 #
6809 # ACCURACY and MONOTONICITY ******************************************* #
6810 # The returned result is within 0.85 ulps in 64 significant bit, #
6811 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6812 # rounded to double precision. The result is provably monotonic #
6813 # in double precision. #
6815 # ALGORITHM and IMPLEMENTATION **************************************** #
6817 # setoxd #
6818 # ------ #
6819 # Step 1. Set ans := 1.0 #
6821 # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6822 # Notes: This will always generate one exception -- inexact. #
6825 # setox #
6826 # ----- #
6828 # Step 1. Filter out extreme cases of input argument. #
6829 # 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6830 # 1.2 Go to Step 7. #
6831 # 1.3 If |X| < 16380 log(2), go to Step 2. #
6832 # 1.4 Go to Step 8. #
6833 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6834 # To avoid the use of floating-point comparisons, a #
6835 # compact representation of |X| is used. This format is a #
6836 # 32-bit integer, the upper (more significant) 16 bits #
6837 # are the sign and biased exponent field of |X|; the #
6838 # lower 16 bits are the 16 most significant fraction #
6839 # (including the explicit bit) bits of |X|. Consequently, #
6840 # the comparisons in Steps 1.1 and 1.3 can be performed #
6841 # by integer comparison. Note also that the constant #
6842 # 16380 log(2) used in Step 1.3 is also in the compact #
6843 # form. Thus taking the branch to Step 2 guarantees #
6844 # |X| < 16380 log(2). There is no harm to have a small #
6845 # number of cases where |X| is less than, but close to, #
6846 # 16380 log(2) and the branch to Step 9 is taken. #
6848 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6849 # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6850 # was taken) #
6851 # 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6852 # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6853 # or 63. #
6854 # 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6855 # 2.5 Calculate the address of the stored value of #
6856 # 2^(J/64). #
6857 # 2.6 Create the value Scale = 2^M. #
6858 # Notes: The calculation in 2.2 is really performed by #
6859 # Z := X * constant #
6860 # N := round-to-nearest-integer(Z) #
6861 # where #
6862 # constant := single-precision( 64/log 2 ). #
6864 # Using a single-precision constant avoids memory #
6865 # access. Another effect of using a single-precision #
6866 # "constant" is that the calculated value Z is #
6868 # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6870 # This error has to be considered later in Steps 3 and 4. #
6872 # Step 3. Calculate X - N*log2/64. #
6873 # 3.1 R := X + N*L1, #
6874 # where L1 := single-precision(-log2/64). #
6875 # 3.2 R := R + N*L2, #
6876 # L2 := extended-precision(-log2/64 - L1).#
6877 # Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6878 # approximate the value -log2/64 to 88 bits of accuracy. #
6879 # b) N*L1 is exact because N is no longer than 22 bits #
6880 # and L1 is no longer than 24 bits. #
6881 # c) The calculation X+N*L1 is also exact due to #
6882 # cancellation. Thus, R is practically X+N(L1+L2) to full #
6883 # 64 bits. #
6884 # d) It is important to estimate how large can |R| be #
6885 # after Step 3.2. #
6887 # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6888 # X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6889 # X*64/log2 - N = f - eps*X 64/log2 #
6890 # X - N*log2/64 = f*log2/64 - eps*X #
6893 # Now |X| <= 16446 log2, thus #
6895 # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6896 # <= 0.57 log2/64. #
6897 # This bound will be used in Step 4. #
6899 # Step 4. Approximate exp(R)-1 by a polynomial #
6900 # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6901 # Notes: a) In order to reduce memory access, the coefficients #
6902 # are made as "short" as possible: A1 (which is 1/2), A4 #
6903 # and A5 are single precision; A2 and A3 are double #
6904 # precision. #
6905 # b) Even with the restrictions above, #
6906 # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6907 # Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6908 # c) To fully use the pipeline, p is separated into #
6909 # two independent pieces of roughly equal complexities #
6910 # p = [ R + R*S*(A2 + S*A4) ] + #
6911 # [ S*(A1 + S*(A3 + S*A5)) ] #
6912 # where S = R*R. #
6914 # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6915 # ans := T + ( T*p + t) #
6916 # where T and t are the stored values for 2^(J/64). #
6917 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
6918 # 2^(J/64) to roughly 85 bits; T is in extended precision #
6919 # and t is in single precision. Note also that T is #
6920 # rounded to 62 bits so that the last two bits of T are #
6921 # zero. The reason for such a special form is that T-1, #
6922 # T-2, and T-8 will all be exact --- a property that will #
6923 # give much more accurate computation of the function #
6924 # EXPM1. #
6926 # Step 6. Reconstruction of exp(X) #
6927 # exp(X) = 2^M * 2^(J/64) * exp(R). #
6928 # 6.1 If AdjFlag = 0, go to 6.3 #
6929 # 6.2 ans := ans * AdjScale #
6930 # 6.3 Restore the user FPCR #
6931 # 6.4 Return ans := ans * Scale. Exit. #
6932 # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6933 # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6934 # neither overflow nor underflow. If AdjFlag = 1, that #
6935 # means that #
6936 # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6937 # Hence, exp(X) may overflow or underflow or neither. #
6938 # When that is the case, AdjScale = 2^(M1) where M1 is #
6939 # approximately M. Thus 6.2 will never cause #
6940 # over/underflow. Possible exception in 6.4 is overflow #
6941 # or underflow. The inexact exception is not generated in #
6942 # 6.4. Although one can argue that the inexact flag #
6943 # should always be raised, to simulate that exception #
6944 # cost to much than the flag is worth in practical uses. #
6946 # Step 7. Return 1 + X. #
6947 # 7.1 ans := X #
6948 # 7.2 Restore user FPCR. #
6949 # 7.3 Return ans := 1 + ans. Exit #
6950 # Notes: For non-zero X, the inexact exception will always be #
6951 # raised by 7.3. That is the only exception raised by 7.3.#
6952 # Note also that we use the FMOVEM instruction to move X #
6953 # in Step 7.1 to avoid unnecessary trapping. (Although #
6954 # the FMOVEM may not seem relevant since X is normalized, #
6955 # the precaution will be useful in the library version of #
6956 # this code where the separate entry for denormalized #
6957 # inputs will be done away with.) #
6959 # Step 8. Handle exp(X) where |X| >= 16380log2. #
6960 # 8.1 If |X| > 16480 log2, go to Step 9. #
6961 # (mimic 2.2 - 2.6) #
6962 # 8.2 N := round-to-integer( X * 64/log2 ) #
6963 # 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6964 # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6965 # AdjFlag := 1. #
6966 # 8.5 Calculate the address of the stored value #
6967 # 2^(J/64). #
6968 # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6969 # 8.7 Go to Step 3. #
6970 # Notes: Refer to notes for 2.2 - 2.6. #
6972 # Step 9. Handle exp(X), |X| > 16480 log2. #
6973 # 9.1 If X < 0, go to 9.3 #
6974 # 9.2 ans := Huge, go to 9.4 #
6975 # 9.3 ans := Tiny. #
6976 # 9.4 Restore user FPCR. #
6977 # 9.5 Return ans := ans * ans. Exit. #
6978 # Notes: Exp(X) will surely overflow or underflow, depending on #
6979 # X's sign. "Huge" and "Tiny" are respectively large/tiny #
6980 # extended-precision numbers whose square over/underflow #
6981 # with an inexact result. Thus, 9.5 always raises the #
6982 # inexact together with either overflow or underflow. #
6984 # setoxm1d #
6985 # -------- #
6987 # Step 1. Set ans := 0 #
6989 # Step 2. Return ans := X + ans. Exit. #
6990 # Notes: This will return X with the appropriate rounding #
6991 # precision prescribed by the user FPCR. #
6993 # setoxm1 #
6994 # ------- #
6996 # Step 1. Check |X| #
6997 # 1.1 If |X| >= 1/4, go to Step 1.3. #
6998 # 1.2 Go to Step 7. #
6999 # 1.3 If |X| < 70 log(2), go to Step 2. #
7000 # 1.4 Go to Step 10. #
7001 # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
7002 # However, it is conceivable |X| can be small very often #
7003 # because EXPM1 is intended to evaluate exp(X)-1 #
7004 # accurately when |X| is small. For further details on #
7005 # the comparisons, see the notes on Step 1 of setox. #
7007 # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
7008 # 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
7009 # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
7010 # or 63. #
7011 # 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
7012 # 2.4 Calculate the address of the stored value of #
7013 # 2^(J/64). #
7014 # 2.5 Create the values Sc = 2^M and #
7015 # OnebySc := -2^(-M). #
7016 # Notes: See the notes on Step 2 of setox. #
7018 # Step 3. Calculate X - N*log2/64. #
7019 # 3.1 R := X + N*L1, #
7020 # where L1 := single-precision(-log2/64). #
7021 # 3.2 R := R + N*L2, #
7022 # L2 := extended-precision(-log2/64 - L1).#
7023 # Notes: Applying the analysis of Step 3 of setox in this case #
7024 # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
7025 # this case). #
7027 # Step 4. Approximate exp(R)-1 by a polynomial #
7028 # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
7029 # Notes: a) In order to reduce memory access, the coefficients #
7030 # are made as "short" as possible: A1 (which is 1/2), A5 #
7031 # and A6 are single precision; A2, A3 and A4 are double #
7032 # precision. #
7033 # b) Even with the restriction above, #
7034 # |p - (exp(R)-1)| < |R| * 2^(-72.7) #
7035 # for all |R| <= 0.0055. #
7036 # c) To fully use the pipeline, p is separated into #
7037 # two independent pieces of roughly equal complexity #
7038 # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
7039 # [ R + S*(A1 + S*(A3 + S*A5)) ] #
7040 # where S = R*R. #
7042 # Step 5. Compute 2^(J/64)*p by #
7043 # p := T*p #
7044 # where T and t are the stored values for 2^(J/64). #
7045 # Notes: 2^(J/64) is stored as T and t where T+t approximates #
7046 # 2^(J/64) to roughly 85 bits; T is in extended precision #
7047 # and t is in single precision. Note also that T is #
7048 # rounded to 62 bits so that the last two bits of T are #
7049 # zero. The reason for such a special form is that T-1, #
7050 # T-2, and T-8 will all be exact --- a property that will #
7051 # be exploited in Step 6 below. The total relative error #
7052 # in p is no bigger than 2^(-67.7) compared to the final #
7053 # result. #
7055 # Step 6. Reconstruction of exp(X)-1 #
7056 # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
7057 # 6.1 If M <= 63, go to Step 6.3. #
7058 # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
7059 # 6.3 If M >= -3, go to 6.5. #
7060 # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
7061 # 6.5 ans := (T + OnebySc) + (p + t). #
7062 # 6.6 Restore user FPCR. #
7063 # 6.7 Return ans := Sc * ans. Exit. #
7064 # Notes: The various arrangements of the expressions give #
7065 # accurate evaluations. #
7067 # Step 7. exp(X)-1 for |X| < 1/4. #
7068 # 7.1 If |X| >= 2^(-65), go to Step 9. #
7069 # 7.2 Go to Step 8. #
7071 # Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
7072 # 8.1 If |X| < 2^(-16312), goto 8.3 #
7073 # 8.2 Restore FPCR; return ans := X - 2^(-16382). #
7074 # Exit. #
7075 # 8.3 X := X * 2^(140). #
7076 # 8.4 Restore FPCR; ans := ans - 2^(-16382). #
7077 # Return ans := ans*2^(140). Exit #
7078 # Notes: The idea is to return "X - tiny" under the user #
7079 # precision and rounding modes. To avoid unnecessary #
7080 # inefficiency, we stay away from denormalized numbers #
7081 # the best we can. For |X| >= 2^(-16312), the #
7082 # straightforward 8.2 generates the inexact exception as #
7083 # the case warrants. #
7085 # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
7086 # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
7087 # Notes: a) In order to reduce memory access, the coefficients #
7088 # are made as "short" as possible: B1 (which is 1/2), B9 #
7089 # to B12 are single precision; B3 to B8 are double #
7090 # precision; and B2 is double extended. #
7091 # b) Even with the restriction above, #
7092 # |p - (exp(X)-1)| < |X| 2^(-70.6) #
7093 # for all |X| <= 0.251. #
7094 # Note that 0.251 is slightly bigger than 1/4. #
7095 # c) To fully preserve accuracy, the polynomial is #
7096 # computed as #
7097 # X + ( S*B1 + Q ) where S = X*X and #
7098 # Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
7099 # d) To fully use the pipeline, Q is separated into #
7100 # two independent pieces of roughly equal complexity #
7101 # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
7102 # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
7104 # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
7105 # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
7106 # practical purposes. Therefore, go to Step 1 of setox. #
7107 # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
7108 # purposes. #
7109 # ans := -1 #
7110 # Restore user FPCR #
7111 # Return ans := ans + 2^(-126). Exit. #
7112 # Notes: 10.2 will always create an inexact and return -1 + tiny #
7113 # in the user rounding precision and mode. #
7115 #########################################################################
7117 L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7119 EEXPA3: long 0x3FA55555,0x55554CC1
7120 EEXPA2: long 0x3FC55555,0x55554A54
7122 EM1A4: long 0x3F811111,0x11174385
7123 EM1A3: long 0x3FA55555,0x55554F5A
7125 EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7127 EM1B8: long 0x3EC71DE3,0xA5774682
7128 EM1B7: long 0x3EFA01A0,0x19D7CB68
7130 EM1B6: long 0x3F2A01A0,0x1A019DF3
7131 EM1B5: long 0x3F56C16C,0x16C170E2
7133 EM1B4: long 0x3F811111,0x11111111
7134 EM1B3: long 0x3FA55555,0x55555555
7136 EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7137 long 0x00000000
7139 TWO140: long 0x48B00000,0x00000000
7140 TWON140:
7141 long 0x37300000,0x00000000
7143 EEXPTBL:
7144 long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7145 long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7146 long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7147 long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7148 long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7149 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7150 long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7151 long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7152 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7153 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7154 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7155 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7156 long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7157 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7158 long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7159 long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7160 long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7161 long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7162 long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7163 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7164 long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7165 long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7166 long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7167 long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7168 long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7169 long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7170 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7171 long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7172 long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7173 long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7174 long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7175 long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7176 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7177 long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7178 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7179 long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7180 long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7181 long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7182 long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7183 long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7184 long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7185 long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7186 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7187 long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7188 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7189 long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7190 long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7191 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7192 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7193 long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7194 long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7195 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7196 long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7197 long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7198 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7199 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7200 long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7201 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7202 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7203 long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7204 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7205 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7206 long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7207 long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7209 set ADJFLAG,L_SCR2
7210 set SCALE,FP_SCR0
7211 set ADJSCALE,FP_SCR1
7212 set SC,FP_SCR0
7213 set ONEBYSC,FP_SCR1
7215 global setox
7216 setox:
7217 #--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7219 #--Step 1.
7220 mov.l (%a0),%d1 # load part of input X
7221 and.l &0x7FFF0000,%d1 # biased expo. of X
7222 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7223 bge.b EXPC1 # normal case
7224 bra EXPSM
7226 EXPC1:
7227 #--The case |X| >= 2^(-65)
7228 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7229 cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7230 blt.b EXPMAIN # normal case
7231 bra EEXPBIG
7233 EXPMAIN:
7234 #--Step 2.
7235 #--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7236 fmov.x (%a0),%fp0 # load input from (a0)
7238 fmov.x %fp0,%fp1
7239 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7240 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7241 mov.l &0,ADJFLAG(%a6)
7242 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7243 lea EEXPTBL(%pc),%a1
7244 fmov.l %d1,%fp0 # convert to floating-format
7246 mov.l %d1,L_SCR1(%a6) # save N temporarily
7247 and.l &0x3F,%d1 # D0 is J = N mod 64
7248 lsl.l &4,%d1
7249 add.l %d1,%a1 # address of 2^(J/64)
7250 mov.l L_SCR1(%a6),%d1
7251 asr.l &6,%d1 # D0 is M
7252 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7253 mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7255 EXPCONT1:
7256 #--Step 3.
7257 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7258 #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7259 fmov.x %fp0,%fp2
7260 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7261 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7262 fadd.x %fp1,%fp0 # X + N*L1
7263 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7265 #--Step 4.
7266 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7267 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7268 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7269 #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7271 fmov.x %fp0,%fp1
7272 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7274 fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7276 fmul.x %fp1,%fp2 # fp2 IS S*A5
7277 fmov.x %fp1,%fp3
7278 fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7280 fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7281 fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7283 fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7284 mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7285 mov.l &0x80000000,SCALE+4(%a6)
7286 clr.l SCALE+8(%a6)
7288 fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7290 fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7291 fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7293 fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7294 fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7296 fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7297 fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7299 #--Step 5
7300 #--final reconstruction process
7301 #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7303 fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7304 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7305 fadd.s (%a1),%fp0 # accurate 2^(J/64)
7307 fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7308 mov.l ADJFLAG(%a6),%d1
7310 #--Step 6
7311 tst.l %d1
7312 beq.b NORMAL
7313 ADJUST:
7314 fmul.x ADJSCALE(%a6),%fp0
7315 NORMAL:
7316 fmov.l %d0,%fpcr # restore user FPCR
7317 mov.b &FMUL_OP,%d1 # last inst is MUL
7318 fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7319 bra t_catch
7321 EXPSM:
7322 #--Step 7
7323 fmovm.x (%a0),&0x80 # load X
7324 fmov.l %d0,%fpcr
7325 fadd.s &0x3F800000,%fp0 # 1+X in user mode
7326 bra t_pinx2
7328 EEXPBIG:
7329 #--Step 8
7330 cmp.l %d1,&0x400CB27C # 16480 log2
7331 bgt.b EXP2BIG
7332 #--Steps 8.2 -- 8.6
7333 fmov.x (%a0),%fp0 # load input from (a0)
7335 fmov.x %fp0,%fp1
7336 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7337 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7338 mov.l &1,ADJFLAG(%a6)
7339 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7340 lea EEXPTBL(%pc),%a1
7341 fmov.l %d1,%fp0 # convert to floating-format
7342 mov.l %d1,L_SCR1(%a6) # save N temporarily
7343 and.l &0x3F,%d1 # D0 is J = N mod 64
7344 lsl.l &4,%d1
7345 add.l %d1,%a1 # address of 2^(J/64)
7346 mov.l L_SCR1(%a6),%d1
7347 asr.l &6,%d1 # D0 is K
7348 mov.l %d1,L_SCR1(%a6) # save K temporarily
7349 asr.l &1,%d1 # D0 is M1
7350 sub.l %d1,L_SCR1(%a6) # a1 is M
7351 add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7352 mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7353 mov.l &0x80000000,ADJSCALE+4(%a6)
7354 clr.l ADJSCALE+8(%a6)
7355 mov.l L_SCR1(%a6),%d1 # D0 is M
7356 add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7357 bra.w EXPCONT1 # go back to Step 3
7359 EXP2BIG:
7360 #--Step 9
7361 tst.b (%a0) # is X positive or negative?
7362 bmi t_unfl2
7363 bra t_ovfl2
7365 global setoxd
7366 setoxd:
7367 #--entry point for EXP(X), X is denormalized
7368 mov.l (%a0),-(%sp)
7369 andi.l &0x80000000,(%sp)
7370 ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7372 fmov.s &0x3F800000,%fp0
7374 fmov.l %d0,%fpcr
7375 fadd.s (%sp)+,%fp0
7376 bra t_pinx2
7378 global setoxm1
7379 setoxm1:
7380 #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7382 #--Step 1.
7383 #--Step 1.1
7384 mov.l (%a0),%d1 # load part of input X
7385 and.l &0x7FFF0000,%d1 # biased expo. of X
7386 cmp.l %d1,&0x3FFD0000 # 1/4
7387 bge.b EM1CON1 # |X| >= 1/4
7388 bra EM1SM
7390 EM1CON1:
7391 #--Step 1.3
7392 #--The case |X| >= 1/4
7393 mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7394 cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7395 ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7396 bra EM1BIG
7398 EM1MAIN:
7399 #--Step 2.
7400 #--This is the case: 1/4 <= |X| <= 70 log2.
7401 fmov.x (%a0),%fp0 # load input from (a0)
7403 fmov.x %fp0,%fp1
7404 fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7405 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7406 fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7407 lea EEXPTBL(%pc),%a1
7408 fmov.l %d1,%fp0 # convert to floating-format
7410 mov.l %d1,L_SCR1(%a6) # save N temporarily
7411 and.l &0x3F,%d1 # D0 is J = N mod 64
7412 lsl.l &4,%d1
7413 add.l %d1,%a1 # address of 2^(J/64)
7414 mov.l L_SCR1(%a6),%d1
7415 asr.l &6,%d1 # D0 is M
7416 mov.l %d1,L_SCR1(%a6) # save a copy of M
7418 #--Step 3.
7419 #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7420 #--a0 points to 2^(J/64), D0 and a1 both contain M
7421 fmov.x %fp0,%fp2
7422 fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7423 fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7424 fadd.x %fp1,%fp0 # X + N*L1
7425 fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7426 add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7428 #--Step 4.
7429 #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7430 #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7431 #--TO FULLY USE THE PIPELINE, WE COMPUTE S = R*R
7432 #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7434 fmov.x %fp0,%fp1
7435 fmul.x %fp1,%fp1 # fp1 IS S = R*R
7437 fmov.s &0x3950097B,%fp2 # fp2 IS a6
7439 fmul.x %fp1,%fp2 # fp2 IS S*A6
7440 fmov.x %fp1,%fp3
7441 fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7443 fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7444 fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7445 mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7446 mov.l &0x80000000,SC+4(%a6)
7447 clr.l SC+8(%a6)
7449 fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7450 mov.l L_SCR1(%a6),%d1 # D0 is M
7451 neg.w %d1 # D0 is -M
7452 fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7453 add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7454 fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7455 fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7457 fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7458 or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7459 mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7460 mov.l &0x80000000,ONEBYSC+4(%a6)
7461 clr.l ONEBYSC+8(%a6)
7462 fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7464 fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7465 fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7467 fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7469 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7471 #--Step 5
7472 #--Compute 2^(J/64)*p
7474 fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7476 #--Step 6
7477 #--Step 6.1
7478 mov.l L_SCR1(%a6),%d1 # retrieve M
7479 cmp.l %d1,&63
7480 ble.b MLE63
7481 #--Step 6.2 M >= 64
7482 fmov.s 12(%a1),%fp1 # fp1 is t
7483 fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7484 fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7485 fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7486 bra EM1SCALE
7487 MLE63:
7488 #--Step 6.3 M <= 63
7489 cmp.l %d1,&-3
7490 bge.b MGEN3
7491 MLTN3:
7492 #--Step 6.4 M <= -4
7493 fadd.s 12(%a1),%fp0 # p+t
7494 fadd.x (%a1),%fp0 # T+(p+t)
7495 fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7496 bra EM1SCALE
7497 MGEN3:
7498 #--Step 6.5 -3 <= M <= 63
7499 fmov.x (%a1)+,%fp1 # fp1 is T
7500 fadd.s (%a1),%fp0 # fp0 is p+t
7501 fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7502 fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7504 EM1SCALE:
7505 #--Step 6.6
7506 fmov.l %d0,%fpcr
7507 fmul.x SC(%a6),%fp0
7508 bra t_inx2
7510 EM1SM:
7511 #--Step 7 |X| < 1/4.
7512 cmp.l %d1,&0x3FBE0000 # 2^(-65)
7513 bge.b EM1POLY
7515 EM1TINY:
7516 #--Step 8 |X| < 2^(-65)
7517 cmp.l %d1,&0x00330000 # 2^(-16312)
7518 blt.b EM12TINY
7519 #--Step 8.2
7520 mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7521 mov.l &0x80000000,SC+4(%a6)
7522 clr.l SC+8(%a6)
7523 fmov.x (%a0),%fp0
7524 fmov.l %d0,%fpcr
7525 mov.b &FADD_OP,%d1 # last inst is ADD
7526 fadd.x SC(%a6),%fp0
7527 bra t_catch
7529 EM12TINY:
7530 #--Step 8.3
7531 fmov.x (%a0),%fp0
7532 fmul.d TWO140(%pc),%fp0
7533 mov.l &0x80010000,SC(%a6)
7534 mov.l &0x80000000,SC+4(%a6)
7535 clr.l SC+8(%a6)
7536 fadd.x SC(%a6),%fp0
7537 fmov.l %d0,%fpcr
7538 mov.b &FMUL_OP,%d1 # last inst is MUL
7539 fmul.d TWON140(%pc),%fp0
7540 bra t_catch
7542 EM1POLY:
7543 #--Step 9 exp(X)-1 by a simple polynomial
7544 fmov.x (%a0),%fp0 # fp0 is X
7545 fmul.x %fp0,%fp0 # fp0 is S := X*X
7546 fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7547 fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7548 fmul.x %fp0,%fp1 # fp1 is S*B12
7549 fmov.s &0x310F8290,%fp2 # fp2 is B11
7550 fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7552 fmul.x %fp0,%fp2 # fp2 is S*B11
7553 fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7555 fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7556 fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7558 fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7559 fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7561 fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7562 fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7564 fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7565 fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7567 fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7568 fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7570 fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7571 fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7573 fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7574 fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7576 fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7577 fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7579 fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7580 fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7582 fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7583 fadd.x %fp2,%fp1 # fp1 is Q
7585 fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7587 fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7589 fmov.l %d0,%fpcr
7590 fadd.x (%a0),%fp0
7591 bra t_inx2
7593 EM1BIG:
7594 #--Step 10 |X| > 70 log2
7595 mov.l (%a0),%d1
7596 cmp.l %d1,&0
7597 bgt.w EXPC1
7598 #--Step 10.2
7599 fmov.s &0xBF800000,%fp0 # fp0 is -1
7600 fmov.l %d0,%fpcr
7601 fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7602 bra t_minx2
7604 global setoxm1d
7605 setoxm1d:
7606 #--entry point for EXPM1(X), here X is denormalized
7607 #--Step 0.
7608 bra t_extdnrm
7610 #########################################################################
7611 # sgetexp(): returns the exponent portion of the input argument. #
7612 # The exponent bias is removed and the exponent value is #
7613 # returned as an extended precision number in fp0. #
7614 # sgetexpd(): handles denormalized numbers. #
7616 # sgetman(): extracts the mantissa of the input argument. The #
7617 # mantissa is converted to an extended precision number w/ #
7618 # an exponent of $3fff and is returned in fp0. The range of #
7619 # the result is [1.0 - 2.0). #
7620 # sgetmand(): handles denormalized numbers. #
7622 # INPUT *************************************************************** #
7623 # a0 = pointer to extended precision input #
7625 # OUTPUT ************************************************************** #
7626 # fp0 = exponent(X) or mantissa(X) #
7628 #########################################################################
7630 global sgetexp
7631 sgetexp:
7632 mov.w SRC_EX(%a0),%d0 # get the exponent
7633 bclr &0xf,%d0 # clear the sign bit
7634 subi.w &0x3fff,%d0 # subtract off the bias
7635 fmov.w %d0,%fp0 # return exp in fp0
7636 blt.b sgetexpn # it's negative
7639 sgetexpn:
7640 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7643 global sgetexpd
7644 sgetexpd:
7645 bsr.l norm # normalize
7646 neg.w %d0 # new exp = -(shft amt)
7647 subi.w &0x3fff,%d0 # subtract off the bias
7648 fmov.w %d0,%fp0 # return exp in fp0
7649 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7652 global sgetman
7653 sgetman:
7654 mov.w SRC_EX(%a0),%d0 # get the exp
7655 ori.w &0x7fff,%d0 # clear old exp
7656 bclr &0xe,%d0 # make it the new exp +-3fff
7658 # here, we build the result in a tmp location so as not to disturb the input
7659 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7660 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7661 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7662 fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7663 bmi.b sgetmann # it's negative
7666 sgetmann:
7667 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7671 # For denormalized numbers, shift the mantissa until the j-bit = 1,
7672 # then load the exponent with +/1 $3fff.
7674 global sgetmand
7675 sgetmand:
7676 bsr.l norm # normalize exponent
7677 bra.b sgetman
7679 #########################################################################
7680 # scosh(): computes the hyperbolic cosine of a normalized input #
7681 # scoshd(): computes the hyperbolic cosine of a denormalized input #
7683 # INPUT *************************************************************** #
7684 # a0 = pointer to extended precision input #
7685 # d0 = round precision,mode #
7687 # OUTPUT ************************************************************** #
7688 # fp0 = cosh(X) #
7690 # ACCURACY and MONOTONICITY ******************************************* #
7691 # The returned result is within 3 ulps in 64 significant bit, #
7692 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7693 # rounded to double precision. The result is provably monotonic #
7694 # in double precision. #
7696 # ALGORITHM *********************************************************** #
7698 # COSH #
7699 # 1. If |X| > 16380 log2, go to 3. #
7701 # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7702 # y = |X|, z = exp(Y), and #
7703 # cosh(X) = (1/2)*( z + 1/z ). #
7704 # Exit. #
7706 # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7708 # 4. (16380 log2 < |X| <= 16480 log2) #
7709 # cosh(X) = sign(X) * exp(|X|)/2. #
7710 # However, invoking exp(|X|) may cause premature #
7711 # overflow. Thus, we calculate sinh(X) as follows: #
7712 # Y := |X| #
7713 # Fact := 2**(16380) #
7714 # Y' := Y - 16381 log2 #
7715 # cosh(X) := Fact * exp(Y'). #
7716 # Exit. #
7718 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7719 # Huge*Huge to generate overflow and an infinity with #
7720 # the appropriate sign. Huge is the largest finite number #
7721 # in extended format. Exit. #
7723 #########################################################################
7725 TWO16380:
7726 long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7728 global scosh
7729 scosh:
7730 fmov.x (%a0),%fp0 # LOAD INPUT
7732 mov.l (%a0),%d1
7733 mov.w 4(%a0),%d1
7734 and.l &0x7FFFFFFF,%d1
7735 cmp.l %d1,&0x400CB167
7736 bgt.b COSHBIG
7738 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7739 #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7741 fabs.x %fp0 # |X|
7743 mov.l %d0,-(%sp)
7744 clr.l %d0
7745 fmovm.x &0x01,-(%sp) # save |X| to stack
7746 lea (%sp),%a0 # pass ptr to |X|
7747 bsr setox # FP0 IS EXP(|X|)
7748 add.l &0xc,%sp # erase |X| from stack
7749 fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7750 mov.l (%sp)+,%d0
7752 fmov.s &0x3E800000,%fp1 # (1/4)
7753 fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7755 fmov.l %d0,%fpcr
7756 mov.b &FADD_OP,%d1 # last inst is ADD
7757 fadd.x %fp1,%fp0
7758 bra t_catch
7760 COSHBIG:
7761 cmp.l %d1,&0x400CB2B3
7762 bgt.b COSHHUGE
7764 fabs.x %fp0
7765 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7766 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7768 mov.l %d0,-(%sp)
7769 clr.l %d0
7770 fmovm.x &0x01,-(%sp) # save fp0 to stack
7771 lea (%sp),%a0 # pass ptr to fp0
7772 bsr setox
7773 add.l &0xc,%sp # clear fp0 from stack
7774 mov.l (%sp)+,%d0
7776 fmov.l %d0,%fpcr
7777 mov.b &FMUL_OP,%d1 # last inst is MUL
7778 fmul.x TWO16380(%pc),%fp0
7779 bra t_catch
7781 COSHHUGE:
7782 bra t_ovfl2
7784 global scoshd
7785 #--COSH(X) = 1 FOR DENORMALIZED X
7786 scoshd:
7787 fmov.s &0x3F800000,%fp0
7789 fmov.l %d0,%fpcr
7790 fadd.s &0x00800000,%fp0
7791 bra t_pinx2
7793 #########################################################################
7794 # ssinh(): computes the hyperbolic sine of a normalized input #
7795 # ssinhd(): computes the hyperbolic sine of a denormalized input #
7797 # INPUT *************************************************************** #
7798 # a0 = pointer to extended precision input #
7799 # d0 = round precision,mode #
7801 # OUTPUT ************************************************************** #
7802 # fp0 = sinh(X) #
7804 # ACCURACY and MONOTONICITY ******************************************* #
7805 # The returned result is within 3 ulps in 64 significant bit, #
7806 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7807 # rounded to double precision. The result is provably monotonic #
7808 # in double precision. #
7810 # ALGORITHM *********************************************************** #
7812 # SINH #
7813 # 1. If |X| > 16380 log2, go to 3. #
7815 # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7816 # y = |X|, sgn = sign(X), and z = expm1(Y), #
7817 # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7818 # Exit. #
7820 # 3. If |X| > 16480 log2, go to 5. #
7822 # 4. (16380 log2 < |X| <= 16480 log2) #
7823 # sinh(X) = sign(X) * exp(|X|)/2. #
7824 # However, invoking exp(|X|) may cause premature overflow. #
7825 # Thus, we calculate sinh(X) as follows: #
7826 # Y := |X| #
7827 # sgn := sign(X) #
7828 # sgnFact := sgn * 2**(16380) #
7829 # Y' := Y - 16381 log2 #
7830 # sinh(X) := sgnFact * exp(Y'). #
7831 # Exit. #
7833 # 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7834 # sign(X)*Huge*Huge to generate overflow and an infinity with #
7835 # the appropriate sign. Huge is the largest finite number in #
7836 # extended format. Exit. #
7838 #########################################################################
7840 global ssinh
7841 ssinh:
7842 fmov.x (%a0),%fp0 # LOAD INPUT
7844 mov.l (%a0),%d1
7845 mov.w 4(%a0),%d1
7846 mov.l %d1,%a1 # save (compacted) operand
7847 and.l &0x7FFFFFFF,%d1
7848 cmp.l %d1,&0x400CB167
7849 bgt.b SINHBIG
7851 #--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7852 #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7854 fabs.x %fp0 # Y = |X|
7856 movm.l &0x8040,-(%sp) # {a1/d0}
7857 fmovm.x &0x01,-(%sp) # save Y on stack
7858 lea (%sp),%a0 # pass ptr to Y
7859 clr.l %d0
7860 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7861 add.l &0xc,%sp # clear Y from stack
7862 fmov.l &0,%fpcr
7863 movm.l (%sp)+,&0x0201 # {a1/d0}
7865 fmov.x %fp0,%fp1
7866 fadd.s &0x3F800000,%fp1 # 1+Z
7867 fmov.x %fp0,-(%sp)
7868 fdiv.x %fp1,%fp0 # Z/(1+Z)
7869 mov.l %a1,%d1
7870 and.l &0x80000000,%d1
7871 or.l &0x3F000000,%d1
7872 fadd.x (%sp)+,%fp0
7873 mov.l %d1,-(%sp)
7875 fmov.l %d0,%fpcr
7876 mov.b &FMUL_OP,%d1 # last inst is MUL
7877 fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7878 bra t_catch
7880 SINHBIG:
7881 cmp.l %d1,&0x400CB2B3
7882 bgt t_ovfl
7883 fabs.x %fp0
7884 fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7885 mov.l &0,-(%sp)
7886 mov.l &0x80000000,-(%sp)
7887 mov.l %a1,%d1
7888 and.l &0x80000000,%d1
7889 or.l &0x7FFB0000,%d1
7890 mov.l %d1,-(%sp) # EXTENDED FMT
7891 fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7893 mov.l %d0,-(%sp)
7894 clr.l %d0
7895 fmovm.x &0x01,-(%sp) # save fp0 on stack
7896 lea (%sp),%a0 # pass ptr to fp0
7897 bsr setox
7898 add.l &0xc,%sp # clear fp0 from stack
7900 mov.l (%sp)+,%d0
7901 fmov.l %d0,%fpcr
7902 mov.b &FMUL_OP,%d1 # last inst is MUL
7903 fmul.x (%sp)+,%fp0 # possible exception
7904 bra t_catch
7906 global ssinhd
7907 #--SINH(X) = X FOR DENORMALIZED X
7908 ssinhd:
7909 bra t_extdnrm
7911 #########################################################################
7912 # stanh(): computes the hyperbolic tangent of a normalized input #
7913 # stanhd(): computes the hyperbolic tangent of a denormalized input #
7915 # INPUT *************************************************************** #
7916 # a0 = pointer to extended precision input #
7917 # d0 = round precision,mode #
7919 # OUTPUT ************************************************************** #
7920 # fp0 = tanh(X) #
7922 # ACCURACY and MONOTONICITY ******************************************* #
7923 # The returned result is within 3 ulps in 64 significant bit, #
7924 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7925 # rounded to double precision. The result is provably monotonic #
7926 # in double precision. #
7928 # ALGORITHM *********************************************************** #
7930 # TANH #
7931 # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7933 # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7934 # sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7935 # tanh(X) = sgn*( z/(2+z) ). #
7936 # Exit. #
7938 # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7939 # go to 7. #
7941 # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7943 # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7944 # sgn := sign(X), y := 2|X|, z := exp(Y), #
7945 # tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7946 # Exit. #
7948 # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7949 # calculate Tanh(X) by #
7950 # sgn := sign(X), Tiny := 2**(-126), #
7951 # tanh(X) := sgn - sgn*Tiny. #
7952 # Exit. #
7954 # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7956 #########################################################################
7958 set X,FP_SCR0
7959 set XFRAC,X+4
7961 set SGN,L_SCR3
7963 set V,FP_SCR0
7965 global stanh
7966 stanh:
7967 fmov.x (%a0),%fp0 # LOAD INPUT
7969 fmov.x %fp0,X(%a6)
7970 mov.l (%a0),%d1
7971 mov.w 4(%a0),%d1
7972 mov.l %d1,X(%a6)
7973 and.l &0x7FFFFFFF,%d1
7974 cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7975 blt.w TANHBORS # yes
7976 cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7977 bgt.w TANHBORS # yes
7979 #--THIS IS THE USUAL CASE
7980 #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7982 mov.l X(%a6),%d1
7983 mov.l %d1,SGN(%a6)
7984 and.l &0x7FFF0000,%d1
7985 add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7986 mov.l %d1,X(%a6)
7987 and.l &0x80000000,SGN(%a6)
7988 fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7990 mov.l %d0,-(%sp)
7991 clr.l %d0
7992 fmovm.x &0x1,-(%sp) # save Y on stack
7993 lea (%sp),%a0 # pass ptr to Y
7994 bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7995 add.l &0xc,%sp # clear Y from stack
7996 mov.l (%sp)+,%d0
7998 fmov.x %fp0,%fp1
7999 fadd.s &0x40000000,%fp1 # Z+2
8000 mov.l SGN(%a6),%d1
8001 fmov.x %fp1,V(%a6)
8002 eor.l %d1,V(%a6)
8004 fmov.l %d0,%fpcr # restore users round prec,mode
8005 fdiv.x V(%a6),%fp0
8006 bra t_inx2
8008 TANHBORS:
8009 cmp.l %d1,&0x3FFF8000
8010 blt.w TANHSM
8012 cmp.l %d1,&0x40048AA1
8013 bgt.w TANHHUGE
8015 #-- (5/2) LOG2 < |X| < 50 LOG2,
8016 #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
8017 #--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
8019 mov.l X(%a6),%d1
8020 mov.l %d1,SGN(%a6)
8021 and.l &0x7FFF0000,%d1
8022 add.l &0x00010000,%d1 # EXPO OF 2|X|
8023 mov.l %d1,X(%a6) # Y = 2|X|
8024 and.l &0x80000000,SGN(%a6)
8025 mov.l SGN(%a6),%d1
8026 fmov.x X(%a6),%fp0 # Y = 2|X|
8028 mov.l %d0,-(%sp)
8029 clr.l %d0
8030 fmovm.x &0x01,-(%sp) # save Y on stack
8031 lea (%sp),%a0 # pass ptr to Y
8032 bsr setox # FP0 IS EXP(Y)
8033 add.l &0xc,%sp # clear Y from stack
8034 mov.l (%sp)+,%d0
8035 mov.l SGN(%a6),%d1
8036 fadd.s &0x3F800000,%fp0 # EXP(Y)+1
8038 eor.l &0xC0000000,%d1 # -SIGN(X)*2
8039 fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
8040 fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
8042 mov.l SGN(%a6),%d1
8043 or.l &0x3F800000,%d1 # SGN
8044 fmov.s %d1,%fp0 # SGN IN SGL FMT
8046 fmov.l %d0,%fpcr # restore users round prec,mode
8047 mov.b &FADD_OP,%d1 # last inst is ADD
8048 fadd.x %fp1,%fp0
8049 bra t_inx2
8051 TANHSM:
8052 fmov.l %d0,%fpcr # restore users round prec,mode
8053 mov.b &FMOV_OP,%d1 # last inst is MOVE
8054 fmov.x X(%a6),%fp0 # last inst - possible exception set
8055 bra t_catch
8057 #---RETURN SGN(X) - SGN(X)EPS
8058 TANHHUGE:
8059 mov.l X(%a6),%d1
8060 and.l &0x80000000,%d1
8061 or.l &0x3F800000,%d1
8062 fmov.s %d1,%fp0
8063 and.l &0x80000000,%d1
8064 eor.l &0x80800000,%d1 # -SIGN(X)*EPS
8066 fmov.l %d0,%fpcr # restore users round prec,mode
8067 fadd.s %d1,%fp0
8068 bra t_inx2
8070 global stanhd
8071 #--TANH(X) = X FOR DENORMALIZED X
8072 stanhd:
8073 bra t_extdnrm
8075 #########################################################################
8076 # slogn(): computes the natural logarithm of a normalized input #
8077 # slognd(): computes the natural logarithm of a denormalized input #
8078 # slognp1(): computes the log(1+X) of a normalized input #
8079 # slognp1d(): computes the log(1+X) of a denormalized input #
8081 # INPUT *************************************************************** #
8082 # a0 = pointer to extended precision input #
8083 # d0 = round precision,mode #
8085 # OUTPUT ************************************************************** #
8086 # fp0 = log(X) or log(1+X) #
8088 # ACCURACY and MONOTONICITY ******************************************* #
8089 # The returned result is within 2 ulps in 64 significant bit, #
8090 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8091 # rounded to double precision. The result is provably monotonic #
8092 # in double precision. #
8094 # ALGORITHM *********************************************************** #
8095 # LOGN: #
8096 # Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
8097 # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
8098 # move on to Step 2. #
8100 # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
8101 # seven significant bits of Y plus 2**(-7), i.e. #
8102 # F = 1.xxxxxx1 in base 2 where the six "x" match those #
8103 # of Y. Note that |Y-F| <= 2**(-7). #
8105 # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
8106 # polynomial in u, log(1+u) = poly. #
8108 # Step 4. Reconstruct #
8109 # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
8110 # by k*log(2) + (log(F) + poly). The values of log(F) are #
8111 # calculated beforehand and stored in the program. #
8113 # lognp1: #
8114 # Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8115 # polynomial in u where u = 2X/(2+X). Otherwise, move on #
8116 # to Step 2. #
8118 # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8119 # in Step 2 of the algorithm for LOGN and compute #
8120 # log(1+X) as k*log(2) + log(F) + poly where poly #
8121 # approximates log(1+u), u = (Y-F)/F. #
8123 # Implementation Notes: #
8124 # Note 1. There are 64 different possible values for F, thus 64 #
8125 # log(F)'s need to be tabulated. Moreover, the values of #
8126 # 1/F are also tabulated so that the division in (Y-F)/F #
8127 # can be performed by a multiplication. #
8129 # Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8130 # the value Y-F has to be calculated carefully when #
8131 # 1/2 <= X < 3/2. #
8133 # Note 3. To fully exploit the pipeline, polynomials are usually #
8134 # separated into two parts evaluated independently before #
8135 # being added up. #
8137 #########################################################################
8138 LOGOF2:
8139 long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8141 one:
8142 long 0x3F800000
8143 zero:
8144 long 0x00000000
8145 infty:
8146 long 0x7F800000
8147 negone:
8148 long 0xBF800000
8150 LOGA6:
8151 long 0x3FC2499A,0xB5E4040B
8152 LOGA5:
8153 long 0xBFC555B5,0x848CB7DB
8155 LOGA4:
8156 long 0x3FC99999,0x987D8730
8157 LOGA3:
8158 long 0xBFCFFFFF,0xFF6F7E97
8160 LOGA2:
8161 long 0x3FD55555,0x555555A4
8162 LOGA1:
8163 long 0xBFE00000,0x00000008
8165 LOGB5:
8166 long 0x3F175496,0xADD7DAD6
8167 LOGB4:
8168 long 0x3F3C71C2,0xFE80C7E0
8170 LOGB3:
8171 long 0x3F624924,0x928BCCFF
8172 LOGB2:
8173 long 0x3F899999,0x999995EC
8175 LOGB1:
8176 long 0x3FB55555,0x55555555
8177 TWO:
8178 long 0x40000000,0x00000000
8180 LTHOLD:
8181 long 0x3f990000,0x80000000,0x00000000,0x00000000
8183 LOGTBL:
8184 long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8185 long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8186 long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8187 long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8188 long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8189 long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8190 long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8191 long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8192 long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8193 long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8194 long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8195 long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8196 long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8197 long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8198 long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8199 long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8200 long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8201 long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8202 long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8203 long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8204 long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8205 long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8206 long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8207 long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8208 long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8209 long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8210 long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8211 long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8212 long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8213 long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8214 long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8215 long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8216 long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8217 long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8218 long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8219 long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8220 long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8221 long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8222 long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8223 long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8224 long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8225 long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8226 long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8227 long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8228 long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8229 long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8230 long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8231 long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8232 long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8233 long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8234 long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8235 long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8236 long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8237 long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8238 long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8239 long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8240 long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8241 long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8242 long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8243 long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8244 long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8245 long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8246 long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8247 long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8248 long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8249 long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8250 long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8251 long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8252 long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8253 long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8254 long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8255 long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8256 long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8257 long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8258 long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8259 long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8260 long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8261 long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8262 long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8263 long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8264 long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8265 long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8266 long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8267 long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8268 long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8269 long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8270 long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8271 long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8272 long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8273 long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8274 long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8275 long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8276 long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8277 long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8278 long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8279 long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8280 long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8281 long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8282 long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8283 long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8284 long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8285 long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8286 long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8287 long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8288 long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8289 long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8290 long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8291 long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8292 long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8293 long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8294 long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8295 long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8296 long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8297 long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8298 long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8299 long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8300 long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8301 long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8302 long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8303 long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8304 long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8305 long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8306 long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8307 long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8308 long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8309 long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8310 long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8311 long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8313 set ADJK,L_SCR1
8315 set X,FP_SCR0
8316 set XDCARE,X+2
8317 set XFRAC,X+4
8319 set F,FP_SCR1
8320 set FFRAC,F+4
8322 set KLOG2,FP_SCR0
8324 set SAVEU,FP_SCR0
8326 global slogn
8327 #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8328 slogn:
8329 fmov.x (%a0),%fp0 # LOAD INPUT
8330 mov.l &0x00000000,ADJK(%a6)
8332 LOGBGN:
8333 #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8334 #--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8336 mov.l (%a0),%d1
8337 mov.w 4(%a0),%d1
8339 mov.l (%a0),X(%a6)
8340 mov.l 4(%a0),X+4(%a6)
8341 mov.l 8(%a0),X+8(%a6)
8343 cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8344 blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8345 # X IS POSITIVE, CHECK IF X IS NEAR 1
8346 cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8347 blt.b LOGMAIN # YES
8348 cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8349 ble.w LOGNEAR1 # NO
8351 LOGMAIN:
8352 #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8354 #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8355 #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8356 #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8357 #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8358 #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8359 #--LOG(1+U) CAN BE VERY EFFICIENT.
8360 #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8361 #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8363 #--GET K, Y, F, AND ADDRESS OF 1/F.
8364 asr.l &8,%d1
8365 asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8366 sub.l &0x3FFF,%d1 # THIS IS K
8367 add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8368 lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8369 fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8371 #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8372 mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8373 mov.l XFRAC(%a6),FFRAC(%a6)
8374 and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8375 or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8376 mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8377 and.l &0x7E000000,%d1
8378 asr.l &8,%d1
8379 asr.l &8,%d1
8380 asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8381 add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8383 fmov.x X(%a6),%fp0
8384 mov.l &0x3fff0000,F(%a6)
8385 clr.l F+8(%a6)
8386 fsub.x F(%a6),%fp0 # Y-F
8387 fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8388 #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8389 #--REGISTERS SAVED: FPCR, FP1, FP2
8391 LP1CONT1:
8392 #--AN RE-ENTRY POINT FOR LOGNP1
8393 fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8394 fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8395 fmov.x %fp0,%fp2
8396 fmul.x %fp2,%fp2 # FP2 IS V=U*U
8397 fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8399 #--LOG(1+U) IS APPROXIMATED BY
8400 #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8401 #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8403 fmov.x %fp2,%fp3
8404 fmov.x %fp2,%fp1
8406 fmul.d LOGA6(%pc),%fp1 # V*A6
8407 fmul.d LOGA5(%pc),%fp2 # V*A5
8409 fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8410 fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8412 fmul.x %fp3,%fp1 # V*(A4+V*A6)
8413 fmul.x %fp3,%fp2 # V*(A3+V*A5)
8415 fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8416 fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8418 fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8419 add.l &16,%a0 # ADDRESS OF LOG(F)
8420 fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8422 fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8423 fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8425 fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8426 fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8427 fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8429 fmov.l %d0,%fpcr
8430 fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8431 bra t_inx2
8434 LOGNEAR1:
8436 # if the input is exactly equal to one, then exit through ld_pzero.
8437 # if these 2 lines weren't here, the correct answer would be returned
8438 # but the INEX2 bit would be set.
8439 fcmp.b %fp0,&0x1 # is it equal to one?
8440 fbeq.l ld_pzero # yes
8442 #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8443 fmov.x %fp0,%fp1
8444 fsub.s one(%pc),%fp1 # FP1 IS X-1
8445 fadd.s one(%pc),%fp0 # FP0 IS X+1
8446 fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8447 #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8448 #--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8450 LP1CONT2:
8451 #--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8452 fdiv.x %fp0,%fp1 # FP1 IS U
8453 fmovm.x &0xc,-(%sp) # SAVE FP2-3
8454 #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8455 #--LET V=U*U, W=V*V, CALCULATE
8456 #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8457 #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8458 fmov.x %fp1,%fp0
8459 fmul.x %fp0,%fp0 # FP0 IS V
8460 fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8461 fmov.x %fp0,%fp1
8462 fmul.x %fp1,%fp1 # FP1 IS W
8464 fmov.d LOGB5(%pc),%fp3
8465 fmov.d LOGB4(%pc),%fp2
8467 fmul.x %fp1,%fp3 # W*B5
8468 fmul.x %fp1,%fp2 # W*B4
8470 fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8471 fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8473 fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8475 fmul.x %fp0,%fp2 # V*(B2+W*B4)
8477 fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8478 fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8480 fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8481 fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8483 fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8485 fmov.l %d0,%fpcr
8486 fadd.x SAVEU(%a6),%fp0
8487 bra t_inx2
8489 #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8490 LOGNEG:
8491 bra t_operr
8493 global slognd
8494 slognd:
8495 #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8497 mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8499 #----normalize the input value by left shifting k bits (k to be determined
8500 #----below), adjusting exponent and storing -k to ADJK
8501 #----the value TWOTO100 is no longer needed.
8502 #----Note that this code assumes the denormalized input is NON-ZERO.
8504 movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8505 mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8506 mov.l 4(%a0),%d4
8507 mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8508 clr.l %d2 # D2 used for holding K
8510 tst.l %d4
8511 bne.b Hi_not0
8513 Hi_0:
8514 mov.l %d5,%d4
8515 clr.l %d5
8516 mov.l &32,%d2
8517 clr.l %d6
8518 bfffo %d4{&0:&32},%d6
8519 lsl.l %d6,%d4
8520 add.l %d6,%d2 # (D3,D4,D5) is normalized
8522 mov.l %d3,X(%a6)
8523 mov.l %d4,XFRAC(%a6)
8524 mov.l %d5,XFRAC+4(%a6)
8525 neg.l %d2
8526 mov.l %d2,ADJK(%a6)
8527 fmov.x X(%a6),%fp0
8528 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8529 lea X(%a6),%a0
8530 bra.w LOGBGN # begin regular log(X)
8532 Hi_not0:
8533 clr.l %d6
8534 bfffo %d4{&0:&32},%d6 # find first 1
8535 mov.l %d6,%d2 # get k
8536 lsl.l %d6,%d4
8537 mov.l %d5,%d7 # a copy of D5
8538 lsl.l %d6,%d5
8539 neg.l %d6
8540 add.l &32,%d6
8541 lsr.l %d6,%d7
8542 or.l %d7,%d4 # (D3,D4,D5) normalized
8544 mov.l %d3,X(%a6)
8545 mov.l %d4,XFRAC(%a6)
8546 mov.l %d5,XFRAC+4(%a6)
8547 neg.l %d2
8548 mov.l %d2,ADJK(%a6)
8549 fmov.x X(%a6),%fp0
8550 movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8551 lea X(%a6),%a0
8552 bra.w LOGBGN # begin regular log(X)
8554 global slognp1
8555 #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8556 slognp1:
8557 fmov.x (%a0),%fp0 # LOAD INPUT
8558 fabs.x %fp0 # test magnitude
8559 fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8560 fbgt.w LP1REAL # if greater, continue
8561 fmov.l %d0,%fpcr
8562 mov.b &FMOV_OP,%d1 # last inst is MOVE
8563 fmov.x (%a0),%fp0 # return signed argument
8564 bra t_catch
8566 LP1REAL:
8567 fmov.x (%a0),%fp0 # LOAD INPUT
8568 mov.l &0x00000000,ADJK(%a6)
8569 fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8570 fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8571 fmov.x %fp0,X(%a6)
8572 mov.w XFRAC(%a6),XDCARE(%a6)
8573 mov.l X(%a6),%d1
8574 cmp.l %d1,&0
8575 ble.w LP1NEG0 # LOG OF ZERO OR -VE
8576 cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8577 blt.w LOGMAIN
8578 cmp.l %d1,&0x3fffc000
8579 bgt.w LOGMAIN
8580 #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8581 #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8582 #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8584 LP1NEAR1:
8585 #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8586 cmp.l %d1,&0x3ffef07d
8587 blt.w LP1CARE
8588 cmp.l %d1,&0x3fff8841
8589 bgt.w LP1CARE
8591 LP1ONE16:
8592 #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8593 #--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8594 fadd.x %fp1,%fp1 # FP1 IS 2Z
8595 fadd.s one(%pc),%fp0 # FP0 IS 1+X
8596 #--U = FP1/FP0
8597 bra.w LP1CONT2
8599 LP1CARE:
8600 #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8601 #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8602 #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8603 #--THERE ARE ONLY TWO CASES.
8604 #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8605 #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8606 #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8607 #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8609 mov.l XFRAC(%a6),FFRAC(%a6)
8610 and.l &0xFE000000,FFRAC(%a6)
8611 or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8612 cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8613 bge.b KISZERO
8615 KISNEG1:
8616 fmov.s TWO(%pc),%fp0
8617 mov.l &0x3fff0000,F(%a6)
8618 clr.l F+8(%a6)
8619 fsub.x F(%a6),%fp0 # 2-F
8620 mov.l FFRAC(%a6),%d1
8621 and.l &0x7E000000,%d1
8622 asr.l &8,%d1
8623 asr.l &8,%d1
8624 asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8625 fadd.x %fp1,%fp1 # GET 2Z
8626 fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8627 fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8628 lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8629 add.l %d1,%a0
8630 fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8631 bra.w LP1CONT1
8633 KISZERO:
8634 fmov.s one(%pc),%fp0
8635 mov.l &0x3fff0000,F(%a6)
8636 clr.l F+8(%a6)
8637 fsub.x F(%a6),%fp0 # 1-F
8638 mov.l FFRAC(%a6),%d1
8639 and.l &0x7E000000,%d1
8640 asr.l &8,%d1
8641 asr.l &8,%d1
8642 asr.l &4,%d1
8643 fadd.x %fp1,%fp0 # FP0 IS Y-F
8644 fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8645 lea LOGTBL(%pc),%a0
8646 add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8647 fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8648 bra.w LP1CONT1
8650 LP1NEG0:
8651 #--FPCR SAVED. D0 IS X IN COMPACT FORM.
8652 cmp.l %d1,&0
8653 blt.b LP1NEG
8654 LP1ZERO:
8655 fmov.s negone(%pc),%fp0
8657 fmov.l %d0,%fpcr
8658 bra t_dz
8660 LP1NEG:
8661 fmov.s zero(%pc),%fp0
8663 fmov.l %d0,%fpcr
8664 bra t_operr
8666 global slognp1d
8667 #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8668 # Simply return the denorm
8669 slognp1d:
8670 bra t_extdnrm
8672 #########################################################################
8673 # satanh(): computes the inverse hyperbolic tangent of a norm input #
8674 # satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8676 # INPUT *************************************************************** #
8677 # a0 = pointer to extended precision input #
8678 # d0 = round precision,mode #
8680 # OUTPUT ************************************************************** #
8681 # fp0 = arctanh(X) #
8683 # ACCURACY and MONOTONICITY ******************************************* #
8684 # The returned result is within 3 ulps in 64 significant bit, #
8685 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8686 # rounded to double precision. The result is provably monotonic #
8687 # in double precision. #
8689 # ALGORITHM *********************************************************** #
8691 # ATANH #
8692 # 1. If |X| >= 1, go to 3. #
8694 # 2. (|X| < 1) Calculate atanh(X) by #
8695 # sgn := sign(X) #
8696 # y := |X| #
8697 # z := 2y/(1-y) #
8698 # atanh(X) := sgn * (1/2) * logp1(z) #
8699 # Exit. #
8701 # 3. If |X| > 1, go to 5. #
8703 # 4. (|X| = 1) Generate infinity with an appropriate sign and #
8704 # divide-by-zero by #
8705 # sgn := sign(X) #
8706 # atan(X) := sgn / (+0). #
8707 # Exit. #
8709 # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8710 # Exit. #
8712 #########################################################################
8714 global satanh
8715 satanh:
8716 mov.l (%a0),%d1
8717 mov.w 4(%a0),%d1
8718 and.l &0x7FFFFFFF,%d1
8719 cmp.l %d1,&0x3FFF8000
8720 bge.b ATANHBIG
8722 #--THIS IS THE USUAL CASE, |X| < 1
8723 #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8725 fabs.x (%a0),%fp0 # Y = |X|
8726 fmov.x %fp0,%fp1
8727 fneg.x %fp1 # -Y
8728 fadd.x %fp0,%fp0 # 2Y
8729 fadd.s &0x3F800000,%fp1 # 1-Y
8730 fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8731 mov.l (%a0),%d1
8732 and.l &0x80000000,%d1
8733 or.l &0x3F000000,%d1 # SIGN(X)*HALF
8734 mov.l %d1,-(%sp)
8736 mov.l %d0,-(%sp) # save rnd prec,mode
8737 clr.l %d0 # pass ext prec,RN
8738 fmovm.x &0x01,-(%sp) # save Z on stack
8739 lea (%sp),%a0 # pass ptr to Z
8740 bsr slognp1 # LOG1P(Z)
8741 add.l &0xc,%sp # clear Z from stack
8743 mov.l (%sp)+,%d0 # fetch old prec,mode
8744 fmov.l %d0,%fpcr # load it
8745 mov.b &FMUL_OP,%d1 # last inst is MUL
8746 fmul.s (%sp)+,%fp0
8747 bra t_catch
8749 ATANHBIG:
8750 fabs.x (%a0),%fp0 # |X|
8751 fcmp.s %fp0,&0x3F800000
8752 fbgt t_operr
8753 bra t_dz
8755 global satanhd
8756 #--ATANH(X) = X FOR DENORMALIZED X
8757 satanhd:
8758 bra t_extdnrm
8760 #########################################################################
8761 # slog10(): computes the base-10 logarithm of a normalized input #
8762 # slog10d(): computes the base-10 logarithm of a denormalized input #
8763 # slog2(): computes the base-2 logarithm of a normalized input #
8764 # slog2d(): computes the base-2 logarithm of a denormalized input #
8766 # INPUT *************************************************************** #
8767 # a0 = pointer to extended precision input #
8768 # d0 = round precision,mode #
8770 # OUTPUT ************************************************************** #
8771 # fp0 = log_10(X) or log_2(X) #
8773 # ACCURACY and MONOTONICITY ******************************************* #
8774 # The returned result is within 1.7 ulps in 64 significant bit, #
8775 # i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8776 # rounded to double precision. The result is provably monotonic #
8777 # in double precision. #
8779 # ALGORITHM *********************************************************** #
8781 # slog10d: #
8783 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8784 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8785 # Notes: Default means round-to-nearest mode, no floating-point #
8786 # traps, and precision control = double extended. #
8788 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8789 # Notes: Even if X is denormalized, log(X) is always normalized. #
8791 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8792 # 2.1 Restore the user FPCR #
8793 # 2.2 Return ans := Y * INV_L10. #
8795 # slog10: #
8797 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8798 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8799 # Notes: Default means round-to-nearest mode, no floating-point #
8800 # traps, and precision control = double extended. #
8802 # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8804 # Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8805 # 2.1 Restore the user FPCR #
8806 # 2.2 Return ans := Y * INV_L10. #
8808 # sLog2d: #
8810 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8811 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8812 # Notes: Default means round-to-nearest mode, no floating-point #
8813 # traps, and precision control = double extended. #
8815 # Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8816 # Notes: Even if X is denormalized, log(X) is always normalized. #
8818 # Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8819 # 2.1 Restore the user FPCR #
8820 # 2.2 Return ans := Y * INV_L2. #
8822 # sLog2: #
8824 # Step 0. If X < 0, create a NaN and raise the invalid operation #
8825 # flag. Otherwise, save FPCR in D1; set FpCR to default. #
8826 # Notes: Default means round-to-nearest mode, no floating-point #
8827 # traps, and precision control = double extended. #
8829 # Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8830 # go to Step 3. #
8832 # Step 2. Return k. #
8833 # 2.1 Get integer k, X = 2^k. #
8834 # 2.2 Restore the user FPCR. #
8835 # 2.3 Return ans := convert-to-double-extended(k). #
8837 # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8839 # Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8840 # 4.1 Restore the user FPCR #
8841 # 4.2 Return ans := Y * INV_L2. #
8843 #########################################################################
8845 INV_L10:
8846 long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8848 INV_L2:
8849 long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8851 global slog10
8852 #--entry point for Log10(X), X is normalized
8853 slog10:
8854 fmov.b &0x1,%fp0
8855 fcmp.x %fp0,(%a0) # if operand == 1,
8856 fbeq.l ld_pzero # return an EXACT zero
8858 mov.l (%a0),%d1
8859 blt.w invalid
8860 mov.l %d0,-(%sp)
8861 clr.l %d0
8862 bsr slogn # log(X), X normal.
8863 fmov.l (%sp)+,%fpcr
8864 fmul.x INV_L10(%pc),%fp0
8865 bra t_inx2
8867 global slog10d
8868 #--entry point for Log10(X), X is denormalized
8869 slog10d:
8870 mov.l (%a0),%d1
8871 blt.w invalid
8872 mov.l %d0,-(%sp)
8873 clr.l %d0
8874 bsr slognd # log(X), X denorm.
8875 fmov.l (%sp)+,%fpcr
8876 fmul.x INV_L10(%pc),%fp0
8877 bra t_minx2
8879 global slog2
8880 #--entry point for Log2(X), X is normalized
8881 slog2:
8882 mov.l (%a0),%d1
8883 blt.w invalid
8885 mov.l 8(%a0),%d1
8886 bne.b continue # X is not 2^k
8888 mov.l 4(%a0),%d1
8889 and.l &0x7FFFFFFF,%d1
8890 bne.b continue
8892 #--X = 2^k.
8893 mov.w (%a0),%d1
8894 and.l &0x00007FFF,%d1
8895 sub.l &0x3FFF,%d1
8896 beq.l ld_pzero
8897 fmov.l %d0,%fpcr
8898 fmov.l %d1,%fp0
8899 bra t_inx2
8901 continue:
8902 mov.l %d0,-(%sp)
8903 clr.l %d0
8904 bsr slogn # log(X), X normal.
8905 fmov.l (%sp)+,%fpcr
8906 fmul.x INV_L2(%pc),%fp0
8907 bra t_inx2
8909 invalid:
8910 bra t_operr
8912 global slog2d
8913 #--entry point for Log2(X), X is denormalized
8914 slog2d:
8915 mov.l (%a0),%d1
8916 blt.w invalid
8917 mov.l %d0,-(%sp)
8918 clr.l %d0
8919 bsr slognd # log(X), X denorm.
8920 fmov.l (%sp)+,%fpcr
8921 fmul.x INV_L2(%pc),%fp0
8922 bra t_minx2
8924 #########################################################################
8925 # stwotox(): computes 2**X for a normalized input #
8926 # stwotoxd(): computes 2**X for a denormalized input #
8927 # stentox(): computes 10**X for a normalized input #
8928 # stentoxd(): computes 10**X for a denormalized input #
8930 # INPUT *************************************************************** #
8931 # a0 = pointer to extended precision input #
8932 # d0 = round precision,mode #
8934 # OUTPUT ************************************************************** #
8935 # fp0 = 2**X or 10**X #
8937 # ACCURACY and MONOTONICITY ******************************************* #
8938 # The returned result is within 2 ulps in 64 significant bit, #
8939 # i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8940 # rounded to double precision. The result is provably monotonic #
8941 # in double precision. #
8943 # ALGORITHM *********************************************************** #
8945 # twotox #
8946 # 1. If |X| > 16480, go to ExpBig. #
8948 # 2. If |X| < 2**(-70), go to ExpSm. #
8950 # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8951 # decompose N as #
8952 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8954 # 4. Overwrite r := r * log2. Then #
8955 # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8956 # Go to expr to compute that expression. #
8958 # tentox #
8959 # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8961 # 2. If |X| < 2**(-70), go to ExpSm. #
8963 # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8964 # N := round-to-int(y). Decompose N as #
8965 # N = 64(M + M') + j, j = 0,1,2,...,63. #
8967 # 4. Define r as #
8968 # r := ((X - N*L1)-N*L2) * L10 #
8969 # where L1, L2 are the leading and trailing parts of #
8970 # log_10(2)/64 and L10 is the natural log of 10. Then #
8971 # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8972 # Go to expr to compute that expression. #
8974 # expr #
8975 # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8977 # 2. Overwrite Fact1 and Fact2 by #
8978 # Fact1 := 2**(M) * Fact1 #
8979 # Fact2 := 2**(M) * Fact2 #
8980 # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8982 # 3. Calculate P where 1 + P approximates exp(r): #
8983 # P = r + r*r*(A1+r*(A2+...+r*A5)). #
8985 # 4. Let AdjFact := 2**(M'). Return #
8986 # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8987 # Exit. #
8989 # ExpBig #
8990 # 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8991 # generate underflow by Tiny * Tiny. #
8993 # ExpSm #
8994 # 1. Return 1 + X. #
8996 #########################################################################
8998 L2TEN64:
8999 long 0x406A934F,0x0979A371 # 64LOG10/LOG2
9000 L10TWO1:
9001 long 0x3F734413,0x509F8000 # LOG2/64LOG10
9003 L10TWO2:
9004 long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
9006 LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
9008 LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
9010 EXPA5: long 0x3F56C16D,0x6F7BD0B2
9011 EXPA4: long 0x3F811112,0x302C712C
9012 EXPA3: long 0x3FA55555,0x55554CC1
9013 EXPA2: long 0x3FC55555,0x55554A54
9014 EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
9016 TEXPTBL:
9017 long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
9018 long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
9019 long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
9020 long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
9021 long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
9022 long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
9023 long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
9024 long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
9025 long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
9026 long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
9027 long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
9028 long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
9029 long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
9030 long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
9031 long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
9032 long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
9033 long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
9034 long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
9035 long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
9036 long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
9037 long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
9038 long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
9039 long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
9040 long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
9041 long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
9042 long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
9043 long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
9044 long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
9045 long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
9046 long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
9047 long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
9048 long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
9049 long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
9050 long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
9051 long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
9052 long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
9053 long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
9054 long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
9055 long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
9056 long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
9057 long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
9058 long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
9059 long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
9060 long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
9061 long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
9062 long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
9063 long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
9064 long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
9065 long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
9066 long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
9067 long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
9068 long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
9069 long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
9070 long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
9071 long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
9072 long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
9073 long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
9074 long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
9075 long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
9076 long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
9077 long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
9078 long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
9079 long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
9080 long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
9082 set INT,L_SCR1
9084 set X,FP_SCR0
9085 set XDCARE,X+2
9086 set XFRAC,X+4
9088 set ADJFACT,FP_SCR0
9090 set FACT1,FP_SCR0
9091 set FACT1HI,FACT1+4
9092 set FACT1LOW,FACT1+8
9094 set FACT2,FP_SCR1
9095 set FACT2HI,FACT2+4
9096 set FACT2LOW,FACT2+8
9098 global stwotox
9099 #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9100 stwotox:
9101 fmovm.x (%a0),&0x80 # LOAD INPUT
9103 mov.l (%a0),%d1
9104 mov.w 4(%a0),%d1
9105 fmov.x %fp0,X(%a6)
9106 and.l &0x7FFFFFFF,%d1
9108 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9109 bge.b TWOOK1
9110 bra.w EXPBORS
9112 TWOOK1:
9113 cmp.l %d1,&0x400D80C0 # |X| > 16480?
9114 ble.b TWOMAIN
9115 bra.w EXPBORS
9117 TWOMAIN:
9118 #--USUAL CASE, 2^(-70) <= |X| <= 16480
9120 fmov.x %fp0,%fp1
9121 fmul.s &0x42800000,%fp1 # 64 * X
9122 fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9123 mov.l %d2,-(%sp)
9124 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9125 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9126 mov.l INT(%a6),%d1
9127 mov.l %d1,%d2
9128 and.l &0x3F,%d1 # D0 IS J
9129 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9130 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9131 asr.l &6,%d2 # d2 IS L, N = 64L + J
9132 mov.l %d2,%d1
9133 asr.l &1,%d1 # D0 IS M
9134 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9135 add.l &0x3FFF,%d2
9137 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9138 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9139 #--ADJFACT = 2^(M').
9140 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9142 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9144 fmul.s &0x3C800000,%fp1 # (1/64)*N
9145 mov.l (%a1)+,FACT1(%a6)
9146 mov.l (%a1)+,FACT1HI(%a6)
9147 mov.l (%a1)+,FACT1LOW(%a6)
9148 mov.w (%a1)+,FACT2(%a6)
9150 fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9152 mov.w (%a1)+,FACT2HI(%a6)
9153 clr.w FACT2HI+2(%a6)
9154 clr.l FACT2LOW(%a6)
9155 add.w %d1,FACT1(%a6)
9156 fmul.x LOG2(%pc),%fp0 # FP0 IS R
9157 add.w %d1,FACT2(%a6)
9159 bra.w expr
9161 EXPBORS:
9162 #--FPCR, D0 SAVED
9163 cmp.l %d1,&0x3FFF8000
9164 bgt.b TEXPBIG
9166 #--|X| IS SMALL, RETURN 1 + X
9168 fmov.l %d0,%fpcr # restore users round prec,mode
9169 fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9170 bra t_pinx2
9172 TEXPBIG:
9173 #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9174 #--REGISTERS SAVE SO FAR ARE FPCR AND D0
9175 mov.l X(%a6),%d1
9176 cmp.l %d1,&0
9177 blt.b EXPNEG
9179 bra t_ovfl2 # t_ovfl expects positive value
9181 EXPNEG:
9182 bra t_unfl2 # t_unfl expects positive value
9184 global stwotoxd
9185 stwotoxd:
9186 #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9188 fmov.l %d0,%fpcr # set user's rounding mode/precision
9189 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9190 mov.l (%a0),%d1
9191 or.l &0x00800001,%d1
9192 fadd.s %d1,%fp0
9193 bra t_pinx2
9195 global stentox
9196 #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9197 stentox:
9198 fmovm.x (%a0),&0x80 # LOAD INPUT
9200 mov.l (%a0),%d1
9201 mov.w 4(%a0),%d1
9202 fmov.x %fp0,X(%a6)
9203 and.l &0x7FFFFFFF,%d1
9205 cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9206 bge.b TENOK1
9207 bra.w EXPBORS
9209 TENOK1:
9210 cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9211 ble.b TENMAIN
9212 bra.w EXPBORS
9214 TENMAIN:
9215 #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9217 fmov.x %fp0,%fp1
9218 fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9219 fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9220 mov.l %d2,-(%sp)
9221 lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9222 fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9223 mov.l INT(%a6),%d1
9224 mov.l %d1,%d2
9225 and.l &0x3F,%d1 # D0 IS J
9226 asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9227 add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9228 asr.l &6,%d2 # d2 IS L, N = 64L + J
9229 mov.l %d2,%d1
9230 asr.l &1,%d1 # D0 IS M
9231 sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9232 add.l &0x3FFF,%d2
9234 #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9235 #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9236 #--ADJFACT = 2^(M').
9237 #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9238 fmovm.x &0x0c,-(%sp) # save fp2/fp3
9240 fmov.x %fp1,%fp2
9242 fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9243 mov.l (%a1)+,FACT1(%a6)
9245 fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9247 mov.l (%a1)+,FACT1HI(%a6)
9248 mov.l (%a1)+,FACT1LOW(%a6)
9249 fsub.x %fp1,%fp0 # X - N L_LEAD
9250 mov.w (%a1)+,FACT2(%a6)
9252 fsub.x %fp2,%fp0 # X - N L_TRAIL
9254 mov.w (%a1)+,FACT2HI(%a6)
9255 clr.w FACT2HI+2(%a6)
9256 clr.l FACT2LOW(%a6)
9258 fmul.x LOG10(%pc),%fp0 # FP0 IS R
9259 add.w %d1,FACT1(%a6)
9260 add.w %d1,FACT2(%a6)
9262 expr:
9263 #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9264 #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9265 #--FP0 IS R. THE FOLLOWING CODE COMPUTES
9266 #-- 2**(M'+M) * 2**(J/64) * EXP(R)
9268 fmov.x %fp0,%fp1
9269 fmul.x %fp1,%fp1 # FP1 IS S = R*R
9271 fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9272 fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9274 fmul.x %fp1,%fp2 # FP2 IS S*A5
9275 fmul.x %fp1,%fp3 # FP3 IS S*A4
9277 fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9278 fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9280 fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9281 fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9283 fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9284 fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9286 fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9287 fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9288 fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9290 fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9292 #--FINAL RECONSTRUCTION PROCESS
9293 #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9295 fmul.x FACT1(%a6),%fp0
9296 fadd.x FACT2(%a6),%fp0
9297 fadd.x FACT1(%a6),%fp0
9299 fmov.l %d0,%fpcr # restore users round prec,mode
9300 mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9301 mov.l (%sp)+,%d2
9302 mov.l &0x80000000,ADJFACT+4(%a6)
9303 clr.l ADJFACT+8(%a6)
9304 mov.b &FMUL_OP,%d1 # last inst is MUL
9305 fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9306 bra t_catch
9308 global stentoxd
9309 stentoxd:
9310 #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9312 fmov.l %d0,%fpcr # set user's rounding mode/precision
9313 fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9314 mov.l (%a0),%d1
9315 or.l &0x00800001,%d1
9316 fadd.s %d1,%fp0
9317 bra t_pinx2
9319 #########################################################################
9320 # smovcr(): returns the ROM constant at the offset specified in d1 #
9321 # rounded to the mode and precision specified in d0. #
9323 # INPUT *************************************************************** #
9324 # d0 = rnd prec,mode #
9325 # d1 = ROM offset #
9327 # OUTPUT ************************************************************** #
9328 # fp0 = the ROM constant rounded to the user's rounding mode,prec #
9330 #########################################################################
9332 global smovcr
9333 smovcr:
9334 mov.l %d1,-(%sp) # save rom offset for a sec
9336 lsr.b &0x4,%d0 # shift ctrl bits to lo
9337 mov.l %d0,%d1 # make a copy
9338 andi.w &0x3,%d1 # extract rnd mode
9339 andi.w &0xc,%d0 # extract rnd prec
9340 swap %d0 # put rnd prec in hi
9341 mov.w %d1,%d0 # put rnd mode in lo
9343 mov.l (%sp)+,%d1 # get rom offset
9346 # check range of offset
9348 tst.b %d1 # if zero, offset is to pi
9349 beq.b pi_tbl # it is pi
9350 cmpi.b %d1,&0x0a # check range $01 - $0a
9351 ble.b z_val # if in this range, return zero
9352 cmpi.b %d1,&0x0e # check range $0b - $0e
9353 ble.b sm_tbl # valid constants in this range
9354 cmpi.b %d1,&0x2f # check range $10 - $2f
9355 ble.b z_val # if in this range, return zero
9356 cmpi.b %d1,&0x3f # check range $30 - $3f
9357 ble.b bg_tbl # valid constants in this range
9359 z_val:
9360 bra.l ld_pzero # return a zero
9363 # the answer is PI rounded to the proper precision.
9365 # fetch a pointer to the answer table relating to the proper rounding
9366 # precision.
9368 pi_tbl:
9369 tst.b %d0 # is rmode RN?
9370 bne.b pi_not_rn # no
9371 pi_rn:
9372 lea.l PIRN(%pc),%a0 # yes; load PI RN table addr
9373 bra.w set_finx
9374 pi_not_rn:
9375 cmpi.b %d0,&rp_mode # is rmode RP?
9376 beq.b pi_rp # yes
9377 pi_rzrm:
9378 lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr
9379 bra.b set_finx
9380 pi_rp:
9381 lea.l PIRP(%pc),%a0 # load PI RP table addr
9382 bra.b set_finx
9385 # the answer is one of:
9386 # $0B log10(2) (inexact)
9387 # $0C e (inexact)
9388 # $0D log2(e) (inexact)
9389 # $0E log10(e) (exact)
9391 # fetch a pointer to the answer table relating to the proper rounding
9392 # precision.
9394 sm_tbl:
9395 subi.b &0xb,%d1 # make offset in 0-4 range
9396 tst.b %d0 # is rmode RN?
9397 bne.b sm_not_rn # no
9398 sm_rn:
9399 lea.l SMALRN(%pc),%a0 # yes; load RN table addr
9400 sm_tbl_cont:
9401 cmpi.b %d1,&0x2 # is result log10(e)?
9402 ble.b set_finx # no; answer is inexact
9403 bra.b no_finx # yes; answer is exact
9404 sm_not_rn:
9405 cmpi.b %d0,&rp_mode # is rmode RP?
9406 beq.b sm_rp # yes
9407 sm_rzrm:
9408 lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr
9409 bra.b sm_tbl_cont
9410 sm_rp:
9411 lea.l SMALRP(%pc),%a0 # load RP table addr
9412 bra.b sm_tbl_cont
9415 # the answer is one of:
9416 # $30 ln(2) (inexact)
9417 # $31 ln(10) (inexact)
9418 # $32 10^0 (exact)
9419 # $33 10^1 (exact)
9420 # $34 10^2 (exact)
9421 # $35 10^4 (exact)
9422 # $36 10^8 (exact)
9423 # $37 10^16 (exact)
9424 # $38 10^32 (inexact)
9425 # $39 10^64 (inexact)
9426 # $3A 10^128 (inexact)
9427 # $3B 10^256 (inexact)
9428 # $3C 10^512 (inexact)
9429 # $3D 10^1024 (inexact)
9430 # $3E 10^2048 (inexact)
9431 # $3F 10^4096 (inexact)
9433 # fetch a pointer to the answer table relating to the proper rounding
9434 # precision.
9436 bg_tbl:
9437 subi.b &0x30,%d1 # make offset in 0-f range
9438 tst.b %d0 # is rmode RN?
9439 bne.b bg_not_rn # no
9440 bg_rn:
9441 lea.l BIGRN(%pc),%a0 # yes; load RN table addr
9442 bg_tbl_cont:
9443 cmpi.b %d1,&0x1 # is offset <= $31?
9444 ble.b set_finx # yes; answer is inexact
9445 cmpi.b %d1,&0x7 # is $32 <= offset <= $37?
9446 ble.b no_finx # yes; answer is exact
9447 bra.b set_finx # no; answer is inexact
9448 bg_not_rn:
9449 cmpi.b %d0,&rp_mode # is rmode RP?
9450 beq.b bg_rp # yes
9451 bg_rzrm:
9452 lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr
9453 bra.b bg_tbl_cont
9454 bg_rp:
9455 lea.l BIGRP(%pc),%a0 # load RP table addr
9456 bra.b bg_tbl_cont
9458 # answer is inexact, so set INEX2 and AINEX in the user's FPSR.
9459 set_finx:
9460 ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
9461 no_finx:
9462 mulu.w &0xc,%d1 # offset points into tables
9463 swap %d0 # put rnd prec in lo word
9464 tst.b %d0 # is precision extended?
9466 bne.b not_ext # if xprec, do not call round
9468 # Precision is extended
9469 fmovm.x (%a0,%d1.w),&0x80 # return result in fp0
9472 # Precision is single or double
9473 not_ext:
9474 swap %d0 # rnd prec in upper word
9476 # call round() to round the answer to the proper precision.
9477 # exponents out of range for single or double DO NOT cause underflow
9478 # or overflow.
9479 mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
9480 mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
9481 mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
9482 mov.l %d0,%d1
9483 clr.l %d0 # clear g,r,s
9484 lea FP_SCR1(%a6),%a0 # pass ptr to answer
9485 clr.w LOCAL_SGN(%a0) # sign always positive
9486 bsr.l _round # round the mantissa
9488 fmovm.x (%a0),&0x80 # return rounded result in fp0
9491 align 0x4
9493 PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9494 PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi
9495 PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
9497 SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9498 long 0x40000000,0xadf85458,0xa2bb4a9a # e
9499 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9500 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9501 long 0x00000000,0x00000000,0x00000000 # 0.0
9503 SMALRZRM:
9504 long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
9505 long 0x40000000,0xadf85458,0xa2bb4a9a # e
9506 long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)
9507 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9508 long 0x00000000,0x00000000,0x00000000 # 0.0
9510 SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)
9511 long 0x40000000,0xadf85458,0xa2bb4a9b # e
9512 long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
9513 long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
9514 long 0x00000000,0x00000000,0x00000000 # 0.0
9516 BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9517 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9519 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9520 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9521 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9522 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9523 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9524 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9525 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9526 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9527 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9528 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9529 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9530 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9531 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9532 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9534 BIGRZRM:
9535 long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)
9536 long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)
9538 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9539 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9540 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9541 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9542 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9543 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9544 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
9545 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
9546 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
9547 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
9548 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
9549 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
9550 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
9551 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
9553 BIGRP:
9554 long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
9555 long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
9557 long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
9558 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
9559 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
9560 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
9561 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
9562 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
9563 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
9564 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
9565 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
9566 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
9567 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
9568 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
9569 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
9570 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
9572 #########################################################################
9573 # sscale(): computes the destination operand scaled by the source #
9574 # operand. If the absoulute value of the source operand is #
9575 # >= 2^14, an overflow or underflow is returned. #
9577 # INPUT *************************************************************** #
9578 # a0 = pointer to double-extended source operand X #
9579 # a1 = pointer to double-extended destination operand Y #
9581 # OUTPUT ************************************************************** #
9582 # fp0 = scale(X,Y) #
9584 #########################################################################
9586 set SIGN, L_SCR1
9588 global sscale
9589 sscale:
9590 mov.l %d0,-(%sp) # store off ctrl bits for now
9592 mov.w DST_EX(%a1),%d1 # get dst exponent
9593 smi.b SIGN(%a6) # use SIGN to hold dst sign
9594 andi.l &0x00007fff,%d1 # strip sign from dst exp
9596 mov.w SRC_EX(%a0),%d0 # check src bounds
9597 andi.w &0x7fff,%d0 # clr src sign bit
9598 cmpi.w %d0,&0x3fff # is src ~ ZERO?
9599 blt.w src_small # yes
9600 cmpi.w %d0,&0x400c # no; is src too big?
9601 bgt.w src_out # yes
9604 # Source is within 2^14 range.
9606 src_ok:
9607 fintrz.x SRC(%a0),%fp0 # calc int of src
9608 fmov.l %fp0,%d0 # int src to d0
9609 # don't want any accrued bits from the fintrz showing up later since
9610 # we may need to read the fpsr for the last fp op in t_catch2().
9611 fmov.l &0x0,%fpsr
9613 tst.b DST_HI(%a1) # is dst denormalized?
9614 bmi.b sok_norm
9616 # the dst is a DENORM. normalize the DENORM and add the adjustment to
9617 # the src value. then, jump to the norm part of the routine.
9618 sok_dnrm:
9619 mov.l %d0,-(%sp) # save src for now
9621 mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9622 mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9623 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9625 lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9626 bsr.l norm # normalize the DENORM
9627 neg.l %d0
9628 add.l (%sp)+,%d0 # add adjustment to src
9630 fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9632 cmpi.w %d0,&-0x3fff # is the shft amt really low?
9633 bge.b sok_norm2 # thank goodness no
9635 # the multiply factor that we're trying to create should be a denorm
9636 # for the multiply to work. therefore, we're going to actually do a
9637 # multiply with a denorm which will cause an unimplemented data type
9638 # exception to be put into the machine which will be caught and corrected
9639 # later. we don't do this with the DENORMs above because this method
9640 # is slower. but, don't fret, I don't see it being used much either.
9641 fmov.l (%sp)+,%fpcr # restore user fpcr
9642 mov.l &0x80000000,%d1 # load normalized mantissa
9643 subi.l &-0x3fff,%d0 # how many should we shift?
9644 neg.l %d0 # make it positive
9645 cmpi.b %d0,&0x20 # is it > 32?
9646 bge.b sok_dnrm_32 # yes
9647 lsr.l %d0,%d1 # no; bit stays in upper lw
9648 clr.l -(%sp) # insert zero low mantissa
9649 mov.l %d1,-(%sp) # insert new high mantissa
9650 clr.l -(%sp) # make zero exponent
9651 bra.b sok_norm_cont
9652 sok_dnrm_32:
9653 subi.b &0x20,%d0 # get shift count
9654 lsr.l %d0,%d1 # make low mantissa longword
9655 mov.l %d1,-(%sp) # insert new low mantissa
9656 clr.l -(%sp) # insert zero high mantissa
9657 clr.l -(%sp) # make zero exponent
9658 bra.b sok_norm_cont
9660 # the src will force the dst to a DENORM value or worse. so, let's
9661 # create an fp multiply that will create the result.
9662 sok_norm:
9663 fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9664 sok_norm2:
9665 fmov.l (%sp)+,%fpcr # restore user fpcr
9667 addi.w &0x3fff,%d0 # turn src amt into exp value
9668 swap %d0 # put exponent in high word
9669 clr.l -(%sp) # insert new exponent
9670 mov.l &0x80000000,-(%sp) # insert new high mantissa
9671 mov.l %d0,-(%sp) # insert new lo mantissa
9673 sok_norm_cont:
9674 fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9675 mov.b &FMUL_OP,%d1 # last inst is MUL
9676 fmul.x (%sp)+,%fp0 # do the multiply
9677 bra t_catch2 # catch any exceptions
9680 # Source is outside of 2^14 range. Test the sign and branch
9681 # to the appropriate exception handler.
9683 src_out:
9684 mov.l (%sp)+,%d0 # restore ctrl bits
9685 exg %a0,%a1 # swap src,dst ptrs
9686 tst.b SRC_EX(%a1) # is src negative?
9687 bmi t_unfl # yes; underflow
9688 bra t_ovfl_sc # no; overflow
9691 # The source input is below 1, so we check for denormalized numbers
9692 # and set unfl.
9694 src_small:
9695 tst.b DST_HI(%a1) # is dst denormalized?
9696 bpl.b ssmall_done # yes
9698 mov.l (%sp)+,%d0
9699 fmov.l %d0,%fpcr # no; load control bits
9700 mov.b &FMOV_OP,%d1 # last inst is MOVE
9701 fmov.x DST(%a1),%fp0 # simply return dest
9702 bra t_catch2
9703 ssmall_done:
9704 mov.l (%sp)+,%d0 # load control bits into d1
9705 mov.l %a1,%a0 # pass ptr to dst
9706 bra t_resdnrm
9708 #########################################################################
9709 # smod(): computes the fp MOD of the input values X,Y. #
9710 # srem(): computes the fp (IEEE) REM of the input values X,Y. #
9712 # INPUT *************************************************************** #
9713 # a0 = pointer to extended precision input X #
9714 # a1 = pointer to extended precision input Y #
9715 # d0 = round precision,mode #
9717 # The input operands X and Y can be either normalized or #
9718 # denormalized. #
9720 # OUTPUT ************************************************************** #
9721 # fp0 = FREM(X,Y) or FMOD(X,Y) #
9723 # ALGORITHM *********************************************************** #
9725 # Step 1. Save and strip signs of X and Y: signX := sign(X), #
9726 # signY := sign(Y), X := |X|, Y := |Y|, #
9727 # signQ := signX EOR signY. Record whether MOD or REM #
9728 # is requested. #
9730 # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9731 # If (L < 0) then #
9732 # R := X, go to Step 4. #
9733 # else #
9734 # R := 2^(-L)X, j := L. #
9735 # endif #
9737 # Step 3. Perform MOD(X,Y) #
9738 # 3.1 If R = Y, go to Step 9. #
9739 # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9740 # 3.3 If j = 0, go to Step 4. #
9741 # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9742 # Step 3.1. #
9744 # Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9745 # Last_Subtract := false (used in Step 7 below). If #
9746 # MOD is requested, go to Step 6. #
9748 # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9749 # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9750 # Step 6. #
9751 # 5.2 If R > Y/2, then { set Last_Subtract := true, #
9752 # Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9753 # 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9754 # then { Q := Q + 1, signX := -signX }. #
9756 # Step 6. R := signX*R. #
9758 # Step 7. If Last_Subtract = true, R := R - Y. #
9760 # Step 8. Return signQ, last 7 bits of Q, and R as required. #
9762 # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9763 # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9764 # R := 0. Return signQ, last 7 bits of Q, and R. #
9766 #########################################################################
9768 set Mod_Flag,L_SCR3
9769 set Sc_Flag,L_SCR3+1
9771 set SignY,L_SCR2
9772 set SignX,L_SCR2+2
9773 set SignQ,L_SCR3+2
9775 set Y,FP_SCR0
9776 set Y_Hi,Y+4
9777 set Y_Lo,Y+8
9779 set R,FP_SCR1
9780 set R_Hi,R+4
9781 set R_Lo,R+8
9783 Scale:
9784 long 0x00010000,0x80000000,0x00000000,0x00000000
9786 global smod
9787 smod:
9788 clr.b FPSR_QBYTE(%a6)
9789 mov.l %d0,-(%sp) # save ctrl bits
9790 clr.b Mod_Flag(%a6)
9791 bra.b Mod_Rem
9793 global srem
9794 srem:
9795 clr.b FPSR_QBYTE(%a6)
9796 mov.l %d0,-(%sp) # save ctrl bits
9797 mov.b &0x1,Mod_Flag(%a6)
9799 Mod_Rem:
9800 #..Save sign of X and Y
9801 movm.l &0x3f00,-(%sp) # save data registers
9802 mov.w SRC_EX(%a0),%d3
9803 mov.w %d3,SignY(%a6)
9804 and.l &0x00007FFF,%d3 # Y := |Y|
9807 mov.l SRC_HI(%a0),%d4
9808 mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9810 tst.l %d3
9811 bne.b Y_Normal
9813 mov.l &0x00003FFE,%d3 # $3FFD + 1
9814 tst.l %d4
9815 bne.b HiY_not0
9817 HiY_0:
9818 mov.l %d5,%d4
9819 clr.l %d5
9820 sub.l &32,%d3
9821 clr.l %d6
9822 bfffo %d4{&0:&32},%d6
9823 lsl.l %d6,%d4
9824 sub.l %d6,%d3 # (D3,D4,D5) is normalized
9825 # ...with bias $7FFD
9826 bra.b Chk_X
9828 HiY_not0:
9829 clr.l %d6
9830 bfffo %d4{&0:&32},%d6
9831 sub.l %d6,%d3
9832 lsl.l %d6,%d4
9833 mov.l %d5,%d7 # a copy of D5
9834 lsl.l %d6,%d5
9835 neg.l %d6
9836 add.l &32,%d6
9837 lsr.l %d6,%d7
9838 or.l %d7,%d4 # (D3,D4,D5) normalized
9839 # ...with bias $7FFD
9840 bra.b Chk_X
9842 Y_Normal:
9843 add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9844 # ...with bias $7FFD
9846 Chk_X:
9847 mov.w DST_EX(%a1),%d0
9848 mov.w %d0,SignX(%a6)
9849 mov.w SignY(%a6),%d1
9850 eor.l %d0,%d1
9851 and.l &0x00008000,%d1
9852 mov.w %d1,SignQ(%a6) # sign(Q) obtained
9853 and.l &0x00007FFF,%d0
9854 mov.l DST_HI(%a1),%d1
9855 mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9856 tst.l %d0
9857 bne.b X_Normal
9858 mov.l &0x00003FFE,%d0
9859 tst.l %d1
9860 bne.b HiX_not0
9862 HiX_0:
9863 mov.l %d2,%d1
9864 clr.l %d2
9865 sub.l &32,%d0
9866 clr.l %d6
9867 bfffo %d1{&0:&32},%d6
9868 lsl.l %d6,%d1
9869 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9870 # ...with bias $7FFD
9871 bra.b Init
9873 HiX_not0:
9874 clr.l %d6
9875 bfffo %d1{&0:&32},%d6
9876 sub.l %d6,%d0
9877 lsl.l %d6,%d1
9878 mov.l %d2,%d7 # a copy of D2
9879 lsl.l %d6,%d2
9880 neg.l %d6
9881 add.l &32,%d6
9882 lsr.l %d6,%d7
9883 or.l %d7,%d1 # (D0,D1,D2) normalized
9884 # ...with bias $7FFD
9885 bra.b Init
9887 X_Normal:
9888 add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9889 # ...with bias $7FFD
9891 Init:
9893 mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9894 mov.l %d0,-(%sp) # save biased exp(X)
9895 sub.l %d3,%d0 # L := expo(X)-expo(Y)
9897 clr.l %d6 # D6 := carry <- 0
9898 clr.l %d3 # D3 is Q
9899 mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9901 #..(Carry,D1,D2) is R
9902 tst.l %d0
9903 bge.b Mod_Loop_pre
9905 #..expo(X) < expo(Y). Thus X = mod(X,Y)
9907 mov.l (%sp)+,%d0 # restore d0
9908 bra.w Get_Mod
9910 Mod_Loop_pre:
9911 addq.l &0x4,%sp # erase exp(X)
9912 #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9913 Mod_Loop:
9914 tst.l %d6 # test carry bit
9915 bgt.b R_GT_Y
9917 #..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9918 cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9919 bne.b R_NE_Y
9920 cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9921 bne.b R_NE_Y
9923 #..At this point, R = Y
9924 bra.w Rem_is_0
9926 R_NE_Y:
9927 #..use the borrow of the previous compare
9928 bcs.b R_LT_Y # borrow is set iff R < Y
9930 R_GT_Y:
9931 #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9932 #..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9933 sub.l %d5,%d2 # lo(R) - lo(Y)
9934 subx.l %d4,%d1 # hi(R) - hi(Y)
9935 clr.l %d6 # clear carry
9936 addq.l &1,%d3 # Q := Q + 1
9938 R_LT_Y:
9939 #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9940 tst.l %d0 # see if j = 0.
9941 beq.b PostLoop
9943 add.l %d3,%d3 # Q := 2Q
9944 add.l %d2,%d2 # lo(R) = 2lo(R)
9945 roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9946 scs %d6 # set Carry if 2(R) overflows
9947 addq.l &1,%a1 # k := k+1
9948 subq.l &1,%d0 # j := j - 1
9949 #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9951 bra.b Mod_Loop
9953 PostLoop:
9954 #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9956 #..normalize R.
9957 mov.l L_SCR1(%a6),%d0 # new biased expo of R
9958 tst.l %d1
9959 bne.b HiR_not0
9961 HiR_0:
9962 mov.l %d2,%d1
9963 clr.l %d2
9964 sub.l &32,%d0
9965 clr.l %d6
9966 bfffo %d1{&0:&32},%d6
9967 lsl.l %d6,%d1
9968 sub.l %d6,%d0 # (D0,D1,D2) is normalized
9969 # ...with bias $7FFD
9970 bra.b Get_Mod
9972 HiR_not0:
9973 clr.l %d6
9974 bfffo %d1{&0:&32},%d6
9975 bmi.b Get_Mod # already normalized
9976 sub.l %d6,%d0
9977 lsl.l %d6,%d1
9978 mov.l %d2,%d7 # a copy of D2
9979 lsl.l %d6,%d2
9980 neg.l %d6
9981 add.l &32,%d6
9982 lsr.l %d6,%d7
9983 or.l %d7,%d1 # (D0,D1,D2) normalized
9986 Get_Mod:
9987 cmp.l %d0,&0x000041FE
9988 bge.b No_Scale
9989 Do_Scale:
9990 mov.w %d0,R(%a6)
9991 mov.l %d1,R_Hi(%a6)
9992 mov.l %d2,R_Lo(%a6)
9993 mov.l L_SCR1(%a6),%d6
9994 mov.w %d6,Y(%a6)
9995 mov.l %d4,Y_Hi(%a6)
9996 mov.l %d5,Y_Lo(%a6)
9997 fmov.x R(%a6),%fp0 # no exception
9998 mov.b &1,Sc_Flag(%a6)
9999 bra.b ModOrRem
10000 No_Scale:
10001 mov.l %d1,R_Hi(%a6)
10002 mov.l %d2,R_Lo(%a6)
10003 sub.l &0x3FFE,%d0
10004 mov.w %d0,R(%a6)
10005 mov.l L_SCR1(%a6),%d6
10006 sub.l &0x3FFE,%d6
10007 mov.l %d6,L_SCR1(%a6)
10008 fmov.x R(%a6),%fp0
10009 mov.w %d6,Y(%a6)
10010 mov.l %d4,Y_Hi(%a6)
10011 mov.l %d5,Y_Lo(%a6)
10012 clr.b Sc_Flag(%a6)
10015 ModOrRem:
10016 tst.b Mod_Flag(%a6)
10017 beq.b Fix_Sign
10019 mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
10020 subq.l &1,%d6 # biased expo(Y/2)
10021 cmp.l %d0,%d6
10022 blt.b Fix_Sign
10023 bgt.b Last_Sub
10025 cmp.l %d1,%d4
10026 bne.b Not_EQ
10027 cmp.l %d2,%d5
10028 bne.b Not_EQ
10029 bra.w Tie_Case
10031 Not_EQ:
10032 bcs.b Fix_Sign
10034 Last_Sub:
10036 fsub.x Y(%a6),%fp0 # no exceptions
10037 addq.l &1,%d3 # Q := Q + 1
10040 Fix_Sign:
10041 #..Get sign of X
10042 mov.w SignX(%a6),%d6
10043 bge.b Get_Q
10044 fneg.x %fp0
10046 #..Get Q
10048 Get_Q:
10049 clr.l %d6
10050 mov.w SignQ(%a6),%d6 # D6 is sign(Q)
10051 mov.l &8,%d7
10052 lsr.l %d7,%d6
10053 and.l &0x0000007F,%d3 # 7 bits of Q
10054 or.l %d6,%d3 # sign and bits of Q
10055 # swap %d3
10056 # fmov.l %fpsr,%d6
10057 # and.l &0xFF00FFFF,%d6
10058 # or.l %d3,%d6
10059 # fmov.l %d6,%fpsr # put Q in fpsr
10060 mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
10063 Restore:
10064 movm.l (%sp)+,&0xfc # {%d2-%d7}
10065 mov.l (%sp)+,%d0
10066 fmov.l %d0,%fpcr
10067 tst.b Sc_Flag(%a6)
10068 beq.b Finish
10069 mov.b &FMUL_OP,%d1 # last inst is MUL
10070 fmul.x Scale(%pc),%fp0 # may cause underflow
10071 bra t_catch2
10072 # the '040 package did this apparently to see if the dst operand for the
10073 # preceding fmul was a denorm. but, it better not have been since the
10074 # algorithm just got done playing with fp0 and expected no exceptions
10075 # as a result. trust me...
10076 # bra t_avoid_unsupp # check for denorm as a
10077 # ;result of the scaling
10079 Finish:
10080 mov.b &FMOV_OP,%d1 # last inst is MOVE
10081 fmov.x %fp0,%fp0 # capture exceptions & round
10082 bra t_catch2
10084 Rem_is_0:
10085 #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
10086 addq.l &1,%d3
10087 cmp.l %d0,&8 # D0 is j
10088 bge.b Q_Big
10090 lsl.l %d0,%d3
10091 bra.b Set_R_0
10093 Q_Big:
10094 clr.l %d3
10096 Set_R_0:
10097 fmov.s &0x00000000,%fp0
10098 clr.b Sc_Flag(%a6)
10099 bra.w Fix_Sign
10101 Tie_Case:
10102 #..Check parity of Q
10103 mov.l %d3,%d6
10104 and.l &0x00000001,%d6
10105 tst.l %d6
10106 beq.w Fix_Sign # Q is even
10108 #..Q is odd, Q := Q + 1, signX := -signX
10109 addq.l &1,%d3
10110 mov.w SignX(%a6),%d6
10111 eor.l &0x00008000,%d6
10112 mov.w %d6,SignX(%a6)
10113 bra.w Fix_Sign
10115 qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
10117 #########################################################################
10118 # XDEF **************************************************************** #
10119 # t_dz(): Handle DZ exception during transcendental emulation. #
10120 # Sets N bit according to sign of source operand. #
10121 # t_dz2(): Handle DZ exception during transcendental emulation. #
10122 # Sets N bit always. #
10124 # XREF **************************************************************** #
10125 # None #
10127 # INPUT *************************************************************** #
10128 # a0 = pointer to source operand #
10130 # OUTPUT ************************************************************** #
10131 # fp0 = default result #
10133 # ALGORITHM *********************************************************** #
10134 # - Store properly signed INF into fp0. #
10135 # - Set FPSR exception status dz bit, ccode inf bit, and #
10136 # accrued dz bit. #
10138 #########################################################################
10140 global t_dz
10141 t_dz:
10142 tst.b SRC_EX(%a0) # no; is src negative?
10143 bmi.b t_dz2 # yes
10145 dz_pinf:
10146 fmov.s &0x7f800000,%fp0 # return +INF in fp0
10147 ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
10150 global t_dz2
10151 t_dz2:
10152 fmov.s &0xff800000,%fp0 # return -INF in fp0
10153 ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
10156 #################################################################
10157 # OPERR exception: #
10158 # - set FPSR exception status operr bit, condition code #
10159 # nan bit; Store default NAN into fp0 #
10160 #################################################################
10161 global t_operr
10162 t_operr:
10163 ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
10164 fmovm.x qnan(%pc),&0x80 # return default NAN in fp0
10167 #################################################################
10168 # Extended DENORM: #
10169 # - For all functions that have a denormalized input and #
10170 # that f(x)=x, this is the entry point. #
10171 # - we only return the EXOP here if either underflow or #
10172 # inexact is enabled. #
10173 #################################################################
10175 # Entry point for scale w/ extended denorm. The function does
10176 # NOT set INEX2/AUNFL/AINEX.
10177 global t_resdnrm
10178 t_resdnrm:
10179 ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL
10180 bra.b xdnrm_con
10182 global t_extdnrm
10183 t_extdnrm:
10184 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10186 xdnrm_con:
10187 mov.l %a0,%a1 # make copy of src ptr
10188 mov.l %d0,%d1 # make copy of rnd prec,mode
10189 andi.b &0xc0,%d1 # extended precision?
10190 bne.b xdnrm_sd # no
10192 # result precision is extended.
10193 tst.b LOCAL_EX(%a0) # is denorm negative?
10194 bpl.b xdnrm_exit # no
10196 bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit
10197 bra.b xdnrm_exit
10199 # result precision is single or double
10200 xdnrm_sd:
10201 mov.l %a1,-(%sp)
10202 tst.b LOCAL_EX(%a0) # is denorm pos or neg?
10203 smi.b %d1 # set d0 accodingly
10204 bsr.l unf_sub
10205 mov.l (%sp)+,%a1
10206 xdnrm_exit:
10207 fmovm.x (%a0),&0x80 # return default result in fp0
10209 mov.b FPCR_ENABLE(%a6),%d0
10210 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
10211 bne.b xdnrm_ena # yes
10214 ################
10215 # unfl enabled #
10216 ################
10217 # we have a DENORM that needs to be converted into an EXOP.
10218 # so, normalize the mantissa, add 0x6000 to the new exponent,
10219 # and return the result in fp1.
10220 xdnrm_ena:
10221 mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6)
10222 mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6)
10223 mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6)
10225 lea FP_SCR0(%a6),%a0
10226 bsr.l norm # normalize mantissa
10227 addi.l &0x6000,%d0 # add extra bias
10228 andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign
10229 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
10231 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10234 #################################################################
10235 # UNFL exception: #
10236 # - This routine is for cases where even an EXOP isn't #
10237 # large enough to hold the range of this result. #
10238 # In such a case, the EXOP equals zero. #
10239 # - Return the default result to the proper precision #
10240 # with the sign of this result being the same as that #
10241 # of the src operand. #
10242 # - t_unfl2() is provided to force the result sign to #
10243 # positive which is the desired result for fetox(). #
10244 #################################################################
10245 global t_unfl
10246 t_unfl:
10247 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10249 tst.b (%a0) # is result pos or neg?
10250 smi.b %d1 # set d1 accordingly
10251 bsr.l unf_sub # calc default unfl result
10252 fmovm.x (%a0),&0x80 # return default result in fp0
10254 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10257 # t_unfl2 ALWAYS tells unf_sub to create a positive result
10258 global t_unfl2
10259 t_unfl2:
10260 ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
10262 sf.b %d1 # set d0 to represent positive
10263 bsr.l unf_sub # calc default unfl result
10264 fmovm.x (%a0),&0x80 # return default result in fp0
10266 fmov.s &0x0000000,%fp1 # return EXOP in fp1
10269 #################################################################
10270 # OVFL exception: #
10271 # - This routine is for cases where even an EXOP isn't #
10272 # large enough to hold the range of this result. #
10273 # - Return the default result to the proper precision #
10274 # with the sign of this result being the same as that #
10275 # of the src operand. #
10276 # - t_ovfl2() is provided to force the result sign to #
10277 # positive which is the desired result for fcosh(). #
10278 # - t_ovfl_sc() is provided for scale() which only sets #
10279 # the inexact bits if the number is inexact for the #
10280 # precision indicated. #
10281 #################################################################
10283 global t_ovfl_sc
10284 t_ovfl_sc:
10285 ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10287 mov.b %d0,%d1 # fetch rnd mode/prec
10288 andi.b &0xc0,%d1 # extract rnd prec
10289 beq.b ovfl_work # prec is extended
10291 tst.b LOCAL_HI(%a0) # is dst a DENORM?
10292 bmi.b ovfl_sc_norm # no
10294 # dst op is a DENORM. we have to normalize the mantissa to see if the
10295 # result would be inexact for the given precision. make a copy of the
10296 # dst so we don't screw up the version passed to us.
10297 mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10298 mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10299 mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10300 lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10301 movm.l &0xc080,-(%sp) # save d0-d1/a0
10302 bsr.l norm # normalize mantissa
10303 movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10305 ovfl_sc_norm:
10306 cmpi.b %d1,&0x40 # is prec dbl?
10307 bne.b ovfl_sc_dbl # no; sgl
10308 ovfl_sc_sgl:
10309 tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10310 bne.b ovfl_sc_inx # yes
10311 tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10312 bne.b ovfl_sc_inx # yes
10313 bra.b ovfl_work # don't set INEX2
10314 ovfl_sc_dbl:
10315 mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10316 andi.l &0x7ff,%d1 # dbl mantissa set?
10317 beq.b ovfl_work # no; don't set INEX2
10318 ovfl_sc_inx:
10319 ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10320 bra.b ovfl_work # continue
10322 global t_ovfl
10323 t_ovfl:
10324 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10326 ovfl_work:
10327 tst.b LOCAL_EX(%a0) # what is the sign?
10328 smi.b %d1 # set d1 accordingly
10329 bsr.l ovf_res # calc default ovfl result
10330 mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10331 fmovm.x (%a0),&0x80 # return default result in fp0
10333 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10336 # t_ovfl2 ALWAYS tells ovf_res to create a positive result
10337 global t_ovfl2
10338 t_ovfl2:
10339 ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
10341 sf.b %d1 # clear sign flag for positive
10342 bsr.l ovf_res # calc default ovfl result
10343 mov.b %d0,FPSR_CC(%a6) # insert new ccodes
10344 fmovm.x (%a0),&0x80 # return default result in fp0
10346 fmov.s &0x00000000,%fp1 # return EXOP in fp1
10349 #################################################################
10350 # t_catch(): #
10351 # - the last operation of a transcendental emulation #
10352 # routine may have caused an underflow or overflow. #
10353 # we find out if this occurred by doing an fsave and #
10354 # checking the exception bit. if one did occur, then we #
10355 # jump to fgen_except() which creates the default #
10356 # result and EXOP for us. #
10357 #################################################################
10358 global t_catch
10359 t_catch:
10361 fsave -(%sp)
10362 tst.b 0x2(%sp)
10363 bmi.b catch
10364 add.l &0xc,%sp
10366 #################################################################
10367 # INEX2 exception: #
10368 # - The inex2 and ainex bits are set. #
10369 #################################################################
10370 global t_inx2
10371 t_inx2:
10372 fblt.w t_minx2
10373 fbeq.w inx2_zero
10375 global t_pinx2
10376 t_pinx2:
10377 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10380 global t_minx2
10381 t_minx2:
10382 ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
10385 inx2_zero:
10386 mov.b &z_bmask,FPSR_CC(%a6)
10387 ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
10390 # an underflow or overflow exception occurred.
10391 # we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
10392 catch:
10393 ori.w &inx2a_mask,FPSR_EXCEPT(%a6)
10394 catch2:
10395 bsr.l fgen_except
10396 add.l &0xc,%sp
10399 global t_catch2
10400 t_catch2:
10402 fsave -(%sp)
10404 tst.b 0x2(%sp)
10405 bmi.b catch2
10406 add.l &0xc,%sp
10408 fmov.l %fpsr,%d0
10409 or.l %d0,USER_FPSR(%a6)
10413 #########################################################################
10415 #########################################################################
10416 # unf_res(): underflow default result calculation for transcendentals #
10418 # INPUT: #
10419 # d0 : rnd mode,precision #
10420 # d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
10421 # OUTPUT: #
10422 # a0 : points to result (in instruction memory) #
10423 #########################################################################
10424 unf_sub:
10425 ori.l &unfinx_mask,USER_FPSR(%a6)
10427 andi.w &0x10,%d1 # keep sign bit in 4th spot
10429 lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits
10430 andi.b &0xf,%d0 # strip hi rnd mode bit
10431 or.b %d1,%d0 # concat {sgn,mode,prec}
10433 mov.l %d0,%d1 # make a copy
10434 lsl.b &0x1,%d1 # mult index 2 by 2
10436 mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
10437 lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
10440 tbl_unf_cc:
10441 byte 0x4, 0x4, 0x4, 0x0
10442 byte 0x4, 0x4, 0x4, 0x0
10443 byte 0x4, 0x4, 0x4, 0x0
10444 byte 0x0, 0x0, 0x0, 0x0
10445 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10446 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10447 byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
10449 tbl_unf_result:
10450 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10451 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10452 long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10453 long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10455 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10456 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10457 long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10458 long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10460 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10461 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
10462 long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10463 long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10465 long 0x0,0x0,0x0,0x0
10466 long 0x0,0x0,0x0,0x0
10467 long 0x0,0x0,0x0,0x0
10468 long 0x0,0x0,0x0,0x0
10470 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10471 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10472 long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
10473 long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
10475 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10476 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10477 long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
10478 long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
10480 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10481 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10482 long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
10483 long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
10485 ############################################################
10487 #########################################################################
10488 # src_zero(): Return signed zero according to sign of src operand. #
10489 #########################################################################
10490 global src_zero
10491 src_zero:
10492 tst.b SRC_EX(%a0) # get sign of src operand
10493 bmi.b ld_mzero # if neg, load neg zero
10496 # ld_pzero(): return a positive zero.
10498 global ld_pzero
10499 ld_pzero:
10500 fmov.s &0x00000000,%fp0 # load +0
10501 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10504 # ld_mzero(): return a negative zero.
10505 global ld_mzero
10506 ld_mzero:
10507 fmov.s &0x80000000,%fp0 # load -0
10508 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10511 #########################################################################
10512 # dst_zero(): Return signed zero according to sign of dst operand. #
10513 #########################################################################
10514 global dst_zero
10515 dst_zero:
10516 tst.b DST_EX(%a1) # get sign of dst operand
10517 bmi.b ld_mzero # if neg, load neg zero
10518 bra.b ld_pzero # load positive zero
10520 #########################################################################
10521 # src_inf(): Return signed inf according to sign of src operand. #
10522 #########################################################################
10523 global src_inf
10524 src_inf:
10525 tst.b SRC_EX(%a0) # get sign of src operand
10526 bmi.b ld_minf # if negative branch
10529 # ld_pinf(): return a positive infinity.
10531 global ld_pinf
10532 ld_pinf:
10533 fmov.s &0x7f800000,%fp0 # load +INF
10534 mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10538 # ld_minf():return a negative infinity.
10540 global ld_minf
10541 ld_minf:
10542 fmov.s &0xff800000,%fp0 # load -INF
10543 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10546 #########################################################################
10547 # dst_inf(): Return signed inf according to sign of dst operand. #
10548 #########################################################################
10549 global dst_inf
10550 dst_inf:
10551 tst.b DST_EX(%a1) # get sign of dst operand
10552 bmi.b ld_minf # if negative branch
10553 bra.b ld_pinf
10555 global szr_inf
10556 #################################################################
10557 # szr_inf(): Return +ZERO for a negative src operand or #
10558 # +INF for a positive src operand. #
10559 # Routine used for fetox, ftwotox, and ftentox. #
10560 #################################################################
10561 szr_inf:
10562 tst.b SRC_EX(%a0) # check sign of source
10563 bmi.b ld_pzero
10564 bra.b ld_pinf
10566 #########################################################################
10567 # sopr_inf(): Return +INF for a positive src operand or #
10568 # jump to operand error routine for a negative src operand. #
10569 # Routine used for flogn, flognp1, flog10, and flog2. #
10570 #########################################################################
10571 global sopr_inf
10572 sopr_inf:
10573 tst.b SRC_EX(%a0) # check sign of source
10574 bmi.w t_operr
10575 bra.b ld_pinf
10577 #################################################################
10578 # setoxm1i(): Return minus one for a negative src operand or #
10579 # positive infinity for a positive src operand. #
10580 # Routine used for fetoxm1. #
10581 #################################################################
10582 global setoxm1i
10583 setoxm1i:
10584 tst.b SRC_EX(%a0) # check sign of source
10585 bmi.b ld_mone
10586 bra.b ld_pinf
10588 #########################################################################
10589 # src_one(): Return signed one according to sign of src operand. #
10590 #########################################################################
10591 global src_one
10592 src_one:
10593 tst.b SRC_EX(%a0) # check sign of source
10594 bmi.b ld_mone
10597 # ld_pone(): return positive one.
10599 global ld_pone
10600 ld_pone:
10601 fmov.s &0x3f800000,%fp0 # load +1
10602 clr.b FPSR_CC(%a6)
10606 # ld_mone(): return negative one.
10608 global ld_mone
10609 ld_mone:
10610 fmov.s &0xbf800000,%fp0 # load -1
10611 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10614 ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10615 mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10617 #################################################################
10618 # spi_2(): Return signed PI/2 according to sign of src operand. #
10619 #################################################################
10620 global spi_2
10621 spi_2:
10622 tst.b SRC_EX(%a0) # check sign of source
10623 bmi.b ld_mpi2
10626 # ld_ppi2(): return positive PI/2.
10628 global ld_ppi2
10629 ld_ppi2:
10630 fmov.l %d0,%fpcr
10631 fmov.x ppiby2(%pc),%fp0 # load +pi/2
10632 bra.w t_pinx2 # set INEX2
10635 # ld_mpi2(): return negative PI/2.
10637 global ld_mpi2
10638 ld_mpi2:
10639 fmov.l %d0,%fpcr
10640 fmov.x mpiby2(%pc),%fp0 # load -pi/2
10641 bra.w t_minx2 # set INEX2
10643 ####################################################
10644 # The following routines give support for fsincos. #
10645 ####################################################
10648 # ssincosz(): When the src operand is ZERO, store a one in the
10649 # cosine register and return a ZERO in fp0 w/ the same sign
10650 # as the src operand.
10652 global ssincosz
10653 ssincosz:
10654 fmov.s &0x3f800000,%fp1
10655 tst.b SRC_EX(%a0) # test sign
10656 bpl.b sincoszp
10657 fmov.s &0x80000000,%fp0 # return sin result in fp0
10658 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10659 bra.b sto_cos # store cosine result
10660 sincoszp:
10661 fmov.s &0x00000000,%fp0 # return sin result in fp0
10662 mov.b &z_bmask,FPSR_CC(%a6)
10663 bra.b sto_cos # store cosine result
10666 # ssincosi(): When the src operand is INF, store a QNAN in the cosine
10667 # register and jump to the operand error routine for negative
10668 # src operands.
10670 global ssincosi
10671 ssincosi:
10672 fmov.x qnan(%pc),%fp1 # load NAN
10673 bsr.l sto_cos # store cosine result
10674 bra.w t_operr
10677 # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10678 # register and branch to the src QNAN routine.
10680 global ssincosqnan
10681 ssincosqnan:
10682 fmov.x LOCAL_EX(%a0),%fp1
10683 bsr.l sto_cos
10684 bra.w src_qnan
10687 # ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
10688 # in the cosine register and branch to the src SNAN routine.
10690 global ssincossnan
10691 ssincossnan:
10692 fmov.x LOCAL_EX(%a0),%fp1
10693 bsr.l sto_cos
10694 bra.w src_snan
10696 ########################################################################
10698 #########################################################################
10699 # sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
10700 # fp1 holds the result of the cosine portion of ssincos(). #
10701 # the value in fp1 will not take any exceptions when moved. #
10702 # INPUT: #
10703 # fp1 : fp value to store #
10704 # MODIFIED: #
10705 # d0 #
10706 #########################################################################
10707 global sto_cos
10708 sto_cos:
10709 mov.b 1+EXC_CMDREG(%a6),%d0
10710 andi.w &0x7,%d0
10711 mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0
10712 jmp (tbl_sto_cos.b,%pc,%d0.w*1)
10714 tbl_sto_cos:
10715 short sto_cos_0 - tbl_sto_cos
10716 short sto_cos_1 - tbl_sto_cos
10717 short sto_cos_2 - tbl_sto_cos
10718 short sto_cos_3 - tbl_sto_cos
10719 short sto_cos_4 - tbl_sto_cos
10720 short sto_cos_5 - tbl_sto_cos
10721 short sto_cos_6 - tbl_sto_cos
10722 short sto_cos_7 - tbl_sto_cos
10724 sto_cos_0:
10725 fmovm.x &0x40,EXC_FP0(%a6)
10727 sto_cos_1:
10728 fmovm.x &0x40,EXC_FP1(%a6)
10730 sto_cos_2:
10731 fmov.x %fp1,%fp2
10733 sto_cos_3:
10734 fmov.x %fp1,%fp3
10736 sto_cos_4:
10737 fmov.x %fp1,%fp4
10739 sto_cos_5:
10740 fmov.x %fp1,%fp5
10742 sto_cos_6:
10743 fmov.x %fp1,%fp6
10745 sto_cos_7:
10746 fmov.x %fp1,%fp7
10749 ##################################################################
10750 global smod_sdnrm
10751 global smod_snorm
10752 smod_sdnrm:
10753 smod_snorm:
10754 mov.b DTAG(%a6),%d1
10755 beq.l smod
10756 cmpi.b %d1,&ZERO
10757 beq.w smod_zro
10758 cmpi.b %d1,&INF
10759 beq.l t_operr
10760 cmpi.b %d1,&DENORM
10761 beq.l smod
10762 cmpi.b %d1,&SNAN
10763 beq.l dst_snan
10764 bra.l dst_qnan
10766 global smod_szero
10767 smod_szero:
10768 mov.b DTAG(%a6),%d1
10769 beq.l t_operr
10770 cmpi.b %d1,&ZERO
10771 beq.l t_operr
10772 cmpi.b %d1,&INF
10773 beq.l t_operr
10774 cmpi.b %d1,&DENORM
10775 beq.l t_operr
10776 cmpi.b %d1,&QNAN
10777 beq.l dst_qnan
10778 bra.l dst_snan
10780 global smod_sinf
10781 smod_sinf:
10782 mov.b DTAG(%a6),%d1
10783 beq.l smod_fpn
10784 cmpi.b %d1,&ZERO
10785 beq.l smod_zro
10786 cmpi.b %d1,&INF
10787 beq.l t_operr
10788 cmpi.b %d1,&DENORM
10789 beq.l smod_fpn
10790 cmpi.b %d1,&QNAN
10791 beq.l dst_qnan
10792 bra.l dst_snan
10794 smod_zro:
10795 srem_zro:
10796 mov.b SRC_EX(%a0),%d1 # get src sign
10797 mov.b DST_EX(%a1),%d0 # get dst sign
10798 eor.b %d0,%d1 # get qbyte sign
10799 andi.b &0x80,%d1
10800 mov.b %d1,FPSR_QBYTE(%a6)
10801 tst.b %d0
10802 bpl.w ld_pzero
10803 bra.w ld_mzero
10805 smod_fpn:
10806 srem_fpn:
10807 clr.b FPSR_QBYTE(%a6)
10808 mov.l %d0,-(%sp)
10809 mov.b SRC_EX(%a0),%d1 # get src sign
10810 mov.b DST_EX(%a1),%d0 # get dst sign
10811 eor.b %d0,%d1 # get qbyte sign
10812 andi.b &0x80,%d1
10813 mov.b %d1,FPSR_QBYTE(%a6)
10814 cmpi.b DTAG(%a6),&DENORM
10815 bne.b smod_nrm
10816 lea DST(%a1),%a0
10817 mov.l (%sp)+,%d0
10818 bra t_resdnrm
10819 smod_nrm:
10820 fmov.l (%sp)+,%fpcr
10821 fmov.x DST(%a1),%fp0
10822 tst.b DST_EX(%a1)
10823 bmi.b smod_nrm_neg
10826 smod_nrm_neg:
10827 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
10830 #########################################################################
10831 global srem_snorm
10832 global srem_sdnrm
10833 srem_sdnrm:
10834 srem_snorm:
10835 mov.b DTAG(%a6),%d1
10836 beq.l srem
10837 cmpi.b %d1,&ZERO
10838 beq.w srem_zro
10839 cmpi.b %d1,&INF
10840 beq.l t_operr
10841 cmpi.b %d1,&DENORM
10842 beq.l srem
10843 cmpi.b %d1,&QNAN
10844 beq.l dst_qnan
10845 bra.l dst_snan
10847 global srem_szero
10848 srem_szero:
10849 mov.b DTAG(%a6),%d1
10850 beq.l t_operr
10851 cmpi.b %d1,&ZERO
10852 beq.l t_operr
10853 cmpi.b %d1,&INF
10854 beq.l t_operr
10855 cmpi.b %d1,&DENORM
10856 beq.l t_operr
10857 cmpi.b %d1,&QNAN
10858 beq.l dst_qnan
10859 bra.l dst_snan
10861 global srem_sinf
10862 srem_sinf:
10863 mov.b DTAG(%a6),%d1
10864 beq.w srem_fpn
10865 cmpi.b %d1,&ZERO
10866 beq.w srem_zro
10867 cmpi.b %d1,&INF
10868 beq.l t_operr
10869 cmpi.b %d1,&DENORM
10870 beq.l srem_fpn
10871 cmpi.b %d1,&QNAN
10872 beq.l dst_qnan
10873 bra.l dst_snan
10875 #########################################################################
10876 global sscale_snorm
10877 global sscale_sdnrm
10878 sscale_snorm:
10879 sscale_sdnrm:
10880 mov.b DTAG(%a6),%d1
10881 beq.l sscale
10882 cmpi.b %d1,&ZERO
10883 beq.l dst_zero
10884 cmpi.b %d1,&INF
10885 beq.l dst_inf
10886 cmpi.b %d1,&DENORM
10887 beq.l sscale
10888 cmpi.b %d1,&QNAN
10889 beq.l dst_qnan
10890 bra.l dst_snan
10892 global sscale_szero
10893 sscale_szero:
10894 mov.b DTAG(%a6),%d1
10895 beq.l sscale
10896 cmpi.b %d1,&ZERO
10897 beq.l dst_zero
10898 cmpi.b %d1,&INF
10899 beq.l dst_inf
10900 cmpi.b %d1,&DENORM
10901 beq.l sscale
10902 cmpi.b %d1,&QNAN
10903 beq.l dst_qnan
10904 bra.l dst_snan
10906 global sscale_sinf
10907 sscale_sinf:
10908 mov.b DTAG(%a6),%d1
10909 beq.l t_operr
10910 cmpi.b %d1,&QNAN
10911 beq.l dst_qnan
10912 cmpi.b %d1,&SNAN
10913 beq.l dst_snan
10914 bra.l t_operr
10916 ########################################################################
10919 # sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
10921 global sop_sqnan
10922 sop_sqnan:
10923 mov.b DTAG(%a6),%d1
10924 cmpi.b %d1,&QNAN
10925 beq.b dst_qnan
10926 cmpi.b %d1,&SNAN
10927 beq.b dst_snan
10928 bra.b src_qnan
10931 # sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
10933 global sop_ssnan
10934 sop_ssnan:
10935 mov.b DTAG(%a6),%d1
10936 cmpi.b %d1,&QNAN
10937 beq.b dst_qnan_src_snan
10938 cmpi.b %d1,&SNAN
10939 beq.b dst_snan
10940 bra.b src_snan
10942 dst_qnan_src_snan:
10943 ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
10944 bra.b dst_qnan
10947 # dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
10949 global dst_snan
10950 dst_snan:
10951 fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit
10952 fmov.l %fpsr,%d0 # catch resulting status
10953 or.l %d0,USER_FPSR(%a6) # store status
10957 # dst_qnan(): Return the dst QNAN.
10959 global dst_qnan
10960 dst_qnan:
10961 fmov.x DST(%a1),%fp0 # return the non-signalling nan
10962 tst.b DST_EX(%a1) # set ccodes according to QNAN sign
10963 bmi.b dst_qnan_m
10964 dst_qnan_p:
10965 mov.b &nan_bmask,FPSR_CC(%a6)
10967 dst_qnan_m:
10968 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10972 # src_snan(): Return the src SNAN w/ the SNAN bit set.
10974 global src_snan
10975 src_snan:
10976 fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit
10977 fmov.l %fpsr,%d0 # catch resulting status
10978 or.l %d0,USER_FPSR(%a6) # store status
10982 # src_qnan(): Return the src QNAN.
10984 global src_qnan
10985 src_qnan:
10986 fmov.x SRC(%a0),%fp0 # return the non-signalling nan
10987 tst.b SRC_EX(%a0) # set ccodes according to QNAN sign
10988 bmi.b dst_qnan_m
10989 src_qnan_p:
10990 mov.b &nan_bmask,FPSR_CC(%a6)
10992 src_qnan_m:
10993 mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
10997 # fkern2.s:
10998 # These entry points are used by the exception handler
10999 # routines where an instruction is selected by an index into
11000 # a large jump table corresponding to a given instruction which
11001 # has been decoded. Flow continues here where we now decode
11002 # further accoding to the source operand type.
11005 global fsinh
11006 fsinh:
11007 mov.b STAG(%a6),%d1
11008 beq.l ssinh
11009 cmpi.b %d1,&ZERO
11010 beq.l src_zero
11011 cmpi.b %d1,&INF
11012 beq.l src_inf
11013 cmpi.b %d1,&DENORM
11014 beq.l ssinhd
11015 cmpi.b %d1,&QNAN
11016 beq.l src_qnan
11017 bra.l src_snan
11019 global flognp1
11020 flognp1:
11021 mov.b STAG(%a6),%d1
11022 beq.l slognp1
11023 cmpi.b %d1,&ZERO
11024 beq.l src_zero
11025 cmpi.b %d1,&INF
11026 beq.l sopr_inf
11027 cmpi.b %d1,&DENORM
11028 beq.l slognp1d
11029 cmpi.b %d1,&QNAN
11030 beq.l src_qnan
11031 bra.l src_snan
11033 global fetoxm1
11034 fetoxm1:
11035 mov.b STAG(%a6),%d1
11036 beq.l setoxm1
11037 cmpi.b %d1,&ZERO
11038 beq.l src_zero
11039 cmpi.b %d1,&INF
11040 beq.l setoxm1i
11041 cmpi.b %d1,&DENORM
11042 beq.l setoxm1d
11043 cmpi.b %d1,&QNAN
11044 beq.l src_qnan
11045 bra.l src_snan
11047 global ftanh
11048 ftanh:
11049 mov.b STAG(%a6),%d1
11050 beq.l stanh
11051 cmpi.b %d1,&ZERO
11052 beq.l src_zero
11053 cmpi.b %d1,&INF
11054 beq.l src_one
11055 cmpi.b %d1,&DENORM
11056 beq.l stanhd
11057 cmpi.b %d1,&QNAN
11058 beq.l src_qnan
11059 bra.l src_snan
11061 global fatan
11062 fatan:
11063 mov.b STAG(%a6),%d1
11064 beq.l satan
11065 cmpi.b %d1,&ZERO
11066 beq.l src_zero
11067 cmpi.b %d1,&INF
11068 beq.l spi_2
11069 cmpi.b %d1,&DENORM
11070 beq.l satand
11071 cmpi.b %d1,&QNAN
11072 beq.l src_qnan
11073 bra.l src_snan
11075 global fasin
11076 fasin:
11077 mov.b STAG(%a6),%d1
11078 beq.l sasin
11079 cmpi.b %d1,&ZERO
11080 beq.l src_zero
11081 cmpi.b %d1,&INF
11082 beq.l t_operr
11083 cmpi.b %d1,&DENORM
11084 beq.l sasind
11085 cmpi.b %d1,&QNAN
11086 beq.l src_qnan
11087 bra.l src_snan
11089 global fatanh
11090 fatanh:
11091 mov.b STAG(%a6),%d1
11092 beq.l satanh
11093 cmpi.b %d1,&ZERO
11094 beq.l src_zero
11095 cmpi.b %d1,&INF
11096 beq.l t_operr
11097 cmpi.b %d1,&DENORM
11098 beq.l satanhd
11099 cmpi.b %d1,&QNAN
11100 beq.l src_qnan
11101 bra.l src_snan
11103 global fsine
11104 fsine:
11105 mov.b STAG(%a6),%d1
11106 beq.l ssin
11107 cmpi.b %d1,&ZERO
11108 beq.l src_zero
11109 cmpi.b %d1,&INF
11110 beq.l t_operr
11111 cmpi.b %d1,&DENORM
11112 beq.l ssind
11113 cmpi.b %d1,&QNAN
11114 beq.l src_qnan
11115 bra.l src_snan
11117 global ftan
11118 ftan:
11119 mov.b STAG(%a6),%d1
11120 beq.l stan
11121 cmpi.b %d1,&ZERO
11122 beq.l src_zero
11123 cmpi.b %d1,&INF
11124 beq.l t_operr
11125 cmpi.b %d1,&DENORM
11126 beq.l stand
11127 cmpi.b %d1,&QNAN
11128 beq.l src_qnan
11129 bra.l src_snan
11131 global fetox
11132 fetox:
11133 mov.b STAG(%a6),%d1
11134 beq.l setox
11135 cmpi.b %d1,&ZERO
11136 beq.l ld_pone
11137 cmpi.b %d1,&INF
11138 beq.l szr_inf
11139 cmpi.b %d1,&DENORM
11140 beq.l setoxd
11141 cmpi.b %d1,&QNAN
11142 beq.l src_qnan
11143 bra.l src_snan
11145 global ftwotox
11146 ftwotox:
11147 mov.b STAG(%a6),%d1
11148 beq.l stwotox
11149 cmpi.b %d1,&ZERO
11150 beq.l ld_pone
11151 cmpi.b %d1,&INF
11152 beq.l szr_inf
11153 cmpi.b %d1,&DENORM
11154 beq.l stwotoxd
11155 cmpi.b %d1,&QNAN
11156 beq.l src_qnan
11157 bra.l src_snan
11159 global ftentox
11160 ftentox:
11161 mov.b STAG(%a6),%d1
11162 beq.l stentox
11163 cmpi.b %d1,&ZERO
11164 beq.l ld_pone
11165 cmpi.b %d1,&INF
11166 beq.l szr_inf
11167 cmpi.b %d1,&DENORM
11168 beq.l stentoxd
11169 cmpi.b %d1,&QNAN
11170 beq.l src_qnan
11171 bra.l src_snan
11173 global flogn
11174 flogn:
11175 mov.b STAG(%a6),%d1
11176 beq.l slogn
11177 cmpi.b %d1,&ZERO
11178 beq.l t_dz2
11179 cmpi.b %d1,&INF
11180 beq.l sopr_inf
11181 cmpi.b %d1,&DENORM
11182 beq.l slognd
11183 cmpi.b %d1,&QNAN
11184 beq.l src_qnan
11185 bra.l src_snan
11187 global flog10
11188 flog10:
11189 mov.b STAG(%a6),%d1
11190 beq.l slog10
11191 cmpi.b %d1,&ZERO
11192 beq.l t_dz2
11193 cmpi.b %d1,&INF
11194 beq.l sopr_inf
11195 cmpi.b %d1,&DENORM
11196 beq.l slog10d
11197 cmpi.b %d1,&QNAN
11198 beq.l src_qnan
11199 bra.l src_snan
11201 global flog2
11202 flog2:
11203 mov.b STAG(%a6),%d1
11204 beq.l slog2
11205 cmpi.b %d1,&ZERO
11206 beq.l t_dz2
11207 cmpi.b %d1,&INF
11208 beq.l sopr_inf
11209 cmpi.b %d1,&DENORM
11210 beq.l slog2d
11211 cmpi.b %d1,&QNAN
11212 beq.l src_qnan
11213 bra.l src_snan
11215 global fcosh
11216 fcosh:
11217 mov.b STAG(%a6),%d1
11218 beq.l scosh
11219 cmpi.b %d1,&ZERO
11220 beq.l ld_pone
11221 cmpi.b %d1,&INF
11222 beq.l ld_pinf
11223 cmpi.b %d1,&DENORM
11224 beq.l scoshd
11225 cmpi.b %d1,&QNAN
11226 beq.l src_qnan
11227 bra.l src_snan
11229 global facos
11230 facos:
11231 mov.b STAG(%a6),%d1
11232 beq.l sacos
11233 cmpi.b %d1,&ZERO
11234 beq.l ld_ppi2
11235 cmpi.b %d1,&INF
11236 beq.l t_operr
11237 cmpi.b %d1,&DENORM
11238 beq.l sacosd
11239 cmpi.b %d1,&QNAN
11240 beq.l src_qnan
11241 bra.l src_snan
11243 global fcos
11244 fcos:
11245 mov.b STAG(%a6),%d1
11246 beq.l scos
11247 cmpi.b %d1,&ZERO
11248 beq.l ld_pone
11249 cmpi.b %d1,&INF
11250 beq.l t_operr
11251 cmpi.b %d1,&DENORM
11252 beq.l scosd
11253 cmpi.b %d1,&QNAN
11254 beq.l src_qnan
11255 bra.l src_snan
11257 global fgetexp
11258 fgetexp:
11259 mov.b STAG(%a6),%d1
11260 beq.l sgetexp
11261 cmpi.b %d1,&ZERO
11262 beq.l src_zero
11263 cmpi.b %d1,&INF
11264 beq.l t_operr
11265 cmpi.b %d1,&DENORM
11266 beq.l sgetexpd
11267 cmpi.b %d1,&QNAN
11268 beq.l src_qnan
11269 bra.l src_snan
11271 global fgetman
11272 fgetman:
11273 mov.b STAG(%a6),%d1
11274 beq.l sgetman
11275 cmpi.b %d1,&ZERO
11276 beq.l src_zero
11277 cmpi.b %d1,&INF
11278 beq.l t_operr
11279 cmpi.b %d1,&DENORM
11280 beq.l sgetmand
11281 cmpi.b %d1,&QNAN
11282 beq.l src_qnan
11283 bra.l src_snan
11285 global fsincos
11286 fsincos:
11287 mov.b STAG(%a6),%d1
11288 beq.l ssincos
11289 cmpi.b %d1,&ZERO
11290 beq.l ssincosz
11291 cmpi.b %d1,&INF
11292 beq.l ssincosi
11293 cmpi.b %d1,&DENORM
11294 beq.l ssincosd
11295 cmpi.b %d1,&QNAN
11296 beq.l ssincosqnan
11297 bra.l ssincossnan
11299 global fmod
11300 fmod:
11301 mov.b STAG(%a6),%d1
11302 beq.l smod_snorm
11303 cmpi.b %d1,&ZERO
11304 beq.l smod_szero
11305 cmpi.b %d1,&INF
11306 beq.l smod_sinf
11307 cmpi.b %d1,&DENORM
11308 beq.l smod_sdnrm
11309 cmpi.b %d1,&QNAN
11310 beq.l sop_sqnan
11311 bra.l sop_ssnan
11313 global frem
11314 frem:
11315 mov.b STAG(%a6),%d1
11316 beq.l srem_snorm
11317 cmpi.b %d1,&ZERO
11318 beq.l srem_szero
11319 cmpi.b %d1,&INF
11320 beq.l srem_sinf
11321 cmpi.b %d1,&DENORM
11322 beq.l srem_sdnrm
11323 cmpi.b %d1,&QNAN
11324 beq.l sop_sqnan
11325 bra.l sop_ssnan
11327 global fscale
11328 fscale:
11329 mov.b STAG(%a6),%d1
11330 beq.l sscale_snorm
11331 cmpi.b %d1,&ZERO
11332 beq.l sscale_szero
11333 cmpi.b %d1,&INF
11334 beq.l sscale_sinf
11335 cmpi.b %d1,&DENORM
11336 beq.l sscale_sdnrm
11337 cmpi.b %d1,&QNAN
11338 beq.l sop_sqnan
11339 bra.l sop_ssnan
11341 #########################################################################
11342 # XDEF **************************************************************** #
11343 # fgen_except(): catch an exception during transcendental #
11344 # emulation #
11346 # XREF **************************************************************** #
11347 # fmul() - emulate a multiply instruction #
11348 # fadd() - emulate an add instruction #
11349 # fin() - emulate an fmove instruction #
11351 # INPUT *************************************************************** #
11352 # fp0 = destination operand #
11353 # d0 = type of instruction that took exception #
11354 # fsave frame = source operand #
11356 # OUTPUT ************************************************************** #
11357 # fp0 = result #
11358 # fp1 = EXOP #
11360 # ALGORITHM *********************************************************** #
11361 # An exception occurred on the last instruction of the #
11362 # transcendental emulation. hopefully, this won't be happening much #
11363 # because it will be VERY slow. #
11364 # The only exceptions capable of passing through here are #
11365 # Overflow, Underflow, and Unsupported Data Type. #
11367 #########################################################################
11369 global fgen_except
11370 fgen_except:
11371 cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP?
11372 beq.b fge_unsupp # yes
11374 mov.b &NORM,STAG(%a6)
11376 fge_cont:
11377 mov.b &NORM,DTAG(%a6)
11379 # ok, I have a problem with putting the dst op at FP_DST. the emulation
11380 # routines aren't supposed to alter the operands but we've just squashed
11381 # FP_DST here...
11383 # 8/17/93 - this turns out to be more of a "cleanliness" standpoint
11384 # then a potential bug. to begin with, only the dyadic functions
11385 # frem,fmod, and fscale would get the dst trashed here. But, for
11386 # the 060SP, the FP_DST is never used again anyways.
11387 fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0
11389 lea 0x4(%sp),%a0 # pass: ptr to src op
11390 lea FP_DST(%a6),%a1 # pass: ptr to dst op
11392 cmpi.b %d1,&FMOV_OP
11393 beq.b fge_fin # it was an "fmov"
11394 cmpi.b %d1,&FADD_OP
11395 beq.b fge_fadd # it was an "fadd"
11396 fge_fmul:
11397 bsr.l fmul
11399 fge_fadd:
11400 bsr.l fadd
11402 fge_fin:
11403 bsr.l fin
11406 fge_unsupp:
11407 mov.b &DENORM,STAG(%a6)
11408 bra.b fge_cont
11411 # This table holds the offsets of the emulation routines for each individual
11412 # math operation relative to the address of this table. Included are
11413 # routines like fadd/fmul/fabs as well as the transcendentals.
11414 # The location within the table is determined by the extension bits of the
11415 # operation longword.
11418 swbeg &109
11419 tbl_unsupp:
11420 long fin - tbl_unsupp # 00: fmove
11421 long fint - tbl_unsupp # 01: fint
11422 long fsinh - tbl_unsupp # 02: fsinh
11423 long fintrz - tbl_unsupp # 03: fintrz
11424 long fsqrt - tbl_unsupp # 04: fsqrt
11425 long tbl_unsupp - tbl_unsupp
11426 long flognp1 - tbl_unsupp # 06: flognp1
11427 long tbl_unsupp - tbl_unsupp
11428 long fetoxm1 - tbl_unsupp # 08: fetoxm1
11429 long ftanh - tbl_unsupp # 09: ftanh
11430 long fatan - tbl_unsupp # 0a: fatan
11431 long tbl_unsupp - tbl_unsupp
11432 long fasin - tbl_unsupp # 0c: fasin
11433 long fatanh - tbl_unsupp # 0d: fatanh
11434 long fsine - tbl_unsupp # 0e: fsin
11435 long ftan - tbl_unsupp # 0f: ftan
11436 long fetox - tbl_unsupp # 10: fetox
11437 long ftwotox - tbl_unsupp # 11: ftwotox
11438 long ftentox - tbl_unsupp # 12: ftentox
11439 long tbl_unsupp - tbl_unsupp
11440 long flogn - tbl_unsupp # 14: flogn
11441 long flog10 - tbl_unsupp # 15: flog10
11442 long flog2 - tbl_unsupp # 16: flog2
11443 long tbl_unsupp - tbl_unsupp
11444 long fabs - tbl_unsupp # 18: fabs
11445 long fcosh - tbl_unsupp # 19: fcosh
11446 long fneg - tbl_unsupp # 1a: fneg
11447 long tbl_unsupp - tbl_unsupp
11448 long facos - tbl_unsupp # 1c: facos
11449 long fcos - tbl_unsupp # 1d: fcos
11450 long fgetexp - tbl_unsupp # 1e: fgetexp
11451 long fgetman - tbl_unsupp # 1f: fgetman
11452 long fdiv - tbl_unsupp # 20: fdiv
11453 long fmod - tbl_unsupp # 21: fmod
11454 long fadd - tbl_unsupp # 22: fadd
11455 long fmul - tbl_unsupp # 23: fmul
11456 long fsgldiv - tbl_unsupp # 24: fsgldiv
11457 long frem - tbl_unsupp # 25: frem
11458 long fscale - tbl_unsupp # 26: fscale
11459 long fsglmul - tbl_unsupp # 27: fsglmul
11460 long fsub - tbl_unsupp # 28: fsub
11461 long tbl_unsupp - tbl_unsupp
11462 long tbl_unsupp - tbl_unsupp
11463 long tbl_unsupp - tbl_unsupp
11464 long tbl_unsupp - tbl_unsupp
11465 long tbl_unsupp - tbl_unsupp
11466 long tbl_unsupp - tbl_unsupp
11467 long tbl_unsupp - tbl_unsupp
11468 long fsincos - tbl_unsupp # 30: fsincos
11469 long fsincos - tbl_unsupp # 31: fsincos
11470 long fsincos - tbl_unsupp # 32: fsincos
11471 long fsincos - tbl_unsupp # 33: fsincos
11472 long fsincos - tbl_unsupp # 34: fsincos
11473 long fsincos - tbl_unsupp # 35: fsincos
11474 long fsincos - tbl_unsupp # 36: fsincos
11475 long fsincos - tbl_unsupp # 37: fsincos
11476 long fcmp - tbl_unsupp # 38: fcmp
11477 long tbl_unsupp - tbl_unsupp
11478 long ftst - tbl_unsupp # 3a: ftst
11479 long tbl_unsupp - tbl_unsupp
11480 long tbl_unsupp - tbl_unsupp
11481 long tbl_unsupp - tbl_unsupp
11482 long tbl_unsupp - tbl_unsupp
11483 long tbl_unsupp - tbl_unsupp
11484 long fsin - tbl_unsupp # 40: fsmove
11485 long fssqrt - tbl_unsupp # 41: fssqrt
11486 long tbl_unsupp - tbl_unsupp
11487 long tbl_unsupp - tbl_unsupp
11488 long fdin - tbl_unsupp # 44: fdmove
11489 long fdsqrt - tbl_unsupp # 45: fdsqrt
11490 long tbl_unsupp - tbl_unsupp
11491 long tbl_unsupp - tbl_unsupp
11492 long tbl_unsupp - tbl_unsupp
11493 long tbl_unsupp - tbl_unsupp
11494 long tbl_unsupp - tbl_unsupp
11495 long tbl_unsupp - tbl_unsupp
11496 long tbl_unsupp - tbl_unsupp
11497 long tbl_unsupp - tbl_unsupp
11498 long tbl_unsupp - tbl_unsupp
11499 long tbl_unsupp - tbl_unsupp
11500 long tbl_unsupp - tbl_unsupp
11501 long tbl_unsupp - tbl_unsupp
11502 long tbl_unsupp - tbl_unsupp
11503 long tbl_unsupp - tbl_unsupp
11504 long tbl_unsupp - tbl_unsupp
11505 long tbl_unsupp - tbl_unsupp
11506 long tbl_unsupp - tbl_unsupp
11507 long tbl_unsupp - tbl_unsupp
11508 long fsabs - tbl_unsupp # 58: fsabs
11509 long tbl_unsupp - tbl_unsupp
11510 long fsneg - tbl_unsupp # 5a: fsneg
11511 long tbl_unsupp - tbl_unsupp
11512 long fdabs - tbl_unsupp # 5c: fdabs
11513 long tbl_unsupp - tbl_unsupp
11514 long fdneg - tbl_unsupp # 5e: fdneg
11515 long tbl_unsupp - tbl_unsupp
11516 long fsdiv - tbl_unsupp # 60: fsdiv
11517 long tbl_unsupp - tbl_unsupp
11518 long fsadd - tbl_unsupp # 62: fsadd
11519 long fsmul - tbl_unsupp # 63: fsmul
11520 long fddiv - tbl_unsupp # 64: fddiv
11521 long tbl_unsupp - tbl_unsupp
11522 long fdadd - tbl_unsupp # 66: fdadd
11523 long fdmul - tbl_unsupp # 67: fdmul
11524 long fssub - tbl_unsupp # 68: fssub
11525 long tbl_unsupp - tbl_unsupp
11526 long tbl_unsupp - tbl_unsupp
11527 long tbl_unsupp - tbl_unsupp
11528 long fdsub - tbl_unsupp # 6c: fdsub
11530 #########################################################################
11531 # XDEF **************************************************************** #
11532 # fmul(): emulates the fmul instruction #
11533 # fsmul(): emulates the fsmul instruction #
11534 # fdmul(): emulates the fdmul instruction #
11536 # XREF **************************************************************** #
11537 # scale_to_zero_src() - scale src exponent to zero #
11538 # scale_to_zero_dst() - scale dst exponent to zero #
11539 # unf_res() - return default underflow result #
11540 # ovf_res() - return default overflow result #
11541 # res_qnan() - return QNAN result #
11542 # res_snan() - return SNAN result #
11544 # INPUT *************************************************************** #
11545 # a0 = pointer to extended precision source operand #
11546 # a1 = pointer to extended precision destination operand #
11547 # d0 rnd prec,mode #
11549 # OUTPUT ************************************************************** #
11550 # fp0 = result #
11551 # fp1 = EXOP (if exception occurred) #
11553 # ALGORITHM *********************************************************** #
11554 # Handle NANs, infinities, and zeroes as special cases. Divide #
11555 # norms/denorms into ext/sgl/dbl precision. #
11556 # For norms/denorms, scale the exponents such that a multiply #
11557 # instruction won't cause an exception. Use the regular fmul to #
11558 # compute a result. Check if the regular operands would have taken #
11559 # an exception. If so, return the default overflow/underflow result #
11560 # and return the EXOP if exceptions are enabled. Else, scale the #
11561 # result operand to the proper exponent. #
11563 #########################################################################
11565 align 0x10
11566 tbl_fmul_ovfl:
11567 long 0x3fff - 0x7ffe # ext_max
11568 long 0x3fff - 0x407e # sgl_max
11569 long 0x3fff - 0x43fe # dbl_max
11570 tbl_fmul_unfl:
11571 long 0x3fff + 0x0001 # ext_unfl
11572 long 0x3fff - 0x3f80 # sgl_unfl
11573 long 0x3fff - 0x3c00 # dbl_unfl
11575 global fsmul
11576 fsmul:
11577 andi.b &0x30,%d0 # clear rnd prec
11578 ori.b &s_mode*0x10,%d0 # insert sgl prec
11579 bra.b fmul
11581 global fdmul
11582 fdmul:
11583 andi.b &0x30,%d0
11584 ori.b &d_mode*0x10,%d0 # insert dbl prec
11586 global fmul
11587 fmul:
11588 mov.l %d0,L_SCR3(%a6) # store rnd info
11590 clr.w %d1
11591 mov.b DTAG(%a6),%d1
11592 lsl.b &0x3,%d1
11593 or.b STAG(%a6),%d1 # combine src tags
11594 bne.w fmul_not_norm # optimize on non-norm input
11596 fmul_norm:
11597 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11598 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11599 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11601 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11602 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11603 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11605 bsr.l scale_to_zero_src # scale src exponent
11606 mov.l %d0,-(%sp) # save scale factor 1
11608 bsr.l scale_to_zero_dst # scale dst exponent
11610 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
11612 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
11613 lsr.b &0x6,%d1 # shift to lo bits
11614 mov.l (%sp)+,%d0 # load S.F.
11615 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
11616 beq.w fmul_may_ovfl # result may rnd to overflow
11617 blt.w fmul_ovfl # result will overflow
11619 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
11620 beq.w fmul_may_unfl # result may rnd to no unfl
11621 bgt.w fmul_unfl # result will underflow
11624 # NORMAL:
11625 # - the result of the multiply operation will neither overflow nor underflow.
11626 # - do the multiply to the proper precision and rounding mode.
11627 # - scale the result exponent using the scale factor. if both operands were
11628 # normalized then we really don't need to go through this scaling. but for now,
11629 # this will do.
11631 fmul_normal:
11632 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11634 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11635 fmov.l &0x0,%fpsr # clear FPSR
11637 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11639 fmov.l %fpsr,%d1 # save status
11640 fmov.l &0x0,%fpcr # clear FPCR
11642 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11644 fmul_normal_exit:
11645 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11646 mov.l %d2,-(%sp) # save d2
11647 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
11648 mov.l %d1,%d2 # make a copy
11649 andi.l &0x7fff,%d1 # strip sign
11650 andi.w &0x8000,%d2 # keep old sign
11651 sub.l %d0,%d1 # add scale factor
11652 or.w %d2,%d1 # concat old sign,new exp
11653 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11654 mov.l (%sp)+,%d2 # restore d2
11655 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11659 # OVERFLOW:
11660 # - the result of the multiply operation is an overflow.
11661 # - do the multiply to the proper precision and rounding mode in order to
11662 # set the inexact bits.
11663 # - calculate the default result and return it in fp0.
11664 # - if overflow or inexact is enabled, we need a multiply result rounded to
11665 # extended precision. if the original operation was extended, then we have this
11666 # result. if the original operation was single or double, we have to do another
11667 # multiply using extended precision and the correct rounding mode. the result
11668 # of this operation then has its exponent scaled by -0x6000 to create the
11669 # exceptional operand.
11671 fmul_ovfl:
11672 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11674 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11675 fmov.l &0x0,%fpsr # clear FPSR
11677 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11679 fmov.l %fpsr,%d1 # save status
11680 fmov.l &0x0,%fpcr # clear FPCR
11682 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11684 # save setting this until now because this is where fmul_may_ovfl may jump in
11685 fmul_ovfl_tst:
11686 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11688 mov.b FPCR_ENABLE(%a6),%d1
11689 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11690 bne.b fmul_ovfl_ena # yes
11692 # calculate the default result
11693 fmul_ovfl_dis:
11694 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11695 sne %d1 # set sign param accordingly
11696 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
11697 bsr.l ovf_res # calculate default result
11698 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11699 fmovm.x (%a0),&0x80 # return default result in fp0
11703 # OVFL is enabled; Create EXOP:
11704 # - if precision is extended, then we have the EXOP. simply bias the exponent
11705 # with an extra -0x6000. if the precision is single or double, we need to
11706 # calculate a result rounded to extended precision.
11708 fmul_ovfl_ena:
11709 mov.l L_SCR3(%a6),%d1
11710 andi.b &0xc0,%d1 # test the rnd prec
11711 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
11713 fmul_ovfl_ena_cont:
11714 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
11716 mov.l %d2,-(%sp) # save d2
11717 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11718 mov.w %d1,%d2 # make a copy
11719 andi.l &0x7fff,%d1 # strip sign
11720 sub.l %d0,%d1 # add scale factor
11721 subi.l &0x6000,%d1 # subtract bias
11722 andi.w &0x7fff,%d1 # clear sign bit
11723 andi.w &0x8000,%d2 # keep old sign
11724 or.w %d2,%d1 # concat old sign,new exp
11725 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11726 mov.l (%sp)+,%d2 # restore d2
11727 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11728 bra.b fmul_ovfl_dis
11730 fmul_ovfl_ena_sd:
11731 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11733 mov.l L_SCR3(%a6),%d1
11734 andi.b &0x30,%d1 # keep rnd mode only
11735 fmov.l %d1,%fpcr # set FPCR
11737 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11739 fmov.l &0x0,%fpcr # clear FPCR
11740 bra.b fmul_ovfl_ena_cont
11743 # may OVERFLOW:
11744 # - the result of the multiply operation MAY overflow.
11745 # - do the multiply to the proper precision and rounding mode in order to
11746 # set the inexact bits.
11747 # - calculate the default result and return it in fp0.
11749 fmul_may_ovfl:
11750 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11752 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11753 fmov.l &0x0,%fpsr # clear FPSR
11755 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11757 fmov.l %fpsr,%d1 # save status
11758 fmov.l &0x0,%fpcr # clear FPCR
11760 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11762 fabs.x %fp0,%fp1 # make a copy of result
11763 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
11764 fbge.w fmul_ovfl_tst # yes; overflow has occurred
11766 # no, it didn't overflow; we have correct result
11767 bra.w fmul_normal_exit
11770 # UNDERFLOW:
11771 # - the result of the multiply operation is an underflow.
11772 # - do the multiply to the proper precision and rounding mode in order to
11773 # set the inexact bits.
11774 # - calculate the default result and return it in fp0.
11775 # - if overflow or inexact is enabled, we need a multiply result rounded to
11776 # extended precision. if the original operation was extended, then we have this
11777 # result. if the original operation was single or double, we have to do another
11778 # multiply using extended precision and the correct rounding mode. the result
11779 # of this operation then has its exponent scaled by -0x6000 to create the
11780 # exceptional operand.
11782 fmul_unfl:
11783 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11785 # for fun, let's use only extended precision, round to zero. then, let
11786 # the unf_res() routine figure out all the rest.
11787 # will we get the correct answer.
11788 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11790 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11791 fmov.l &0x0,%fpsr # clear FPSR
11793 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11795 fmov.l %fpsr,%d1 # save status
11796 fmov.l &0x0,%fpcr # clear FPCR
11798 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11800 mov.b FPCR_ENABLE(%a6),%d1
11801 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11802 bne.b fmul_unfl_ena # yes
11804 fmul_unfl_dis:
11805 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11807 lea FP_SCR0(%a6),%a0 # pass: result addr
11808 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11809 bsr.l unf_res # calculate default result
11810 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
11811 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11815 # UNFL is enabled.
11817 fmul_unfl_ena:
11818 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11820 mov.l L_SCR3(%a6),%d1
11821 andi.b &0xc0,%d1 # is precision extended?
11822 bne.b fmul_unfl_ena_sd # no, sgl or dbl
11824 # if the rnd mode is anything but RZ, then we have to re-do the above
11825 # multiplication because we used RZ for all.
11826 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11828 fmul_unfl_ena_cont:
11829 fmov.l &0x0,%fpsr # clear FPSR
11831 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11833 fmov.l &0x0,%fpcr # clear FPCR
11835 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11836 mov.l %d2,-(%sp) # save d2
11837 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11838 mov.l %d1,%d2 # make a copy
11839 andi.l &0x7fff,%d1 # strip sign
11840 andi.w &0x8000,%d2 # keep old sign
11841 sub.l %d0,%d1 # add scale factor
11842 addi.l &0x6000,%d1 # add bias
11843 andi.w &0x7fff,%d1
11844 or.w %d2,%d1 # concat old sign,new exp
11845 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11846 mov.l (%sp)+,%d2 # restore d2
11847 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11848 bra.w fmul_unfl_dis
11850 fmul_unfl_ena_sd:
11851 mov.l L_SCR3(%a6),%d1
11852 andi.b &0x30,%d1 # use only rnd mode
11853 fmov.l %d1,%fpcr # set FPCR
11855 bra.b fmul_unfl_ena_cont
11857 # MAY UNDERFLOW:
11858 # -use the correct rounding mode and precision. this code favors operations
11859 # that do not underflow.
11860 fmul_may_unfl:
11861 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
11863 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11864 fmov.l &0x0,%fpsr # clear FPSR
11866 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
11868 fmov.l %fpsr,%d1 # save status
11869 fmov.l &0x0,%fpcr # clear FPCR
11871 or.l %d1,USER_FPSR(%a6) # save INEX2,N
11873 fabs.x %fp0,%fp1 # make a copy of result
11874 fcmp.b %fp1,&0x2 # is |result| > 2.b?
11875 fbgt.w fmul_normal_exit # no; no underflow occurred
11876 fblt.w fmul_unfl # yes; underflow occurred
11879 # we still don't know if underflow occurred. result is ~ equal to 2. but,
11880 # we don't know if the result was an underflow that rounded up to a 2 or
11881 # a normalized number that rounded down to a 2. so, redo the entire operation
11882 # using RZ as the rounding mode to see what the pre-rounded result is.
11883 # this case should be relatively rare.
11885 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
11887 mov.l L_SCR3(%a6),%d1
11888 andi.b &0xc0,%d1 # keep rnd prec
11889 ori.b &rz_mode*0x10,%d1 # insert RZ
11891 fmov.l %d1,%fpcr # set FPCR
11892 fmov.l &0x0,%fpsr # clear FPSR
11894 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
11896 fmov.l &0x0,%fpcr # clear FPCR
11897 fabs.x %fp1 # make absolute value
11898 fcmp.b %fp1,&0x2 # is |result| < 2.b?
11899 fbge.w fmul_normal_exit # no; no underflow occurred
11900 bra.w fmul_unfl # yes, underflow occurred
11902 ################################################################################
11905 # Multiply: inputs are not both normalized; what are they?
11907 fmul_not_norm:
11908 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
11909 jmp (tbl_fmul_op.b,%pc,%d1.w)
11911 swbeg &48
11912 tbl_fmul_op:
11913 short fmul_norm - tbl_fmul_op # NORM x NORM
11914 short fmul_zero - tbl_fmul_op # NORM x ZERO
11915 short fmul_inf_src - tbl_fmul_op # NORM x INF
11916 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11917 short fmul_norm - tbl_fmul_op # NORM x DENORM
11918 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11919 short tbl_fmul_op - tbl_fmul_op #
11920 short tbl_fmul_op - tbl_fmul_op #
11922 short fmul_zero - tbl_fmul_op # ZERO x NORM
11923 short fmul_zero - tbl_fmul_op # ZERO x ZERO
11924 short fmul_res_operr - tbl_fmul_op # ZERO x INF
11925 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
11926 short fmul_zero - tbl_fmul_op # ZERO x DENORM
11927 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
11928 short tbl_fmul_op - tbl_fmul_op #
11929 short tbl_fmul_op - tbl_fmul_op #
11931 short fmul_inf_dst - tbl_fmul_op # INF x NORM
11932 short fmul_res_operr - tbl_fmul_op # INF x ZERO
11933 short fmul_inf_dst - tbl_fmul_op # INF x INF
11934 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
11935 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
11936 short fmul_res_snan - tbl_fmul_op # INF x SNAN
11937 short tbl_fmul_op - tbl_fmul_op #
11938 short tbl_fmul_op - tbl_fmul_op #
11940 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
11941 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
11942 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
11943 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
11944 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
11945 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
11946 short tbl_fmul_op - tbl_fmul_op #
11947 short tbl_fmul_op - tbl_fmul_op #
11949 short fmul_norm - tbl_fmul_op # NORM x NORM
11950 short fmul_zero - tbl_fmul_op # NORM x ZERO
11951 short fmul_inf_src - tbl_fmul_op # NORM x INF
11952 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
11953 short fmul_norm - tbl_fmul_op # NORM x DENORM
11954 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
11955 short tbl_fmul_op - tbl_fmul_op #
11956 short tbl_fmul_op - tbl_fmul_op #
11958 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
11959 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
11960 short fmul_res_snan - tbl_fmul_op # SNAN x INF
11961 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
11962 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
11963 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
11964 short tbl_fmul_op - tbl_fmul_op #
11965 short tbl_fmul_op - tbl_fmul_op #
11967 fmul_res_operr:
11968 bra.l res_operr
11969 fmul_res_snan:
11970 bra.l res_snan
11971 fmul_res_qnan:
11972 bra.l res_qnan
11975 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
11977 global fmul_zero # global for fsglmul
11978 fmul_zero:
11979 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11980 mov.b DST_EX(%a1),%d1
11981 eor.b %d0,%d1
11982 bpl.b fmul_zero_p # result ZERO is pos.
11983 fmul_zero_n:
11984 fmov.s &0x80000000,%fp0 # load -ZERO
11985 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
11987 fmul_zero_p:
11988 fmov.s &0x00000000,%fp0 # load +ZERO
11989 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11993 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
11995 # Note: The j-bit for an infinity is a don't-care. However, to be
11996 # strictly compatible w/ the 68881/882, we make sure to return an
11997 # INF w/ the j-bit set if the input INF j-bit was set. Destination
11998 # INFs take priority.
12000 global fmul_inf_dst # global for fsglmul
12001 fmul_inf_dst:
12002 fmovm.x DST(%a1),&0x80 # return INF result in fp0
12003 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
12004 mov.b DST_EX(%a1),%d1
12005 eor.b %d0,%d1
12006 bpl.b fmul_inf_dst_p # result INF is pos.
12007 fmul_inf_dst_n:
12008 fabs.x %fp0 # clear result sign
12009 fneg.x %fp0 # set result sign
12010 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12012 fmul_inf_dst_p:
12013 fabs.x %fp0 # clear result sign
12014 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12017 global fmul_inf_src # global for fsglmul
12018 fmul_inf_src:
12019 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
12020 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
12021 mov.b DST_EX(%a1),%d1
12022 eor.b %d0,%d1
12023 bpl.b fmul_inf_dst_p # result INF is pos.
12024 bra.b fmul_inf_dst_n
12026 #########################################################################
12027 # XDEF **************************************************************** #
12028 # fin(): emulates the fmove instruction #
12029 # fsin(): emulates the fsmove instruction #
12030 # fdin(): emulates the fdmove instruction #
12032 # XREF **************************************************************** #
12033 # norm() - normalize mantissa for EXOP on denorm #
12034 # scale_to_zero_src() - scale src exponent to zero #
12035 # ovf_res() - return default overflow result #
12036 # unf_res() - return default underflow result #
12037 # res_qnan_1op() - return QNAN result #
12038 # res_snan_1op() - return SNAN result #
12040 # INPUT *************************************************************** #
12041 # a0 = pointer to extended precision source operand #
12042 # d0 = round prec/mode #
12044 # OUTPUT ************************************************************** #
12045 # fp0 = result #
12046 # fp1 = EXOP (if exception occurred) #
12048 # ALGORITHM *********************************************************** #
12049 # Handle NANs, infinities, and zeroes as special cases. Divide #
12050 # norms into extended, single, and double precision. #
12051 # Norms can be emulated w/ a regular fmove instruction. For #
12052 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
12053 # if the result would have overflowed/underflowed. If so, use unf_res() #
12054 # or ovf_res() to return the default result. Also return EXOP if #
12055 # exception is enabled. If no exception, return the default result. #
12056 # Unnorms don't pass through here. #
12058 #########################################################################
12060 global fsin
12061 fsin:
12062 andi.b &0x30,%d0 # clear rnd prec
12063 ori.b &s_mode*0x10,%d0 # insert sgl precision
12064 bra.b fin
12066 global fdin
12067 fdin:
12068 andi.b &0x30,%d0 # clear rnd prec
12069 ori.b &d_mode*0x10,%d0 # insert dbl precision
12071 global fin
12072 fin:
12073 mov.l %d0,L_SCR3(%a6) # store rnd info
12075 mov.b STAG(%a6),%d1 # fetch src optype tag
12076 bne.w fin_not_norm # optimize on non-norm input
12079 # FP MOVE IN: NORMs and DENORMs ONLY!
12081 fin_norm:
12082 andi.b &0xc0,%d0 # is precision extended?
12083 bne.w fin_not_ext # no, so go handle dbl or sgl
12086 # precision selected is extended. so...we cannot get an underflow
12087 # or overflow because of rounding to the correct precision. so...
12088 # skip the scaling and unscaling...
12090 tst.b SRC_EX(%a0) # is the operand negative?
12091 bpl.b fin_norm_done # no
12092 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12093 fin_norm_done:
12094 fmovm.x SRC(%a0),&0x80 # return result in fp0
12098 # for an extended precision DENORM, the UNFL exception bit is set
12099 # the accrued bit is NOT set in this instance(no inexactness!)
12101 fin_denorm:
12102 andi.b &0xc0,%d0 # is precision extended?
12103 bne.w fin_not_ext # no, so go handle dbl or sgl
12105 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12106 tst.b SRC_EX(%a0) # is the operand negative?
12107 bpl.b fin_denorm_done # no
12108 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
12109 fin_denorm_done:
12110 fmovm.x SRC(%a0),&0x80 # return result in fp0
12111 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12112 bne.b fin_denorm_unfl_ena # yes
12116 # the input is an extended DENORM and underflow is enabled in the FPCR.
12117 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12118 # exponent and insert back into the operand.
12120 fin_denorm_unfl_ena:
12121 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12122 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12123 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12124 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12125 bsr.l norm # normalize result
12126 neg.w %d0 # new exponent = -(shft val)
12127 addi.w &0x6000,%d0 # add new bias to exponent
12128 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12129 andi.w &0x8000,%d1 # keep old sign
12130 andi.w &0x7fff,%d0 # clear sign position
12131 or.w %d1,%d0 # concat new exo,old sign
12132 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12133 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12137 # operand is to be rounded to single or double precision
12139 fin_not_ext:
12140 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12141 bne.b fin_dbl
12144 # operand is to be rounded to single precision
12146 fin_sgl:
12147 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12148 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12149 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12150 bsr.l scale_to_zero_src # calculate scale factor
12152 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12153 bge.w fin_sd_unfl # yes; go handle underflow
12154 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12155 beq.w fin_sd_may_ovfl # maybe; go check
12156 blt.w fin_sd_ovfl # yes; go handle overflow
12159 # operand will NOT overflow or underflow when moved into the fp reg file
12161 fin_sd_normal:
12162 fmov.l &0x0,%fpsr # clear FPSR
12163 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12165 fmov.x FP_SCR0(%a6),%fp0 # perform move
12167 fmov.l %fpsr,%d1 # save FPSR
12168 fmov.l &0x0,%fpcr # clear FPCR
12170 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12172 fin_sd_normal_exit:
12173 mov.l %d2,-(%sp) # save d2
12174 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12175 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12176 mov.w %d1,%d2 # make a copy
12177 andi.l &0x7fff,%d1 # strip sign
12178 sub.l %d0,%d1 # add scale factor
12179 andi.w &0x8000,%d2 # keep old sign
12180 or.w %d1,%d2 # concat old sign,new exponent
12181 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12182 mov.l (%sp)+,%d2 # restore d2
12183 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12187 # operand is to be rounded to double precision
12189 fin_dbl:
12190 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12191 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12192 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12193 bsr.l scale_to_zero_src # calculate scale factor
12195 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12196 bge.w fin_sd_unfl # yes; go handle underflow
12197 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12198 beq.w fin_sd_may_ovfl # maybe; go check
12199 blt.w fin_sd_ovfl # yes; go handle overflow
12200 bra.w fin_sd_normal # no; ho handle normalized op
12203 # operand WILL underflow when moved in to the fp register file
12205 fin_sd_unfl:
12206 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12208 tst.b FP_SCR0_EX(%a6) # is operand negative?
12209 bpl.b fin_sd_unfl_tst
12210 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12212 # if underflow or inexact is enabled, then go calculate the EXOP first.
12213 fin_sd_unfl_tst:
12214 mov.b FPCR_ENABLE(%a6),%d1
12215 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12216 bne.b fin_sd_unfl_ena # yes
12218 fin_sd_unfl_dis:
12219 lea FP_SCR0(%a6),%a0 # pass: result addr
12220 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12221 bsr.l unf_res # calculate default result
12222 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
12223 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12224 rts
12227 # operand will underflow AND underflow or inexact is enabled.
12228 # therefore, we must return the result rounded to extended precision.
12230 fin_sd_unfl_ena:
12231 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12232 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12233 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12235 mov.l %d2,-(%sp) # save d2
12236 mov.w %d1,%d2 # make a copy
12237 andi.l &0x7fff,%d1 # strip sign
12238 sub.l %d0,%d1 # subtract scale factor
12239 andi.w &0x8000,%d2 # extract old sign
12240 addi.l &0x6000,%d1 # add new bias
12241 andi.w &0x7fff,%d1
12242 or.w %d1,%d2 # concat old sign,new exp
12243 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
12244 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12245 mov.l (%sp)+,%d2 # restore d2
12246 bra.b fin_sd_unfl_dis
12249 # operand WILL overflow.
12251 fin_sd_ovfl:
12252 fmov.l &0x0,%fpsr # clear FPSR
12253 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12255 fmov.x FP_SCR0(%a6),%fp0 # perform move
12257 fmov.l &0x0,%fpcr # clear FPCR
12258 fmov.l %fpsr,%d1 # save FPSR
12260 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12262 fin_sd_ovfl_tst:
12263 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12265 mov.b FPCR_ENABLE(%a6),%d1
12266 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12267 bne.b fin_sd_ovfl_ena # yes
12270 # OVFL is not enabled; therefore, we must create the default result by
12271 # calling ovf_res().
12273 fin_sd_ovfl_dis:
12274 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12275 sne %d1 # set sign param accordingly
12276 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12277 bsr.l ovf_res # calculate default result
12278 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12279 fmovm.x (%a0),&0x80 # return default result in fp0
12283 # OVFL is enabled.
12284 # the INEX2 bit has already been updated by the round to the correct precision.
12285 # now, round to extended(and don't alter the FPSR).
12287 fin_sd_ovfl_ena:
12288 mov.l %d2,-(%sp) # save d2
12289 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12290 mov.l %d1,%d2 # make a copy
12291 andi.l &0x7fff,%d1 # strip sign
12292 andi.w &0x8000,%d2 # keep old sign
12293 sub.l %d0,%d1 # add scale factor
12294 sub.l &0x6000,%d1 # subtract bias
12295 andi.w &0x7fff,%d1
12296 or.w %d2,%d1
12297 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12298 mov.l (%sp)+,%d2 # restore d2
12299 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12300 bra.b fin_sd_ovfl_dis
12303 # the move in MAY overflow. so...
12305 fin_sd_may_ovfl:
12306 fmov.l &0x0,%fpsr # clear FPSR
12307 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12309 fmov.x FP_SCR0(%a6),%fp0 # perform the move
12311 fmov.l %fpsr,%d1 # save status
12312 fmov.l &0x0,%fpcr # clear FPCR
12314 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12316 fabs.x %fp0,%fp1 # make a copy of result
12317 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
12318 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
12320 # no, it didn't overflow; we have correct result
12321 bra.w fin_sd_normal_exit
12323 ##########################################################################
12326 # operand is not a NORM: check its optype and branch accordingly
12328 fin_not_norm:
12329 cmpi.b %d1,&DENORM # weed out DENORM
12330 beq.w fin_denorm
12331 cmpi.b %d1,&SNAN # weed out SNANs
12332 beq.l res_snan_1op
12333 cmpi.b %d1,&QNAN # weed out QNANs
12334 beq.l res_qnan_1op
12337 # do the fmove in; at this point, only possible ops are ZERO and INF.
12338 # use fmov to determine ccodes.
12339 # prec:mode should be zero at this point but it won't affect answer anyways.
12341 fmov.x SRC(%a0),%fp0 # do fmove in
12342 fmov.l %fpsr,%d0 # no exceptions possible
12343 rol.l &0x8,%d0 # put ccodes in lo byte
12344 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
12347 #########################################################################
12348 # XDEF **************************************************************** #
12349 # fdiv(): emulates the fdiv instruction #
12350 # fsdiv(): emulates the fsdiv instruction #
12351 # fddiv(): emulates the fddiv instruction #
12353 # XREF **************************************************************** #
12354 # scale_to_zero_src() - scale src exponent to zero #
12355 # scale_to_zero_dst() - scale dst exponent to zero #
12356 # unf_res() - return default underflow result #
12357 # ovf_res() - return default overflow result #
12358 # res_qnan() - return QNAN result #
12359 # res_snan() - return SNAN result #
12361 # INPUT *************************************************************** #
12362 # a0 = pointer to extended precision source operand #
12363 # a1 = pointer to extended precision destination operand #
12364 # d0 rnd prec,mode #
12366 # OUTPUT ************************************************************** #
12367 # fp0 = result #
12368 # fp1 = EXOP (if exception occurred) #
12370 # ALGORITHM *********************************************************** #
12371 # Handle NANs, infinities, and zeroes as special cases. Divide #
12372 # norms/denorms into ext/sgl/dbl precision. #
12373 # For norms/denorms, scale the exponents such that a divide #
12374 # instruction won't cause an exception. Use the regular fdiv to #
12375 # compute a result. Check if the regular operands would have taken #
12376 # an exception. If so, return the default overflow/underflow result #
12377 # and return the EXOP if exceptions are enabled. Else, scale the #
12378 # result operand to the proper exponent. #
12380 #########################################################################
12382 align 0x10
12383 tbl_fdiv_unfl:
12384 long 0x3fff - 0x0000 # ext_unfl
12385 long 0x3fff - 0x3f81 # sgl_unfl
12386 long 0x3fff - 0x3c01 # dbl_unfl
12388 tbl_fdiv_ovfl:
12389 long 0x3fff - 0x7ffe # ext overflow exponent
12390 long 0x3fff - 0x407e # sgl overflow exponent
12391 long 0x3fff - 0x43fe # dbl overflow exponent
12393 global fsdiv
12394 fsdiv:
12395 andi.b &0x30,%d0 # clear rnd prec
12396 ori.b &s_mode*0x10,%d0 # insert sgl prec
12397 bra.b fdiv
12399 global fddiv
12400 fddiv:
12401 andi.b &0x30,%d0 # clear rnd prec
12402 ori.b &d_mode*0x10,%d0 # insert dbl prec
12404 global fdiv
12405 fdiv:
12406 mov.l %d0,L_SCR3(%a6) # store rnd info
12408 clr.w %d1
12409 mov.b DTAG(%a6),%d1
12410 lsl.b &0x3,%d1
12411 or.b STAG(%a6),%d1 # combine src tags
12413 bne.w fdiv_not_norm # optimize on non-norm input
12416 # DIVIDE: NORMs and DENORMs ONLY!
12418 fdiv_norm:
12419 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
12420 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
12421 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
12423 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12424 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12425 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12427 bsr.l scale_to_zero_src # scale src exponent
12428 mov.l %d0,-(%sp) # save scale factor 1
12430 bsr.l scale_to_zero_dst # scale dst exponent
12432 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
12433 add.l %d0,(%sp)
12435 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
12436 lsr.b &0x6,%d1 # shift to lo bits
12437 mov.l (%sp)+,%d0 # load S.F.
12438 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
12439 ble.w fdiv_may_ovfl # result will overflow
12441 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
12442 beq.w fdiv_may_unfl # maybe
12443 bgt.w fdiv_unfl # yes; go handle underflow
12445 fdiv_normal:
12446 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12448 fmov.l L_SCR3(%a6),%fpcr # save FPCR
12449 fmov.l &0x0,%fpsr # clear FPSR
12451 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
12453 fmov.l %fpsr,%d1 # save FPSR
12454 fmov.l &0x0,%fpcr # clear FPCR
12456 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12458 fdiv_normal_exit:
12459 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
12460 mov.l %d2,-(%sp) # store d2
12461 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
12462 mov.l %d1,%d2 # make a copy
12463 andi.l &0x7fff,%d1 # strip sign
12464 andi.w &0x8000,%d2 # keep old sign
12465 sub.l %d0,%d1 # add scale factor
12466 or.w %d2,%d1 # concat old sign,new exp
12467 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12468 mov.l (%sp)+,%d2 # restore d2
12469 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12472 tbl_fdiv_ovfl2:
12473 long 0x7fff
12474 long 0x407f
12475 long 0x43ff
12477 fdiv_no_ovfl:
12478 mov.l (%sp)+,%d0 # restore scale factor
12479 bra.b fdiv_normal_exit
12481 fdiv_may_ovfl:
12482 mov.l %d0,-(%sp) # save scale factor
12484 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12486 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12487 fmov.l &0x0,%fpsr # set FPSR
12489 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12491 fmov.l %fpsr,%d0
12492 fmov.l &0x0,%fpcr
12494 or.l %d0,USER_FPSR(%a6) # save INEX,N
12496 fmovm.x &0x01,-(%sp) # save result to stack
12497 mov.w (%sp),%d0 # fetch new exponent
12498 add.l &0xc,%sp # clear result from stack
12499 andi.l &0x7fff,%d0 # strip sign
12500 sub.l (%sp),%d0 # add scale factor
12501 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
12502 blt.b fdiv_no_ovfl
12503 mov.l (%sp)+,%d0
12505 fdiv_ovfl_tst:
12506 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12508 mov.b FPCR_ENABLE(%a6),%d1
12509 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12510 bne.b fdiv_ovfl_ena # yes
12512 fdiv_ovfl_dis:
12513 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12514 sne %d1 # set sign param accordingly
12515 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
12516 bsr.l ovf_res # calculate default result
12517 or.b %d0,FPSR_CC(%a6) # set INF if applicable
12518 fmovm.x (%a0),&0x80 # return default result in fp0
12521 fdiv_ovfl_ena:
12522 mov.l L_SCR3(%a6),%d1
12523 andi.b &0xc0,%d1 # is precision extended?
12524 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
12526 fdiv_ovfl_ena_cont:
12527 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
12529 mov.l %d2,-(%sp) # save d2
12530 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12531 mov.w %d1,%d2 # make a copy
12532 andi.l &0x7fff,%d1 # strip sign
12533 sub.l %d0,%d1 # add scale factor
12534 subi.l &0x6000,%d1 # subtract bias
12535 andi.w &0x7fff,%d1 # clear sign bit
12536 andi.w &0x8000,%d2 # keep old sign
12537 or.w %d2,%d1 # concat old sign,new exp
12538 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12539 mov.l (%sp)+,%d2 # restore d2
12540 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12541 bra.b fdiv_ovfl_dis
12543 fdiv_ovfl_ena_sd:
12544 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
12546 mov.l L_SCR3(%a6),%d1
12547 andi.b &0x30,%d1 # keep rnd mode
12548 fmov.l %d1,%fpcr # set FPCR
12550 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12552 fmov.l &0x0,%fpcr # clear FPCR
12553 bra.b fdiv_ovfl_ena_cont
12555 fdiv_unfl:
12556 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12558 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12560 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12561 fmov.l &0x0,%fpsr # clear FPSR
12563 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12565 fmov.l %fpsr,%d1 # save status
12566 fmov.l &0x0,%fpcr # clear FPCR
12568 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12570 mov.b FPCR_ENABLE(%a6),%d1
12571 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12572 bne.b fdiv_unfl_ena # yes
12574 fdiv_unfl_dis:
12575 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12577 lea FP_SCR0(%a6),%a0 # pass: result addr
12578 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12579 bsr.l unf_res # calculate default result
12580 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
12581 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12585 # UNFL is enabled.
12587 fdiv_unfl_ena:
12588 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
12590 mov.l L_SCR3(%a6),%d1
12591 andi.b &0xc0,%d1 # is precision extended?
12592 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
12594 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12596 fdiv_unfl_ena_cont:
12597 fmov.l &0x0,%fpsr # clear FPSR
12599 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12601 fmov.l &0x0,%fpcr # clear FPCR
12603 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
12604 mov.l %d2,-(%sp) # save d2
12605 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12606 mov.l %d1,%d2 # make a copy
12607 andi.l &0x7fff,%d1 # strip sign
12608 andi.w &0x8000,%d2 # keep old sign
12609 sub.l %d0,%d1 # add scale factoer
12610 addi.l &0x6000,%d1 # add bias
12611 andi.w &0x7fff,%d1
12612 or.w %d2,%d1 # concat old sign,new exp
12613 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
12614 mov.l (%sp)+,%d2 # restore d2
12615 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12616 bra.w fdiv_unfl_dis
12618 fdiv_unfl_ena_sd:
12619 mov.l L_SCR3(%a6),%d1
12620 andi.b &0x30,%d1 # use only rnd mode
12621 fmov.l %d1,%fpcr # set FPCR
12623 bra.b fdiv_unfl_ena_cont
12626 # the divide operation MAY underflow:
12628 fdiv_may_unfl:
12629 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
12631 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12632 fmov.l &0x0,%fpsr # clear FPSR
12634 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
12636 fmov.l %fpsr,%d1 # save status
12637 fmov.l &0x0,%fpcr # clear FPCR
12639 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12641 fabs.x %fp0,%fp1 # make a copy of result
12642 fcmp.b %fp1,&0x1 # is |result| > 1.b?
12643 fbgt.w fdiv_normal_exit # no; no underflow occurred
12644 fblt.w fdiv_unfl # yes; underflow occurred
12647 # we still don't know if underflow occurred. result is ~ equal to 1. but,
12648 # we don't know if the result was an underflow that rounded up to a 1
12649 # or a normalized number that rounded down to a 1. so, redo the entire
12650 # operation using RZ as the rounding mode to see what the pre-rounded
12651 # result is. this case should be relatively rare.
12653 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
12655 mov.l L_SCR3(%a6),%d1
12656 andi.b &0xc0,%d1 # keep rnd prec
12657 ori.b &rz_mode*0x10,%d1 # insert RZ
12659 fmov.l %d1,%fpcr # set FPCR
12660 fmov.l &0x0,%fpsr # clear FPSR
12662 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
12664 fmov.l &0x0,%fpcr # clear FPCR
12665 fabs.x %fp1 # make absolute value
12666 fcmp.b %fp1,&0x1 # is |result| < 1.b?
12667 fbge.w fdiv_normal_exit # no; no underflow occurred
12668 bra.w fdiv_unfl # yes; underflow occurred
12670 ############################################################################
12673 # Divide: inputs are not both normalized; what are they?
12675 fdiv_not_norm:
12676 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
12677 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
12679 swbeg &48
12680 tbl_fdiv_op:
12681 short fdiv_norm - tbl_fdiv_op # NORM / NORM
12682 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
12683 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
12684 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
12685 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
12686 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
12687 short tbl_fdiv_op - tbl_fdiv_op #
12688 short tbl_fdiv_op - tbl_fdiv_op #
12690 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
12691 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
12692 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
12693 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
12694 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
12695 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
12696 short tbl_fdiv_op - tbl_fdiv_op #
12697 short tbl_fdiv_op - tbl_fdiv_op #
12699 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
12700 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
12701 short fdiv_res_operr - tbl_fdiv_op # INF / INF
12702 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
12703 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
12704 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
12705 short tbl_fdiv_op - tbl_fdiv_op #
12706 short tbl_fdiv_op - tbl_fdiv_op #
12708 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
12709 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
12710 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
12711 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
12712 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
12713 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
12714 short tbl_fdiv_op - tbl_fdiv_op #
12715 short tbl_fdiv_op - tbl_fdiv_op #
12717 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
12718 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
12719 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
12720 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
12721 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
12722 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
12723 short tbl_fdiv_op - tbl_fdiv_op #
12724 short tbl_fdiv_op - tbl_fdiv_op #
12726 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
12727 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
12728 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
12729 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
12730 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
12731 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
12732 short tbl_fdiv_op - tbl_fdiv_op #
12733 short tbl_fdiv_op - tbl_fdiv_op #
12735 fdiv_res_qnan:
12736 bra.l res_qnan
12737 fdiv_res_snan:
12738 bra.l res_snan
12739 fdiv_res_operr:
12740 bra.l res_operr
12742 global fdiv_zero_load # global for fsgldiv
12743 fdiv_zero_load:
12744 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
12745 mov.b DST_EX(%a1),%d1 # or of input signs.
12746 eor.b %d0,%d1
12747 bpl.b fdiv_zero_load_p # result is positive
12748 fmov.s &0x80000000,%fp0 # load a -ZERO
12749 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
12751 fdiv_zero_load_p:
12752 fmov.s &0x00000000,%fp0 # load a +ZERO
12753 mov.b &z_bmask,FPSR_CC(%a6) # set Z
12757 # The destination was In Range and the source was a ZERO. The result,
12758 # therefore, is an INF w/ the proper sign.
12759 # So, determine the sign and return a new INF (w/ the j-bit cleared).
12761 global fdiv_inf_load # global for fsgldiv
12762 fdiv_inf_load:
12763 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
12764 mov.b SRC_EX(%a0),%d0 # load both signs
12765 mov.b DST_EX(%a1),%d1
12766 eor.b %d0,%d1
12767 bpl.b fdiv_inf_load_p # result is positive
12768 fmov.s &0xff800000,%fp0 # make result -INF
12769 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
12771 fdiv_inf_load_p:
12772 fmov.s &0x7f800000,%fp0 # make result +INF
12773 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12777 # The destination was an INF w/ an In Range or ZERO source, the result is
12778 # an INF w/ the proper sign.
12779 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
12780 # dst INF is set, then then j-bit of the result INF is also set).
12782 global fdiv_inf_dst # global for fsgldiv
12783 fdiv_inf_dst:
12784 mov.b DST_EX(%a1),%d0 # load both signs
12785 mov.b SRC_EX(%a0),%d1
12786 eor.b %d0,%d1
12787 bpl.b fdiv_inf_dst_p # result is positive
12789 fmovm.x DST(%a1),&0x80 # return result in fp0
12790 fabs.x %fp0 # clear sign bit
12791 fneg.x %fp0 # set sign bit
12792 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
12795 fdiv_inf_dst_p:
12796 fmovm.x DST(%a1),&0x80 # return result in fp0
12797 fabs.x %fp0 # return positive INF
12798 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
12801 #########################################################################
12802 # XDEF **************************************************************** #
12803 # fneg(): emulates the fneg instruction #
12804 # fsneg(): emulates the fsneg instruction #
12805 # fdneg(): emulates the fdneg instruction #
12807 # XREF **************************************************************** #
12808 # norm() - normalize a denorm to provide EXOP #
12809 # scale_to_zero_src() - scale sgl/dbl source exponent #
12810 # ovf_res() - return default overflow result #
12811 # unf_res() - return default underflow result #
12812 # res_qnan_1op() - return QNAN result #
12813 # res_snan_1op() - return SNAN result #
12815 # INPUT *************************************************************** #
12816 # a0 = pointer to extended precision source operand #
12817 # d0 = rnd prec,mode #
12819 # OUTPUT ************************************************************** #
12820 # fp0 = result #
12821 # fp1 = EXOP (if exception occurred) #
12823 # ALGORITHM *********************************************************** #
12824 # Handle NANs, zeroes, and infinities as special cases. Separate #
12825 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
12826 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
12827 # and an actual fneg performed to see if overflow/underflow would have #
12828 # occurred. If so, return default underflow/overflow result. Else, #
12829 # scale the result exponent and return result. FPSR gets set based on #
12830 # the result value. #
12832 #########################################################################
12834 global fsneg
12835 fsneg:
12836 andi.b &0x30,%d0 # clear rnd prec
12837 ori.b &s_mode*0x10,%d0 # insert sgl precision
12838 bra.b fneg
12840 global fdneg
12841 fdneg:
12842 andi.b &0x30,%d0 # clear rnd prec
12843 ori.b &d_mode*0x10,%d0 # insert dbl prec
12845 global fneg
12846 fneg:
12847 mov.l %d0,L_SCR3(%a6) # store rnd info
12848 mov.b STAG(%a6),%d1
12849 bne.w fneg_not_norm # optimize on non-norm input
12852 # NEGATE SIGN : norms and denorms ONLY!
12854 fneg_norm:
12855 andi.b &0xc0,%d0 # is precision extended?
12856 bne.w fneg_not_ext # no; go handle sgl or dbl
12859 # precision selected is extended. so...we can not get an underflow
12860 # or overflow because of rounding to the correct precision. so...
12861 # skip the scaling and unscaling...
12863 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12864 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12865 mov.w SRC_EX(%a0),%d0
12866 eori.w &0x8000,%d0 # negate sign
12867 bpl.b fneg_norm_load # sign is positive
12868 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
12869 fneg_norm_load:
12870 mov.w %d0,FP_SCR0_EX(%a6)
12871 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12875 # for an extended precision DENORM, the UNFL exception bit is set
12876 # the accrued bit is NOT set in this instance(no inexactness!)
12878 fneg_denorm:
12879 andi.b &0xc0,%d0 # is precision extended?
12880 bne.b fneg_not_ext # no; go handle sgl or dbl
12882 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12884 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12885 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12886 mov.w SRC_EX(%a0),%d0
12887 eori.w &0x8000,%d0 # negate sign
12888 bpl.b fneg_denorm_done # no
12889 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
12890 fneg_denorm_done:
12891 mov.w %d0,FP_SCR0_EX(%a6)
12892 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12894 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
12895 bne.b fneg_ext_unfl_ena # yes
12899 # the input is an extended DENORM and underflow is enabled in the FPCR.
12900 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
12901 # exponent and insert back into the operand.
12903 fneg_ext_unfl_ena:
12904 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
12905 bsr.l norm # normalize result
12906 neg.w %d0 # new exponent = -(shft val)
12907 addi.w &0x6000,%d0 # add new bias to exponent
12908 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
12909 andi.w &0x8000,%d1 # keep old sign
12910 andi.w &0x7fff,%d0 # clear sign position
12911 or.w %d1,%d0 # concat old sign, new exponent
12912 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
12913 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12917 # operand is either single or double
12919 fneg_not_ext:
12920 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12921 bne.b fneg_dbl
12924 # operand is to be rounded to single precision
12926 fneg_sgl:
12927 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12928 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12929 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12930 bsr.l scale_to_zero_src # calculate scale factor
12932 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
12933 bge.w fneg_sd_unfl # yes; go handle underflow
12934 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
12935 beq.w fneg_sd_may_ovfl # maybe; go check
12936 blt.w fneg_sd_ovfl # yes; go handle overflow
12939 # operand will NOT overflow or underflow when moved in to the fp reg file
12941 fneg_sd_normal:
12942 fmov.l &0x0,%fpsr # clear FPSR
12943 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12945 fneg.x FP_SCR0(%a6),%fp0 # perform negation
12947 fmov.l %fpsr,%d1 # save FPSR
12948 fmov.l &0x0,%fpcr # clear FPCR
12950 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12952 fneg_sd_normal_exit:
12953 mov.l %d2,-(%sp) # save d2
12954 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12955 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12956 mov.w %d1,%d2 # make a copy
12957 andi.l &0x7fff,%d1 # strip sign
12958 sub.l %d0,%d1 # add scale factor
12959 andi.w &0x8000,%d2 # keep old sign
12960 or.w %d1,%d2 # concat old sign,new exp
12961 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12962 mov.l (%sp)+,%d2 # restore d2
12963 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12967 # operand is to be rounded to double precision
12969 fneg_dbl:
12970 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12971 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12972 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12973 bsr.l scale_to_zero_src # calculate scale factor
12975 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
12976 bge.b fneg_sd_unfl # yes; go handle underflow
12977 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
12978 beq.w fneg_sd_may_ovfl # maybe; go check
12979 blt.w fneg_sd_ovfl # yes; go handle overflow
12980 bra.w fneg_sd_normal # no; ho handle normalized op
12983 # operand WILL underflow when moved in to the fp register file
12985 fneg_sd_unfl:
12986 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12988 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
12989 bpl.b fneg_sd_unfl_tst
12990 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
12992 # if underflow or inexact is enabled, go calculate EXOP first.
12993 fneg_sd_unfl_tst:
12994 mov.b FPCR_ENABLE(%a6),%d1
12995 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12996 bne.b fneg_sd_unfl_ena # yes
12998 fneg_sd_unfl_dis:
12999 lea FP_SCR0(%a6),%a0 # pass: result addr
13000 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
13001 bsr.l unf_res # calculate default result
13002 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
13003 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13004 rts
13007 # operand will underflow AND underflow is enabled.
13008 # therefore, we must return the result rounded to extended precision.
13010 fneg_sd_unfl_ena:
13011 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13012 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13013 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13015 mov.l %d2,-(%sp) # save d2
13016 mov.l %d1,%d2 # make a copy
13017 andi.l &0x7fff,%d1 # strip sign
13018 andi.w &0x8000,%d2 # keep old sign
13019 sub.l %d0,%d1 # subtract scale factor
13020 addi.l &0x6000,%d1 # add new bias
13021 andi.w &0x7fff,%d1
13022 or.w %d2,%d1 # concat new sign,new exp
13023 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13024 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13025 mov.l (%sp)+,%d2 # restore d2
13026 bra.b fneg_sd_unfl_dis
13029 # operand WILL overflow.
13031 fneg_sd_ovfl:
13032 fmov.l &0x0,%fpsr # clear FPSR
13033 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13035 fneg.x FP_SCR0(%a6),%fp0 # perform negation
13037 fmov.l &0x0,%fpcr # clear FPCR
13038 fmov.l %fpsr,%d1 # save FPSR
13040 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13042 fneg_sd_ovfl_tst:
13043 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13045 mov.b FPCR_ENABLE(%a6),%d1
13046 andi.b &0x13,%d1 # is OVFL or INEX enabled?
13047 bne.b fneg_sd_ovfl_ena # yes
13050 # OVFL is not enabled; therefore, we must create the default result by
13051 # calling ovf_res().
13053 fneg_sd_ovfl_dis:
13054 btst &neg_bit,FPSR_CC(%a6) # is result negative?
13055 sne %d1 # set sign param accordingly
13056 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13057 bsr.l ovf_res # calculate default result
13058 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13059 fmovm.x (%a0),&0x80 # return default result in fp0
13063 # OVFL is enabled.
13064 # the INEX2 bit has already been updated by the round to the correct precision.
13065 # now, round to extended(and don't alter the FPSR).
13067 fneg_sd_ovfl_ena:
13068 mov.l %d2,-(%sp) # save d2
13069 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13070 mov.l %d1,%d2 # make a copy
13071 andi.l &0x7fff,%d1 # strip sign
13072 andi.w &0x8000,%d2 # keep old sign
13073 sub.l %d0,%d1 # add scale factor
13074 subi.l &0x6000,%d1 # subtract bias
13075 andi.w &0x7fff,%d1
13076 or.w %d2,%d1 # concat sign,exp
13077 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13078 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13079 mov.l (%sp)+,%d2 # restore d2
13080 bra.b fneg_sd_ovfl_dis
13083 # the move in MAY underflow. so...
13085 fneg_sd_may_ovfl:
13086 fmov.l &0x0,%fpsr # clear FPSR
13087 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13089 fneg.x FP_SCR0(%a6),%fp0 # perform negation
13091 fmov.l %fpsr,%d1 # save status
13092 fmov.l &0x0,%fpcr # clear FPCR
13094 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13096 fabs.x %fp0,%fp1 # make a copy of result
13097 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13098 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
13100 # no, it didn't overflow; we have correct result
13101 bra.w fneg_sd_normal_exit
13103 ##########################################################################
13106 # input is not normalized; what is it?
13108 fneg_not_norm:
13109 cmpi.b %d1,&DENORM # weed out DENORM
13110 beq.w fneg_denorm
13111 cmpi.b %d1,&SNAN # weed out SNAN
13112 beq.l res_snan_1op
13113 cmpi.b %d1,&QNAN # weed out QNAN
13114 beq.l res_qnan_1op
13117 # do the fneg; at this point, only possible ops are ZERO and INF.
13118 # use fneg to determine ccodes.
13119 # prec:mode should be zero at this point but it won't affect answer anyways.
13121 fneg.x SRC_EX(%a0),%fp0 # do fneg
13122 fmov.l %fpsr,%d0
13123 rol.l &0x8,%d0 # put ccodes in lo byte
13124 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
13127 #########################################################################
13128 # XDEF **************************************************************** #
13129 # ftst(): emulates the ftest instruction #
13131 # XREF **************************************************************** #
13132 # res{s,q}nan_1op() - set NAN result for monadic instruction #
13134 # INPUT *************************************************************** #
13135 # a0 = pointer to extended precision source operand #
13137 # OUTPUT ************************************************************** #
13138 # none #
13140 # ALGORITHM *********************************************************** #
13141 # Check the source operand tag (STAG) and set the FPCR according #
13142 # to the operand type and sign. #
13144 #########################################################################
13146 global ftst
13147 ftst:
13148 mov.b STAG(%a6),%d1
13149 bne.b ftst_not_norm # optimize on non-norm input
13152 # Norm:
13154 ftst_norm:
13155 tst.b SRC_EX(%a0) # is operand negative?
13156 bmi.b ftst_norm_m # yes
13158 ftst_norm_m:
13159 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13163 # input is not normalized; what is it?
13165 ftst_not_norm:
13166 cmpi.b %d1,&ZERO # weed out ZERO
13167 beq.b ftst_zero
13168 cmpi.b %d1,&INF # weed out INF
13169 beq.b ftst_inf
13170 cmpi.b %d1,&SNAN # weed out SNAN
13171 beq.l res_snan_1op
13172 cmpi.b %d1,&QNAN # weed out QNAN
13173 beq.l res_qnan_1op
13176 # Denorm:
13178 ftst_denorm:
13179 tst.b SRC_EX(%a0) # is operand negative?
13180 bmi.b ftst_denorm_m # yes
13182 ftst_denorm_m:
13183 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13187 # Infinity:
13189 ftst_inf:
13190 tst.b SRC_EX(%a0) # is operand negative?
13191 bmi.b ftst_inf_m # yes
13192 ftst_inf_p:
13193 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13195 ftst_inf_m:
13196 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
13200 # Zero:
13202 ftst_zero:
13203 tst.b SRC_EX(%a0) # is operand negative?
13204 bmi.b ftst_zero_m # yes
13205 ftst_zero_p:
13206 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
13208 ftst_zero_m:
13209 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13212 #########################################################################
13213 # XDEF **************************************************************** #
13214 # fint(): emulates the fint instruction #
13216 # XREF **************************************************************** #
13217 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13219 # INPUT *************************************************************** #
13220 # a0 = pointer to extended precision source operand #
13221 # d0 = round precision/mode #
13223 # OUTPUT ************************************************************** #
13224 # fp0 = result #
13226 # ALGORITHM *********************************************************** #
13227 # Separate according to operand type. Unnorms don't pass through #
13228 # here. For norms, load the rounding mode/prec, execute a "fint", then #
13229 # store the resulting FPSR bits. #
13230 # For denorms, force the j-bit to a one and do the same as for #
13231 # norms. Denorms are so low that the answer will either be a zero or a #
13232 # one. #
13233 # For zeroes/infs/NANs, return the same while setting the FPSR #
13234 # as appropriate. #
13236 #########################################################################
13238 global fint
13239 fint:
13240 mov.b STAG(%a6),%d1
13241 bne.b fint_not_norm # optimize on non-norm input
13244 # Norm:
13246 fint_norm:
13247 andi.b &0x30,%d0 # set prec = ext
13249 fmov.l %d0,%fpcr # set FPCR
13250 fmov.l &0x0,%fpsr # clear FPSR
13252 fint.x SRC(%a0),%fp0 # execute fint
13254 fmov.l &0x0,%fpcr # clear FPCR
13255 fmov.l %fpsr,%d0 # save FPSR
13256 or.l %d0,USER_FPSR(%a6) # set exception bits
13261 # input is not normalized; what is it?
13263 fint_not_norm:
13264 cmpi.b %d1,&ZERO # weed out ZERO
13265 beq.b fint_zero
13266 cmpi.b %d1,&INF # weed out INF
13267 beq.b fint_inf
13268 cmpi.b %d1,&DENORM # weed out DENORM
13269 beq.b fint_denorm
13270 cmpi.b %d1,&SNAN # weed out SNAN
13271 beq.l res_snan_1op
13272 bra.l res_qnan_1op # weed out QNAN
13275 # Denorm:
13277 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
13278 # also, the INEX2 and AINEX exception bits will be set.
13279 # so, we could either set these manually or force the DENORM
13280 # to a very small NORM and ship it to the NORM routine.
13281 # I do the latter.
13283 fint_denorm:
13284 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13285 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13286 lea FP_SCR0(%a6),%a0
13287 bra.b fint_norm
13290 # Zero:
13292 fint_zero:
13293 tst.b SRC_EX(%a0) # is ZERO negative?
13294 bmi.b fint_zero_m # yes
13295 fint_zero_p:
13296 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13297 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13299 fint_zero_m:
13300 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13301 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13305 # Infinity:
13307 fint_inf:
13308 fmovm.x SRC(%a0),&0x80 # return result in fp0
13309 tst.b SRC_EX(%a0) # is INF negative?
13310 bmi.b fint_inf_m # yes
13311 fint_inf_p:
13312 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13314 fint_inf_m:
13315 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13318 #########################################################################
13319 # XDEF **************************************************************** #
13320 # fintrz(): emulates the fintrz instruction #
13322 # XREF **************************************************************** #
13323 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13325 # INPUT *************************************************************** #
13326 # a0 = pointer to extended precision source operand #
13327 # d0 = round precision/mode #
13329 # OUTPUT ************************************************************** #
13330 # fp0 = result #
13332 # ALGORITHM *********************************************************** #
13333 # Separate according to operand type. Unnorms don't pass through #
13334 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
13335 # then store the resulting FPSR bits. #
13336 # For denorms, force the j-bit to a one and do the same as for #
13337 # norms. Denorms are so low that the answer will either be a zero or a #
13338 # one. #
13339 # For zeroes/infs/NANs, return the same while setting the FPSR #
13340 # as appropriate. #
13342 #########################################################################
13344 global fintrz
13345 fintrz:
13346 mov.b STAG(%a6),%d1
13347 bne.b fintrz_not_norm # optimize on non-norm input
13350 # Norm:
13352 fintrz_norm:
13353 fmov.l &0x0,%fpsr # clear FPSR
13355 fintrz.x SRC(%a0),%fp0 # execute fintrz
13357 fmov.l %fpsr,%d0 # save FPSR
13358 or.l %d0,USER_FPSR(%a6) # set exception bits
13363 # input is not normalized; what is it?
13365 fintrz_not_norm:
13366 cmpi.b %d1,&ZERO # weed out ZERO
13367 beq.b fintrz_zero
13368 cmpi.b %d1,&INF # weed out INF
13369 beq.b fintrz_inf
13370 cmpi.b %d1,&DENORM # weed out DENORM
13371 beq.b fintrz_denorm
13372 cmpi.b %d1,&SNAN # weed out SNAN
13373 beq.l res_snan_1op
13374 bra.l res_qnan_1op # weed out QNAN
13377 # Denorm:
13379 # for DENORMs, the result will be (+/-)ZERO.
13380 # also, the INEX2 and AINEX exception bits will be set.
13381 # so, we could either set these manually or force the DENORM
13382 # to a very small NORM and ship it to the NORM routine.
13383 # I do the latter.
13385 fintrz_denorm:
13386 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
13387 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
13388 lea FP_SCR0(%a6),%a0
13389 bra.b fintrz_norm
13392 # Zero:
13394 fintrz_zero:
13395 tst.b SRC_EX(%a0) # is ZERO negative?
13396 bmi.b fintrz_zero_m # yes
13397 fintrz_zero_p:
13398 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
13399 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13401 fintrz_zero_m:
13402 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
13403 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
13407 # Infinity:
13409 fintrz_inf:
13410 fmovm.x SRC(%a0),&0x80 # return result in fp0
13411 tst.b SRC_EX(%a0) # is INF negative?
13412 bmi.b fintrz_inf_m # yes
13413 fintrz_inf_p:
13414 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13416 fintrz_inf_m:
13417 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
13420 #########################################################################
13421 # XDEF **************************************************************** #
13422 # fabs(): emulates the fabs instruction #
13423 # fsabs(): emulates the fsabs instruction #
13424 # fdabs(): emulates the fdabs instruction #
13426 # XREF **************************************************************** #
13427 # norm() - normalize denorm mantissa to provide EXOP #
13428 # scale_to_zero_src() - make exponent. = 0; get scale factor #
13429 # unf_res() - calculate underflow result #
13430 # ovf_res() - calculate overflow result #
13431 # res_{s,q}nan_1op() - set NAN result for monadic operation #
13433 # INPUT *************************************************************** #
13434 # a0 = pointer to extended precision source operand #
13435 # d0 = rnd precision/mode #
13437 # OUTPUT ************************************************************** #
13438 # fp0 = result #
13439 # fp1 = EXOP (if exception occurred) #
13441 # ALGORITHM *********************************************************** #
13442 # Handle NANs, infinities, and zeroes as special cases. Divide #
13443 # norms into extended, single, and double precision. #
13444 # Simply clear sign for extended precision norm. Ext prec denorm #
13445 # gets an EXOP created for it since it's an underflow. #
13446 # Double and single precision can overflow and underflow. First, #
13447 # scale the operand such that the exponent is zero. Perform an "fabs" #
13448 # using the correct rnd mode/prec. Check to see if the original #
13449 # exponent would take an exception. If so, use unf_res() or ovf_res() #
13450 # to calculate the default result. Also, create the EXOP for the #
13451 # exceptional case. If no exception should occur, insert the correct #
13452 # result exponent and return. #
13453 # Unnorms don't pass through here. #
13455 #########################################################################
13457 global fsabs
13458 fsabs:
13459 andi.b &0x30,%d0 # clear rnd prec
13460 ori.b &s_mode*0x10,%d0 # insert sgl precision
13461 bra.b fabs
13463 global fdabs
13464 fdabs:
13465 andi.b &0x30,%d0 # clear rnd prec
13466 ori.b &d_mode*0x10,%d0 # insert dbl precision
13468 global fabs
13469 fabs:
13470 mov.l %d0,L_SCR3(%a6) # store rnd info
13471 mov.b STAG(%a6),%d1
13472 bne.w fabs_not_norm # optimize on non-norm input
13475 # ABSOLUTE VALUE: norms and denorms ONLY!
13477 fabs_norm:
13478 andi.b &0xc0,%d0 # is precision extended?
13479 bne.b fabs_not_ext # no; go handle sgl or dbl
13482 # precision selected is extended. so...we can not get an underflow
13483 # or overflow because of rounding to the correct precision. so...
13484 # skip the scaling and unscaling...
13486 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13487 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13488 mov.w SRC_EX(%a0),%d1
13489 bclr &15,%d1 # force absolute value
13490 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
13491 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13495 # for an extended precision DENORM, the UNFL exception bit is set
13496 # the accrued bit is NOT set in this instance(no inexactness!)
13498 fabs_denorm:
13499 andi.b &0xc0,%d0 # is precision extended?
13500 bne.b fabs_not_ext # no
13502 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13504 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13505 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13506 mov.w SRC_EX(%a0),%d0
13507 bclr &15,%d0 # clear sign
13508 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
13510 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13512 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
13513 bne.b fabs_ext_unfl_ena
13517 # the input is an extended DENORM and underflow is enabled in the FPCR.
13518 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
13519 # exponent and insert back into the operand.
13521 fabs_ext_unfl_ena:
13522 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
13523 bsr.l norm # normalize result
13524 neg.w %d0 # new exponent = -(shft val)
13525 addi.w &0x6000,%d0 # add new bias to exponent
13526 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
13527 andi.w &0x8000,%d1 # keep old sign
13528 andi.w &0x7fff,%d0 # clear sign position
13529 or.w %d1,%d0 # concat old sign, new exponent
13530 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
13531 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13535 # operand is either single or double
13537 fabs_not_ext:
13538 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
13539 bne.b fabs_dbl
13542 # operand is to be rounded to single precision
13544 fabs_sgl:
13545 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13546 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13547 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13548 bsr.l scale_to_zero_src # calculate scale factor
13550 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
13551 bge.w fabs_sd_unfl # yes; go handle underflow
13552 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
13553 beq.w fabs_sd_may_ovfl # maybe; go check
13554 blt.w fabs_sd_ovfl # yes; go handle overflow
13557 # operand will NOT overflow or underflow when moved in to the fp reg file
13559 fabs_sd_normal:
13560 fmov.l &0x0,%fpsr # clear FPSR
13561 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13563 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13565 fmov.l %fpsr,%d1 # save FPSR
13566 fmov.l &0x0,%fpcr # clear FPCR
13568 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13570 fabs_sd_normal_exit:
13571 mov.l %d2,-(%sp) # save d2
13572 fmovm.x &0x80,FP_SCR0(%a6) # store out result
13573 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
13574 mov.l %d1,%d2 # make a copy
13575 andi.l &0x7fff,%d1 # strip sign
13576 sub.l %d0,%d1 # add scale factor
13577 andi.w &0x8000,%d2 # keep old sign
13578 or.w %d1,%d2 # concat old sign,new exp
13579 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
13580 mov.l (%sp)+,%d2 # restore d2
13581 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
13585 # operand is to be rounded to double precision
13587 fabs_dbl:
13588 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13589 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13590 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13591 bsr.l scale_to_zero_src # calculate scale factor
13593 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
13594 bge.b fabs_sd_unfl # yes; go handle underflow
13595 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
13596 beq.w fabs_sd_may_ovfl # maybe; go check
13597 blt.w fabs_sd_ovfl # yes; go handle overflow
13598 bra.w fabs_sd_normal # no; ho handle normalized op
13601 # operand WILL underflow when moved in to the fp register file
13603 fabs_sd_unfl:
13604 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
13606 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
13608 # if underflow or inexact is enabled, go calculate EXOP first.
13609 mov.b FPCR_ENABLE(%a6),%d1
13610 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
13611 bne.b fabs_sd_unfl_ena # yes
13613 fabs_sd_unfl_dis:
13614 lea FP_SCR0(%a6),%a0 # pass: result addr
13615 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
13616 bsr.l unf_res # calculate default result
13617 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
13618 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
13619 rts
13622 # operand will underflow AND underflow is enabled.
13623 # therefore, we must return the result rounded to extended precision.
13625 fabs_sd_unfl_ena:
13626 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
13627 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
13628 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
13630 mov.l %d2,-(%sp) # save d2
13631 mov.l %d1,%d2 # make a copy
13632 andi.l &0x7fff,%d1 # strip sign
13633 andi.w &0x8000,%d2 # keep old sign
13634 sub.l %d0,%d1 # subtract scale factor
13635 addi.l &0x6000,%d1 # add new bias
13636 andi.w &0x7fff,%d1
13637 or.w %d2,%d1 # concat new sign,new exp
13638 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
13639 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
13640 mov.l (%sp)+,%d2 # restore d2
13641 bra.b fabs_sd_unfl_dis
13644 # operand WILL overflow.
13646 fabs_sd_ovfl:
13647 fmov.l &0x0,%fpsr # clear FPSR
13648 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13650 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13652 fmov.l &0x0,%fpcr # clear FPCR
13653 fmov.l %fpsr,%d1 # save FPSR
13655 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13657 fabs_sd_ovfl_tst:
13658 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
13660 mov.b FPCR_ENABLE(%a6),%d1
13661 andi.b &0x13,%d1 # is OVFL or INEX enabled?
13662 bne.b fabs_sd_ovfl_ena # yes
13665 # OVFL is not enabled; therefore, we must create the default result by
13666 # calling ovf_res().
13668 fabs_sd_ovfl_dis:
13669 btst &neg_bit,FPSR_CC(%a6) # is result negative?
13670 sne %d1 # set sign param accordingly
13671 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
13672 bsr.l ovf_res # calculate default result
13673 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
13674 fmovm.x (%a0),&0x80 # return default result in fp0
13678 # OVFL is enabled.
13679 # the INEX2 bit has already been updated by the round to the correct precision.
13680 # now, round to extended(and don't alter the FPSR).
13682 fabs_sd_ovfl_ena:
13683 mov.l %d2,-(%sp) # save d2
13684 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
13685 mov.l %d1,%d2 # make a copy
13686 andi.l &0x7fff,%d1 # strip sign
13687 andi.w &0x8000,%d2 # keep old sign
13688 sub.l %d0,%d1 # add scale factor
13689 subi.l &0x6000,%d1 # subtract bias
13690 andi.w &0x7fff,%d1
13691 or.w %d2,%d1 # concat sign,exp
13692 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
13693 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
13694 mov.l (%sp)+,%d2 # restore d2
13695 bra.b fabs_sd_ovfl_dis
13698 # the move in MAY underflow. so...
13700 fabs_sd_may_ovfl:
13701 fmov.l &0x0,%fpsr # clear FPSR
13702 fmov.l L_SCR3(%a6),%fpcr # set FPCR
13704 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
13706 fmov.l %fpsr,%d1 # save status
13707 fmov.l &0x0,%fpcr # clear FPCR
13709 or.l %d1,USER_FPSR(%a6) # save INEX2,N
13711 fabs.x %fp0,%fp1 # make a copy of result
13712 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
13713 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
13715 # no, it didn't overflow; we have correct result
13716 bra.w fabs_sd_normal_exit
13718 ##########################################################################
13721 # input is not normalized; what is it?
13723 fabs_not_norm:
13724 cmpi.b %d1,&DENORM # weed out DENORM
13725 beq.w fabs_denorm
13726 cmpi.b %d1,&SNAN # weed out SNAN
13727 beq.l res_snan_1op
13728 cmpi.b %d1,&QNAN # weed out QNAN
13729 beq.l res_qnan_1op
13731 fabs.x SRC(%a0),%fp0 # force absolute value
13733 cmpi.b %d1,&INF # weed out INF
13734 beq.b fabs_inf
13735 fabs_zero:
13736 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13738 fabs_inf:
13739 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
13742 #########################################################################
13743 # XDEF **************************************************************** #
13744 # fcmp(): fp compare op routine #
13746 # XREF **************************************************************** #
13747 # res_qnan() - return QNAN result #
13748 # res_snan() - return SNAN result #
13750 # INPUT *************************************************************** #
13751 # a0 = pointer to extended precision source operand #
13752 # a1 = pointer to extended precision destination operand #
13753 # d0 = round prec/mode #
13755 # OUTPUT ************************************************************** #
13756 # None #
13758 # ALGORITHM *********************************************************** #
13759 # Handle NANs and denorms as special cases. For everything else, #
13760 # just use the actual fcmp instruction to produce the correct condition #
13761 # codes. #
13763 #########################################################################
13765 global fcmp
13766 fcmp:
13767 clr.w %d1
13768 mov.b DTAG(%a6),%d1
13769 lsl.b &0x3,%d1
13770 or.b STAG(%a6),%d1
13771 bne.b fcmp_not_norm # optimize on non-norm input
13774 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
13776 fcmp_norm:
13777 fmovm.x DST(%a1),&0x80 # load dst op
13779 fcmp.x %fp0,SRC(%a0) # do compare
13781 fmov.l %fpsr,%d0 # save FPSR
13782 rol.l &0x8,%d0 # extract ccode bits
13783 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
13788 # fcmp: inputs are not both normalized; what are they?
13790 fcmp_not_norm:
13791 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
13792 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
13794 swbeg &48
13795 tbl_fcmp_op:
13796 short fcmp_norm - tbl_fcmp_op # NORM - NORM
13797 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
13798 short fcmp_norm - tbl_fcmp_op # NORM - INF
13799 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
13800 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
13801 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
13802 short tbl_fcmp_op - tbl_fcmp_op #
13803 short tbl_fcmp_op - tbl_fcmp_op #
13805 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
13806 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
13807 short fcmp_norm - tbl_fcmp_op # ZERO - INF
13808 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
13809 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
13810 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
13811 short tbl_fcmp_op - tbl_fcmp_op #
13812 short tbl_fcmp_op - tbl_fcmp_op #
13814 short fcmp_norm - tbl_fcmp_op # INF - NORM
13815 short fcmp_norm - tbl_fcmp_op # INF - ZERO
13816 short fcmp_norm - tbl_fcmp_op # INF - INF
13817 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
13818 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
13819 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
13820 short tbl_fcmp_op - tbl_fcmp_op #
13821 short tbl_fcmp_op - tbl_fcmp_op #
13823 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
13824 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
13825 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
13826 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
13827 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
13828 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
13829 short tbl_fcmp_op - tbl_fcmp_op #
13830 short tbl_fcmp_op - tbl_fcmp_op #
13832 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
13833 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
13834 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
13835 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
13836 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
13837 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
13838 short tbl_fcmp_op - tbl_fcmp_op #
13839 short tbl_fcmp_op - tbl_fcmp_op #
13841 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
13842 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
13843 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
13844 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
13845 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
13846 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
13847 short tbl_fcmp_op - tbl_fcmp_op #
13848 short tbl_fcmp_op - tbl_fcmp_op #
13850 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
13851 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
13852 fcmp_res_qnan:
13853 bsr.l res_qnan
13854 andi.b &0xf7,FPSR_CC(%a6)
13856 fcmp_res_snan:
13857 bsr.l res_snan
13858 andi.b &0xf7,FPSR_CC(%a6)
13862 # DENORMs are a little more difficult.
13863 # If you have a 2 DENORMs, then you can just force the j-bit to a one
13864 # and use the fcmp_norm routine.
13865 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
13866 # and use the fcmp_norm routine.
13867 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
13868 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
13869 # (1) signs are (+) and the DENORM is the dst or
13870 # (2) signs are (-) and the DENORM is the src
13873 fcmp_dnrm_s:
13874 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13875 mov.l SRC_HI(%a0),%d0
13876 bset &31,%d0 # DENORM src; make into small norm
13877 mov.l %d0,FP_SCR0_HI(%a6)
13878 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13879 lea FP_SCR0(%a6),%a0
13880 bra.w fcmp_norm
13882 fcmp_dnrm_d:
13883 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
13884 mov.l DST_HI(%a1),%d0
13885 bset &31,%d0 # DENORM src; make into small norm
13886 mov.l %d0,FP_SCR0_HI(%a6)
13887 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
13888 lea FP_SCR0(%a6),%a1
13889 bra.w fcmp_norm
13891 fcmp_dnrm_sd:
13892 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13893 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13894 mov.l DST_HI(%a1),%d0
13895 bset &31,%d0 # DENORM dst; make into small norm
13896 mov.l %d0,FP_SCR1_HI(%a6)
13897 mov.l SRC_HI(%a0),%d0
13898 bset &31,%d0 # DENORM dst; make into small norm
13899 mov.l %d0,FP_SCR0_HI(%a6)
13900 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13901 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13902 lea FP_SCR1(%a6),%a1
13903 lea FP_SCR0(%a6),%a0
13904 bra.w fcmp_norm
13906 fcmp_nrm_dnrm:
13907 mov.b SRC_EX(%a0),%d0 # determine if like signs
13908 mov.b DST_EX(%a1),%d1
13909 eor.b %d0,%d1
13910 bmi.w fcmp_dnrm_s
13912 # signs are the same, so must determine the answer ourselves.
13913 tst.b %d0 # is src op negative?
13914 bmi.b fcmp_nrm_dnrm_m # yes
13916 fcmp_nrm_dnrm_m:
13917 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13920 fcmp_dnrm_nrm:
13921 mov.b SRC_EX(%a0),%d0 # determine if like signs
13922 mov.b DST_EX(%a1),%d1
13923 eor.b %d0,%d1
13924 bmi.w fcmp_dnrm_d
13926 # signs are the same, so must determine the answer ourselves.
13927 tst.b %d0 # is src op negative?
13928 bpl.b fcmp_dnrm_nrm_m # no
13930 fcmp_dnrm_nrm_m:
13931 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
13934 #########################################################################
13935 # XDEF **************************************************************** #
13936 # fsglmul(): emulates the fsglmul instruction #
13938 # XREF **************************************************************** #
13939 # scale_to_zero_src() - scale src exponent to zero #
13940 # scale_to_zero_dst() - scale dst exponent to zero #
13941 # unf_res4() - return default underflow result for sglop #
13942 # ovf_res() - return default overflow result #
13943 # res_qnan() - return QNAN result #
13944 # res_snan() - return SNAN result #
13946 # INPUT *************************************************************** #
13947 # a0 = pointer to extended precision source operand #
13948 # a1 = pointer to extended precision destination operand #
13949 # d0 rnd prec,mode #
13951 # OUTPUT ************************************************************** #
13952 # fp0 = result #
13953 # fp1 = EXOP (if exception occurred) #
13955 # ALGORITHM *********************************************************** #
13956 # Handle NANs, infinities, and zeroes as special cases. Divide #
13957 # norms/denorms into ext/sgl/dbl precision. #
13958 # For norms/denorms, scale the exponents such that a multiply #
13959 # instruction won't cause an exception. Use the regular fsglmul to #
13960 # compute a result. Check if the regular operands would have taken #
13961 # an exception. If so, return the default overflow/underflow result #
13962 # and return the EXOP if exceptions are enabled. Else, scale the #
13963 # result operand to the proper exponent. #
13965 #########################################################################
13967 global fsglmul
13968 fsglmul:
13969 mov.l %d0,L_SCR3(%a6) # store rnd info
13971 clr.w %d1
13972 mov.b DTAG(%a6),%d1
13973 lsl.b &0x3,%d1
13974 or.b STAG(%a6),%d1
13976 bne.w fsglmul_not_norm # optimize on non-norm input
13978 fsglmul_norm:
13979 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
13980 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
13981 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
13983 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
13984 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
13985 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
13987 bsr.l scale_to_zero_src # scale exponent
13988 mov.l %d0,-(%sp) # save scale factor 1
13990 bsr.l scale_to_zero_dst # scale dst exponent
13992 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
13994 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
13995 beq.w fsglmul_may_ovfl # result may rnd to overflow
13996 blt.w fsglmul_ovfl # result will overflow
13998 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
13999 beq.w fsglmul_may_unfl # result may rnd to no unfl
14000 bgt.w fsglmul_unfl # result will underflow
14002 fsglmul_normal:
14003 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14005 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14006 fmov.l &0x0,%fpsr # clear FPSR
14008 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14010 fmov.l %fpsr,%d1 # save status
14011 fmov.l &0x0,%fpcr # clear FPCR
14013 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14015 fsglmul_normal_exit:
14016 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14017 mov.l %d2,-(%sp) # save d2
14018 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14019 mov.l %d1,%d2 # make a copy
14020 andi.l &0x7fff,%d1 # strip sign
14021 andi.w &0x8000,%d2 # keep old sign
14022 sub.l %d0,%d1 # add scale factor
14023 or.w %d2,%d1 # concat old sign,new exp
14024 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14025 mov.l (%sp)+,%d2 # restore d2
14026 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14029 fsglmul_ovfl:
14030 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14032 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14033 fmov.l &0x0,%fpsr # clear FPSR
14035 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14037 fmov.l %fpsr,%d1 # save status
14038 fmov.l &0x0,%fpcr # clear FPCR
14040 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14042 fsglmul_ovfl_tst:
14044 # save setting this until now because this is where fsglmul_may_ovfl may jump in
14045 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
14047 mov.b FPCR_ENABLE(%a6),%d1
14048 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14049 bne.b fsglmul_ovfl_ena # yes
14051 fsglmul_ovfl_dis:
14052 btst &neg_bit,FPSR_CC(%a6) # is result negative?
14053 sne %d1 # set sign param accordingly
14054 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14055 andi.b &0x30,%d0 # force prec = ext
14056 bsr.l ovf_res # calculate default result
14057 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14058 fmovm.x (%a0),&0x80 # return default result in fp0
14061 fsglmul_ovfl_ena:
14062 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14064 mov.l %d2,-(%sp) # save d2
14065 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14066 mov.l %d1,%d2 # make a copy
14067 andi.l &0x7fff,%d1 # strip sign
14068 sub.l %d0,%d1 # add scale factor
14069 subi.l &0x6000,%d1 # subtract bias
14070 andi.w &0x7fff,%d1
14071 andi.w &0x8000,%d2 # keep old sign
14072 or.w %d2,%d1 # concat old sign,new exp
14073 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14074 mov.l (%sp)+,%d2 # restore d2
14075 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14076 bra.b fsglmul_ovfl_dis
14078 fsglmul_may_ovfl:
14079 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14081 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14082 fmov.l &0x0,%fpsr # clear FPSR
14084 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14086 fmov.l %fpsr,%d1 # save status
14087 fmov.l &0x0,%fpcr # clear FPCR
14089 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14091 fabs.x %fp0,%fp1 # make a copy of result
14092 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
14093 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
14095 # no, it didn't overflow; we have correct result
14096 bra.w fsglmul_normal_exit
14098 fsglmul_unfl:
14099 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14101 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14103 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14104 fmov.l &0x0,%fpsr # clear FPSR
14106 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14108 fmov.l %fpsr,%d1 # save status
14109 fmov.l &0x0,%fpcr # clear FPCR
14111 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14113 mov.b FPCR_ENABLE(%a6),%d1
14114 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14115 bne.b fsglmul_unfl_ena # yes
14117 fsglmul_unfl_dis:
14118 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14120 lea FP_SCR0(%a6),%a0 # pass: result addr
14121 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14122 bsr.l unf_res4 # calculate default result
14123 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14124 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14128 # UNFL is enabled.
14130 fsglmul_unfl_ena:
14131 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14133 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14134 fmov.l &0x0,%fpsr # clear FPSR
14136 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14138 fmov.l &0x0,%fpcr # clear FPCR
14140 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14141 mov.l %d2,-(%sp) # save d2
14142 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14143 mov.l %d1,%d2 # make a copy
14144 andi.l &0x7fff,%d1 # strip sign
14145 andi.w &0x8000,%d2 # keep old sign
14146 sub.l %d0,%d1 # add scale factor
14147 addi.l &0x6000,%d1 # add bias
14148 andi.w &0x7fff,%d1
14149 or.w %d2,%d1 # concat old sign,new exp
14150 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14151 mov.l (%sp)+,%d2 # restore d2
14152 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14153 bra.w fsglmul_unfl_dis
14155 fsglmul_may_unfl:
14156 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14158 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14159 fmov.l &0x0,%fpsr # clear FPSR
14161 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
14163 fmov.l %fpsr,%d1 # save status
14164 fmov.l &0x0,%fpcr # clear FPCR
14166 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14168 fabs.x %fp0,%fp1 # make a copy of result
14169 fcmp.b %fp1,&0x2 # is |result| > 2.b?
14170 fbgt.w fsglmul_normal_exit # no; no underflow occurred
14171 fblt.w fsglmul_unfl # yes; underflow occurred
14174 # we still don't know if underflow occurred. result is ~ equal to 2. but,
14175 # we don't know if the result was an underflow that rounded up to a 2 or
14176 # a normalized number that rounded down to a 2. so, redo the entire operation
14177 # using RZ as the rounding mode to see what the pre-rounded result is.
14178 # this case should be relatively rare.
14180 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14182 mov.l L_SCR3(%a6),%d1
14183 andi.b &0xc0,%d1 # keep rnd prec
14184 ori.b &rz_mode*0x10,%d1 # insert RZ
14186 fmov.l %d1,%fpcr # set FPCR
14187 fmov.l &0x0,%fpsr # clear FPSR
14189 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
14191 fmov.l &0x0,%fpcr # clear FPCR
14192 fabs.x %fp1 # make absolute value
14193 fcmp.b %fp1,&0x2 # is |result| < 2.b?
14194 fbge.w fsglmul_normal_exit # no; no underflow occurred
14195 bra.w fsglmul_unfl # yes, underflow occurred
14197 ##############################################################################
14200 # Single Precision Multiply: inputs are not both normalized; what are they?
14202 fsglmul_not_norm:
14203 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
14204 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
14206 swbeg &48
14207 tbl_fsglmul_op:
14208 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14209 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14210 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14211 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14212 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14213 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14214 short tbl_fsglmul_op - tbl_fsglmul_op #
14215 short tbl_fsglmul_op - tbl_fsglmul_op #
14217 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
14218 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
14219 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
14220 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
14221 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
14222 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
14223 short tbl_fsglmul_op - tbl_fsglmul_op #
14224 short tbl_fsglmul_op - tbl_fsglmul_op #
14226 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
14227 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
14228 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
14229 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
14230 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
14231 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
14232 short tbl_fsglmul_op - tbl_fsglmul_op #
14233 short tbl_fsglmul_op - tbl_fsglmul_op #
14235 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
14236 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
14237 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
14238 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
14239 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
14240 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
14241 short tbl_fsglmul_op - tbl_fsglmul_op #
14242 short tbl_fsglmul_op - tbl_fsglmul_op #
14244 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
14245 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
14246 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
14247 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
14248 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
14249 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
14250 short tbl_fsglmul_op - tbl_fsglmul_op #
14251 short tbl_fsglmul_op - tbl_fsglmul_op #
14253 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
14254 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
14255 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
14256 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
14257 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
14258 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
14259 short tbl_fsglmul_op - tbl_fsglmul_op #
14260 short tbl_fsglmul_op - tbl_fsglmul_op #
14262 fsglmul_res_operr:
14263 bra.l res_operr
14264 fsglmul_res_snan:
14265 bra.l res_snan
14266 fsglmul_res_qnan:
14267 bra.l res_qnan
14268 fsglmul_zero:
14269 bra.l fmul_zero
14270 fsglmul_inf_src:
14271 bra.l fmul_inf_src
14272 fsglmul_inf_dst:
14273 bra.l fmul_inf_dst
14275 #########################################################################
14276 # XDEF **************************************************************** #
14277 # fsgldiv(): emulates the fsgldiv instruction #
14279 # XREF **************************************************************** #
14280 # scale_to_zero_src() - scale src exponent to zero #
14281 # scale_to_zero_dst() - scale dst exponent to zero #
14282 # unf_res4() - return default underflow result for sglop #
14283 # ovf_res() - return default overflow result #
14284 # res_qnan() - return QNAN result #
14285 # res_snan() - return SNAN result #
14287 # INPUT *************************************************************** #
14288 # a0 = pointer to extended precision source operand #
14289 # a1 = pointer to extended precision destination operand #
14290 # d0 rnd prec,mode #
14292 # OUTPUT ************************************************************** #
14293 # fp0 = result #
14294 # fp1 = EXOP (if exception occurred) #
14296 # ALGORITHM *********************************************************** #
14297 # Handle NANs, infinities, and zeroes as special cases. Divide #
14298 # norms/denorms into ext/sgl/dbl precision. #
14299 # For norms/denorms, scale the exponents such that a divide #
14300 # instruction won't cause an exception. Use the regular fsgldiv to #
14301 # compute a result. Check if the regular operands would have taken #
14302 # an exception. If so, return the default overflow/underflow result #
14303 # and return the EXOP if exceptions are enabled. Else, scale the #
14304 # result operand to the proper exponent. #
14306 #########################################################################
14308 global fsgldiv
14309 fsgldiv:
14310 mov.l %d0,L_SCR3(%a6) # store rnd info
14312 clr.w %d1
14313 mov.b DTAG(%a6),%d1
14314 lsl.b &0x3,%d1
14315 or.b STAG(%a6),%d1 # combine src tags
14317 bne.w fsgldiv_not_norm # optimize on non-norm input
14320 # DIVIDE: NORMs and DENORMs ONLY!
14322 fsgldiv_norm:
14323 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
14324 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
14325 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
14327 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
14328 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
14329 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
14331 bsr.l scale_to_zero_src # calculate scale factor 1
14332 mov.l %d0,-(%sp) # save scale factor 1
14334 bsr.l scale_to_zero_dst # calculate scale factor 2
14336 neg.l (%sp) # S.F. = scale1 - scale2
14337 add.l %d0,(%sp)
14339 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
14340 lsr.b &0x6,%d1
14341 mov.l (%sp)+,%d0
14342 cmpi.l %d0,&0x3fff-0x7ffe
14343 ble.w fsgldiv_may_ovfl
14345 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
14346 beq.w fsgldiv_may_unfl # maybe
14347 bgt.w fsgldiv_unfl # yes; go handle underflow
14349 fsgldiv_normal:
14350 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14352 fmov.l L_SCR3(%a6),%fpcr # save FPCR
14353 fmov.l &0x0,%fpsr # clear FPSR
14355 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
14357 fmov.l %fpsr,%d1 # save FPSR
14358 fmov.l &0x0,%fpcr # clear FPCR
14360 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14362 fsgldiv_normal_exit:
14363 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
14364 mov.l %d2,-(%sp) # save d2
14365 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
14366 mov.l %d1,%d2 # make a copy
14367 andi.l &0x7fff,%d1 # strip sign
14368 andi.w &0x8000,%d2 # keep old sign
14369 sub.l %d0,%d1 # add scale factor
14370 or.w %d2,%d1 # concat old sign,new exp
14371 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14372 mov.l (%sp)+,%d2 # restore d2
14373 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
14376 fsgldiv_may_ovfl:
14377 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14379 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14380 fmov.l &0x0,%fpsr # set FPSR
14382 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
14384 fmov.l %fpsr,%d1
14385 fmov.l &0x0,%fpcr
14387 or.l %d1,USER_FPSR(%a6) # save INEX,N
14389 fmovm.x &0x01,-(%sp) # save result to stack
14390 mov.w (%sp),%d1 # fetch new exponent
14391 add.l &0xc,%sp # clear result
14392 andi.l &0x7fff,%d1 # strip sign
14393 sub.l %d0,%d1 # add scale factor
14394 cmp.l %d1,&0x7fff # did divide overflow?
14395 blt.b fsgldiv_normal_exit
14397 fsgldiv_ovfl_tst:
14398 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
14400 mov.b FPCR_ENABLE(%a6),%d1
14401 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14402 bne.b fsgldiv_ovfl_ena # yes
14404 fsgldiv_ovfl_dis:
14405 btst &neg_bit,FPSR_CC(%a6) # is result negative
14406 sne %d1 # set sign param accordingly
14407 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14408 andi.b &0x30,%d0 # kill precision
14409 bsr.l ovf_res # calculate default result
14410 or.b %d0,FPSR_CC(%a6) # set INF if applicable
14411 fmovm.x (%a0),&0x80 # return default result in fp0
14414 fsgldiv_ovfl_ena:
14415 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
14417 mov.l %d2,-(%sp) # save d2
14418 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14419 mov.l %d1,%d2 # make a copy
14420 andi.l &0x7fff,%d1 # strip sign
14421 andi.w &0x8000,%d2 # keep old sign
14422 sub.l %d0,%d1 # add scale factor
14423 subi.l &0x6000,%d1 # subtract new bias
14424 andi.w &0x7fff,%d1 # clear ms bit
14425 or.w %d2,%d1 # concat old sign,new exp
14426 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14427 mov.l (%sp)+,%d2 # restore d2
14428 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14429 bra.b fsgldiv_ovfl_dis
14431 fsgldiv_unfl:
14432 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14434 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14436 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14437 fmov.l &0x0,%fpsr # clear FPSR
14439 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14441 fmov.l %fpsr,%d1 # save status
14442 fmov.l &0x0,%fpcr # clear FPCR
14444 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14446 mov.b FPCR_ENABLE(%a6),%d1
14447 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14448 bne.b fsgldiv_unfl_ena # yes
14450 fsgldiv_unfl_dis:
14451 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14453 lea FP_SCR0(%a6),%a0 # pass: result addr
14454 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14455 bsr.l unf_res4 # calculate default result
14456 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14457 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14461 # UNFL is enabled.
14463 fsgldiv_unfl_ena:
14464 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14466 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14467 fmov.l &0x0,%fpsr # clear FPSR
14469 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14471 fmov.l &0x0,%fpcr # clear FPCR
14473 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14474 mov.l %d2,-(%sp) # save d2
14475 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14476 mov.l %d1,%d2 # make a copy
14477 andi.l &0x7fff,%d1 # strip sign
14478 andi.w &0x8000,%d2 # keep old sign
14479 sub.l %d0,%d1 # add scale factor
14480 addi.l &0x6000,%d1 # add bias
14481 andi.w &0x7fff,%d1 # clear top bit
14482 or.w %d2,%d1 # concat old sign, new exp
14483 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14484 mov.l (%sp)+,%d2 # restore d2
14485 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14486 bra.b fsgldiv_unfl_dis
14489 # the divide operation MAY underflow:
14491 fsgldiv_may_unfl:
14492 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14494 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14495 fmov.l &0x0,%fpsr # clear FPSR
14497 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
14499 fmov.l %fpsr,%d1 # save status
14500 fmov.l &0x0,%fpcr # clear FPCR
14502 or.l %d1,USER_FPSR(%a6) # save INEX2,N
14504 fabs.x %fp0,%fp1 # make a copy of result
14505 fcmp.b %fp1,&0x1 # is |result| > 1.b?
14506 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
14507 fblt.w fsgldiv_unfl # yes; underflow occurred
14510 # we still don't know if underflow occurred. result is ~ equal to 1. but,
14511 # we don't know if the result was an underflow that rounded up to a 1
14512 # or a normalized number that rounded down to a 1. so, redo the entire
14513 # operation using RZ as the rounding mode to see what the pre-rounded
14514 # result is. this case should be relatively rare.
14516 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
14518 clr.l %d1 # clear scratch register
14519 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
14521 fmov.l %d1,%fpcr # set FPCR
14522 fmov.l &0x0,%fpsr # clear FPSR
14524 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
14526 fmov.l &0x0,%fpcr # clear FPCR
14527 fabs.x %fp1 # make absolute value
14528 fcmp.b %fp1,&0x1 # is |result| < 1.b?
14529 fbge.w fsgldiv_normal_exit # no; no underflow occurred
14530 bra.w fsgldiv_unfl # yes; underflow occurred
14532 ############################################################################
14535 # Divide: inputs are not both normalized; what are they?
14537 fsgldiv_not_norm:
14538 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
14539 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
14541 swbeg &48
14542 tbl_fsgldiv_op:
14543 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
14544 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
14545 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
14546 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
14547 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
14548 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
14549 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14550 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14552 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
14553 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
14554 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
14555 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
14556 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
14557 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
14558 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14559 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14561 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
14562 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
14563 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
14564 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
14565 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
14566 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
14567 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14568 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14570 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
14571 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
14572 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
14573 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
14574 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
14575 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
14576 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14577 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14579 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
14580 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
14581 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
14582 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
14583 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
14584 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
14585 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14586 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14588 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
14589 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
14590 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
14591 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
14592 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
14593 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
14594 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14595 short tbl_fsgldiv_op - tbl_fsgldiv_op #
14597 fsgldiv_res_qnan:
14598 bra.l res_qnan
14599 fsgldiv_res_snan:
14600 bra.l res_snan
14601 fsgldiv_res_operr:
14602 bra.l res_operr
14603 fsgldiv_inf_load:
14604 bra.l fdiv_inf_load
14605 fsgldiv_zero_load:
14606 bra.l fdiv_zero_load
14607 fsgldiv_inf_dst:
14608 bra.l fdiv_inf_dst
14610 #########################################################################
14611 # XDEF **************************************************************** #
14612 # fadd(): emulates the fadd instruction #
14613 # fsadd(): emulates the fadd instruction #
14614 # fdadd(): emulates the fdadd instruction #
14616 # XREF **************************************************************** #
14617 # addsub_scaler2() - scale the operands so they won't take exc #
14618 # ovf_res() - return default overflow result #
14619 # unf_res() - return default underflow result #
14620 # res_qnan() - set QNAN result #
14621 # res_snan() - set SNAN result #
14622 # res_operr() - set OPERR result #
14623 # scale_to_zero_src() - set src operand exponent equal to zero #
14624 # scale_to_zero_dst() - set dst operand exponent equal to zero #
14626 # INPUT *************************************************************** #
14627 # a0 = pointer to extended precision source operand #
14628 # a1 = pointer to extended precision destination operand #
14630 # OUTPUT ************************************************************** #
14631 # fp0 = result #
14632 # fp1 = EXOP (if exception occurred) #
14634 # ALGORITHM *********************************************************** #
14635 # Handle NANs, infinities, and zeroes as special cases. Divide #
14636 # norms into extended, single, and double precision. #
14637 # Do addition after scaling exponents such that exception won't #
14638 # occur. Then, check result exponent to see if exception would have #
14639 # occurred. If so, return default result and maybe EXOP. Else, insert #
14640 # the correct result exponent and return. Set FPSR bits as appropriate. #
14642 #########################################################################
14644 global fsadd
14645 fsadd:
14646 andi.b &0x30,%d0 # clear rnd prec
14647 ori.b &s_mode*0x10,%d0 # insert sgl prec
14648 bra.b fadd
14650 global fdadd
14651 fdadd:
14652 andi.b &0x30,%d0 # clear rnd prec
14653 ori.b &d_mode*0x10,%d0 # insert dbl prec
14655 global fadd
14656 fadd:
14657 mov.l %d0,L_SCR3(%a6) # store rnd info
14659 clr.w %d1
14660 mov.b DTAG(%a6),%d1
14661 lsl.b &0x3,%d1
14662 or.b STAG(%a6),%d1 # combine src tags
14664 bne.w fadd_not_norm # optimize on non-norm input
14667 # ADD: norms and denorms
14669 fadd_norm:
14670 bsr.l addsub_scaler2 # scale exponents
14672 fadd_zero_entry:
14673 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14675 fmov.l &0x0,%fpsr # clear FPSR
14676 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14678 fadd.x FP_SCR0(%a6),%fp0 # execute add
14680 fmov.l &0x0,%fpcr # clear FPCR
14681 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
14683 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
14685 fbeq.w fadd_zero_exit # if result is zero, end now
14687 mov.l %d2,-(%sp) # save d2
14689 fmovm.x &0x01,-(%sp) # save result to stack
14691 mov.w 2+L_SCR3(%a6),%d1
14692 lsr.b &0x6,%d1
14694 mov.w (%sp),%d2 # fetch new sign, exp
14695 andi.l &0x7fff,%d2 # strip sign
14696 sub.l %d0,%d2 # add scale factor
14698 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
14699 bge.b fadd_ovfl # yes
14701 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
14702 blt.w fadd_unfl # yes
14703 beq.w fadd_may_unfl # maybe; go find out
14705 fadd_normal:
14706 mov.w (%sp),%d1
14707 andi.w &0x8000,%d1 # keep sign
14708 or.w %d2,%d1 # concat sign,new exp
14709 mov.w %d1,(%sp) # insert new exponent
14711 fmovm.x (%sp)+,&0x80 # return result in fp0
14713 mov.l (%sp)+,%d2 # restore d2
14716 fadd_zero_exit:
14717 # fmov.s &0x00000000,%fp0 # return zero in fp0
14720 tbl_fadd_ovfl:
14721 long 0x7fff # ext ovfl
14722 long 0x407f # sgl ovfl
14723 long 0x43ff # dbl ovfl
14725 tbl_fadd_unfl:
14726 long 0x0000 # ext unfl
14727 long 0x3f81 # sgl unfl
14728 long 0x3c01 # dbl unfl
14730 fadd_ovfl:
14731 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
14733 mov.b FPCR_ENABLE(%a6),%d1
14734 andi.b &0x13,%d1 # is OVFL or INEX enabled?
14735 bne.b fadd_ovfl_ena # yes
14737 add.l &0xc,%sp
14738 fadd_ovfl_dis:
14739 btst &neg_bit,FPSR_CC(%a6) # is result negative?
14740 sne %d1 # set sign param accordingly
14741 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
14742 bsr.l ovf_res # calculate default result
14743 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
14744 fmovm.x (%a0),&0x80 # return default result in fp0
14745 mov.l (%sp)+,%d2 # restore d2
14748 fadd_ovfl_ena:
14749 mov.b L_SCR3(%a6),%d1
14750 andi.b &0xc0,%d1 # is precision extended?
14751 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
14753 fadd_ovfl_ena_cont:
14754 mov.w (%sp),%d1
14755 andi.w &0x8000,%d1 # keep sign
14756 subi.l &0x6000,%d2 # add extra bias
14757 andi.w &0x7fff,%d2
14758 or.w %d2,%d1 # concat sign,new exp
14759 mov.w %d1,(%sp) # insert new exponent
14761 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
14762 bra.b fadd_ovfl_dis
14764 fadd_ovfl_ena_sd:
14765 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14767 mov.l L_SCR3(%a6),%d1
14768 andi.b &0x30,%d1 # keep rnd mode
14769 fmov.l %d1,%fpcr # set FPCR
14771 fadd.x FP_SCR0(%a6),%fp0 # execute add
14773 fmov.l &0x0,%fpcr # clear FPCR
14775 add.l &0xc,%sp
14776 fmovm.x &0x01,-(%sp)
14777 bra.b fadd_ovfl_ena_cont
14779 fadd_unfl:
14780 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
14782 add.l &0xc,%sp
14784 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
14786 fmov.l &rz_mode*0x10,%fpcr # set FPCR
14787 fmov.l &0x0,%fpsr # clear FPSR
14789 fadd.x FP_SCR0(%a6),%fp0 # execute add
14791 fmov.l &0x0,%fpcr # clear FPCR
14792 fmov.l %fpsr,%d1 # save status
14794 or.l %d1,USER_FPSR(%a6) # save INEX,N
14796 mov.b FPCR_ENABLE(%a6),%d1
14797 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
14798 bne.b fadd_unfl_ena # yes
14800 fadd_unfl_dis:
14801 fmovm.x &0x80,FP_SCR0(%a6) # store out result
14803 lea FP_SCR0(%a6),%a0 # pass: result addr
14804 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
14805 bsr.l unf_res # calculate default result
14806 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
14807 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
14808 mov.l (%sp)+,%d2 # restore d2
14811 fadd_unfl_ena:
14812 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
14814 mov.l L_SCR3(%a6),%d1
14815 andi.b &0xc0,%d1 # is precision extended?
14816 bne.b fadd_unfl_ena_sd # no; sgl or dbl
14818 fmov.l L_SCR3(%a6),%fpcr # set FPCR
14820 fadd_unfl_ena_cont:
14821 fmov.l &0x0,%fpsr # clear FPSR
14823 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
14825 fmov.l &0x0,%fpcr # clear FPCR
14827 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
14828 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
14829 mov.l %d1,%d2 # make a copy
14830 andi.l &0x7fff,%d1 # strip sign
14831 andi.w &0x8000,%d2 # keep old sign
14832 sub.l %d0,%d1 # add scale factor
14833 addi.l &0x6000,%d1 # add new bias
14834 andi.w &0x7fff,%d1 # clear top bit
14835 or.w %d2,%d1 # concat sign,new exp
14836 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
14837 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
14838 bra.w fadd_unfl_dis
14840 fadd_unfl_ena_sd:
14841 mov.l L_SCR3(%a6),%d1
14842 andi.b &0x30,%d1 # use only rnd mode
14843 fmov.l %d1,%fpcr # set FPCR
14845 bra.b fadd_unfl_ena_cont
14848 # result is equal to the smallest normalized number in the selected precision
14849 # if the precision is extended, this result could not have come from an
14850 # underflow that rounded up.
14852 fadd_may_unfl:
14853 mov.l L_SCR3(%a6),%d1
14854 andi.b &0xc0,%d1
14855 beq.w fadd_normal # yes; no underflow occurred
14857 mov.l 0x4(%sp),%d1 # extract hi(man)
14858 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
14859 bne.w fadd_normal # no; no underflow occurred
14861 tst.l 0x8(%sp) # is lo(man) = 0x0?
14862 bne.w fadd_normal # no; no underflow occurred
14864 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
14865 beq.w fadd_normal # no; no underflow occurred
14868 # ok, so now the result has a exponent equal to the smallest normalized
14869 # exponent for the selected precision. also, the mantissa is equal to
14870 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
14871 # g,r,s.
14872 # now, we must determine whether the pre-rounded result was an underflow
14873 # rounded "up" or a normalized number rounded "down".
14874 # so, we do this be re-executing the add using RZ as the rounding mode and
14875 # seeing if the new result is smaller or equal to the current result.
14877 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
14879 mov.l L_SCR3(%a6),%d1
14880 andi.b &0xc0,%d1 # keep rnd prec
14881 ori.b &rz_mode*0x10,%d1 # insert rnd mode
14882 fmov.l %d1,%fpcr # set FPCR
14883 fmov.l &0x0,%fpsr # clear FPSR
14885 fadd.x FP_SCR0(%a6),%fp1 # execute add
14887 fmov.l &0x0,%fpcr # clear FPCR
14889 fabs.x %fp0 # compare absolute values
14890 fabs.x %fp1
14891 fcmp.x %fp0,%fp1 # is first result > second?
14893 fbgt.w fadd_unfl # yes; it's an underflow
14894 bra.w fadd_normal # no; it's not an underflow
14896 ##########################################################################
14899 # Add: inputs are not both normalized; what are they?
14901 fadd_not_norm:
14902 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
14903 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
14905 swbeg &48
14906 tbl_fadd_op:
14907 short fadd_norm - tbl_fadd_op # NORM + NORM
14908 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
14909 short fadd_inf_src - tbl_fadd_op # NORM + INF
14910 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14911 short fadd_norm - tbl_fadd_op # NORM + DENORM
14912 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14913 short tbl_fadd_op - tbl_fadd_op #
14914 short tbl_fadd_op - tbl_fadd_op #
14916 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
14917 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
14918 short fadd_inf_src - tbl_fadd_op # ZERO + INF
14919 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14920 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
14921 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14922 short tbl_fadd_op - tbl_fadd_op #
14923 short tbl_fadd_op - tbl_fadd_op #
14925 short fadd_inf_dst - tbl_fadd_op # INF + NORM
14926 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
14927 short fadd_inf_2 - tbl_fadd_op # INF + INF
14928 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14929 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
14930 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14931 short tbl_fadd_op - tbl_fadd_op #
14932 short tbl_fadd_op - tbl_fadd_op #
14934 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
14935 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
14936 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
14937 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
14938 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
14939 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
14940 short tbl_fadd_op - tbl_fadd_op #
14941 short tbl_fadd_op - tbl_fadd_op #
14943 short fadd_norm - tbl_fadd_op # DENORM + NORM
14944 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
14945 short fadd_inf_src - tbl_fadd_op # DENORM + INF
14946 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
14947 short fadd_norm - tbl_fadd_op # DENORM + DENORM
14948 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
14949 short tbl_fadd_op - tbl_fadd_op #
14950 short tbl_fadd_op - tbl_fadd_op #
14952 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
14953 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
14954 short fadd_res_snan - tbl_fadd_op # SNAN + INF
14955 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
14956 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
14957 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
14958 short tbl_fadd_op - tbl_fadd_op #
14959 short tbl_fadd_op - tbl_fadd_op #
14961 fadd_res_qnan:
14962 bra.l res_qnan
14963 fadd_res_snan:
14964 bra.l res_snan
14967 # both operands are ZEROes
14969 fadd_zero_2:
14970 mov.b SRC_EX(%a0),%d0 # are the signs opposite
14971 mov.b DST_EX(%a1),%d1
14972 eor.b %d0,%d1
14973 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
14975 # the signs are the same. so determine whether they are positive or negative
14976 # and return the appropriately signed zero.
14977 tst.b %d0 # are ZEROes positive or negative?
14978 bmi.b fadd_zero_rm # negative
14979 fmov.s &0x00000000,%fp0 # return +ZERO
14980 mov.b &z_bmask,FPSR_CC(%a6) # set Z
14984 # the ZEROes have opposite signs:
14985 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
14986 # - -ZERO is returned in the case of RM.
14988 fadd_zero_2_chk_rm:
14989 mov.b 3+L_SCR3(%a6),%d1
14990 andi.b &0x30,%d1 # extract rnd mode
14991 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
14992 beq.b fadd_zero_rm # yes
14993 fmov.s &0x00000000,%fp0 # return +ZERO
14994 mov.b &z_bmask,FPSR_CC(%a6) # set Z
14997 fadd_zero_rm:
14998 fmov.s &0x80000000,%fp0 # return -ZERO
14999 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
15003 # one operand is a ZERO and the other is a DENORM or NORM. scale
15004 # the DENORM or NORM and jump to the regular fadd routine.
15006 fadd_zero_dst:
15007 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15008 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15009 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15010 bsr.l scale_to_zero_src # scale the operand
15011 clr.w FP_SCR1_EX(%a6)
15012 clr.l FP_SCR1_HI(%a6)
15013 clr.l FP_SCR1_LO(%a6)
15014 bra.w fadd_zero_entry # go execute fadd
15016 fadd_zero_src:
15017 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15018 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15019 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15020 bsr.l scale_to_zero_dst # scale the operand
15021 clr.w FP_SCR0_EX(%a6)
15022 clr.l FP_SCR0_HI(%a6)
15023 clr.l FP_SCR0_LO(%a6)
15024 bra.w fadd_zero_entry # go execute fadd
15027 # both operands are INFs. an OPERR will result if the INFs have
15028 # different signs. else, an INF of the same sign is returned
15030 fadd_inf_2:
15031 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15032 mov.b DST_EX(%a1),%d1
15033 eor.b %d1,%d0
15034 bmi.l res_operr # weed out (-INF)+(+INF)
15036 # ok, so it's not an OPERR. but, we do have to remember to return the
15037 # src INF since that's where the 881/882 gets the j-bit from...
15040 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15042 fadd_inf_src:
15043 fmovm.x SRC(%a0),&0x80 # return src INF
15044 tst.b SRC_EX(%a0) # is INF positive?
15045 bpl.b fadd_inf_done # yes; we're done
15046 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15050 # operands are INF and one of {ZERO, INF, DENORM, NORM}
15052 fadd_inf_dst:
15053 fmovm.x DST(%a1),&0x80 # return dst INF
15054 tst.b DST_EX(%a1) # is INF positive?
15055 bpl.b fadd_inf_done # yes; we're done
15056 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15059 fadd_inf_done:
15060 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15063 #########################################################################
15064 # XDEF **************************************************************** #
15065 # fsub(): emulates the fsub instruction #
15066 # fssub(): emulates the fssub instruction #
15067 # fdsub(): emulates the fdsub instruction #
15069 # XREF **************************************************************** #
15070 # addsub_scaler2() - scale the operands so they won't take exc #
15071 # ovf_res() - return default overflow result #
15072 # unf_res() - return default underflow result #
15073 # res_qnan() - set QNAN result #
15074 # res_snan() - set SNAN result #
15075 # res_operr() - set OPERR result #
15076 # scale_to_zero_src() - set src operand exponent equal to zero #
15077 # scale_to_zero_dst() - set dst operand exponent equal to zero #
15079 # INPUT *************************************************************** #
15080 # a0 = pointer to extended precision source operand #
15081 # a1 = pointer to extended precision destination operand #
15083 # OUTPUT ************************************************************** #
15084 # fp0 = result #
15085 # fp1 = EXOP (if exception occurred) #
15087 # ALGORITHM *********************************************************** #
15088 # Handle NANs, infinities, and zeroes as special cases. Divide #
15089 # norms into extended, single, and double precision. #
15090 # Do subtraction after scaling exponents such that exception won't#
15091 # occur. Then, check result exponent to see if exception would have #
15092 # occurred. If so, return default result and maybe EXOP. Else, insert #
15093 # the correct result exponent and return. Set FPSR bits as appropriate. #
15095 #########################################################################
15097 global fssub
15098 fssub:
15099 andi.b &0x30,%d0 # clear rnd prec
15100 ori.b &s_mode*0x10,%d0 # insert sgl prec
15101 bra.b fsub
15103 global fdsub
15104 fdsub:
15105 andi.b &0x30,%d0 # clear rnd prec
15106 ori.b &d_mode*0x10,%d0 # insert dbl prec
15108 global fsub
15109 fsub:
15110 mov.l %d0,L_SCR3(%a6) # store rnd info
15112 clr.w %d1
15113 mov.b DTAG(%a6),%d1
15114 lsl.b &0x3,%d1
15115 or.b STAG(%a6),%d1 # combine src tags
15117 bne.w fsub_not_norm # optimize on non-norm input
15120 # SUB: norms and denorms
15122 fsub_norm:
15123 bsr.l addsub_scaler2 # scale exponents
15125 fsub_zero_entry:
15126 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15128 fmov.l &0x0,%fpsr # clear FPSR
15129 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15131 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15133 fmov.l &0x0,%fpcr # clear FPCR
15134 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
15136 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
15138 fbeq.w fsub_zero_exit # if result zero, end now
15140 mov.l %d2,-(%sp) # save d2
15142 fmovm.x &0x01,-(%sp) # save result to stack
15144 mov.w 2+L_SCR3(%a6),%d1
15145 lsr.b &0x6,%d1
15147 mov.w (%sp),%d2 # fetch new exponent
15148 andi.l &0x7fff,%d2 # strip sign
15149 sub.l %d0,%d2 # add scale factor
15151 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
15152 bge.b fsub_ovfl # yes
15154 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
15155 blt.w fsub_unfl # yes
15156 beq.w fsub_may_unfl # maybe; go find out
15158 fsub_normal:
15159 mov.w (%sp),%d1
15160 andi.w &0x8000,%d1 # keep sign
15161 or.w %d2,%d1 # insert new exponent
15162 mov.w %d1,(%sp) # insert new exponent
15164 fmovm.x (%sp)+,&0x80 # return result in fp0
15166 mov.l (%sp)+,%d2 # restore d2
15169 fsub_zero_exit:
15170 # fmov.s &0x00000000,%fp0 # return zero in fp0
15173 tbl_fsub_ovfl:
15174 long 0x7fff # ext ovfl
15175 long 0x407f # sgl ovfl
15176 long 0x43ff # dbl ovfl
15178 tbl_fsub_unfl:
15179 long 0x0000 # ext unfl
15180 long 0x3f81 # sgl unfl
15181 long 0x3c01 # dbl unfl
15183 fsub_ovfl:
15184 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15186 mov.b FPCR_ENABLE(%a6),%d1
15187 andi.b &0x13,%d1 # is OVFL or INEX enabled?
15188 bne.b fsub_ovfl_ena # yes
15190 add.l &0xc,%sp
15191 fsub_ovfl_dis:
15192 btst &neg_bit,FPSR_CC(%a6) # is result negative?
15193 sne %d1 # set sign param accordingly
15194 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
15195 bsr.l ovf_res # calculate default result
15196 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15197 fmovm.x (%a0),&0x80 # return default result in fp0
15198 mov.l (%sp)+,%d2 # restore d2
15201 fsub_ovfl_ena:
15202 mov.b L_SCR3(%a6),%d1
15203 andi.b &0xc0,%d1 # is precision extended?
15204 bne.b fsub_ovfl_ena_sd # no
15206 fsub_ovfl_ena_cont:
15207 mov.w (%sp),%d1 # fetch {sgn,exp}
15208 andi.w &0x8000,%d1 # keep sign
15209 subi.l &0x6000,%d2 # subtract new bias
15210 andi.w &0x7fff,%d2 # clear top bit
15211 or.w %d2,%d1 # concat sign,exp
15212 mov.w %d1,(%sp) # insert new exponent
15214 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
15215 bra.b fsub_ovfl_dis
15217 fsub_ovfl_ena_sd:
15218 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15220 mov.l L_SCR3(%a6),%d1
15221 andi.b &0x30,%d1 # clear rnd prec
15222 fmov.l %d1,%fpcr # set FPCR
15224 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15226 fmov.l &0x0,%fpcr # clear FPCR
15228 add.l &0xc,%sp
15229 fmovm.x &0x01,-(%sp)
15230 bra.b fsub_ovfl_ena_cont
15232 fsub_unfl:
15233 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15235 add.l &0xc,%sp
15237 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
15239 fmov.l &rz_mode*0x10,%fpcr # set FPCR
15240 fmov.l &0x0,%fpsr # clear FPSR
15242 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
15244 fmov.l &0x0,%fpcr # clear FPCR
15245 fmov.l %fpsr,%d1 # save status
15247 or.l %d1,USER_FPSR(%a6)
15249 mov.b FPCR_ENABLE(%a6),%d1
15250 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15251 bne.b fsub_unfl_ena # yes
15253 fsub_unfl_dis:
15254 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15256 lea FP_SCR0(%a6),%a0 # pass: result addr
15257 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15258 bsr.l unf_res # calculate default result
15259 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
15260 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15261 mov.l (%sp)+,%d2 # restore d2
15264 fsub_unfl_ena:
15265 fmovm.x FP_SCR1(%a6),&0x40
15267 mov.l L_SCR3(%a6),%d1
15268 andi.b &0xc0,%d1 # is precision extended?
15269 bne.b fsub_unfl_ena_sd # no
15271 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15273 fsub_unfl_ena_cont:
15274 fmov.l &0x0,%fpsr # clear FPSR
15276 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15278 fmov.l &0x0,%fpcr # clear FPCR
15280 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
15281 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15282 mov.l %d1,%d2 # make a copy
15283 andi.l &0x7fff,%d1 # strip sign
15284 andi.w &0x8000,%d2 # keep old sign
15285 sub.l %d0,%d1 # add scale factor
15286 addi.l &0x6000,%d1 # subtract new bias
15287 andi.w &0x7fff,%d1 # clear top bit
15288 or.w %d2,%d1 # concat sgn,exp
15289 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15290 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15291 bra.w fsub_unfl_dis
15293 fsub_unfl_ena_sd:
15294 mov.l L_SCR3(%a6),%d1
15295 andi.b &0x30,%d1 # clear rnd prec
15296 fmov.l %d1,%fpcr # set FPCR
15298 bra.b fsub_unfl_ena_cont
15301 # result is equal to the smallest normalized number in the selected precision
15302 # if the precision is extended, this result could not have come from an
15303 # underflow that rounded up.
15305 fsub_may_unfl:
15306 mov.l L_SCR3(%a6),%d1
15307 andi.b &0xc0,%d1 # fetch rnd prec
15308 beq.w fsub_normal # yes; no underflow occurred
15310 mov.l 0x4(%sp),%d1
15311 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
15312 bne.w fsub_normal # no; no underflow occurred
15314 tst.l 0x8(%sp) # is lo(man) = 0x0?
15315 bne.w fsub_normal # no; no underflow occurred
15317 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
15318 beq.w fsub_normal # no; no underflow occurred
15321 # ok, so now the result has a exponent equal to the smallest normalized
15322 # exponent for the selected precision. also, the mantissa is equal to
15323 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
15324 # g,r,s.
15325 # now, we must determine whether the pre-rounded result was an underflow
15326 # rounded "up" or a normalized number rounded "down".
15327 # so, we do this be re-executing the add using RZ as the rounding mode and
15328 # seeing if the new result is smaller or equal to the current result.
15330 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
15332 mov.l L_SCR3(%a6),%d1
15333 andi.b &0xc0,%d1 # keep rnd prec
15334 ori.b &rz_mode*0x10,%d1 # insert rnd mode
15335 fmov.l %d1,%fpcr # set FPCR
15336 fmov.l &0x0,%fpsr # clear FPSR
15338 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
15340 fmov.l &0x0,%fpcr # clear FPCR
15342 fabs.x %fp0 # compare absolute values
15343 fabs.x %fp1
15344 fcmp.x %fp0,%fp1 # is first result > second?
15346 fbgt.w fsub_unfl # yes; it's an underflow
15347 bra.w fsub_normal # no; it's not an underflow
15349 ##########################################################################
15352 # Sub: inputs are not both normalized; what are they?
15354 fsub_not_norm:
15355 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
15356 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
15358 swbeg &48
15359 tbl_fsub_op:
15360 short fsub_norm - tbl_fsub_op # NORM - NORM
15361 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
15362 short fsub_inf_src - tbl_fsub_op # NORM - INF
15363 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15364 short fsub_norm - tbl_fsub_op # NORM - DENORM
15365 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15366 short tbl_fsub_op - tbl_fsub_op #
15367 short tbl_fsub_op - tbl_fsub_op #
15369 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
15370 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
15371 short fsub_inf_src - tbl_fsub_op # ZERO - INF
15372 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15373 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
15374 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15375 short tbl_fsub_op - tbl_fsub_op #
15376 short tbl_fsub_op - tbl_fsub_op #
15378 short fsub_inf_dst - tbl_fsub_op # INF - NORM
15379 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
15380 short fsub_inf_2 - tbl_fsub_op # INF - INF
15381 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15382 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
15383 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15384 short tbl_fsub_op - tbl_fsub_op #
15385 short tbl_fsub_op - tbl_fsub_op #
15387 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
15388 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
15389 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
15390 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
15391 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
15392 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
15393 short tbl_fsub_op - tbl_fsub_op #
15394 short tbl_fsub_op - tbl_fsub_op #
15396 short fsub_norm - tbl_fsub_op # DENORM - NORM
15397 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
15398 short fsub_inf_src - tbl_fsub_op # DENORM - INF
15399 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
15400 short fsub_norm - tbl_fsub_op # DENORM - DENORM
15401 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
15402 short tbl_fsub_op - tbl_fsub_op #
15403 short tbl_fsub_op - tbl_fsub_op #
15405 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
15406 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
15407 short fsub_res_snan - tbl_fsub_op # SNAN - INF
15408 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
15409 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
15410 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
15411 short tbl_fsub_op - tbl_fsub_op #
15412 short tbl_fsub_op - tbl_fsub_op #
15414 fsub_res_qnan:
15415 bra.l res_qnan
15416 fsub_res_snan:
15417 bra.l res_snan
15420 # both operands are ZEROes
15422 fsub_zero_2:
15423 mov.b SRC_EX(%a0),%d0
15424 mov.b DST_EX(%a1),%d1
15425 eor.b %d1,%d0
15426 bpl.b fsub_zero_2_chk_rm
15428 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
15429 tst.b %d0 # is dst negative?
15430 bmi.b fsub_zero_2_rm # yes
15431 fmov.s &0x00000000,%fp0 # no; return +ZERO
15432 mov.b &z_bmask,FPSR_CC(%a6) # set Z
15436 # the ZEROes have the same signs:
15437 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
15438 # - -ZERO is returned in the case of RM.
15440 fsub_zero_2_chk_rm:
15441 mov.b 3+L_SCR3(%a6),%d1
15442 andi.b &0x30,%d1 # extract rnd mode
15443 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
15444 beq.b fsub_zero_2_rm # yes
15445 fmov.s &0x00000000,%fp0 # no; return +ZERO
15446 mov.b &z_bmask,FPSR_CC(%a6) # set Z
15449 fsub_zero_2_rm:
15450 fmov.s &0x80000000,%fp0 # return -ZERO
15451 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
15455 # one operand is a ZERO and the other is a DENORM or a NORM.
15456 # scale the DENORM or NORM and jump to the regular fsub routine.
15458 fsub_zero_dst:
15459 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15460 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15461 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15462 bsr.l scale_to_zero_src # scale the operand
15463 clr.w FP_SCR1_EX(%a6)
15464 clr.l FP_SCR1_HI(%a6)
15465 clr.l FP_SCR1_LO(%a6)
15466 bra.w fsub_zero_entry # go execute fsub
15468 fsub_zero_src:
15469 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
15470 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15471 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15472 bsr.l scale_to_zero_dst # scale the operand
15473 clr.w FP_SCR0_EX(%a6)
15474 clr.l FP_SCR0_HI(%a6)
15475 clr.l FP_SCR0_LO(%a6)
15476 bra.w fsub_zero_entry # go execute fsub
15479 # both operands are INFs. an OPERR will result if the INFs have the
15480 # same signs. else,
15482 fsub_inf_2:
15483 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
15484 mov.b DST_EX(%a1),%d1
15485 eor.b %d1,%d0
15486 bpl.l res_operr # weed out (-INF)+(+INF)
15488 # ok, so it's not an OPERR. but we do have to remember to return
15489 # the src INF since that's where the 881/882 gets the j-bit.
15491 fsub_inf_src:
15492 fmovm.x SRC(%a0),&0x80 # return src INF
15493 fneg.x %fp0 # invert sign
15494 fbge.w fsub_inf_done # sign is now positive
15495 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15498 fsub_inf_dst:
15499 fmovm.x DST(%a1),&0x80 # return dst INF
15500 tst.b DST_EX(%a1) # is INF negative?
15501 bpl.b fsub_inf_done # no
15502 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
15505 fsub_inf_done:
15506 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
15509 #########################################################################
15510 # XDEF **************************************************************** #
15511 # fsqrt(): emulates the fsqrt instruction #
15512 # fssqrt(): emulates the fssqrt instruction #
15513 # fdsqrt(): emulates the fdsqrt instruction #
15515 # XREF **************************************************************** #
15516 # scale_sqrt() - scale the source operand #
15517 # unf_res() - return default underflow result #
15518 # ovf_res() - return default overflow result #
15519 # res_qnan_1op() - return QNAN result #
15520 # res_snan_1op() - return SNAN result #
15522 # INPUT *************************************************************** #
15523 # a0 = pointer to extended precision source operand #
15524 # d0 rnd prec,mode #
15526 # OUTPUT ************************************************************** #
15527 # fp0 = result #
15528 # fp1 = EXOP (if exception occurred) #
15530 # ALGORITHM *********************************************************** #
15531 # Handle NANs, infinities, and zeroes as special cases. Divide #
15532 # norms/denorms into ext/sgl/dbl precision. #
15533 # For norms/denorms, scale the exponents such that a sqrt #
15534 # instruction won't cause an exception. Use the regular fsqrt to #
15535 # compute a result. Check if the regular operands would have taken #
15536 # an exception. If so, return the default overflow/underflow result #
15537 # and return the EXOP if exceptions are enabled. Else, scale the #
15538 # result operand to the proper exponent. #
15540 #########################################################################
15542 global fssqrt
15543 fssqrt:
15544 andi.b &0x30,%d0 # clear rnd prec
15545 ori.b &s_mode*0x10,%d0 # insert sgl precision
15546 bra.b fsqrt
15548 global fdsqrt
15549 fdsqrt:
15550 andi.b &0x30,%d0 # clear rnd prec
15551 ori.b &d_mode*0x10,%d0 # insert dbl precision
15553 global fsqrt
15554 fsqrt:
15555 mov.l %d0,L_SCR3(%a6) # store rnd info
15556 clr.w %d1
15557 mov.b STAG(%a6),%d1
15558 bne.w fsqrt_not_norm # optimize on non-norm input
15561 # SQUARE ROOT: norms and denorms ONLY!
15563 fsqrt_norm:
15564 tst.b SRC_EX(%a0) # is operand negative?
15565 bmi.l res_operr # yes
15567 andi.b &0xc0,%d0 # is precision extended?
15568 bne.b fsqrt_not_ext # no; go handle sgl or dbl
15570 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15571 fmov.l &0x0,%fpsr # clear FPSR
15573 fsqrt.x (%a0),%fp0 # execute square root
15575 fmov.l %fpsr,%d1
15576 or.l %d1,USER_FPSR(%a6) # set N,INEX
15580 fsqrt_denorm:
15581 tst.b SRC_EX(%a0) # is operand negative?
15582 bmi.l res_operr # yes
15584 andi.b &0xc0,%d0 # is precision extended?
15585 bne.b fsqrt_not_ext # no; go handle sgl or dbl
15587 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15588 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15589 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15591 bsr.l scale_sqrt # calculate scale factor
15593 bra.w fsqrt_sd_normal
15596 # operand is either single or double
15598 fsqrt_not_ext:
15599 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
15600 bne.w fsqrt_dbl
15603 # operand is to be rounded to single precision
15605 fsqrt_sgl:
15606 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15607 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15608 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15610 bsr.l scale_sqrt # calculate scale factor
15612 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
15613 beq.w fsqrt_sd_may_unfl
15614 bgt.w fsqrt_sd_unfl # yes; go handle underflow
15615 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
15616 beq.w fsqrt_sd_may_ovfl # maybe; go check
15617 blt.w fsqrt_sd_ovfl # yes; go handle overflow
15620 # operand will NOT overflow or underflow when moved in to the fp reg file
15622 fsqrt_sd_normal:
15623 fmov.l &0x0,%fpsr # clear FPSR
15624 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15626 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15628 fmov.l %fpsr,%d1 # save FPSR
15629 fmov.l &0x0,%fpcr # clear FPCR
15631 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15633 fsqrt_sd_normal_exit:
15634 mov.l %d2,-(%sp) # save d2
15635 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15636 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
15637 mov.l %d1,%d2 # make a copy
15638 andi.l &0x7fff,%d1 # strip sign
15639 sub.l %d0,%d1 # add scale factor
15640 andi.w &0x8000,%d2 # keep old sign
15641 or.w %d1,%d2 # concat old sign,new exp
15642 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
15643 mov.l (%sp)+,%d2 # restore d2
15644 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
15648 # operand is to be rounded to double precision
15650 fsqrt_dbl:
15651 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
15652 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15653 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15655 bsr.l scale_sqrt # calculate scale factor
15657 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
15658 beq.w fsqrt_sd_may_unfl
15659 bgt.b fsqrt_sd_unfl # yes; go handle underflow
15660 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
15661 beq.w fsqrt_sd_may_ovfl # maybe; go check
15662 blt.w fsqrt_sd_ovfl # yes; go handle overflow
15663 bra.w fsqrt_sd_normal # no; ho handle normalized op
15665 # we're on the line here and the distinguising characteristic is whether
15666 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
15667 # elsewise fall through to underflow.
15668 fsqrt_sd_may_unfl:
15669 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15670 bne.w fsqrt_sd_normal # yes, so no underflow
15673 # operand WILL underflow when moved in to the fp register file
15675 fsqrt_sd_unfl:
15676 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
15678 fmov.l &rz_mode*0x10,%fpcr # set FPCR
15679 fmov.l &0x0,%fpsr # clear FPSR
15681 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
15683 fmov.l %fpsr,%d1 # save status
15684 fmov.l &0x0,%fpcr # clear FPCR
15686 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15688 # if underflow or inexact is enabled, go calculate EXOP first.
15689 mov.b FPCR_ENABLE(%a6),%d1
15690 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
15691 bne.b fsqrt_sd_unfl_ena # yes
15693 fsqrt_sd_unfl_dis:
15694 fmovm.x &0x80,FP_SCR0(%a6) # store out result
15696 lea FP_SCR0(%a6),%a0 # pass: result addr
15697 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
15698 bsr.l unf_res # calculate default result
15699 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
15700 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
15701 rts
15704 # operand will underflow AND underflow is enabled.
15705 # therefore, we must return the result rounded to extended precision.
15707 fsqrt_sd_unfl_ena:
15708 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
15709 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
15710 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
15712 mov.l %d2,-(%sp) # save d2
15713 mov.l %d1,%d2 # make a copy
15714 andi.l &0x7fff,%d1 # strip sign
15715 andi.w &0x8000,%d2 # keep old sign
15716 sub.l %d0,%d1 # subtract scale factor
15717 addi.l &0x6000,%d1 # add new bias
15718 andi.w &0x7fff,%d1
15719 or.w %d2,%d1 # concat new sign,new exp
15720 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
15721 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
15722 mov.l (%sp)+,%d2 # restore d2
15723 bra.b fsqrt_sd_unfl_dis
15726 # operand WILL overflow.
15728 fsqrt_sd_ovfl:
15729 fmov.l &0x0,%fpsr # clear FPSR
15730 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15732 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
15734 fmov.l &0x0,%fpcr # clear FPCR
15735 fmov.l %fpsr,%d1 # save FPSR
15737 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15739 fsqrt_sd_ovfl_tst:
15740 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
15742 mov.b FPCR_ENABLE(%a6),%d1
15743 andi.b &0x13,%d1 # is OVFL or INEX enabled?
15744 bne.b fsqrt_sd_ovfl_ena # yes
15747 # OVFL is not enabled; therefore, we must create the default result by
15748 # calling ovf_res().
15750 fsqrt_sd_ovfl_dis:
15751 btst &neg_bit,FPSR_CC(%a6) # is result negative?
15752 sne %d1 # set sign param accordingly
15753 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
15754 bsr.l ovf_res # calculate default result
15755 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
15756 fmovm.x (%a0),&0x80 # return default result in fp0
15760 # OVFL is enabled.
15761 # the INEX2 bit has already been updated by the round to the correct precision.
15762 # now, round to extended(and don't alter the FPSR).
15764 fsqrt_sd_ovfl_ena:
15765 mov.l %d2,-(%sp) # save d2
15766 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
15767 mov.l %d1,%d2 # make a copy
15768 andi.l &0x7fff,%d1 # strip sign
15769 andi.w &0x8000,%d2 # keep old sign
15770 sub.l %d0,%d1 # add scale factor
15771 subi.l &0x6000,%d1 # subtract bias
15772 andi.w &0x7fff,%d1
15773 or.w %d2,%d1 # concat sign,exp
15774 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
15775 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
15776 mov.l (%sp)+,%d2 # restore d2
15777 bra.b fsqrt_sd_ovfl_dis
15780 # the move in MAY underflow. so...
15782 fsqrt_sd_may_ovfl:
15783 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
15784 bne.w fsqrt_sd_ovfl # yes, so overflow
15786 fmov.l &0x0,%fpsr # clear FPSR
15787 fmov.l L_SCR3(%a6),%fpcr # set FPCR
15789 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
15791 fmov.l %fpsr,%d1 # save status
15792 fmov.l &0x0,%fpcr # clear FPCR
15794 or.l %d1,USER_FPSR(%a6) # save INEX2,N
15796 fmov.x %fp0,%fp1 # make a copy of result
15797 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
15798 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
15800 # no, it didn't overflow; we have correct result
15801 bra.w fsqrt_sd_normal_exit
15803 ##########################################################################
15806 # input is not normalized; what is it?
15808 fsqrt_not_norm:
15809 cmpi.b %d1,&DENORM # weed out DENORM
15810 beq.w fsqrt_denorm
15811 cmpi.b %d1,&ZERO # weed out ZERO
15812 beq.b fsqrt_zero
15813 cmpi.b %d1,&INF # weed out INF
15814 beq.b fsqrt_inf
15815 cmpi.b %d1,&SNAN # weed out SNAN
15816 beq.l res_snan_1op
15817 bra.l res_qnan_1op
15820 # fsqrt(+0) = +0
15821 # fsqrt(-0) = -0
15822 # fsqrt(+INF) = +INF
15823 # fsqrt(-INF) = OPERR
15825 fsqrt_zero:
15826 tst.b SRC_EX(%a0) # is ZERO positive or negative?
15827 bmi.b fsqrt_zero_m # negative
15828 fsqrt_zero_p:
15829 fmov.s &0x00000000,%fp0 # return +ZERO
15830 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
15832 fsqrt_zero_m:
15833 fmov.s &0x80000000,%fp0 # return -ZERO
15834 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
15837 fsqrt_inf:
15838 tst.b SRC_EX(%a0) # is INF positive or negative?
15839 bmi.l res_operr # negative
15840 fsqrt_inf_p:
15841 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
15842 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
15845 ##########################################################################
15847 #########################################################################
15848 # XDEF **************************************************************** #
15849 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
15850 # OVFL/UNFL exceptions will result #
15852 # XREF **************************************************************** #
15853 # norm() - normalize mantissa after adjusting exponent #
15855 # INPUT *************************************************************** #
15856 # FP_SRC(a6) = fp op1(src) #
15857 # FP_DST(a6) = fp op2(dst) #
15859 # OUTPUT ************************************************************** #
15860 # FP_SRC(a6) = fp op1 scaled(src) #
15861 # FP_DST(a6) = fp op2 scaled(dst) #
15862 # d0 = scale amount #
15864 # ALGORITHM *********************************************************** #
15865 # If the DST exponent is > the SRC exponent, set the DST exponent #
15866 # equal to 0x3fff and scale the SRC exponent by the value that the #
15867 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
15868 # do the opposite. Return this scale factor in d0. #
15869 # If the two exponents differ by > the number of mantissa bits #
15870 # plus two, then set the smallest exponent to a very small value as a #
15871 # quick shortcut. #
15873 #########################################################################
15875 global addsub_scaler2
15876 addsub_scaler2:
15877 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
15878 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
15879 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
15880 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
15881 mov.w SRC_EX(%a0),%d0
15882 mov.w DST_EX(%a1),%d1
15883 mov.w %d0,FP_SCR0_EX(%a6)
15884 mov.w %d1,FP_SCR1_EX(%a6)
15886 andi.w &0x7fff,%d0
15887 andi.w &0x7fff,%d1
15888 mov.w %d0,L_SCR1(%a6) # store src exponent
15889 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
15891 cmp.w %d0, %d1 # is src exp >= dst exp?
15892 bge.l src_exp_ge2
15894 # dst exp is > src exp; scale dst to exp = 0x3fff
15895 dst_exp_gt2:
15896 bsr.l scale_to_zero_dst
15897 mov.l %d0,-(%sp) # save scale factor
15899 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
15900 bne.b cmpexp12
15902 lea FP_SCR0(%a6),%a0
15903 bsr.l norm # normalize the denorm; result is new exp
15904 neg.w %d0 # new exp = -(shft val)
15905 mov.w %d0,L_SCR1(%a6) # inset new exp
15907 cmpexp12:
15908 mov.w 2+L_SCR1(%a6),%d0
15909 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15911 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
15912 bge.b quick_scale12
15914 mov.w L_SCR1(%a6),%d0
15915 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
15916 mov.w FP_SCR0_EX(%a6),%d1
15917 and.w &0x8000,%d1
15918 or.w %d1,%d0 # concat {sgn,new exp}
15919 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
15921 mov.l (%sp)+,%d0 # return SCALE factor
15924 quick_scale12:
15925 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
15926 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
15928 mov.l (%sp)+,%d0 # return SCALE factor
15931 # src exp is >= dst exp; scale src to exp = 0x3fff
15932 src_exp_ge2:
15933 bsr.l scale_to_zero_src
15934 mov.l %d0,-(%sp) # save scale factor
15936 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
15937 bne.b cmpexp22
15938 lea FP_SCR1(%a6),%a0
15939 bsr.l norm # normalize the denorm; result is new exp
15940 neg.w %d0 # new exp = -(shft val)
15941 mov.w %d0,2+L_SCR1(%a6) # inset new exp
15943 cmpexp22:
15944 mov.w L_SCR1(%a6),%d0
15945 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
15947 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
15948 bge.b quick_scale22
15950 mov.w 2+L_SCR1(%a6),%d0
15951 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
15952 mov.w FP_SCR1_EX(%a6),%d1
15953 andi.w &0x8000,%d1
15954 or.w %d1,%d0 # concat {sgn,new exp}
15955 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
15957 mov.l (%sp)+,%d0 # return SCALE factor
15960 quick_scale22:
15961 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
15962 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
15964 mov.l (%sp)+,%d0 # return SCALE factor
15967 ##########################################################################
15969 #########################################################################
15970 # XDEF **************************************************************** #
15971 # scale_to_zero_src(): scale the exponent of extended precision #
15972 # value at FP_SCR0(a6). #
15974 # XREF **************************************************************** #
15975 # norm() - normalize the mantissa if the operand was a DENORM #
15977 # INPUT *************************************************************** #
15978 # FP_SCR0(a6) = extended precision operand to be scaled #
15980 # OUTPUT ************************************************************** #
15981 # FP_SCR0(a6) = scaled extended precision operand #
15982 # d0 = scale value #
15984 # ALGORITHM *********************************************************** #
15985 # Set the exponent of the input operand to 0x3fff. Save the value #
15986 # of the difference between the original and new exponent. Then, #
15987 # normalize the operand if it was a DENORM. Add this normalization #
15988 # value to the previous value. Return the result. #
15990 #########################################################################
15992 global scale_to_zero_src
15993 scale_to_zero_src:
15994 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
15995 mov.w %d1,%d0 # make a copy
15997 andi.l &0x7fff,%d1 # extract operand's exponent
15999 andi.w &0x8000,%d0 # extract operand's sgn
16000 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
16002 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
16004 cmpi.b STAG(%a6),&DENORM # is operand normalized?
16005 beq.b stzs_denorm # normalize the DENORM
16007 stzs_norm:
16008 mov.l &0x3fff,%d0
16009 sub.l %d1,%d0 # scale = BIAS + (-exp)
16013 stzs_denorm:
16014 lea FP_SCR0(%a6),%a0 # pass ptr to src op
16015 bsr.l norm # normalize denorm
16016 neg.l %d0 # new exponent = -(shft val)
16017 mov.l %d0,%d1 # prepare for op_norm call
16018 bra.b stzs_norm # finish scaling
16022 #########################################################################
16023 # XDEF **************************************************************** #
16024 # scale_sqrt(): scale the input operand exponent so a subsequent #
16025 # fsqrt operation won't take an exception. #
16027 # XREF **************************************************************** #
16028 # norm() - normalize the mantissa if the operand was a DENORM #
16030 # INPUT *************************************************************** #
16031 # FP_SCR0(a6) = extended precision operand to be scaled #
16033 # OUTPUT ************************************************************** #
16034 # FP_SCR0(a6) = scaled extended precision operand #
16035 # d0 = scale value #
16037 # ALGORITHM *********************************************************** #
16038 # If the input operand is a DENORM, normalize it. #
16039 # If the exponent of the input operand is even, set the exponent #
16040 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
16041 # exponent of the input operand is off, set the exponent to ox3fff and #
16042 # return a scale factor of "(exp-0x3fff)/2". #
16044 #########################################################################
16046 global scale_sqrt
16047 scale_sqrt:
16048 cmpi.b STAG(%a6),&DENORM # is operand normalized?
16049 beq.b ss_denorm # normalize the DENORM
16051 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
16052 andi.l &0x7fff,%d1 # extract operand's exponent
16054 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
16056 btst &0x0,%d1 # is exp even or odd?
16057 beq.b ss_norm_even
16059 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16061 mov.l &0x3fff,%d0
16062 sub.l %d1,%d0 # scale = BIAS + (-exp)
16063 asr.l &0x1,%d0 # divide scale factor by 2
16066 ss_norm_even:
16067 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16069 mov.l &0x3ffe,%d0
16070 sub.l %d1,%d0 # scale = BIAS + (-exp)
16071 asr.l &0x1,%d0 # divide scale factor by 2
16074 ss_denorm:
16075 lea FP_SCR0(%a6),%a0 # pass ptr to src op
16076 bsr.l norm # normalize denorm
16078 btst &0x0,%d0 # is exp even or odd?
16079 beq.b ss_denorm_even
16081 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16083 add.l &0x3fff,%d0
16084 asr.l &0x1,%d0 # divide scale factor by 2
16087 ss_denorm_even:
16088 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
16090 add.l &0x3ffe,%d0
16091 asr.l &0x1,%d0 # divide scale factor by 2
16096 #########################################################################
16097 # XDEF **************************************************************** #
16098 # scale_to_zero_dst(): scale the exponent of extended precision #
16099 # value at FP_SCR1(a6). #
16101 # XREF **************************************************************** #
16102 # norm() - normalize the mantissa if the operand was a DENORM #
16104 # INPUT *************************************************************** #
16105 # FP_SCR1(a6) = extended precision operand to be scaled #
16107 # OUTPUT ************************************************************** #
16108 # FP_SCR1(a6) = scaled extended precision operand #
16109 # d0 = scale value #
16111 # ALGORITHM *********************************************************** #
16112 # Set the exponent of the input operand to 0x3fff. Save the value #
16113 # of the difference between the original and new exponent. Then, #
16114 # normalize the operand if it was a DENORM. Add this normalization #
16115 # value to the previous value. Return the result. #
16117 #########################################################################
16119 global scale_to_zero_dst
16120 scale_to_zero_dst:
16121 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
16122 mov.w %d1,%d0 # make a copy
16124 andi.l &0x7fff,%d1 # extract operand's exponent
16126 andi.w &0x8000,%d0 # extract operand's sgn
16127 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
16129 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
16131 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
16132 beq.b stzd_denorm # normalize the DENORM
16134 stzd_norm:
16135 mov.l &0x3fff,%d0
16136 sub.l %d1,%d0 # scale = BIAS + (-exp)
16139 stzd_denorm:
16140 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
16141 bsr.l norm # normalize denorm
16142 neg.l %d0 # new exponent = -(shft val)
16143 mov.l %d0,%d1 # prepare for op_norm call
16144 bra.b stzd_norm # finish scaling
16146 ##########################################################################
16148 #########################################################################
16149 # XDEF **************************************************************** #
16150 # res_qnan(): return default result w/ QNAN operand for dyadic #
16151 # res_snan(): return default result w/ SNAN operand for dyadic #
16152 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
16153 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
16155 # XREF **************************************************************** #
16156 # None #
16158 # INPUT *************************************************************** #
16159 # FP_SRC(a6) = pointer to extended precision src operand #
16160 # FP_DST(a6) = pointer to extended precision dst operand #
16162 # OUTPUT ************************************************************** #
16163 # fp0 = default result #
16165 # ALGORITHM *********************************************************** #
16166 # If either operand (but not both operands) of an operation is a #
16167 # nonsignalling NAN, then that NAN is returned as the result. If both #
16168 # operands are nonsignalling NANs, then the destination operand #
16169 # nonsignalling NAN is returned as the result. #
16170 # If either operand to an operation is a signalling NAN (SNAN), #
16171 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
16172 # enable bit is set in the FPCR, then the trap is taken and the #
16173 # destination is not modified. If the SNAN trap enable bit is not set, #
16174 # then the SNAN is converted to a nonsignalling NAN (by setting the #
16175 # SNAN bit in the operand to one), and the operation continues as #
16176 # described in the preceding paragraph, for nonsignalling NANs. #
16177 # Make sure the appropriate FPSR bits are set before exiting. #
16179 #########################################################################
16181 global res_qnan
16182 global res_snan
16183 res_qnan:
16184 res_snan:
16185 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
16186 beq.b dst_snan2
16187 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
16188 beq.b dst_qnan2
16189 src_nan:
16190 cmp.b STAG(%a6), &QNAN
16191 beq.b src_qnan2
16192 global res_snan_1op
16193 res_snan_1op:
16194 src_snan2:
16195 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
16196 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16197 lea FP_SRC(%a6), %a0
16198 bra.b nan_comp
16199 global res_qnan_1op
16200 res_qnan_1op:
16201 src_qnan2:
16202 or.l &nan_mask, USER_FPSR(%a6)
16203 lea FP_SRC(%a6), %a0
16204 bra.b nan_comp
16205 dst_snan2:
16206 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
16207 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
16208 lea FP_DST(%a6), %a0
16209 bra.b nan_comp
16210 dst_qnan2:
16211 lea FP_DST(%a6), %a0
16212 cmp.b STAG(%a6), &SNAN
16213 bne nan_done
16214 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
16215 nan_done:
16216 or.l &nan_mask, USER_FPSR(%a6)
16217 nan_comp:
16218 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
16219 beq.b nan_not_neg
16220 or.l &neg_mask, USER_FPSR(%a6)
16221 nan_not_neg:
16222 fmovm.x (%a0), &0x80
16225 #########################################################################
16226 # XDEF **************************************************************** #
16227 # res_operr(): return default result during operand error #
16229 # XREF **************************************************************** #
16230 # None #
16232 # INPUT *************************************************************** #
16233 # None #
16235 # OUTPUT ************************************************************** #
16236 # fp0 = default operand error result #
16238 # ALGORITHM *********************************************************** #
16239 # An nonsignalling NAN is returned as the default result when #
16240 # an operand error occurs for the following cases: #
16242 # Multiply: (Infinity x Zero) #
16243 # Divide : (Zero / Zero) || (Infinity / Infinity) #
16245 #########################################################################
16247 global res_operr
16248 res_operr:
16249 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
16250 fmovm.x nan_return(%pc), &0x80
16253 nan_return:
16254 long 0x7fff0000, 0xffffffff, 0xffffffff
16256 #########################################################################
16257 # fdbcc(): routine to emulate the fdbcc instruction #
16259 # XDEF **************************************************************** #
16260 # _fdbcc() #
16262 # XREF **************************************************************** #
16263 # fetch_dreg() - fetch Dn value #
16264 # store_dreg_l() - store updated Dn value #
16266 # INPUT *************************************************************** #
16267 # d0 = displacement #
16269 # OUTPUT ************************************************************** #
16270 # none #
16272 # ALGORITHM *********************************************************** #
16273 # This routine checks which conditional predicate is specified by #
16274 # the stacked fdbcc instruction opcode and then branches to a routine #
16275 # for that predicate. The corresponding fbcc instruction is then used #
16276 # to see whether the condition (specified by the stacked FPSR) is true #
16277 # or false. #
16278 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16279 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16280 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16281 # enabled BSUN should not be flagged and the predicate is true, then #
16282 # Dn is fetched and decremented by one. If Dn is not equal to -1, add #
16283 # the displacement value to the stacked PC so that when an "rte" is #
16284 # finally executed, the branch occurs. #
16286 #########################################################################
16287 global _fdbcc
16288 _fdbcc:
16289 mov.l %d0,L_SCR1(%a6) # save displacement
16291 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16293 clr.l %d1 # clear scratch reg
16294 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16295 ror.l &0x8,%d1 # rotate to top byte
16296 fmov.l %d1,%fpsr # insert into FPSR
16298 mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
16299 jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
16301 tbl_fdbcc:
16302 short fdbcc_f - tbl_fdbcc # 00
16303 short fdbcc_eq - tbl_fdbcc # 01
16304 short fdbcc_ogt - tbl_fdbcc # 02
16305 short fdbcc_oge - tbl_fdbcc # 03
16306 short fdbcc_olt - tbl_fdbcc # 04
16307 short fdbcc_ole - tbl_fdbcc # 05
16308 short fdbcc_ogl - tbl_fdbcc # 06
16309 short fdbcc_or - tbl_fdbcc # 07
16310 short fdbcc_un - tbl_fdbcc # 08
16311 short fdbcc_ueq - tbl_fdbcc # 09
16312 short fdbcc_ugt - tbl_fdbcc # 10
16313 short fdbcc_uge - tbl_fdbcc # 11
16314 short fdbcc_ult - tbl_fdbcc # 12
16315 short fdbcc_ule - tbl_fdbcc # 13
16316 short fdbcc_neq - tbl_fdbcc # 14
16317 short fdbcc_t - tbl_fdbcc # 15
16318 short fdbcc_sf - tbl_fdbcc # 16
16319 short fdbcc_seq - tbl_fdbcc # 17
16320 short fdbcc_gt - tbl_fdbcc # 18
16321 short fdbcc_ge - tbl_fdbcc # 19
16322 short fdbcc_lt - tbl_fdbcc # 20
16323 short fdbcc_le - tbl_fdbcc # 21
16324 short fdbcc_gl - tbl_fdbcc # 22
16325 short fdbcc_gle - tbl_fdbcc # 23
16326 short fdbcc_ngle - tbl_fdbcc # 24
16327 short fdbcc_ngl - tbl_fdbcc # 25
16328 short fdbcc_nle - tbl_fdbcc # 26
16329 short fdbcc_nlt - tbl_fdbcc # 27
16330 short fdbcc_nge - tbl_fdbcc # 28
16331 short fdbcc_ngt - tbl_fdbcc # 29
16332 short fdbcc_sneq - tbl_fdbcc # 30
16333 short fdbcc_st - tbl_fdbcc # 31
16335 #########################################################################
16337 # IEEE Nonaware tests #
16339 # For the IEEE nonaware tests, only the false branch changes the #
16340 # counter. However, the true branch may set bsun so we check to see #
16341 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16343 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16344 # and are incapable of setting the BSUN exception bit. #
16346 # Typically, only one of the two possible branch directions could #
16347 # have the NAN bit set. #
16348 # (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
16349 # is preserved.) #
16351 #########################################################################
16354 # equal:
16358 fdbcc_eq:
16359 fbeq.w fdbcc_eq_yes # equal?
16360 fdbcc_eq_no:
16361 bra.w fdbcc_false # no; go handle counter
16362 fdbcc_eq_yes:
16366 # not equal:
16370 fdbcc_neq:
16371 fbneq.w fdbcc_neq_yes # not equal?
16372 fdbcc_neq_no:
16373 bra.w fdbcc_false # no; go handle counter
16374 fdbcc_neq_yes:
16378 # greater than:
16379 # _______
16380 # NANvZvN
16382 fdbcc_gt:
16383 fbgt.w fdbcc_gt_yes # greater than?
16384 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16385 beq.w fdbcc_false # no;go handle counter
16386 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16387 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16388 bne.w fdbcc_bsun # yes; we have an exception
16389 bra.w fdbcc_false # no; go handle counter
16390 fdbcc_gt_yes:
16391 rts # do nothing
16394 # not greater than:
16396 # NANvZvN
16398 fdbcc_ngt:
16399 fbngt.w fdbcc_ngt_yes # not greater than?
16400 fdbcc_ngt_no:
16401 bra.w fdbcc_false # no; go handle counter
16402 fdbcc_ngt_yes:
16403 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16404 beq.b fdbcc_ngt_done # no;go finish
16405 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16406 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16407 bne.w fdbcc_bsun # yes; we have an exception
16408 fdbcc_ngt_done:
16409 rts # no; do nothing
16412 # greater than or equal:
16413 # _____
16414 # Zv(NANvN)
16416 fdbcc_ge:
16417 fbge.w fdbcc_ge_yes # greater than or equal?
16418 fdbcc_ge_no:
16419 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16420 beq.w fdbcc_false # no;go handle counter
16421 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16422 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16423 bne.w fdbcc_bsun # yes; we have an exception
16424 bra.w fdbcc_false # no; go handle counter
16425 fdbcc_ge_yes:
16426 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16427 beq.b fdbcc_ge_yes_done # no;go do nothing
16428 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16429 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16430 bne.w fdbcc_bsun # yes; we have an exception
16431 fdbcc_ge_yes_done:
16432 rts # do nothing
16435 # not (greater than or equal):
16437 # NANv(N^Z)
16439 fdbcc_nge:
16440 fbnge.w fdbcc_nge_yes # not (greater than or equal)?
16441 fdbcc_nge_no:
16442 bra.w fdbcc_false # no; go handle counter
16443 fdbcc_nge_yes:
16444 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16445 beq.b fdbcc_nge_done # no;go finish
16446 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16447 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16448 bne.w fdbcc_bsun # yes; we have an exception
16449 fdbcc_nge_done:
16450 rts # no; do nothing
16453 # less than:
16454 # _____
16455 # N^(NANvZ)
16457 fdbcc_lt:
16458 fblt.w fdbcc_lt_yes # less than?
16459 fdbcc_lt_no:
16460 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16461 beq.w fdbcc_false # no; go handle counter
16462 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16463 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16464 bne.w fdbcc_bsun # yes; we have an exception
16465 bra.w fdbcc_false # no; go handle counter
16466 fdbcc_lt_yes:
16467 rts # do nothing
16470 # not less than:
16472 # NANv(ZvN)
16474 fdbcc_nlt:
16475 fbnlt.w fdbcc_nlt_yes # not less than?
16476 fdbcc_nlt_no:
16477 bra.w fdbcc_false # no; go handle counter
16478 fdbcc_nlt_yes:
16479 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16480 beq.b fdbcc_nlt_done # no;go finish
16481 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16482 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16483 bne.w fdbcc_bsun # yes; we have an exception
16484 fdbcc_nlt_done:
16485 rts # no; do nothing
16488 # less than or equal:
16489 # ___
16490 # Zv(N^NAN)
16492 fdbcc_le:
16493 fble.w fdbcc_le_yes # less than or equal?
16494 fdbcc_le_no:
16495 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16496 beq.w fdbcc_false # no; go handle counter
16497 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16498 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16499 bne.w fdbcc_bsun # yes; we have an exception
16500 bra.w fdbcc_false # no; go handle counter
16501 fdbcc_le_yes:
16502 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16503 beq.b fdbcc_le_yes_done # no; go do nothing
16504 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16505 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16506 bne.w fdbcc_bsun # yes; we have an exception
16507 fdbcc_le_yes_done:
16508 rts # do nothing
16511 # not (less than or equal):
16512 # ___
16513 # NANv(NvZ)
16515 fdbcc_nle:
16516 fbnle.w fdbcc_nle_yes # not (less than or equal)?
16517 fdbcc_nle_no:
16518 bra.w fdbcc_false # no; go handle counter
16519 fdbcc_nle_yes:
16520 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16521 beq.w fdbcc_nle_done # no; go finish
16522 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16523 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16524 bne.w fdbcc_bsun # yes; we have an exception
16525 fdbcc_nle_done:
16526 rts # no; do nothing
16529 # greater or less than:
16530 # _____
16531 # NANvZ
16533 fdbcc_gl:
16534 fbgl.w fdbcc_gl_yes # greater or less than?
16535 fdbcc_gl_no:
16536 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16537 beq.w fdbcc_false # no; handle counter
16538 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16539 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16540 bne.w fdbcc_bsun # yes; we have an exception
16541 bra.w fdbcc_false # no; go handle counter
16542 fdbcc_gl_yes:
16543 rts # do nothing
16546 # not (greater or less than):
16548 # NANvZ
16550 fdbcc_ngl:
16551 fbngl.w fdbcc_ngl_yes # not (greater or less than)?
16552 fdbcc_ngl_no:
16553 bra.w fdbcc_false # no; go handle counter
16554 fdbcc_ngl_yes:
16555 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
16556 beq.b fdbcc_ngl_done # no; go finish
16557 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16558 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16559 bne.w fdbcc_bsun # yes; we have an exception
16560 fdbcc_ngl_done:
16561 rts # no; do nothing
16564 # greater, less, or equal:
16565 # ___
16566 # NAN
16568 fdbcc_gle:
16569 fbgle.w fdbcc_gle_yes # greater, less, or equal?
16570 fdbcc_gle_no:
16571 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16572 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16573 bne.w fdbcc_bsun # yes; we have an exception
16574 bra.w fdbcc_false # no; go handle counter
16575 fdbcc_gle_yes:
16576 rts # do nothing
16579 # not (greater, less, or equal):
16581 # NAN
16583 fdbcc_ngle:
16584 fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)?
16585 fdbcc_ngle_no:
16586 bra.w fdbcc_false # no; go handle counter
16587 fdbcc_ngle_yes:
16588 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16589 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16590 bne.w fdbcc_bsun # yes; we have an exception
16591 rts # no; do nothing
16593 #########################################################################
16595 # Miscellaneous tests #
16597 # For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
16599 #########################################################################
16602 # false:
16604 # False
16606 fdbcc_f: # no bsun possible
16607 bra.w fdbcc_false # go handle counter
16610 # true:
16612 # True
16614 fdbcc_t: # no bsun possible
16615 rts # do nothing
16618 # signalling false:
16620 # False
16622 fdbcc_sf:
16623 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16624 beq.w fdbcc_false # no;go handle counter
16625 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16626 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16627 bne.w fdbcc_bsun # yes; we have an exception
16628 bra.w fdbcc_false # go handle counter
16631 # signalling true:
16633 # True
16635 fdbcc_st:
16636 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16637 beq.b fdbcc_st_done # no;go finish
16638 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16639 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16640 bne.w fdbcc_bsun # yes; we have an exception
16641 fdbcc_st_done:
16645 # signalling equal:
16649 fdbcc_seq:
16650 fbseq.w fdbcc_seq_yes # signalling equal?
16651 fdbcc_seq_no:
16652 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16653 beq.w fdbcc_false # no;go handle counter
16654 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16655 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16656 bne.w fdbcc_bsun # yes; we have an exception
16657 bra.w fdbcc_false # go handle counter
16658 fdbcc_seq_yes:
16659 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16660 beq.b fdbcc_seq_yes_done # no;go do nothing
16661 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16662 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16663 bne.w fdbcc_bsun # yes; we have an exception
16664 fdbcc_seq_yes_done:
16665 rts # yes; do nothing
16668 # signalling not equal:
16672 fdbcc_sneq:
16673 fbsneq.w fdbcc_sneq_yes # signalling not equal?
16674 fdbcc_sneq_no:
16675 btst &nan_bit, FPSR_CC(%a6) # is NAN set?
16676 beq.w fdbcc_false # no;go handle counter
16677 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16678 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16679 bne.w fdbcc_bsun # yes; we have an exception
16680 bra.w fdbcc_false # go handle counter
16681 fdbcc_sneq_yes:
16682 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
16683 beq.w fdbcc_sneq_done # no;go finish
16684 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
16685 btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
16686 bne.w fdbcc_bsun # yes; we have an exception
16687 fdbcc_sneq_done:
16690 #########################################################################
16692 # IEEE Aware tests #
16694 # For the IEEE aware tests, action is only taken if the result is false.#
16695 # Therefore, the opposite branch type is used to jump to the decrement #
16696 # routine. #
16697 # The BSUN exception will not be set for any of these tests. #
16699 #########################################################################
16702 # ordered greater than:
16703 # _______
16704 # NANvZvN
16706 fdbcc_ogt:
16707 fbogt.w fdbcc_ogt_yes # ordered greater than?
16708 fdbcc_ogt_no:
16709 bra.w fdbcc_false # no; go handle counter
16710 fdbcc_ogt_yes:
16711 rts # yes; do nothing
16714 # unordered or less or equal:
16715 # _______
16716 # NANvZvN
16718 fdbcc_ule:
16719 fbule.w fdbcc_ule_yes # unordered or less or equal?
16720 fdbcc_ule_no:
16721 bra.w fdbcc_false # no; go handle counter
16722 fdbcc_ule_yes:
16723 rts # yes; do nothing
16726 # ordered greater than or equal:
16727 # _____
16728 # Zv(NANvN)
16730 fdbcc_oge:
16731 fboge.w fdbcc_oge_yes # ordered greater than or equal?
16732 fdbcc_oge_no:
16733 bra.w fdbcc_false # no; go handle counter
16734 fdbcc_oge_yes:
16735 rts # yes; do nothing
16738 # unordered or less than:
16740 # NANv(N^Z)
16742 fdbcc_ult:
16743 fbult.w fdbcc_ult_yes # unordered or less than?
16744 fdbcc_ult_no:
16745 bra.w fdbcc_false # no; go handle counter
16746 fdbcc_ult_yes:
16747 rts # yes; do nothing
16750 # ordered less than:
16751 # _____
16752 # N^(NANvZ)
16754 fdbcc_olt:
16755 fbolt.w fdbcc_olt_yes # ordered less than?
16756 fdbcc_olt_no:
16757 bra.w fdbcc_false # no; go handle counter
16758 fdbcc_olt_yes:
16759 rts # yes; do nothing
16762 # unordered or greater or equal:
16764 # NANvZvN
16766 fdbcc_uge:
16767 fbuge.w fdbcc_uge_yes # unordered or greater than?
16768 fdbcc_uge_no:
16769 bra.w fdbcc_false # no; go handle counter
16770 fdbcc_uge_yes:
16771 rts # yes; do nothing
16774 # ordered less than or equal:
16775 # ___
16776 # Zv(N^NAN)
16778 fdbcc_ole:
16779 fbole.w fdbcc_ole_yes # ordered greater or less than?
16780 fdbcc_ole_no:
16781 bra.w fdbcc_false # no; go handle counter
16782 fdbcc_ole_yes:
16783 rts # yes; do nothing
16786 # unordered or greater than:
16787 # ___
16788 # NANv(NvZ)
16790 fdbcc_ugt:
16791 fbugt.w fdbcc_ugt_yes # unordered or greater than?
16792 fdbcc_ugt_no:
16793 bra.w fdbcc_false # no; go handle counter
16794 fdbcc_ugt_yes:
16795 rts # yes; do nothing
16798 # ordered greater or less than:
16799 # _____
16800 # NANvZ
16802 fdbcc_ogl:
16803 fbogl.w fdbcc_ogl_yes # ordered greater or less than?
16804 fdbcc_ogl_no:
16805 bra.w fdbcc_false # no; go handle counter
16806 fdbcc_ogl_yes:
16807 rts # yes; do nothing
16810 # unordered or equal:
16812 # NANvZ
16814 fdbcc_ueq:
16815 fbueq.w fdbcc_ueq_yes # unordered or equal?
16816 fdbcc_ueq_no:
16817 bra.w fdbcc_false # no; go handle counter
16818 fdbcc_ueq_yes:
16819 rts # yes; do nothing
16822 # ordered:
16823 # ___
16824 # NAN
16826 fdbcc_or:
16827 fbor.w fdbcc_or_yes # ordered?
16828 fdbcc_or_no:
16829 bra.w fdbcc_false # no; go handle counter
16830 fdbcc_or_yes:
16831 rts # yes; do nothing
16834 # unordered:
16836 # NAN
16838 fdbcc_un:
16839 fbun.w fdbcc_un_yes # unordered?
16840 fdbcc_un_no:
16841 bra.w fdbcc_false # no; go handle counter
16842 fdbcc_un_yes:
16843 rts # yes; do nothing
16845 #######################################################################
16848 # the bsun exception bit was not set.
16850 # (1) subtract 1 from the count register
16851 # (2) if (cr == -1) then
16852 # pc = pc of next instruction
16853 # else
16854 # pc += sign_ext(16-bit displacement)
16856 fdbcc_false:
16857 mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword
16858 andi.w &0x7, %d1 # extract count register
16860 bsr.l fetch_dreg # fetch count value
16861 # make sure that d0 isn't corrupted between calls...
16863 subq.w &0x1, %d0 # Dn - 1 -> Dn
16865 bsr.l store_dreg_l # store new count value
16867 cmpi.w %d0, &-0x1 # is (Dn == -1)?
16868 bne.b fdbcc_false_cont # no;
16871 fdbcc_false_cont:
16872 mov.l L_SCR1(%a6),%d0 # fetch displacement
16873 add.l USER_FPIAR(%a6),%d0 # add instruction PC
16874 addq.l &0x4,%d0 # add instruction length
16875 mov.l %d0,EXC_PC(%a6) # set new PC
16878 # the emulation routine set bsun and BSUN was enabled. have to
16879 # fix stack and jump to the bsun handler.
16880 # let the caller of this routine shift the stack frame up to
16881 # eliminate the effective address field.
16882 fdbcc_bsun:
16883 mov.b &fbsun_flg,SPCOND_FLG(%a6)
16886 #########################################################################
16887 # ftrapcc(): routine to emulate the ftrapcc instruction #
16889 # XDEF **************************************************************** #
16890 # _ftrapcc() #
16892 # XREF **************************************************************** #
16893 # none #
16895 # INPUT *************************************************************** #
16896 # none #
16898 # OUTPUT ************************************************************** #
16899 # none #
16901 # ALGORITHM *********************************************************** #
16902 # This routine checks which conditional predicate is specified by #
16903 # the stacked ftrapcc instruction opcode and then branches to a routine #
16904 # for that predicate. The corresponding fbcc instruction is then used #
16905 # to see whether the condition (specified by the stacked FPSR) is true #
16906 # or false. #
16907 # If a BSUN exception should be indicated, the BSUN and ABSUN #
16908 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
16909 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
16910 # enabled BSUN should not be flagged and the predicate is true, then #
16911 # the ftrapcc_flg is set in the SPCOND_FLG location. These special #
16912 # flags indicate to the calling routine to emulate the exceptional #
16913 # condition. #
16915 #########################################################################
16917 global _ftrapcc
16918 _ftrapcc:
16919 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
16921 clr.l %d1 # clear scratch reg
16922 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
16923 ror.l &0x8,%d1 # rotate to top byte
16924 fmov.l %d1,%fpsr # insert into FPSR
16926 mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
16927 jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
16929 tbl_ftrapcc:
16930 short ftrapcc_f - tbl_ftrapcc # 00
16931 short ftrapcc_eq - tbl_ftrapcc # 01
16932 short ftrapcc_ogt - tbl_ftrapcc # 02
16933 short ftrapcc_oge - tbl_ftrapcc # 03
16934 short ftrapcc_olt - tbl_ftrapcc # 04
16935 short ftrapcc_ole - tbl_ftrapcc # 05
16936 short ftrapcc_ogl - tbl_ftrapcc # 06
16937 short ftrapcc_or - tbl_ftrapcc # 07
16938 short ftrapcc_un - tbl_ftrapcc # 08
16939 short ftrapcc_ueq - tbl_ftrapcc # 09
16940 short ftrapcc_ugt - tbl_ftrapcc # 10
16941 short ftrapcc_uge - tbl_ftrapcc # 11
16942 short ftrapcc_ult - tbl_ftrapcc # 12
16943 short ftrapcc_ule - tbl_ftrapcc # 13
16944 short ftrapcc_neq - tbl_ftrapcc # 14
16945 short ftrapcc_t - tbl_ftrapcc # 15
16946 short ftrapcc_sf - tbl_ftrapcc # 16
16947 short ftrapcc_seq - tbl_ftrapcc # 17
16948 short ftrapcc_gt - tbl_ftrapcc # 18
16949 short ftrapcc_ge - tbl_ftrapcc # 19
16950 short ftrapcc_lt - tbl_ftrapcc # 20
16951 short ftrapcc_le - tbl_ftrapcc # 21
16952 short ftrapcc_gl - tbl_ftrapcc # 22
16953 short ftrapcc_gle - tbl_ftrapcc # 23
16954 short ftrapcc_ngle - tbl_ftrapcc # 24
16955 short ftrapcc_ngl - tbl_ftrapcc # 25
16956 short ftrapcc_nle - tbl_ftrapcc # 26
16957 short ftrapcc_nlt - tbl_ftrapcc # 27
16958 short ftrapcc_nge - tbl_ftrapcc # 28
16959 short ftrapcc_ngt - tbl_ftrapcc # 29
16960 short ftrapcc_sneq - tbl_ftrapcc # 30
16961 short ftrapcc_st - tbl_ftrapcc # 31
16963 #########################################################################
16965 # IEEE Nonaware tests #
16967 # For the IEEE nonaware tests, we set the result based on the #
16968 # floating point condition codes. In addition, we check to see #
16969 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
16971 # The cases EQ and NE are shared by the Aware and Nonaware groups #
16972 # and are incapable of setting the BSUN exception bit. #
16974 # Typically, only one of the two possible branch directions could #
16975 # have the NAN bit set. #
16977 #########################################################################
16980 # equal:
16984 ftrapcc_eq:
16985 fbeq.w ftrapcc_trap # equal?
16986 ftrapcc_eq_no:
16987 rts # do nothing
16990 # not equal:
16994 ftrapcc_neq:
16995 fbneq.w ftrapcc_trap # not equal?
16996 ftrapcc_neq_no:
16997 rts # do nothing
17000 # greater than:
17001 # _______
17002 # NANvZvN
17004 ftrapcc_gt:
17005 fbgt.w ftrapcc_trap # greater than?
17006 ftrapcc_gt_no:
17007 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17008 beq.b ftrapcc_gt_done # no
17009 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17010 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17011 bne.w ftrapcc_bsun # yes
17012 ftrapcc_gt_done:
17013 rts # no; do nothing
17016 # not greater than:
17018 # NANvZvN
17020 ftrapcc_ngt:
17021 fbngt.w ftrapcc_ngt_yes # not greater than?
17022 ftrapcc_ngt_no:
17023 rts # do nothing
17024 ftrapcc_ngt_yes:
17025 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17026 beq.w ftrapcc_trap # no; go take trap
17027 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17028 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17029 bne.w ftrapcc_bsun # yes
17030 bra.w ftrapcc_trap # no; go take trap
17033 # greater than or equal:
17034 # _____
17035 # Zv(NANvN)
17037 ftrapcc_ge:
17038 fbge.w ftrapcc_ge_yes # greater than or equal?
17039 ftrapcc_ge_no:
17040 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17041 beq.b ftrapcc_ge_done # no; go finish
17042 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17043 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17044 bne.w ftrapcc_bsun # yes
17045 ftrapcc_ge_done:
17046 rts # no; do nothing
17047 ftrapcc_ge_yes:
17048 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17049 beq.w ftrapcc_trap # no; go take trap
17050 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17051 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17052 bne.w ftrapcc_bsun # yes
17053 bra.w ftrapcc_trap # no; go take trap
17056 # not (greater than or equal):
17058 # NANv(N^Z)
17060 ftrapcc_nge:
17061 fbnge.w ftrapcc_nge_yes # not (greater than or equal)?
17062 ftrapcc_nge_no:
17063 rts # do nothing
17064 ftrapcc_nge_yes:
17065 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17066 beq.w ftrapcc_trap # no; go take trap
17067 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17068 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17069 bne.w ftrapcc_bsun # yes
17070 bra.w ftrapcc_trap # no; go take trap
17073 # less than:
17074 # _____
17075 # N^(NANvZ)
17077 ftrapcc_lt:
17078 fblt.w ftrapcc_trap # less than?
17079 ftrapcc_lt_no:
17080 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17081 beq.b ftrapcc_lt_done # no; go finish
17082 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17083 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17084 bne.w ftrapcc_bsun # yes
17085 ftrapcc_lt_done:
17086 rts # no; do nothing
17089 # not less than:
17091 # NANv(ZvN)
17093 ftrapcc_nlt:
17094 fbnlt.w ftrapcc_nlt_yes # not less than?
17095 ftrapcc_nlt_no:
17096 rts # do nothing
17097 ftrapcc_nlt_yes:
17098 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17099 beq.w ftrapcc_trap # no; go take trap
17100 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17101 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17102 bne.w ftrapcc_bsun # yes
17103 bra.w ftrapcc_trap # no; go take trap
17106 # less than or equal:
17107 # ___
17108 # Zv(N^NAN)
17110 ftrapcc_le:
17111 fble.w ftrapcc_le_yes # less than or equal?
17112 ftrapcc_le_no:
17113 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17114 beq.b ftrapcc_le_done # no; go finish
17115 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17116 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17117 bne.w ftrapcc_bsun # yes
17118 ftrapcc_le_done:
17119 rts # no; do nothing
17120 ftrapcc_le_yes:
17121 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17122 beq.w ftrapcc_trap # no; go take trap
17123 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17124 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17125 bne.w ftrapcc_bsun # yes
17126 bra.w ftrapcc_trap # no; go take trap
17129 # not (less than or equal):
17130 # ___
17131 # NANv(NvZ)
17133 ftrapcc_nle:
17134 fbnle.w ftrapcc_nle_yes # not (less than or equal)?
17135 ftrapcc_nle_no:
17136 rts # do nothing
17137 ftrapcc_nle_yes:
17138 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17139 beq.w ftrapcc_trap # no; go take trap
17140 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17141 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17142 bne.w ftrapcc_bsun # yes
17143 bra.w ftrapcc_trap # no; go take trap
17146 # greater or less than:
17147 # _____
17148 # NANvZ
17150 ftrapcc_gl:
17151 fbgl.w ftrapcc_trap # greater or less than?
17152 ftrapcc_gl_no:
17153 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17154 beq.b ftrapcc_gl_done # no; go finish
17155 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17156 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17157 bne.w ftrapcc_bsun # yes
17158 ftrapcc_gl_done:
17159 rts # no; do nothing
17162 # not (greater or less than):
17164 # NANvZ
17166 ftrapcc_ngl:
17167 fbngl.w ftrapcc_ngl_yes # not (greater or less than)?
17168 ftrapcc_ngl_no:
17169 rts # do nothing
17170 ftrapcc_ngl_yes:
17171 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17172 beq.w ftrapcc_trap # no; go take trap
17173 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17174 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17175 bne.w ftrapcc_bsun # yes
17176 bra.w ftrapcc_trap # no; go take trap
17179 # greater, less, or equal:
17180 # ___
17181 # NAN
17183 ftrapcc_gle:
17184 fbgle.w ftrapcc_trap # greater, less, or equal?
17185 ftrapcc_gle_no:
17186 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17187 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17188 bne.w ftrapcc_bsun # yes
17189 rts # no; do nothing
17192 # not (greater, less, or equal):
17194 # NAN
17196 ftrapcc_ngle:
17197 fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)?
17198 ftrapcc_ngle_no:
17199 rts # do nothing
17200 ftrapcc_ngle_yes:
17201 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17202 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17203 bne.w ftrapcc_bsun # yes
17204 bra.w ftrapcc_trap # no; go take trap
17206 #########################################################################
17208 # Miscellaneous tests #
17210 # For the IEEE aware tests, we only have to set the result based on the #
17211 # floating point condition codes. The BSUN exception will not be #
17212 # set for any of these tests. #
17214 #########################################################################
17217 # false:
17219 # False
17221 ftrapcc_f:
17222 rts # do nothing
17225 # true:
17227 # True
17229 ftrapcc_t:
17230 bra.w ftrapcc_trap # go take trap
17233 # signalling false:
17235 # False
17237 ftrapcc_sf:
17238 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17239 beq.b ftrapcc_sf_done # no; go finish
17240 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17241 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17242 bne.w ftrapcc_bsun # yes
17243 ftrapcc_sf_done:
17244 rts # no; do nothing
17247 # signalling true:
17249 # True
17251 ftrapcc_st:
17252 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17253 beq.w ftrapcc_trap # no; go take trap
17254 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17255 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17256 bne.w ftrapcc_bsun # yes
17257 bra.w ftrapcc_trap # no; go take trap
17260 # signalling equal:
17264 ftrapcc_seq:
17265 fbseq.w ftrapcc_seq_yes # signalling equal?
17266 ftrapcc_seq_no:
17267 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17268 beq.w ftrapcc_seq_done # no; go finish
17269 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17270 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17271 bne.w ftrapcc_bsun # yes
17272 ftrapcc_seq_done:
17273 rts # no; do nothing
17274 ftrapcc_seq_yes:
17275 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17276 beq.w ftrapcc_trap # no; go take trap
17277 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17278 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17279 bne.w ftrapcc_bsun # yes
17280 bra.w ftrapcc_trap # no; go take trap
17283 # signalling not equal:
17287 ftrapcc_sneq:
17288 fbsneq.w ftrapcc_sneq_yes # signalling equal?
17289 ftrapcc_sneq_no:
17290 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17291 beq.w ftrapcc_sneq_no_done # no; go finish
17292 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17293 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17294 bne.w ftrapcc_bsun # yes
17295 ftrapcc_sneq_no_done:
17296 rts # do nothing
17297 ftrapcc_sneq_yes:
17298 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17299 beq.w ftrapcc_trap # no; go take trap
17300 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17301 btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
17302 bne.w ftrapcc_bsun # yes
17303 bra.w ftrapcc_trap # no; go take trap
17305 #########################################################################
17307 # IEEE Aware tests #
17309 # For the IEEE aware tests, we only have to set the result based on the #
17310 # floating point condition codes. The BSUN exception will not be #
17311 # set for any of these tests. #
17313 #########################################################################
17316 # ordered greater than:
17317 # _______
17318 # NANvZvN
17320 ftrapcc_ogt:
17321 fbogt.w ftrapcc_trap # ordered greater than?
17322 ftrapcc_ogt_no:
17323 rts # do nothing
17326 # unordered or less or equal:
17327 # _______
17328 # NANvZvN
17330 ftrapcc_ule:
17331 fbule.w ftrapcc_trap # unordered or less or equal?
17332 ftrapcc_ule_no:
17333 rts # do nothing
17336 # ordered greater than or equal:
17337 # _____
17338 # Zv(NANvN)
17340 ftrapcc_oge:
17341 fboge.w ftrapcc_trap # ordered greater than or equal?
17342 ftrapcc_oge_no:
17343 rts # do nothing
17346 # unordered or less than:
17348 # NANv(N^Z)
17350 ftrapcc_ult:
17351 fbult.w ftrapcc_trap # unordered or less than?
17352 ftrapcc_ult_no:
17353 rts # do nothing
17356 # ordered less than:
17357 # _____
17358 # N^(NANvZ)
17360 ftrapcc_olt:
17361 fbolt.w ftrapcc_trap # ordered less than?
17362 ftrapcc_olt_no:
17363 rts # do nothing
17366 # unordered or greater or equal:
17368 # NANvZvN
17370 ftrapcc_uge:
17371 fbuge.w ftrapcc_trap # unordered or greater than?
17372 ftrapcc_uge_no:
17373 rts # do nothing
17376 # ordered less than or equal:
17377 # ___
17378 # Zv(N^NAN)
17380 ftrapcc_ole:
17381 fbole.w ftrapcc_trap # ordered greater or less than?
17382 ftrapcc_ole_no:
17383 rts # do nothing
17386 # unordered or greater than:
17387 # ___
17388 # NANv(NvZ)
17390 ftrapcc_ugt:
17391 fbugt.w ftrapcc_trap # unordered or greater than?
17392 ftrapcc_ugt_no:
17393 rts # do nothing
17396 # ordered greater or less than:
17397 # _____
17398 # NANvZ
17400 ftrapcc_ogl:
17401 fbogl.w ftrapcc_trap # ordered greater or less than?
17402 ftrapcc_ogl_no:
17403 rts # do nothing
17406 # unordered or equal:
17408 # NANvZ
17410 ftrapcc_ueq:
17411 fbueq.w ftrapcc_trap # unordered or equal?
17412 ftrapcc_ueq_no:
17413 rts # do nothing
17416 # ordered:
17417 # ___
17418 # NAN
17420 ftrapcc_or:
17421 fbor.w ftrapcc_trap # ordered?
17422 ftrapcc_or_no:
17423 rts # do nothing
17426 # unordered:
17428 # NAN
17430 ftrapcc_un:
17431 fbun.w ftrapcc_trap # unordered?
17432 ftrapcc_un_no:
17433 rts # do nothing
17435 #######################################################################
17437 # the bsun exception bit was not set.
17438 # we will need to jump to the ftrapcc vector. the stack frame
17439 # is the same size as that of the fp unimp instruction. the
17440 # only difference is that the <ea> field should hold the PC
17441 # of the ftrapcc instruction and the vector offset field
17442 # should denote the ftrapcc trap.
17443 ftrapcc_trap:
17444 mov.b &ftrapcc_flg,SPCOND_FLG(%a6)
17447 # the emulation routine set bsun and BSUN was enabled. have to
17448 # fix stack and jump to the bsun handler.
17449 # let the caller of this routine shift the stack frame up to
17450 # eliminate the effective address field.
17451 ftrapcc_bsun:
17452 mov.b &fbsun_flg,SPCOND_FLG(%a6)
17455 #########################################################################
17456 # fscc(): routine to emulate the fscc instruction #
17458 # XDEF **************************************************************** #
17459 # _fscc() #
17461 # XREF **************************************************************** #
17462 # store_dreg_b() - store result to data register file #
17463 # dec_areg() - decrement an areg for -(an) mode #
17464 # inc_areg() - increment an areg for (an)+ mode #
17465 # _dmem_write_byte() - store result to memory #
17467 # INPUT *************************************************************** #
17468 # none #
17470 # OUTPUT ************************************************************** #
17471 # none #
17473 # ALGORITHM *********************************************************** #
17474 # This routine checks which conditional predicate is specified by #
17475 # the stacked fscc instruction opcode and then branches to a routine #
17476 # for that predicate. The corresponding fbcc instruction is then used #
17477 # to see whether the condition (specified by the stacked FPSR) is true #
17478 # or false. #
17479 # If a BSUN exception should be indicated, the BSUN and ABSUN #
17480 # bits are set in the stacked FPSR. If the BSUN exception is enabled, #
17481 # the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
17482 # enabled BSUN should not be flagged and the predicate is true, then #
17483 # the result is stored to the data register file or memory #
17485 #########################################################################
17487 global _fscc
17488 _fscc:
17489 mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
17491 clr.l %d1 # clear scratch reg
17492 mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
17493 ror.l &0x8,%d1 # rotate to top byte
17494 fmov.l %d1,%fpsr # insert into FPSR
17496 mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
17497 jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine
17499 tbl_fscc:
17500 short fscc_f - tbl_fscc # 00
17501 short fscc_eq - tbl_fscc # 01
17502 short fscc_ogt - tbl_fscc # 02
17503 short fscc_oge - tbl_fscc # 03
17504 short fscc_olt - tbl_fscc # 04
17505 short fscc_ole - tbl_fscc # 05
17506 short fscc_ogl - tbl_fscc # 06
17507 short fscc_or - tbl_fscc # 07
17508 short fscc_un - tbl_fscc # 08
17509 short fscc_ueq - tbl_fscc # 09
17510 short fscc_ugt - tbl_fscc # 10
17511 short fscc_uge - tbl_fscc # 11
17512 short fscc_ult - tbl_fscc # 12
17513 short fscc_ule - tbl_fscc # 13
17514 short fscc_neq - tbl_fscc # 14
17515 short fscc_t - tbl_fscc # 15
17516 short fscc_sf - tbl_fscc # 16
17517 short fscc_seq - tbl_fscc # 17
17518 short fscc_gt - tbl_fscc # 18
17519 short fscc_ge - tbl_fscc # 19
17520 short fscc_lt - tbl_fscc # 20
17521 short fscc_le - tbl_fscc # 21
17522 short fscc_gl - tbl_fscc # 22
17523 short fscc_gle - tbl_fscc # 23
17524 short fscc_ngle - tbl_fscc # 24
17525 short fscc_ngl - tbl_fscc # 25
17526 short fscc_nle - tbl_fscc # 26
17527 short fscc_nlt - tbl_fscc # 27
17528 short fscc_nge - tbl_fscc # 28
17529 short fscc_ngt - tbl_fscc # 29
17530 short fscc_sneq - tbl_fscc # 30
17531 short fscc_st - tbl_fscc # 31
17533 #########################################################################
17535 # IEEE Nonaware tests #
17537 # For the IEEE nonaware tests, we set the result based on the #
17538 # floating point condition codes. In addition, we check to see #
17539 # if the NAN bit is set, in which case BSUN and AIOP will be set. #
17541 # The cases EQ and NE are shared by the Aware and Nonaware groups #
17542 # and are incapable of setting the BSUN exception bit. #
17544 # Typically, only one of the two possible branch directions could #
17545 # have the NAN bit set. #
17547 #########################################################################
17550 # equal:
17554 fscc_eq:
17555 fbeq.w fscc_eq_yes # equal?
17556 fscc_eq_no:
17557 clr.b %d0 # set false
17558 bra.w fscc_done # go finish
17559 fscc_eq_yes:
17560 st %d0 # set true
17561 bra.w fscc_done # go finish
17564 # not equal:
17568 fscc_neq:
17569 fbneq.w fscc_neq_yes # not equal?
17570 fscc_neq_no:
17571 clr.b %d0 # set false
17572 bra.w fscc_done # go finish
17573 fscc_neq_yes:
17574 st %d0 # set true
17575 bra.w fscc_done # go finish
17578 # greater than:
17579 # _______
17580 # NANvZvN
17582 fscc_gt:
17583 fbgt.w fscc_gt_yes # greater than?
17584 fscc_gt_no:
17585 clr.b %d0 # set false
17586 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17587 beq.w fscc_done # no;go finish
17588 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17589 bra.w fscc_chk_bsun # go finish
17590 fscc_gt_yes:
17591 st %d0 # set true
17592 bra.w fscc_done # go finish
17595 # not greater than:
17597 # NANvZvN
17599 fscc_ngt:
17600 fbngt.w fscc_ngt_yes # not greater than?
17601 fscc_ngt_no:
17602 clr.b %d0 # set false
17603 bra.w fscc_done # go finish
17604 fscc_ngt_yes:
17605 st %d0 # set true
17606 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17607 beq.w fscc_done # no;go finish
17608 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17609 bra.w fscc_chk_bsun # go finish
17612 # greater than or equal:
17613 # _____
17614 # Zv(NANvN)
17616 fscc_ge:
17617 fbge.w fscc_ge_yes # greater than or equal?
17618 fscc_ge_no:
17619 clr.b %d0 # set false
17620 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17621 beq.w fscc_done # no;go finish
17622 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17623 bra.w fscc_chk_bsun # go finish
17624 fscc_ge_yes:
17625 st %d0 # set true
17626 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17627 beq.w fscc_done # no;go finish
17628 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17629 bra.w fscc_chk_bsun # go finish
17632 # not (greater than or equal):
17634 # NANv(N^Z)
17636 fscc_nge:
17637 fbnge.w fscc_nge_yes # not (greater than or equal)?
17638 fscc_nge_no:
17639 clr.b %d0 # set false
17640 bra.w fscc_done # go finish
17641 fscc_nge_yes:
17642 st %d0 # set true
17643 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17644 beq.w fscc_done # no;go finish
17645 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17646 bra.w fscc_chk_bsun # go finish
17649 # less than:
17650 # _____
17651 # N^(NANvZ)
17653 fscc_lt:
17654 fblt.w fscc_lt_yes # less than?
17655 fscc_lt_no:
17656 clr.b %d0 # set false
17657 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17658 beq.w fscc_done # no;go finish
17659 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17660 bra.w fscc_chk_bsun # go finish
17661 fscc_lt_yes:
17662 st %d0 # set true
17663 bra.w fscc_done # go finish
17666 # not less than:
17668 # NANv(ZvN)
17670 fscc_nlt:
17671 fbnlt.w fscc_nlt_yes # not less than?
17672 fscc_nlt_no:
17673 clr.b %d0 # set false
17674 bra.w fscc_done # go finish
17675 fscc_nlt_yes:
17676 st %d0 # set true
17677 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17678 beq.w fscc_done # no;go finish
17679 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17680 bra.w fscc_chk_bsun # go finish
17683 # less than or equal:
17684 # ___
17685 # Zv(N^NAN)
17687 fscc_le:
17688 fble.w fscc_le_yes # less than or equal?
17689 fscc_le_no:
17690 clr.b %d0 # set false
17691 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17692 beq.w fscc_done # no;go finish
17693 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17694 bra.w fscc_chk_bsun # go finish
17695 fscc_le_yes:
17696 st %d0 # set true
17697 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17698 beq.w fscc_done # no;go finish
17699 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17700 bra.w fscc_chk_bsun # go finish
17703 # not (less than or equal):
17704 # ___
17705 # NANv(NvZ)
17707 fscc_nle:
17708 fbnle.w fscc_nle_yes # not (less than or equal)?
17709 fscc_nle_no:
17710 clr.b %d0 # set false
17711 bra.w fscc_done # go finish
17712 fscc_nle_yes:
17713 st %d0 # set true
17714 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17715 beq.w fscc_done # no;go finish
17716 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17717 bra.w fscc_chk_bsun # go finish
17720 # greater or less than:
17721 # _____
17722 # NANvZ
17724 fscc_gl:
17725 fbgl.w fscc_gl_yes # greater or less than?
17726 fscc_gl_no:
17727 clr.b %d0 # set false
17728 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17729 beq.w fscc_done # no;go finish
17730 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17731 bra.w fscc_chk_bsun # go finish
17732 fscc_gl_yes:
17733 st %d0 # set true
17734 bra.w fscc_done # go finish
17737 # not (greater or less than):
17739 # NANvZ
17741 fscc_ngl:
17742 fbngl.w fscc_ngl_yes # not (greater or less than)?
17743 fscc_ngl_no:
17744 clr.b %d0 # set false
17745 bra.w fscc_done # go finish
17746 fscc_ngl_yes:
17747 st %d0 # set true
17748 btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
17749 beq.w fscc_done # no;go finish
17750 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17751 bra.w fscc_chk_bsun # go finish
17754 # greater, less, or equal:
17755 # ___
17756 # NAN
17758 fscc_gle:
17759 fbgle.w fscc_gle_yes # greater, less, or equal?
17760 fscc_gle_no:
17761 clr.b %d0 # set false
17762 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17763 bra.w fscc_chk_bsun # go finish
17764 fscc_gle_yes:
17765 st %d0 # set true
17766 bra.w fscc_done # go finish
17769 # not (greater, less, or equal):
17771 # NAN
17773 fscc_ngle:
17774 fbngle.w fscc_ngle_yes # not (greater, less, or equal)?
17775 fscc_ngle_no:
17776 clr.b %d0 # set false
17777 bra.w fscc_done # go finish
17778 fscc_ngle_yes:
17779 st %d0 # set true
17780 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17781 bra.w fscc_chk_bsun # go finish
17783 #########################################################################
17785 # Miscellaneous tests #
17787 # For the IEEE aware tests, we only have to set the result based on the #
17788 # floating point condition codes. The BSUN exception will not be #
17789 # set for any of these tests. #
17791 #########################################################################
17794 # false:
17796 # False
17798 fscc_f:
17799 clr.b %d0 # set false
17800 bra.w fscc_done # go finish
17803 # true:
17805 # True
17807 fscc_t:
17808 st %d0 # set true
17809 bra.w fscc_done # go finish
17812 # signalling false:
17814 # False
17816 fscc_sf:
17817 clr.b %d0 # set false
17818 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17819 beq.w fscc_done # no;go finish
17820 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17821 bra.w fscc_chk_bsun # go finish
17824 # signalling true:
17826 # True
17828 fscc_st:
17829 st %d0 # set false
17830 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17831 beq.w fscc_done # no;go finish
17832 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17833 bra.w fscc_chk_bsun # go finish
17836 # signalling equal:
17840 fscc_seq:
17841 fbseq.w fscc_seq_yes # signalling equal?
17842 fscc_seq_no:
17843 clr.b %d0 # set false
17844 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17845 beq.w fscc_done # no;go finish
17846 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17847 bra.w fscc_chk_bsun # go finish
17848 fscc_seq_yes:
17849 st %d0 # set true
17850 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17851 beq.w fscc_done # no;go finish
17852 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17853 bra.w fscc_chk_bsun # go finish
17856 # signalling not equal:
17860 fscc_sneq:
17861 fbsneq.w fscc_sneq_yes # signalling equal?
17862 fscc_sneq_no:
17863 clr.b %d0 # set false
17864 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17865 beq.w fscc_done # no;go finish
17866 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17867 bra.w fscc_chk_bsun # go finish
17868 fscc_sneq_yes:
17869 st %d0 # set true
17870 btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
17871 beq.w fscc_done # no;go finish
17872 ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
17873 bra.w fscc_chk_bsun # go finish
17875 #########################################################################
17877 # IEEE Aware tests #
17879 # For the IEEE aware tests, we only have to set the result based on the #
17880 # floating point condition codes. The BSUN exception will not be #
17881 # set for any of these tests. #
17883 #########################################################################
17886 # ordered greater than:
17887 # _______
17888 # NANvZvN
17890 fscc_ogt:
17891 fbogt.w fscc_ogt_yes # ordered greater than?
17892 fscc_ogt_no:
17893 clr.b %d0 # set false
17894 bra.w fscc_done # go finish
17895 fscc_ogt_yes:
17896 st %d0 # set true
17897 bra.w fscc_done # go finish
17900 # unordered or less or equal:
17901 # _______
17902 # NANvZvN
17904 fscc_ule:
17905 fbule.w fscc_ule_yes # unordered or less or equal?
17906 fscc_ule_no:
17907 clr.b %d0 # set false
17908 bra.w fscc_done # go finish
17909 fscc_ule_yes:
17910 st %d0 # set true
17911 bra.w fscc_done # go finish
17914 # ordered greater than or equal:
17915 # _____
17916 # Zv(NANvN)
17918 fscc_oge:
17919 fboge.w fscc_oge_yes # ordered greater than or equal?
17920 fscc_oge_no:
17921 clr.b %d0 # set false
17922 bra.w fscc_done # go finish
17923 fscc_oge_yes:
17924 st %d0 # set true
17925 bra.w fscc_done # go finish
17928 # unordered or less than:
17930 # NANv(N^Z)
17932 fscc_ult:
17933 fbult.w fscc_ult_yes # unordered or less than?
17934 fscc_ult_no:
17935 clr.b %d0 # set false
17936 bra.w fscc_done # go finish
17937 fscc_ult_yes:
17938 st %d0 # set true
17939 bra.w fscc_done # go finish
17942 # ordered less than:
17943 # _____
17944 # N^(NANvZ)
17946 fscc_olt:
17947 fbolt.w fscc_olt_yes # ordered less than?
17948 fscc_olt_no:
17949 clr.b %d0 # set false
17950 bra.w fscc_done # go finish
17951 fscc_olt_yes:
17952 st %d0 # set true
17953 bra.w fscc_done # go finish
17956 # unordered or greater or equal:
17958 # NANvZvN
17960 fscc_uge:
17961 fbuge.w fscc_uge_yes # unordered or greater than?
17962 fscc_uge_no:
17963 clr.b %d0 # set false
17964 bra.w fscc_done # go finish
17965 fscc_uge_yes:
17966 st %d0 # set true
17967 bra.w fscc_done # go finish
17970 # ordered less than or equal:
17971 # ___
17972 # Zv(N^NAN)
17974 fscc_ole:
17975 fbole.w fscc_ole_yes # ordered greater or less than?
17976 fscc_ole_no:
17977 clr.b %d0 # set false
17978 bra.w fscc_done # go finish
17979 fscc_ole_yes:
17980 st %d0 # set true
17981 bra.w fscc_done # go finish
17984 # unordered or greater than:
17985 # ___
17986 # NANv(NvZ)
17988 fscc_ugt:
17989 fbugt.w fscc_ugt_yes # unordered or greater than?
17990 fscc_ugt_no:
17991 clr.b %d0 # set false
17992 bra.w fscc_done # go finish
17993 fscc_ugt_yes:
17994 st %d0 # set true
17995 bra.w fscc_done # go finish
17998 # ordered greater or less than:
17999 # _____
18000 # NANvZ
18002 fscc_ogl:
18003 fbogl.w fscc_ogl_yes # ordered greater or less than?
18004 fscc_ogl_no:
18005 clr.b %d0 # set false
18006 bra.w fscc_done # go finish
18007 fscc_ogl_yes:
18008 st %d0 # set true
18009 bra.w fscc_done # go finish
18012 # unordered or equal:
18014 # NANvZ
18016 fscc_ueq:
18017 fbueq.w fscc_ueq_yes # unordered or equal?
18018 fscc_ueq_no:
18019 clr.b %d0 # set false
18020 bra.w fscc_done # go finish
18021 fscc_ueq_yes:
18022 st %d0 # set true
18023 bra.w fscc_done # go finish
18026 # ordered:
18027 # ___
18028 # NAN
18030 fscc_or:
18031 fbor.w fscc_or_yes # ordered?
18032 fscc_or_no:
18033 clr.b %d0 # set false
18034 bra.w fscc_done # go finish
18035 fscc_or_yes:
18036 st %d0 # set true
18037 bra.w fscc_done # go finish
18040 # unordered:
18042 # NAN
18044 fscc_un:
18045 fbun.w fscc_un_yes # unordered?
18046 fscc_un_no:
18047 clr.b %d0 # set false
18048 bra.w fscc_done # go finish
18049 fscc_un_yes:
18050 st %d0 # set true
18051 bra.w fscc_done # go finish
18053 #######################################################################
18056 # the bsun exception bit was set. now, check to see is BSUN
18057 # is enabled. if so, don't store result and correct stack frame
18058 # for a bsun exception.
18060 fscc_chk_bsun:
18061 btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
18062 bne.w fscc_bsun
18065 # the bsun exception bit was not set.
18066 # the result has been selected.
18067 # now, check to see if the result is to be stored in the data register
18068 # file or in memory.
18070 fscc_done:
18071 mov.l %d0,%a0 # save result for a moment
18073 mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword
18074 mov.l %d1,%d0 # make a copy
18075 andi.b &0x38,%d1 # extract src mode
18077 bne.b fscc_mem_op # it's a memory operation
18079 mov.l %d0,%d1
18080 andi.w &0x7,%d1 # pass index in d1
18081 mov.l %a0,%d0 # pass result in d0
18082 bsr.l store_dreg_b # save result in regfile
18086 # the stacked <ea> is correct with the exception of:
18087 # -> Dn : <ea> is garbage
18089 # if the addressing mode is post-increment or pre-decrement,
18090 # then the address registers have not been updated.
18092 fscc_mem_op:
18093 cmpi.b %d1,&0x18 # is <ea> (An)+ ?
18094 beq.b fscc_mem_inc # yes
18095 cmpi.b %d1,&0x20 # is <ea> -(An) ?
18096 beq.b fscc_mem_dec # yes
18098 mov.l %a0,%d0 # pass result in d0
18099 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18100 bsr.l _dmem_write_byte # write result byte
18102 tst.l %d1 # did dstore fail?
18103 bne.w fscc_err # yes
18107 # addresing mode is post-increment. write the result byte. if the write
18108 # fails then don't update the address register. if write passes then
18109 # call inc_areg() to update the address register.
18110 fscc_mem_inc:
18111 mov.l %a0,%d0 # pass result in d0
18112 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18113 bsr.l _dmem_write_byte # write result byte
18115 tst.l %d1 # did dstore fail?
18116 bne.w fscc_err # yes
18118 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18119 andi.w &0x7,%d1 # pass index in d1
18120 movq.l &0x1,%d0 # pass amt to inc by
18121 bsr.l inc_areg # increment address register
18125 # addressing mode is pre-decrement. write the result byte. if the write
18126 # fails then don't update the address register. if the write passes then
18127 # call dec_areg() to update the address register.
18128 fscc_mem_dec:
18129 mov.l %a0,%d0 # pass result in d0
18130 mov.l EXC_EA(%a6),%a0 # fetch <ea>
18131 bsr.l _dmem_write_byte # write result byte
18133 tst.l %d1 # did dstore fail?
18134 bne.w fscc_err # yes
18136 mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
18137 andi.w &0x7,%d1 # pass index in d1
18138 movq.l &0x1,%d0 # pass amt to dec by
18139 bsr.l dec_areg # decrement address register
18143 # the emulation routine set bsun and BSUN was enabled. have to
18144 # fix stack and jump to the bsun handler.
18145 # let the caller of this routine shift the stack frame up to
18146 # eliminate the effective address field.
18147 fscc_bsun:
18148 mov.b &fbsun_flg,SPCOND_FLG(%a6)
18151 # the byte write to memory has failed. pass the failing effective address
18152 # and a FSLW to funimp_dacc().
18153 fscc_err:
18154 mov.w &0x00a1,EXC_VOFF(%a6)
18155 bra.l facc_finish
18157 #########################################################################
18158 # XDEF **************************************************************** #
18159 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
18161 # XREF **************************************************************** #
18162 # fetch_dreg() - fetch data register #
18163 # {i,d,}mem_read() - fetch data from memory #
18164 # _mem_write() - write data to memory #
18165 # iea_iacc() - instruction memory access error occurred #
18166 # iea_dacc() - data memory access error occurred #
18167 # restore() - restore An index regs if access error occurred #
18169 # INPUT *************************************************************** #
18170 # None #
18172 # OUTPUT ************************************************************** #
18173 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
18174 # d0 = size of dump #
18175 # d1 = Dn #
18176 # Else if instruction access error, #
18177 # d0 = FSLW #
18178 # Else if data access error, #
18179 # d0 = FSLW #
18180 # a0 = address of fault #
18181 # Else #
18182 # none. #
18184 # ALGORITHM *********************************************************** #
18185 # The effective address must be calculated since this is entered #
18186 # from an "Unimplemented Effective Address" exception handler. So, we #
18187 # have our own fcalc_ea() routine here. If an access error is flagged #
18188 # by a _{i,d,}mem_read() call, we must exit through the special #
18189 # handler. #
18190 # The data register is determined and its value loaded to get the #
18191 # string of FP registers affected. This value is used as an index into #
18192 # a lookup table such that we can determine the number of bytes #
18193 # involved. #
18194 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
18195 # to read in all FP values. Again, _mem_read() may fail and require a #
18196 # special exit. #
18197 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
18198 # to write all FP values. _mem_write() may also fail. #
18199 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
18200 # then we return the size of the dump and the string to the caller #
18201 # so that the move can occur outside of this routine. This special #
18202 # case is required so that moves to the system stack are handled #
18203 # correctly. #
18205 # DYNAMIC: #
18206 # fmovm.x dn, <ea> #
18207 # fmovm.x <ea>, dn #
18209 # <WORD 1> <WORD2> #
18210 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
18212 # & = (0): predecrement addressing mode #
18213 # (1): postincrement or control addressing mode #
18214 # @ = (0): move listed regs from memory to the FPU #
18215 # (1): move listed regs from the FPU to memory #
18216 # $$$ : index of data register holding reg select mask #
18218 # NOTES: #
18219 # If the data register holds a zero, then the #
18220 # instruction is a nop. #
18222 #########################################################################
18224 global fmovm_dynamic
18225 fmovm_dynamic:
18227 # extract the data register in which the bit string resides...
18228 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
18229 andi.w &0x70,%d1 # extract reg bits
18230 lsr.b &0x4,%d1 # shift into lo bits
18232 # fetch the bit string into d0...
18233 bsr.l fetch_dreg # fetch reg string
18235 andi.l &0x000000ff,%d0 # keep only lo byte
18237 mov.l %d0,-(%sp) # save strg
18238 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
18239 mov.l %d0,-(%sp) # save size
18240 bsr.l fmovm_calc_ea # calculate <ea>
18241 mov.l (%sp)+,%d0 # restore size
18242 mov.l (%sp)+,%d1 # restore strg
18244 # if the bit string is a zero, then the operation is a no-op
18245 # but, make sure that we've calculated ea and advanced the opword pointer
18246 beq.w fmovm_data_done
18248 # separate move ins from move outs...
18249 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
18250 beq.w fmovm_data_in # it's a move out
18252 #############
18253 # MOVE OUT: #
18254 #############
18255 fmovm_data_out:
18256 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
18257 bne.w fmovm_out_ctrl # control
18259 ############################
18260 fmovm_out_predec:
18261 # for predecrement mode, the bit string is the opposite of both control
18262 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
18263 # here, we convert it to be just like the others...
18264 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
18266 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
18267 beq.b fmovm_out_ctrl # user
18269 fmovm_out_predec_s:
18270 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
18271 bne.b fmovm_out_ctrl
18273 # the operation was unfortunately an: fmovm.x dn,-(sp)
18274 # called from supervisor mode.
18275 # we're also passing "size" and "strg" back to the calling routine
18278 ############################
18279 fmovm_out_ctrl:
18280 mov.l %a0,%a1 # move <ea> to a1
18282 sub.l %d0,%sp # subtract size of dump
18283 lea (%sp),%a0
18285 tst.b %d1 # should FP0 be moved?
18286 bpl.b fmovm_out_ctrl_fp1 # no
18288 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
18289 mov.l 0x4+EXC_FP0(%a6),(%a0)+
18290 mov.l 0x8+EXC_FP0(%a6),(%a0)+
18292 fmovm_out_ctrl_fp1:
18293 lsl.b &0x1,%d1 # should FP1 be moved?
18294 bpl.b fmovm_out_ctrl_fp2 # no
18296 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
18297 mov.l 0x4+EXC_FP1(%a6),(%a0)+
18298 mov.l 0x8+EXC_FP1(%a6),(%a0)+
18300 fmovm_out_ctrl_fp2:
18301 lsl.b &0x1,%d1 # should FP2 be moved?
18302 bpl.b fmovm_out_ctrl_fp3 # no
18304 fmovm.x &0x20,(%a0) # yes
18305 add.l &0xc,%a0
18307 fmovm_out_ctrl_fp3:
18308 lsl.b &0x1,%d1 # should FP3 be moved?
18309 bpl.b fmovm_out_ctrl_fp4 # no
18311 fmovm.x &0x10,(%a0) # yes
18312 add.l &0xc,%a0
18314 fmovm_out_ctrl_fp4:
18315 lsl.b &0x1,%d1 # should FP4 be moved?
18316 bpl.b fmovm_out_ctrl_fp5 # no
18318 fmovm.x &0x08,(%a0) # yes
18319 add.l &0xc,%a0
18321 fmovm_out_ctrl_fp5:
18322 lsl.b &0x1,%d1 # should FP5 be moved?
18323 bpl.b fmovm_out_ctrl_fp6 # no
18325 fmovm.x &0x04,(%a0) # yes
18326 add.l &0xc,%a0
18328 fmovm_out_ctrl_fp6:
18329 lsl.b &0x1,%d1 # should FP6 be moved?
18330 bpl.b fmovm_out_ctrl_fp7 # no
18332 fmovm.x &0x02,(%a0) # yes
18333 add.l &0xc,%a0
18335 fmovm_out_ctrl_fp7:
18336 lsl.b &0x1,%d1 # should FP7 be moved?
18337 bpl.b fmovm_out_ctrl_done # no
18339 fmovm.x &0x01,(%a0) # yes
18340 add.l &0xc,%a0
18342 fmovm_out_ctrl_done:
18343 mov.l %a1,L_SCR1(%a6)
18345 lea (%sp),%a0 # pass: supervisor src
18346 mov.l %d0,-(%sp) # save size
18347 bsr.l _dmem_write # copy data to user mem
18349 mov.l (%sp)+,%d0
18350 add.l %d0,%sp # clear fpreg data from stack
18352 tst.l %d1 # did dstore err?
18353 bne.w fmovm_out_err # yes
18357 ############
18358 # MOVE IN: #
18359 ############
18360 fmovm_data_in:
18361 mov.l %a0,L_SCR1(%a6)
18363 sub.l %d0,%sp # make room for fpregs
18364 lea (%sp),%a1
18366 mov.l %d1,-(%sp) # save bit string for later
18367 mov.l %d0,-(%sp) # save # of bytes
18369 bsr.l _dmem_read # copy data from user mem
18371 mov.l (%sp)+,%d0 # retrieve # of bytes
18373 tst.l %d1 # did dfetch fail?
18374 bne.w fmovm_in_err # yes
18376 mov.l (%sp)+,%d1 # load bit string
18378 lea (%sp),%a0 # addr of stack
18380 tst.b %d1 # should FP0 be moved?
18381 bpl.b fmovm_data_in_fp1 # no
18383 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
18384 mov.l (%a0)+,0x4+EXC_FP0(%a6)
18385 mov.l (%a0)+,0x8+EXC_FP0(%a6)
18387 fmovm_data_in_fp1:
18388 lsl.b &0x1,%d1 # should FP1 be moved?
18389 bpl.b fmovm_data_in_fp2 # no
18391 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
18392 mov.l (%a0)+,0x4+EXC_FP1(%a6)
18393 mov.l (%a0)+,0x8+EXC_FP1(%a6)
18395 fmovm_data_in_fp2:
18396 lsl.b &0x1,%d1 # should FP2 be moved?
18397 bpl.b fmovm_data_in_fp3 # no
18399 fmovm.x (%a0)+,&0x20 # yes
18401 fmovm_data_in_fp3:
18402 lsl.b &0x1,%d1 # should FP3 be moved?
18403 bpl.b fmovm_data_in_fp4 # no
18405 fmovm.x (%a0)+,&0x10 # yes
18407 fmovm_data_in_fp4:
18408 lsl.b &0x1,%d1 # should FP4 be moved?
18409 bpl.b fmovm_data_in_fp5 # no
18411 fmovm.x (%a0)+,&0x08 # yes
18413 fmovm_data_in_fp5:
18414 lsl.b &0x1,%d1 # should FP5 be moved?
18415 bpl.b fmovm_data_in_fp6 # no
18417 fmovm.x (%a0)+,&0x04 # yes
18419 fmovm_data_in_fp6:
18420 lsl.b &0x1,%d1 # should FP6 be moved?
18421 bpl.b fmovm_data_in_fp7 # no
18423 fmovm.x (%a0)+,&0x02 # yes
18425 fmovm_data_in_fp7:
18426 lsl.b &0x1,%d1 # should FP7 be moved?
18427 bpl.b fmovm_data_in_done # no
18429 fmovm.x (%a0)+,&0x01 # yes
18431 fmovm_data_in_done:
18432 add.l %d0,%sp # remove fpregs from stack
18435 #####################################
18437 fmovm_data_done:
18440 ##############################################################################
18443 # table indexed by the operation's bit string that gives the number
18444 # of bytes that will be moved.
18446 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
18448 tbl_fmovm_size:
18449 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
18450 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18451 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18452 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18453 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18454 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18455 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18456 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18457 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18458 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18459 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18460 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18461 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18462 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18463 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18464 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18465 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
18466 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18467 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18468 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18469 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18470 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18471 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18472 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18473 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
18474 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18475 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18476 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18477 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
18478 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18479 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
18480 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
18483 # table to convert a pre-decrement bit string into a post-increment
18484 # or control bit string.
18485 # ex: 0x00 ==> 0x00
18486 # 0x01 ==> 0x80
18487 # 0x02 ==> 0x40
18490 # 0xfd ==> 0xbf
18491 # 0xfe ==> 0x7f
18492 # 0xff ==> 0xff
18494 tbl_fmovm_convert:
18495 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
18496 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
18497 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
18498 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
18499 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
18500 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
18501 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
18502 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
18503 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
18504 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
18505 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
18506 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
18507 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
18508 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
18509 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
18510 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
18511 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
18512 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
18513 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
18514 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
18515 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
18516 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
18517 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
18518 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
18519 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
18520 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
18521 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
18522 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
18523 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
18524 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
18525 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
18526 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
18528 global fmovm_calc_ea
18529 ###############################################
18530 # _fmovm_calc_ea: calculate effective address #
18531 ###############################################
18532 fmovm_calc_ea:
18533 mov.l %d0,%a0 # move # bytes to a0
18535 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
18536 # easily changed if they were inputs passed in registers.
18537 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
18538 mov.w %d0,%d1 # make a copy
18540 andi.w &0x3f,%d0 # extract mode field
18541 andi.l &0x7,%d1 # extract reg field
18543 # jump to the corresponding function for each {MODE,REG} pair.
18544 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
18545 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
18547 swbeg &64
18548 tbl_fea_mode:
18549 short tbl_fea_mode - tbl_fea_mode
18550 short tbl_fea_mode - tbl_fea_mode
18551 short tbl_fea_mode - tbl_fea_mode
18552 short tbl_fea_mode - tbl_fea_mode
18553 short tbl_fea_mode - tbl_fea_mode
18554 short tbl_fea_mode - tbl_fea_mode
18555 short tbl_fea_mode - tbl_fea_mode
18556 short tbl_fea_mode - tbl_fea_mode
18558 short tbl_fea_mode - tbl_fea_mode
18559 short tbl_fea_mode - tbl_fea_mode
18560 short tbl_fea_mode - tbl_fea_mode
18561 short tbl_fea_mode - tbl_fea_mode
18562 short tbl_fea_mode - tbl_fea_mode
18563 short tbl_fea_mode - tbl_fea_mode
18564 short tbl_fea_mode - tbl_fea_mode
18565 short tbl_fea_mode - tbl_fea_mode
18567 short faddr_ind_a0 - tbl_fea_mode
18568 short faddr_ind_a1 - tbl_fea_mode
18569 short faddr_ind_a2 - tbl_fea_mode
18570 short faddr_ind_a3 - tbl_fea_mode
18571 short faddr_ind_a4 - tbl_fea_mode
18572 short faddr_ind_a5 - tbl_fea_mode
18573 short faddr_ind_a6 - tbl_fea_mode
18574 short faddr_ind_a7 - tbl_fea_mode
18576 short faddr_ind_p_a0 - tbl_fea_mode
18577 short faddr_ind_p_a1 - tbl_fea_mode
18578 short faddr_ind_p_a2 - tbl_fea_mode
18579 short faddr_ind_p_a3 - tbl_fea_mode
18580 short faddr_ind_p_a4 - tbl_fea_mode
18581 short faddr_ind_p_a5 - tbl_fea_mode
18582 short faddr_ind_p_a6 - tbl_fea_mode
18583 short faddr_ind_p_a7 - tbl_fea_mode
18585 short faddr_ind_m_a0 - tbl_fea_mode
18586 short faddr_ind_m_a1 - tbl_fea_mode
18587 short faddr_ind_m_a2 - tbl_fea_mode
18588 short faddr_ind_m_a3 - tbl_fea_mode
18589 short faddr_ind_m_a4 - tbl_fea_mode
18590 short faddr_ind_m_a5 - tbl_fea_mode
18591 short faddr_ind_m_a6 - tbl_fea_mode
18592 short faddr_ind_m_a7 - tbl_fea_mode
18594 short faddr_ind_disp_a0 - tbl_fea_mode
18595 short faddr_ind_disp_a1 - tbl_fea_mode
18596 short faddr_ind_disp_a2 - tbl_fea_mode
18597 short faddr_ind_disp_a3 - tbl_fea_mode
18598 short faddr_ind_disp_a4 - tbl_fea_mode
18599 short faddr_ind_disp_a5 - tbl_fea_mode
18600 short faddr_ind_disp_a6 - tbl_fea_mode
18601 short faddr_ind_disp_a7 - tbl_fea_mode
18603 short faddr_ind_ext - tbl_fea_mode
18604 short faddr_ind_ext - tbl_fea_mode
18605 short faddr_ind_ext - tbl_fea_mode
18606 short faddr_ind_ext - tbl_fea_mode
18607 short faddr_ind_ext - tbl_fea_mode
18608 short faddr_ind_ext - tbl_fea_mode
18609 short faddr_ind_ext - tbl_fea_mode
18610 short faddr_ind_ext - tbl_fea_mode
18612 short fabs_short - tbl_fea_mode
18613 short fabs_long - tbl_fea_mode
18614 short fpc_ind - tbl_fea_mode
18615 short fpc_ind_ext - tbl_fea_mode
18616 short tbl_fea_mode - tbl_fea_mode
18617 short tbl_fea_mode - tbl_fea_mode
18618 short tbl_fea_mode - tbl_fea_mode
18619 short tbl_fea_mode - tbl_fea_mode
18621 ###################################
18622 # Address register indirect: (An) #
18623 ###################################
18624 faddr_ind_a0:
18625 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
18628 faddr_ind_a1:
18629 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
18632 faddr_ind_a2:
18633 mov.l %a2,%a0 # Get current a2
18636 faddr_ind_a3:
18637 mov.l %a3,%a0 # Get current a3
18640 faddr_ind_a4:
18641 mov.l %a4,%a0 # Get current a4
18644 faddr_ind_a5:
18645 mov.l %a5,%a0 # Get current a5
18648 faddr_ind_a6:
18649 mov.l (%a6),%a0 # Get current a6
18652 faddr_ind_a7:
18653 mov.l EXC_A7(%a6),%a0 # Get current a7
18656 #####################################################
18657 # Address register indirect w/ postincrement: (An)+ #
18658 #####################################################
18659 faddr_ind_p_a0:
18660 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18661 mov.l %d0,%d1
18662 add.l %a0,%d1 # Increment
18663 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
18664 mov.l %d0,%a0
18667 faddr_ind_p_a1:
18668 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18669 mov.l %d0,%d1
18670 add.l %a0,%d1 # Increment
18671 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
18672 mov.l %d0,%a0
18675 faddr_ind_p_a2:
18676 mov.l %a2,%d0 # Get current a2
18677 mov.l %d0,%d1
18678 add.l %a0,%d1 # Increment
18679 mov.l %d1,%a2 # Save incr value
18680 mov.l %d0,%a0
18683 faddr_ind_p_a3:
18684 mov.l %a3,%d0 # Get current a3
18685 mov.l %d0,%d1
18686 add.l %a0,%d1 # Increment
18687 mov.l %d1,%a3 # Save incr value
18688 mov.l %d0,%a0
18691 faddr_ind_p_a4:
18692 mov.l %a4,%d0 # Get current a4
18693 mov.l %d0,%d1
18694 add.l %a0,%d1 # Increment
18695 mov.l %d1,%a4 # Save incr value
18696 mov.l %d0,%a0
18699 faddr_ind_p_a5:
18700 mov.l %a5,%d0 # Get current a5
18701 mov.l %d0,%d1
18702 add.l %a0,%d1 # Increment
18703 mov.l %d1,%a5 # Save incr value
18704 mov.l %d0,%a0
18707 faddr_ind_p_a6:
18708 mov.l (%a6),%d0 # Get current a6
18709 mov.l %d0,%d1
18710 add.l %a0,%d1 # Increment
18711 mov.l %d1,(%a6) # Save incr value
18712 mov.l %d0,%a0
18715 faddr_ind_p_a7:
18716 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
18718 mov.l EXC_A7(%a6),%d0 # Get current a7
18719 mov.l %d0,%d1
18720 add.l %a0,%d1 # Increment
18721 mov.l %d1,EXC_A7(%a6) # Save incr value
18722 mov.l %d0,%a0
18725 ####################################################
18726 # Address register indirect w/ predecrement: -(An) #
18727 ####################################################
18728 faddr_ind_m_a0:
18729 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
18730 sub.l %a0,%d0 # Decrement
18731 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
18732 mov.l %d0,%a0
18735 faddr_ind_m_a1:
18736 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
18737 sub.l %a0,%d0 # Decrement
18738 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
18739 mov.l %d0,%a0
18742 faddr_ind_m_a2:
18743 mov.l %a2,%d0 # Get current a2
18744 sub.l %a0,%d0 # Decrement
18745 mov.l %d0,%a2 # Save decr value
18746 mov.l %d0,%a0
18749 faddr_ind_m_a3:
18750 mov.l %a3,%d0 # Get current a3
18751 sub.l %a0,%d0 # Decrement
18752 mov.l %d0,%a3 # Save decr value
18753 mov.l %d0,%a0
18756 faddr_ind_m_a4:
18757 mov.l %a4,%d0 # Get current a4
18758 sub.l %a0,%d0 # Decrement
18759 mov.l %d0,%a4 # Save decr value
18760 mov.l %d0,%a0
18763 faddr_ind_m_a5:
18764 mov.l %a5,%d0 # Get current a5
18765 sub.l %a0,%d0 # Decrement
18766 mov.l %d0,%a5 # Save decr value
18767 mov.l %d0,%a0
18770 faddr_ind_m_a6:
18771 mov.l (%a6),%d0 # Get current a6
18772 sub.l %a0,%d0 # Decrement
18773 mov.l %d0,(%a6) # Save decr value
18774 mov.l %d0,%a0
18777 faddr_ind_m_a7:
18778 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
18780 mov.l EXC_A7(%a6),%d0 # Get current a7
18781 sub.l %a0,%d0 # Decrement
18782 mov.l %d0,EXC_A7(%a6) # Save decr value
18783 mov.l %d0,%a0
18786 ########################################################
18787 # Address register indirect w/ displacement: (d16, An) #
18788 ########################################################
18789 faddr_ind_disp_a0:
18790 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18791 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18792 bsr.l _imem_read_word
18794 tst.l %d1 # did ifetch fail?
18795 bne.l iea_iacc # yes
18797 mov.w %d0,%a0 # sign extend displacement
18799 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
18802 faddr_ind_disp_a1:
18803 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18804 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18805 bsr.l _imem_read_word
18807 tst.l %d1 # did ifetch fail?
18808 bne.l iea_iacc # yes
18810 mov.w %d0,%a0 # sign extend displacement
18812 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
18815 faddr_ind_disp_a2:
18816 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18817 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18818 bsr.l _imem_read_word
18820 tst.l %d1 # did ifetch fail?
18821 bne.l iea_iacc # yes
18823 mov.w %d0,%a0 # sign extend displacement
18825 add.l %a2,%a0 # a2 + d16
18828 faddr_ind_disp_a3:
18829 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18830 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18831 bsr.l _imem_read_word
18833 tst.l %d1 # did ifetch fail?
18834 bne.l iea_iacc # yes
18836 mov.w %d0,%a0 # sign extend displacement
18838 add.l %a3,%a0 # a3 + d16
18841 faddr_ind_disp_a4:
18842 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18843 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18844 bsr.l _imem_read_word
18846 tst.l %d1 # did ifetch fail?
18847 bne.l iea_iacc # yes
18849 mov.w %d0,%a0 # sign extend displacement
18851 add.l %a4,%a0 # a4 + d16
18854 faddr_ind_disp_a5:
18855 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18856 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18857 bsr.l _imem_read_word
18859 tst.l %d1 # did ifetch fail?
18860 bne.l iea_iacc # yes
18862 mov.w %d0,%a0 # sign extend displacement
18864 add.l %a5,%a0 # a5 + d16
18867 faddr_ind_disp_a6:
18868 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18869 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18870 bsr.l _imem_read_word
18872 tst.l %d1 # did ifetch fail?
18873 bne.l iea_iacc # yes
18875 mov.w %d0,%a0 # sign extend displacement
18877 add.l (%a6),%a0 # a6 + d16
18880 faddr_ind_disp_a7:
18881 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18882 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18883 bsr.l _imem_read_word
18885 tst.l %d1 # did ifetch fail?
18886 bne.l iea_iacc # yes
18888 mov.w %d0,%a0 # sign extend displacement
18890 add.l EXC_A7(%a6),%a0 # a7 + d16
18893 ########################################################################
18894 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
18895 # " " " w/ " (base displacement): (bd, An, Xn) #
18896 # Memory indirect postindexed: ([bd, An], Xn, od) #
18897 # Memory indirect preindexed: ([bd, An, Xn], od) #
18898 ########################################################################
18899 faddr_ind_ext:
18900 addq.l &0x8,%d1
18901 bsr.l fetch_dreg # fetch base areg
18902 mov.l %d0,-(%sp)
18904 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18905 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18906 bsr.l _imem_read_word # fetch extword in d0
18908 tst.l %d1 # did ifetch fail?
18909 bne.l iea_iacc # yes
18911 mov.l (%sp)+,%a0
18913 btst &0x8,%d0
18914 bne.w fcalc_mem_ind
18916 mov.l %d0,L_SCR1(%a6) # hold opword
18918 mov.l %d0,%d1
18919 rol.w &0x4,%d1
18920 andi.w &0xf,%d1 # extract index regno
18922 # count on fetch_dreg() not to alter a0...
18923 bsr.l fetch_dreg # fetch index
18925 mov.l %d2,-(%sp) # save d2
18926 mov.l L_SCR1(%a6),%d2 # fetch opword
18928 btst &0xb,%d2 # is it word or long?
18929 bne.b faii8_long
18930 ext.l %d0 # sign extend word index
18931 faii8_long:
18932 mov.l %d2,%d1
18933 rol.w &0x7,%d1
18934 andi.l &0x3,%d1 # extract scale value
18936 lsl.l %d1,%d0 # shift index by scale
18938 extb.l %d2 # sign extend displacement
18939 add.l %d2,%d0 # index + disp
18940 add.l %d0,%a0 # An + (index + disp)
18942 mov.l (%sp)+,%d2 # restore old d2
18945 ###########################
18946 # Absolute short: (XXX).W #
18947 ###########################
18948 fabs_short:
18949 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18950 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18951 bsr.l _imem_read_word # fetch short address
18953 tst.l %d1 # did ifetch fail?
18954 bne.l iea_iacc # yes
18956 mov.w %d0,%a0 # return <ea> in a0
18959 ##########################
18960 # Absolute long: (XXX).L #
18961 ##########################
18962 fabs_long:
18963 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18964 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
18965 bsr.l _imem_read_long # fetch long address
18967 tst.l %d1 # did ifetch fail?
18968 bne.l iea_iacc # yes
18970 mov.l %d0,%a0 # return <ea> in a0
18973 #######################################################
18974 # Program counter indirect w/ displacement: (d16, PC) #
18975 #######################################################
18976 fpc_ind:
18977 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
18978 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
18979 bsr.l _imem_read_word # fetch word displacement
18981 tst.l %d1 # did ifetch fail?
18982 bne.l iea_iacc # yes
18984 mov.w %d0,%a0 # sign extend displacement
18986 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
18988 # _imem_read_word() increased the extwptr by 2. need to adjust here.
18989 subq.l &0x2,%a0 # adjust <ea>
18992 ##########################################################
18993 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
18994 # " " w/ " (base displacement): (bd, PC, An) #
18995 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
18996 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
18997 ##########################################################
18998 fpc_ind_ext:
18999 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19000 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19001 bsr.l _imem_read_word # fetch ext word
19003 tst.l %d1 # did ifetch fail?
19004 bne.l iea_iacc # yes
19006 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
19007 subq.l &0x2,%a0 # adjust base
19009 btst &0x8,%d0 # is disp only 8 bits?
19010 bne.w fcalc_mem_ind # calc memory indirect
19012 mov.l %d0,L_SCR1(%a6) # store opword
19014 mov.l %d0,%d1 # make extword copy
19015 rol.w &0x4,%d1 # rotate reg num into place
19016 andi.w &0xf,%d1 # extract register number
19018 # count on fetch_dreg() not to alter a0...
19019 bsr.l fetch_dreg # fetch index
19021 mov.l %d2,-(%sp) # save d2
19022 mov.l L_SCR1(%a6),%d2 # fetch opword
19024 btst &0xb,%d2 # is index word or long?
19025 bne.b fpii8_long # long
19026 ext.l %d0 # sign extend word index
19027 fpii8_long:
19028 mov.l %d2,%d1
19029 rol.w &0x7,%d1 # rotate scale value into place
19030 andi.l &0x3,%d1 # extract scale value
19032 lsl.l %d1,%d0 # shift index by scale
19034 extb.l %d2 # sign extend displacement
19035 add.l %d2,%d0 # disp + index
19036 add.l %d0,%a0 # An + (index + disp)
19038 mov.l (%sp)+,%d2 # restore temp register
19041 # d2 = index
19042 # d3 = base
19043 # d4 = od
19044 # d5 = extword
19045 fcalc_mem_ind:
19046 btst &0x6,%d0 # is the index suppressed?
19047 beq.b fcalc_index
19049 movm.l &0x3c00,-(%sp) # save d2-d5
19051 mov.l %d0,%d5 # put extword in d5
19052 mov.l %a0,%d3 # put base in d3
19054 clr.l %d2 # yes, so index = 0
19055 bra.b fbase_supp_ck
19057 # index:
19058 fcalc_index:
19059 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
19060 bfextu %d0{&16:&4},%d1 # fetch dreg index
19061 bsr.l fetch_dreg
19063 movm.l &0x3c00,-(%sp) # save d2-d5
19064 mov.l %d0,%d2 # put index in d2
19065 mov.l L_SCR1(%a6),%d5
19066 mov.l %a0,%d3
19068 btst &0xb,%d5 # is index word or long?
19069 bne.b fno_ext
19070 ext.l %d2
19072 fno_ext:
19073 bfextu %d5{&21:&2},%d0
19074 lsl.l %d0,%d2
19076 # base address (passed as parameter in d3):
19077 # we clear the value here if it should actually be suppressed.
19078 fbase_supp_ck:
19079 btst &0x7,%d5 # is the bd suppressed?
19080 beq.b fno_base_sup
19081 clr.l %d3
19083 # base displacement:
19084 fno_base_sup:
19085 bfextu %d5{&26:&2},%d0 # get bd size
19086 # beq.l fmovm_error # if (size == 0) it's reserved
19088 cmpi.b %d0,&0x2
19089 blt.b fno_bd
19090 beq.b fget_word_bd
19092 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19093 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19094 bsr.l _imem_read_long
19096 tst.l %d1 # did ifetch fail?
19097 bne.l fcea_iacc # yes
19099 bra.b fchk_ind
19101 fget_word_bd:
19102 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19103 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19104 bsr.l _imem_read_word
19106 tst.l %d1 # did ifetch fail?
19107 bne.l fcea_iacc # yes
19109 ext.l %d0 # sign extend bd
19111 fchk_ind:
19112 add.l %d0,%d3 # base += bd
19114 # outer displacement:
19115 fno_bd:
19116 bfextu %d5{&30:&2},%d0 # is od suppressed?
19117 beq.w faii_bd
19119 cmpi.b %d0,&0x2
19120 blt.b fnull_od
19121 beq.b fword_od
19123 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19124 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19125 bsr.l _imem_read_long
19127 tst.l %d1 # did ifetch fail?
19128 bne.l fcea_iacc # yes
19130 bra.b fadd_them
19132 fword_od:
19133 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19134 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
19135 bsr.l _imem_read_word
19137 tst.l %d1 # did ifetch fail?
19138 bne.l fcea_iacc # yes
19140 ext.l %d0 # sign extend od
19141 bra.b fadd_them
19143 fnull_od:
19144 clr.l %d0
19146 fadd_them:
19147 mov.l %d0,%d4
19149 btst &0x2,%d5 # pre or post indexing?
19150 beq.b fpre_indexed
19152 mov.l %d3,%a0
19153 bsr.l _dmem_read_long
19155 tst.l %d1 # did dfetch fail?
19156 bne.w fcea_err # yes
19158 add.l %d2,%d0 # <ea> += index
19159 add.l %d4,%d0 # <ea> += od
19160 bra.b fdone_ea
19162 fpre_indexed:
19163 add.l %d2,%d3 # preindexing
19164 mov.l %d3,%a0
19165 bsr.l _dmem_read_long
19167 tst.l %d1 # did dfetch fail?
19168 bne.w fcea_err # yes
19170 add.l %d4,%d0 # ea += od
19171 bra.b fdone_ea
19173 faii_bd:
19174 add.l %d2,%d3 # ea = (base + bd) + index
19175 mov.l %d3,%d0
19176 fdone_ea:
19177 mov.l %d0,%a0
19179 movm.l (%sp)+,&0x003c # restore d2-d5
19182 #########################################################
19183 fcea_err:
19184 mov.l %d3,%a0
19186 movm.l (%sp)+,&0x003c # restore d2-d5
19187 mov.w &0x0101,%d0
19188 bra.l iea_dacc
19190 fcea_iacc:
19191 movm.l (%sp)+,&0x003c # restore d2-d5
19192 bra.l iea_iacc
19194 fmovm_out_err:
19195 bsr.l restore
19196 mov.w &0x00e1,%d0
19197 bra.b fmovm_err
19199 fmovm_in_err:
19200 bsr.l restore
19201 mov.w &0x0161,%d0
19203 fmovm_err:
19204 mov.l L_SCR1(%a6),%a0
19205 bra.l iea_dacc
19207 #########################################################################
19208 # XDEF **************************************************************** #
19209 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
19211 # XREF **************************************************************** #
19212 # _imem_read_long() - read longword from memory #
19213 # iea_iacc() - _imem_read_long() failed; error recovery #
19215 # INPUT *************************************************************** #
19216 # None #
19218 # OUTPUT ************************************************************** #
19219 # If _imem_read_long() doesn't fail: #
19220 # USER_FPCR(a6) = new FPCR value #
19221 # USER_FPSR(a6) = new FPSR value #
19222 # USER_FPIAR(a6) = new FPIAR value #
19224 # ALGORITHM *********************************************************** #
19225 # Decode the instruction type by looking at the extension word #
19226 # in order to see how many control registers to fetch from memory. #
19227 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
19228 # the special access error exit handler iea_iacc(). #
19230 # Instruction word decoding: #
19232 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
19234 # WORD1 WORD2 #
19235 # 1111 0010 00 111100 100$ $$00 0000 0000 #
19237 # $$$ (100): FPCR #
19238 # (010): FPSR #
19239 # (001): FPIAR #
19240 # (000): FPIAR #
19242 #########################################################################
19244 global fmovm_ctrl
19245 fmovm_ctrl:
19246 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
19247 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
19248 beq.w fctrl_in_7 # yes
19249 cmpi.b %d0,&0x98 # fpcr & fpsr ?
19250 beq.w fctrl_in_6 # yes
19251 cmpi.b %d0,&0x94 # fpcr & fpiar ?
19252 beq.b fctrl_in_5 # yes
19254 # fmovem.l #<data>, fpsr/fpiar
19255 fctrl_in_3:
19256 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19257 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19258 bsr.l _imem_read_long # fetch FPSR from mem
19260 tst.l %d1 # did ifetch fail?
19261 bne.l iea_iacc # yes
19263 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
19264 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19265 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19266 bsr.l _imem_read_long # fetch FPIAR from mem
19268 tst.l %d1 # did ifetch fail?
19269 bne.l iea_iacc # yes
19271 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19274 # fmovem.l #<data>, fpcr/fpiar
19275 fctrl_in_5:
19276 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19277 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19278 bsr.l _imem_read_long # fetch FPCR from mem
19280 tst.l %d1 # did ifetch fail?
19281 bne.l iea_iacc # yes
19283 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
19284 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19285 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19286 bsr.l _imem_read_long # fetch FPIAR from mem
19288 tst.l %d1 # did ifetch fail?
19289 bne.l iea_iacc # yes
19291 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
19294 # fmovem.l #<data>, fpcr/fpsr
19295 fctrl_in_6:
19296 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19297 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19298 bsr.l _imem_read_long # fetch FPCR from mem
19300 tst.l %d1 # did ifetch fail?
19301 bne.l iea_iacc # yes
19303 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19304 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19305 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19306 bsr.l _imem_read_long # fetch FPSR from mem
19308 tst.l %d1 # did ifetch fail?
19309 bne.l iea_iacc # yes
19311 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19314 # fmovem.l #<data>, fpcr/fpsr/fpiar
19315 fctrl_in_7:
19316 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19317 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19318 bsr.l _imem_read_long # fetch FPCR from mem
19320 tst.l %d1 # did ifetch fail?
19321 bne.l iea_iacc # yes
19323 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
19324 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19325 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19326 bsr.l _imem_read_long # fetch FPSR from mem
19328 tst.l %d1 # did ifetch fail?
19329 bne.l iea_iacc # yes
19331 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
19332 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
19333 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
19334 bsr.l _imem_read_long # fetch FPIAR from mem
19336 tst.l %d1 # did ifetch fail?
19337 bne.l iea_iacc # yes
19339 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
19342 #########################################################################
19343 # XDEF **************************************************************** #
19344 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
19346 # XREF **************************************************************** #
19347 # inc_areg() - increment an address register #
19348 # dec_areg() - decrement an address register #
19350 # INPUT *************************************************************** #
19351 # d0 = number of bytes to adjust <ea> by #
19353 # OUTPUT ************************************************************** #
19354 # None #
19356 # ALGORITHM *********************************************************** #
19357 # "Dummy" CALCulate Effective Address: #
19358 # The stacked <ea> for FP unimplemented instructions and opclass #
19359 # two packed instructions is correct with the exception of... #
19361 # 1) -(An) : The register is not updated regardless of size. #
19362 # Also, for extended precision and packed, the #
19363 # stacked <ea> value is 8 bytes too big #
19364 # 2) (An)+ : The register is not updated. #
19365 # 3) #<data> : The upper longword of the immediate operand is #
19366 # stacked b,w,l and s sizes are completely stacked. #
19367 # d,x, and p are not. #
19369 #########################################################################
19371 global _dcalc_ea
19372 _dcalc_ea:
19373 mov.l %d0, %a0 # move # bytes to %a0
19375 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
19376 mov.l %d0, %d1 # make a copy
19378 andi.w &0x38, %d0 # extract mode field
19379 andi.l &0x7, %d1 # extract reg field
19381 cmpi.b %d0,&0x18 # is mode (An)+ ?
19382 beq.b dcea_pi # yes
19384 cmpi.b %d0,&0x20 # is mode -(An) ?
19385 beq.b dcea_pd # yes
19387 or.w %d1,%d0 # concat mode,reg
19388 cmpi.b %d0,&0x3c # is mode #<data>?
19390 beq.b dcea_imm # yes
19392 mov.l EXC_EA(%a6),%a0 # return <ea>
19395 # need to set immediate data flag here since we'll need to do
19396 # an imem_read to fetch this later.
19397 dcea_imm:
19398 mov.b &immed_flg,SPCOND_FLG(%a6)
19399 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
19402 # here, the <ea> is stacked correctly. however, we must update the
19403 # address register...
19404 dcea_pi:
19405 mov.l %a0,%d0 # pass amt to inc by
19406 bsr.l inc_areg # inc addr register
19408 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19411 # the <ea> is stacked correctly for all but extended and packed which
19412 # the <ea>s are 8 bytes too large.
19413 # it would make no sense to have a pre-decrement to a7 in supervisor
19414 # mode so we don't even worry about this tricky case here : )
19415 dcea_pd:
19416 mov.l %a0,%d0 # pass amt to dec by
19417 bsr.l dec_areg # dec addr register
19419 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19421 cmpi.b %d0,&0xc # is opsize ext or packed?
19422 beq.b dcea_pd2 # yes
19424 dcea_pd2:
19425 sub.l &0x8,%a0 # correct <ea>
19426 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
19429 #########################################################################
19430 # XDEF **************************************************************** #
19431 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
19432 # and packed data opclass 3 operations. #
19434 # XREF **************************************************************** #
19435 # None #
19437 # INPUT *************************************************************** #
19438 # None #
19440 # OUTPUT ************************************************************** #
19441 # a0 = return correct effective address #
19443 # ALGORITHM *********************************************************** #
19444 # For opclass 3 extended and packed data operations, the <ea> #
19445 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
19446 # modes. Also, while we're at it, the index register itself must get #
19447 # updated. #
19448 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
19449 # and return that value as the correct <ea> and store that value in An. #
19450 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
19452 #########################################################################
19454 # This calc_ea is currently used to retrieve the correct <ea>
19455 # for fmove outs of type extended and packed.
19456 global _calc_ea_fout
19457 _calc_ea_fout:
19458 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
19459 mov.l %d0,%d1 # make a copy
19461 andi.w &0x38,%d0 # extract mode field
19462 andi.l &0x7,%d1 # extract reg field
19464 cmpi.b %d0,&0x18 # is mode (An)+ ?
19465 beq.b ceaf_pi # yes
19467 cmpi.b %d0,&0x20 # is mode -(An) ?
19468 beq.w ceaf_pd # yes
19470 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
19473 # (An)+ : extended and packed fmove out
19474 # : stacked <ea> is correct
19475 # : "An" not updated
19476 ceaf_pi:
19477 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
19478 mov.l EXC_EA(%a6),%a0
19479 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
19481 swbeg &0x8
19482 tbl_ceaf_pi:
19483 short ceaf_pi0 - tbl_ceaf_pi
19484 short ceaf_pi1 - tbl_ceaf_pi
19485 short ceaf_pi2 - tbl_ceaf_pi
19486 short ceaf_pi3 - tbl_ceaf_pi
19487 short ceaf_pi4 - tbl_ceaf_pi
19488 short ceaf_pi5 - tbl_ceaf_pi
19489 short ceaf_pi6 - tbl_ceaf_pi
19490 short ceaf_pi7 - tbl_ceaf_pi
19492 ceaf_pi0:
19493 addi.l &0xc,EXC_DREGS+0x8(%a6)
19495 ceaf_pi1:
19496 addi.l &0xc,EXC_DREGS+0xc(%a6)
19498 ceaf_pi2:
19499 add.l &0xc,%a2
19501 ceaf_pi3:
19502 add.l &0xc,%a3
19504 ceaf_pi4:
19505 add.l &0xc,%a4
19507 ceaf_pi5:
19508 add.l &0xc,%a5
19510 ceaf_pi6:
19511 addi.l &0xc,EXC_A6(%a6)
19513 ceaf_pi7:
19514 mov.b &mia7_flg,SPCOND_FLG(%a6)
19515 addi.l &0xc,EXC_A7(%a6)
19518 # -(An) : extended and packed fmove out
19519 # : stacked <ea> = actual <ea> + 8
19520 # : "An" not updated
19521 ceaf_pd:
19522 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
19523 mov.l EXC_EA(%a6),%a0
19524 sub.l &0x8,%a0
19525 sub.l &0x8,EXC_EA(%a6)
19526 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
19528 swbeg &0x8
19529 tbl_ceaf_pd:
19530 short ceaf_pd0 - tbl_ceaf_pd
19531 short ceaf_pd1 - tbl_ceaf_pd
19532 short ceaf_pd2 - tbl_ceaf_pd
19533 short ceaf_pd3 - tbl_ceaf_pd
19534 short ceaf_pd4 - tbl_ceaf_pd
19535 short ceaf_pd5 - tbl_ceaf_pd
19536 short ceaf_pd6 - tbl_ceaf_pd
19537 short ceaf_pd7 - tbl_ceaf_pd
19539 ceaf_pd0:
19540 mov.l %a0,EXC_DREGS+0x8(%a6)
19542 ceaf_pd1:
19543 mov.l %a0,EXC_DREGS+0xc(%a6)
19545 ceaf_pd2:
19546 mov.l %a0,%a2
19548 ceaf_pd3:
19549 mov.l %a0,%a3
19551 ceaf_pd4:
19552 mov.l %a0,%a4
19554 ceaf_pd5:
19555 mov.l %a0,%a5
19557 ceaf_pd6:
19558 mov.l %a0,EXC_A6(%a6)
19560 ceaf_pd7:
19561 mov.l %a0,EXC_A7(%a6)
19562 mov.b &mda7_flg,SPCOND_FLG(%a6)
19565 #########################################################################
19566 # XDEF **************************************************************** #
19567 # _load_fop(): load operand for unimplemented FP exception #
19569 # XREF **************************************************************** #
19570 # set_tag_x() - determine ext prec optype tag #
19571 # set_tag_s() - determine sgl prec optype tag #
19572 # set_tag_d() - determine dbl prec optype tag #
19573 # unnorm_fix() - convert normalized number to denorm or zero #
19574 # norm() - normalize a denormalized number #
19575 # get_packed() - fetch a packed operand from memory #
19576 # _dcalc_ea() - calculate <ea>, fixing An in process #
19578 # _imem_read_{word,long}() - read from instruction memory #
19579 # _dmem_read() - read from data memory #
19580 # _dmem_read_{byte,word,long}() - read from data memory #
19582 # facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
19584 # INPUT *************************************************************** #
19585 # None #
19587 # OUTPUT ************************************************************** #
19588 # If memory access doesn't fail: #
19589 # FP_SRC(a6) = source operand in extended precision #
19590 # FP_DST(a6) = destination operand in extended precision #
19592 # ALGORITHM *********************************************************** #
19593 # This is called from the Unimplemented FP exception handler in #
19594 # order to load the source and maybe destination operand into #
19595 # FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
19596 # the source and destination from the FP register file. Set the optype #
19597 # tags for both if dyadic, one for monadic. If a number is an UNNORM, #
19598 # convert it to a DENORM or a ZERO. #
19599 # If the instruction is opclass two (memory->reg), then fetch #
19600 # the destination from the register file and the source operand from #
19601 # memory. Tag and fix both as above w/ opclass zero instructions. #
19602 # If the source operand is byte,word,long, or single, it may be #
19603 # in the data register file. If it's actually out in memory, use one of #
19604 # the mem_read() routines to fetch it. If the mem_read() access returns #
19605 # a failing value, exit through the special facc_in() routine which #
19606 # will create an acess error exception frame from the current exception #
19607 # frame. #
19608 # Immediate data and regular data accesses are separated because #
19609 # if an immediate data access fails, the resulting fault status #
19610 # longword stacked for the access error exception must have the #
19611 # instruction bit set. #
19613 #########################################################################
19615 global _load_fop
19616 _load_fop:
19618 # 15 13 12 10 9 7 6 0
19619 # / \ / \ / \ / \
19620 # ---------------------------------
19621 # | opclass | RX | RY | EXTENSION | (2nd word of general FP instruction)
19622 # ---------------------------------
19625 # bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
19626 # cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
19627 # beq.w op010 # handle <ea> -> fpn
19628 # bgt.w op011 # handle fpn -> <ea>
19630 # we're not using op011 for now...
19631 btst &0x6,EXC_CMDREG(%a6)
19632 bne.b op010
19634 ############################
19635 # OPCLASS '000: reg -> reg #
19636 ############################
19637 op000:
19638 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo
19639 btst &0x5,%d0 # testing extension bits
19640 beq.b op000_src # (bit 5 == 0) => monadic
19641 btst &0x4,%d0 # (bit 5 == 1)
19642 beq.b op000_dst # (bit 4 == 0) => dyadic
19643 and.w &0x007f,%d0 # extract extension bits {6:0}
19644 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19645 bne.b op000_src # it's an fcmp
19647 op000_dst:
19648 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19649 bsr.l load_fpn2 # fetch dst fpreg into FP_DST
19651 bsr.l set_tag_x # get dst optype tag
19653 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19654 beq.b op000_dst_unnorm # yes
19655 op000_dst_cont:
19656 mov.b %d0, DTAG(%a6) # store the dst optype tag
19658 op000_src:
19659 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
19660 bsr.l load_fpn1 # fetch src fpreg into FP_SRC
19662 bsr.l set_tag_x # get src optype tag
19664 cmpi.b %d0, &UNNORM # is src fpreg an UNNORM?
19665 beq.b op000_src_unnorm # yes
19666 op000_src_cont:
19667 mov.b %d0, STAG(%a6) # store the src optype tag
19670 op000_dst_unnorm:
19671 bsr.l unnorm_fix # fix the dst UNNORM
19672 bra.b op000_dst_cont
19673 op000_src_unnorm:
19674 bsr.l unnorm_fix # fix the src UNNORM
19675 bra.b op000_src_cont
19677 #############################
19678 # OPCLASS '010: <ea> -> reg #
19679 #############################
19680 op010:
19681 mov.w EXC_CMDREG(%a6),%d0 # fetch extension word
19682 btst &0x5,%d0 # testing extension bits
19683 beq.b op010_src # (bit 5 == 0) => monadic
19684 btst &0x4,%d0 # (bit 5 == 1)
19685 beq.b op010_dst # (bit 4 == 0) => dyadic
19686 and.w &0x007f,%d0 # extract extension bits {6:0}
19687 cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
19688 bne.b op010_src # it's an fcmp
19690 op010_dst:
19691 bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
19692 bsr.l load_fpn2 # fetch dst fpreg ptr
19694 bsr.l set_tag_x # get dst type tag
19696 cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
19697 beq.b op010_dst_unnorm # yes
19698 op010_dst_cont:
19699 mov.b %d0, DTAG(%a6) # store the dst optype tag
19701 op010_src:
19702 bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
19704 bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
19705 bne.w fetch_from_mem # src op is in memory
19707 op010_dreg:
19708 clr.b STAG(%a6) # either NORM or ZERO
19709 bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
19711 mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
19712 jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
19714 op010_dst_unnorm:
19715 bsr.l unnorm_fix # fix the dst UNNORM
19716 bra.b op010_dst_cont
19718 swbeg &0x8
19719 tbl_op010_dreg:
19720 short opd_long - tbl_op010_dreg
19721 short opd_sgl - tbl_op010_dreg
19722 short tbl_op010_dreg - tbl_op010_dreg
19723 short tbl_op010_dreg - tbl_op010_dreg
19724 short opd_word - tbl_op010_dreg
19725 short tbl_op010_dreg - tbl_op010_dreg
19726 short opd_byte - tbl_op010_dreg
19727 short tbl_op010_dreg - tbl_op010_dreg
19730 # LONG: can be either NORM or ZERO...
19732 opd_long:
19733 bsr.l fetch_dreg # fetch long in d0
19734 fmov.l %d0, %fp0 # load a long
19735 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19736 fbeq.w opd_long_zero # long is a ZERO
19738 opd_long_zero:
19739 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19743 # WORD: can be either NORM or ZERO...
19745 opd_word:
19746 bsr.l fetch_dreg # fetch word in d0
19747 fmov.w %d0, %fp0 # load a word
19748 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19749 fbeq.w opd_word_zero # WORD is a ZERO
19751 opd_word_zero:
19752 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19756 # BYTE: can be either NORM or ZERO...
19758 opd_byte:
19759 bsr.l fetch_dreg # fetch word in d0
19760 fmov.b %d0, %fp0 # load a byte
19761 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19762 fbeq.w opd_byte_zero # byte is a ZERO
19764 opd_byte_zero:
19765 mov.b &ZERO, STAG(%a6) # set ZERO optype flag
19769 # SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
19771 # separate SNANs and DENORMs so they can be loaded w/ special care.
19772 # all others can simply be moved "in" using fmove.
19774 opd_sgl:
19775 bsr.l fetch_dreg # fetch sgl in d0
19776 mov.l %d0,L_SCR1(%a6)
19778 lea L_SCR1(%a6), %a0 # pass: ptr to the sgl
19779 bsr.l set_tag_s # determine sgl type
19780 mov.b %d0, STAG(%a6) # save the src tag
19782 cmpi.b %d0, &SNAN # is it an SNAN?
19783 beq.w get_sgl_snan # yes
19785 cmpi.b %d0, &DENORM # is it a DENORM?
19786 beq.w get_sgl_denorm # yes
19788 fmov.s (%a0), %fp0 # no, so can load it regular
19789 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19792 ##############################################################################
19794 #########################################################################
19795 # fetch_from_mem(): #
19796 # - src is out in memory. must: #
19797 # (1) calc ea - must read AFTER you know the src type since #
19798 # if the ea is -() or ()+, need to know # of bytes. #
19799 # (2) read it in from either user or supervisor space #
19800 # (3) if (b || w || l) then simply read in #
19801 # if (s || d || x) then check for SNAN,UNNORM,DENORM #
19802 # if (packed) then punt for now #
19803 # INPUT: #
19804 # %d0 : src type field #
19805 #########################################################################
19806 fetch_from_mem:
19807 clr.b STAG(%a6) # either NORM or ZERO
19809 mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
19810 jmp (tbl_fp_type.b,%pc,%d0.w*1)
19812 swbeg &0x8
19813 tbl_fp_type:
19814 short load_long - tbl_fp_type
19815 short load_sgl - tbl_fp_type
19816 short load_ext - tbl_fp_type
19817 short load_packed - tbl_fp_type
19818 short load_word - tbl_fp_type
19819 short load_dbl - tbl_fp_type
19820 short load_byte - tbl_fp_type
19821 short tbl_fp_type - tbl_fp_type
19823 #########################################
19824 # load a LONG into %fp0: #
19825 # -number can't fault #
19826 # (1) calc ea #
19827 # (2) read 4 bytes into L_SCR1 #
19828 # (3) fmov.l into %fp0 #
19829 #########################################
19830 load_long:
19831 movq.l &0x4, %d0 # pass: 4 (bytes)
19832 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19834 cmpi.b SPCOND_FLG(%a6),&immed_flg
19835 beq.b load_long_immed
19837 bsr.l _dmem_read_long # fetch src operand from memory
19839 tst.l %d1 # did dfetch fail?
19840 bne.l facc_in_l # yes
19842 load_long_cont:
19843 fmov.l %d0, %fp0 # read into %fp0;convert to xprec
19844 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19846 fbeq.w load_long_zero # src op is a ZERO
19848 load_long_zero:
19849 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19852 load_long_immed:
19853 bsr.l _imem_read_long # fetch src operand immed data
19855 tst.l %d1 # did ifetch fail?
19856 bne.l funimp_iacc # yes
19857 bra.b load_long_cont
19859 #########################################
19860 # load a WORD into %fp0: #
19861 # -number can't fault #
19862 # (1) calc ea #
19863 # (2) read 2 bytes into L_SCR1 #
19864 # (3) fmov.w into %fp0 #
19865 #########################################
19866 load_word:
19867 movq.l &0x2, %d0 # pass: 2 (bytes)
19868 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19870 cmpi.b SPCOND_FLG(%a6),&immed_flg
19871 beq.b load_word_immed
19873 bsr.l _dmem_read_word # fetch src operand from memory
19875 tst.l %d1 # did dfetch fail?
19876 bne.l facc_in_w # yes
19878 load_word_cont:
19879 fmov.w %d0, %fp0 # read into %fp0;convert to xprec
19880 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19882 fbeq.w load_word_zero # src op is a ZERO
19884 load_word_zero:
19885 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19888 load_word_immed:
19889 bsr.l _imem_read_word # fetch src operand immed data
19891 tst.l %d1 # did ifetch fail?
19892 bne.l funimp_iacc # yes
19893 bra.b load_word_cont
19895 #########################################
19896 # load a BYTE into %fp0: #
19897 # -number can't fault #
19898 # (1) calc ea #
19899 # (2) read 1 byte into L_SCR1 #
19900 # (3) fmov.b into %fp0 #
19901 #########################################
19902 load_byte:
19903 movq.l &0x1, %d0 # pass: 1 (byte)
19904 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19906 cmpi.b SPCOND_FLG(%a6),&immed_flg
19907 beq.b load_byte_immed
19909 bsr.l _dmem_read_byte # fetch src operand from memory
19911 tst.l %d1 # did dfetch fail?
19912 bne.l facc_in_b # yes
19914 load_byte_cont:
19915 fmov.b %d0, %fp0 # read into %fp0;convert to xprec
19916 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19918 fbeq.w load_byte_zero # src op is a ZERO
19920 load_byte_zero:
19921 mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
19924 load_byte_immed:
19925 bsr.l _imem_read_word # fetch src operand immed data
19927 tst.l %d1 # did ifetch fail?
19928 bne.l funimp_iacc # yes
19929 bra.b load_byte_cont
19931 #########################################
19932 # load a SGL into %fp0: #
19933 # -number can't fault #
19934 # (1) calc ea #
19935 # (2) read 4 bytes into L_SCR1 #
19936 # (3) fmov.s into %fp0 #
19937 #########################################
19938 load_sgl:
19939 movq.l &0x4, %d0 # pass: 4 (bytes)
19940 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
19942 cmpi.b SPCOND_FLG(%a6),&immed_flg
19943 beq.b load_sgl_immed
19945 bsr.l _dmem_read_long # fetch src operand from memory
19946 mov.l %d0, L_SCR1(%a6) # store src op on stack
19948 tst.l %d1 # did dfetch fail?
19949 bne.l facc_in_l # yes
19951 load_sgl_cont:
19952 lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op
19953 bsr.l set_tag_s # determine src type tag
19954 mov.b %d0, STAG(%a6) # save src optype tag on stack
19956 cmpi.b %d0, &DENORM # is it a sgl DENORM?
19957 beq.w get_sgl_denorm # yes
19959 cmpi.b %d0, &SNAN # is it a sgl SNAN?
19960 beq.w get_sgl_snan # yes
19962 fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
19963 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
19966 load_sgl_immed:
19967 bsr.l _imem_read_long # fetch src operand immed data
19969 tst.l %d1 # did ifetch fail?
19970 bne.l funimp_iacc # yes
19971 bra.b load_sgl_cont
19973 # must convert sgl denorm format to an Xprec denorm fmt suitable for
19974 # normalization...
19975 # %a0 : points to sgl denorm
19976 get_sgl_denorm:
19977 clr.w FP_SRC_EX(%a6)
19978 bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa)
19979 lsl.l &0x8, %d0
19980 mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa)
19981 clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa)
19983 clr.w FP_SRC_EX(%a6)
19984 btst &0x7, (%a0) # is sgn bit set?
19985 beq.b sgl_dnrm_norm
19986 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
19988 sgl_dnrm_norm:
19989 lea FP_SRC(%a6), %a0
19990 bsr.l norm # normalize number
19991 mov.w &0x3f81, %d1 # xprec exp = 0x3f81
19992 sub.w %d0, %d1 # exp = 0x3f81 - shft amt.
19993 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
19995 mov.b &NORM, STAG(%a6) # fix src type tag
19998 # convert sgl to ext SNAN
19999 # %a0 : points to sgl SNAN
20000 get_sgl_snan:
20001 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20002 bfextu (%a0){&9:&23}, %d0
20003 lsl.l &0x8, %d0 # extract and insert hi(man)
20004 mov.l %d0, FP_SRC_HI(%a6)
20005 clr.l FP_SRC_LO(%a6)
20007 btst &0x7, (%a0) # see if sign of SNAN is set
20008 beq.b no_sgl_snan_sgn
20009 bset &0x7, FP_SRC_EX(%a6)
20010 no_sgl_snan_sgn:
20013 #########################################
20014 # load a DBL into %fp0: #
20015 # -number can't fault #
20016 # (1) calc ea #
20017 # (2) read 8 bytes into L_SCR(1,2)#
20018 # (3) fmov.d into %fp0 #
20019 #########################################
20020 load_dbl:
20021 movq.l &0x8, %d0 # pass: 8 (bytes)
20022 bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
20024 cmpi.b SPCOND_FLG(%a6),&immed_flg
20025 beq.b load_dbl_immed
20027 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20028 movq.l &0x8, %d0 # pass: # bytes to read
20029 bsr.l _dmem_read # fetch src operand from memory
20031 tst.l %d1 # did dfetch fail?
20032 bne.l facc_in_d # yes
20034 load_dbl_cont:
20035 lea L_SCR1(%a6), %a0 # pass: ptr to input dbl
20036 bsr.l set_tag_d # determine src type tag
20037 mov.b %d0, STAG(%a6) # set src optype tag
20039 cmpi.b %d0, &DENORM # is it a dbl DENORM?
20040 beq.w get_dbl_denorm # yes
20042 cmpi.b %d0, &SNAN # is it a dbl SNAN?
20043 beq.w get_dbl_snan # yes
20045 fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
20046 fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
20049 load_dbl_immed:
20050 lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
20051 movq.l &0x8, %d0 # pass: # bytes to read
20052 bsr.l _imem_read # fetch src operand from memory
20054 tst.l %d1 # did ifetch fail?
20055 bne.l funimp_iacc # yes
20056 bra.b load_dbl_cont
20058 # must convert dbl denorm format to an Xprec denorm fmt suitable for
20059 # normalization...
20060 # %a0 : loc. of dbl denorm
20061 get_dbl_denorm:
20062 clr.w FP_SRC_EX(%a6)
20063 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20064 mov.l %d0, FP_SRC_HI(%a6)
20065 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20066 mov.l &0xb, %d1
20067 lsl.l %d1, %d0
20068 mov.l %d0, FP_SRC_LO(%a6)
20070 btst &0x7, (%a0) # is sgn bit set?
20071 beq.b dbl_dnrm_norm
20072 bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
20074 dbl_dnrm_norm:
20075 lea FP_SRC(%a6), %a0
20076 bsr.l norm # normalize number
20077 mov.w &0x3c01, %d1 # xprec exp = 0x3c01
20078 sub.w %d0, %d1 # exp = 0x3c01 - shft amt.
20079 or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
20081 mov.b &NORM, STAG(%a6) # fix src type tag
20084 # convert dbl to ext SNAN
20085 # %a0 : points to dbl SNAN
20086 get_dbl_snan:
20087 mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
20089 bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
20090 mov.l %d0, FP_SRC_HI(%a6)
20091 bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
20092 mov.l &0xb, %d1
20093 lsl.l %d1, %d0
20094 mov.l %d0, FP_SRC_LO(%a6)
20096 btst &0x7, (%a0) # see if sign of SNAN is set
20097 beq.b no_dbl_snan_sgn
20098 bset &0x7, FP_SRC_EX(%a6)
20099 no_dbl_snan_sgn:
20102 #################################################
20103 # load a Xprec into %fp0: #
20104 # -number can't fault #
20105 # (1) calc ea #
20106 # (2) read 12 bytes into L_SCR(1,2) #
20107 # (3) fmov.x into %fp0 #
20108 #################################################
20109 load_ext:
20110 mov.l &0xc, %d0 # pass: 12 (bytes)
20111 bsr.l _dcalc_ea # calc <ea>
20113 lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space
20114 mov.l &0xc, %d0 # pass: # of bytes to read
20115 bsr.l _dmem_read # fetch src operand from memory
20117 tst.l %d1 # did dfetch fail?
20118 bne.l facc_in_x # yes
20120 lea FP_SRC(%a6), %a0 # pass: ptr to src op
20121 bsr.l set_tag_x # determine src type tag
20123 cmpi.b %d0, &UNNORM # is the src op an UNNORM?
20124 beq.b load_ext_unnorm # yes
20126 mov.b %d0, STAG(%a6) # store the src optype tag
20129 load_ext_unnorm:
20130 bsr.l unnorm_fix # fix the src UNNORM
20131 mov.b %d0, STAG(%a6) # store the src optype tag
20134 #################################################
20135 # load a packed into %fp0: #
20136 # -number can't fault #
20137 # (1) calc ea #
20138 # (2) read 12 bytes into L_SCR(1,2,3) #
20139 # (3) fmov.x into %fp0 #
20140 #################################################
20141 load_packed:
20142 bsr.l get_packed
20144 lea FP_SRC(%a6),%a0 # pass ptr to src op
20145 bsr.l set_tag_x # determine src type tag
20146 cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO?
20147 beq.b load_packed_unnorm # yes
20149 mov.b %d0,STAG(%a6) # store the src optype tag
20152 load_packed_unnorm:
20153 bsr.l unnorm_fix # fix the UNNORM ZERO
20154 mov.b %d0,STAG(%a6) # store the src optype tag
20155 rts
20157 #########################################################################
20158 # XDEF **************************************************************** #
20159 # fout(): move from fp register to memory or data register #
20161 # XREF **************************************************************** #
20162 # _round() - needed to create EXOP for sgl/dbl precision #
20163 # norm() - needed to create EXOP for extended precision #
20164 # ovf_res() - create default overflow result for sgl/dbl precision#
20165 # unf_res() - create default underflow result for sgl/dbl prec. #
20166 # dst_dbl() - create rounded dbl precision result. #
20167 # dst_sgl() - create rounded sgl precision result. #
20168 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
20169 # bindec() - convert FP binary number to packed number. #
20170 # _mem_write() - write data to memory. #
20171 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
20172 # _dmem_write_{byte,word,long}() - write data to memory. #
20173 # store_dreg_{b,w,l}() - store data to data register file. #
20174 # facc_out_{b,w,l,d,x}() - data access error occurred. #
20176 # INPUT *************************************************************** #
20177 # a0 = pointer to extended precision source operand #
20178 # d0 = round prec,mode #
20180 # OUTPUT ************************************************************** #
20181 # fp0 : intermediate underflow or overflow result if #
20182 # OVFL/UNFL occurred for a sgl or dbl operand #
20184 # ALGORITHM *********************************************************** #
20185 # This routine is accessed by many handlers that need to do an #
20186 # opclass three move of an operand out to memory. #
20187 # Decode an fmove out (opclass 3) instruction to determine if #
20188 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
20189 # register or memory. The algorithm uses a standard "fmove" to create #
20190 # the rounded result. Also, since exceptions are disabled, this also #
20191 # create the correct OPERR default result if appropriate. #
20192 # For sgl or dbl precision, overflow or underflow can occur. If #
20193 # either occurs and is enabled, the EXOP. #
20194 # For extended precision, the stacked <ea> must be fixed along #
20195 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
20196 # the source is a denorm and if underflow is enabled, an EXOP must be #
20197 # created. #
20198 # For packed, the k-factor must be fetched from the instruction #
20199 # word or a data register. The <ea> must be fixed as w/ extended #
20200 # precision. Then, bindec() is called to create the appropriate #
20201 # packed result. #
20202 # If at any time an access error is flagged by one of the move- #
20203 # to-memory routines, then a special exit must be made so that the #
20204 # access error can be handled properly. #
20206 #########################################################################
20208 global fout
20209 fout:
20210 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
20211 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
20212 jmp (tbl_fout.b,%pc,%a1) # jump to routine
20214 swbeg &0x8
20215 tbl_fout:
20216 short fout_long - tbl_fout
20217 short fout_sgl - tbl_fout
20218 short fout_ext - tbl_fout
20219 short fout_pack - tbl_fout
20220 short fout_word - tbl_fout
20221 short fout_dbl - tbl_fout
20222 short fout_byte - tbl_fout
20223 short fout_pack - tbl_fout
20225 #################################################################
20226 # fmove.b out ###################################################
20227 #################################################################
20229 # Only "Unimplemented Data Type" exceptions enter here. The operand
20230 # is either a DENORM or a NORM.
20231 fout_byte:
20232 tst.b STAG(%a6) # is operand normalized?
20233 bne.b fout_byte_denorm # no
20235 fmovm.x SRC(%a0),&0x80 # load value
20237 fout_byte_norm:
20238 fmov.l %d0,%fpcr # insert rnd prec,mode
20240 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
20242 fmov.l &0x0,%fpcr # clear FPCR
20243 fmov.l %fpsr,%d1 # fetch FPSR
20244 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20246 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20247 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20248 beq.b fout_byte_dn # must save to integer regfile
20250 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20251 bsr.l _dmem_write_byte # write byte
20253 tst.l %d1 # did dstore fail?
20254 bne.l facc_out_b # yes
20258 fout_byte_dn:
20259 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20260 andi.w &0x7,%d1
20261 bsr.l store_dreg_b
20264 fout_byte_denorm:
20265 mov.l SRC_EX(%a0),%d1
20266 andi.l &0x80000000,%d1 # keep DENORM sign
20267 ori.l &0x00800000,%d1 # make smallest sgl
20268 fmov.s %d1,%fp0
20269 bra.b fout_byte_norm
20271 #################################################################
20272 # fmove.w out ###################################################
20273 #################################################################
20275 # Only "Unimplemented Data Type" exceptions enter here. The operand
20276 # is either a DENORM or a NORM.
20277 fout_word:
20278 tst.b STAG(%a6) # is operand normalized?
20279 bne.b fout_word_denorm # no
20281 fmovm.x SRC(%a0),&0x80 # load value
20283 fout_word_norm:
20284 fmov.l %d0,%fpcr # insert rnd prec:mode
20286 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
20288 fmov.l &0x0,%fpcr # clear FPCR
20289 fmov.l %fpsr,%d1 # fetch FPSR
20290 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20292 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20293 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20294 beq.b fout_word_dn # must save to integer regfile
20296 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20297 bsr.l _dmem_write_word # write word
20299 tst.l %d1 # did dstore fail?
20300 bne.l facc_out_w # yes
20304 fout_word_dn:
20305 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20306 andi.w &0x7,%d1
20307 bsr.l store_dreg_w
20310 fout_word_denorm:
20311 mov.l SRC_EX(%a0),%d1
20312 andi.l &0x80000000,%d1 # keep DENORM sign
20313 ori.l &0x00800000,%d1 # make smallest sgl
20314 fmov.s %d1,%fp0
20315 bra.b fout_word_norm
20317 #################################################################
20318 # fmove.l out ###################################################
20319 #################################################################
20321 # Only "Unimplemented Data Type" exceptions enter here. The operand
20322 # is either a DENORM or a NORM.
20323 fout_long:
20324 tst.b STAG(%a6) # is operand normalized?
20325 bne.b fout_long_denorm # no
20327 fmovm.x SRC(%a0),&0x80 # load value
20329 fout_long_norm:
20330 fmov.l %d0,%fpcr # insert rnd prec:mode
20332 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
20334 fmov.l &0x0,%fpcr # clear FPCR
20335 fmov.l %fpsr,%d1 # fetch FPSR
20336 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
20338 fout_long_write:
20339 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20340 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20341 beq.b fout_long_dn # must save to integer regfile
20343 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20344 bsr.l _dmem_write_long # write long
20346 tst.l %d1 # did dstore fail?
20347 bne.l facc_out_l # yes
20351 fout_long_dn:
20352 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20353 andi.w &0x7,%d1
20354 bsr.l store_dreg_l
20357 fout_long_denorm:
20358 mov.l SRC_EX(%a0),%d1
20359 andi.l &0x80000000,%d1 # keep DENORM sign
20360 ori.l &0x00800000,%d1 # make smallest sgl
20361 fmov.s %d1,%fp0
20362 bra.b fout_long_norm
20364 #################################################################
20365 # fmove.x out ###################################################
20366 #################################################################
20368 # Only "Unimplemented Data Type" exceptions enter here. The operand
20369 # is either a DENORM or a NORM.
20370 # The DENORM causes an Underflow exception.
20371 fout_ext:
20373 # we copy the extended precision result to FP_SCR0 so that the reserved
20374 # 16-bit field gets zeroed. we do this since we promise not to disturb
20375 # what's at SRC(a0).
20376 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20377 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
20378 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20379 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20381 fmovm.x SRC(%a0),&0x80 # return result
20383 bsr.l _calc_ea_fout # fix stacked <ea>
20385 mov.l %a0,%a1 # pass: dst addr
20386 lea FP_SCR0(%a6),%a0 # pass: src addr
20387 mov.l &0xc,%d0 # pass: opsize is 12 bytes
20389 # we must not yet write the extended precision data to the stack
20390 # in the pre-decrement case from supervisor mode or else we'll corrupt
20391 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
20392 cmpi.b SPCOND_FLG(%a6),&mda7_flg
20393 beq.b fout_ext_a7
20395 bsr.l _dmem_write # write ext prec number to memory
20397 tst.l %d1 # did dstore fail?
20398 bne.w fout_ext_err # yes
20400 tst.b STAG(%a6) # is operand normalized?
20401 bne.b fout_ext_denorm # no
20404 # the number is a DENORM. must set the underflow exception bit
20405 fout_ext_denorm:
20406 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
20408 mov.b FPCR_ENABLE(%a6),%d0
20409 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
20410 bne.b fout_ext_exc # yes
20413 # we don't want to do the write if the exception occurred in supervisor mode
20414 # so _mem_write2() handles this for us.
20415 fout_ext_a7:
20416 bsr.l _mem_write2 # write ext prec number to memory
20418 tst.l %d1 # did dstore fail?
20419 bne.w fout_ext_err # yes
20421 tst.b STAG(%a6) # is operand normalized?
20422 bne.b fout_ext_denorm # no
20425 fout_ext_exc:
20426 lea FP_SCR0(%a6),%a0
20427 bsr.l norm # normalize the mantissa
20428 neg.w %d0 # new exp = -(shft amt)
20429 andi.w &0x7fff,%d0
20430 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
20431 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
20432 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20435 fout_ext_err:
20436 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
20437 bra.l facc_out_x
20439 #########################################################################
20440 # fmove.s out ###########################################################
20441 #########################################################################
20442 fout_sgl:
20443 andi.b &0x30,%d0 # clear rnd prec
20444 ori.b &s_mode*0x10,%d0 # insert sgl prec
20445 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20448 # operand is a normalized number. first, we check to see if the move out
20449 # would cause either an underflow or overflow. these cases are handled
20450 # separately. otherwise, set the FPCR to the proper rounding mode and
20451 # execute the move.
20453 mov.w SRC_EX(%a0),%d0 # extract exponent
20454 andi.w &0x7fff,%d0 # strip sign
20456 cmpi.w %d0,&SGL_HI # will operand overflow?
20457 bgt.w fout_sgl_ovfl # yes; go handle OVFL
20458 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
20459 cmpi.w %d0,&SGL_LO # will operand underflow?
20460 blt.w fout_sgl_unfl # yes; go handle underflow
20463 # NORMs(in range) can be stored out by a simple "fmov.s"
20464 # Unnormalized inputs can come through this point.
20466 fout_sgl_exg:
20467 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20469 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20470 fmov.l &0x0,%fpsr # clear FPSR
20472 fmov.s %fp0,%d0 # store does convert and round
20474 fmov.l &0x0,%fpcr # clear FPCR
20475 fmov.l %fpsr,%d1 # save FPSR
20477 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
20479 fout_sgl_exg_write:
20480 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20481 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20482 beq.b fout_sgl_exg_write_dn # must save to integer regfile
20484 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20485 bsr.l _dmem_write_long # write long
20487 tst.l %d1 # did dstore fail?
20488 bne.l facc_out_l # yes
20492 fout_sgl_exg_write_dn:
20493 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20494 andi.w &0x7,%d1
20495 bsr.l store_dreg_l
20499 # here, we know that the operand would UNFL if moved out to single prec,
20500 # so, denorm and round and then use generic store single routine to
20501 # write the value to memory.
20503 fout_sgl_unfl:
20504 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20506 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20507 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20508 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20509 mov.l %a0,-(%sp)
20511 clr.l %d0 # pass: S.F. = 0
20513 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20514 bne.b fout_sgl_unfl_cont # let DENORMs fall through
20516 lea FP_SCR0(%a6),%a0
20517 bsr.l norm # normalize the DENORM
20519 fout_sgl_unfl_cont:
20520 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20521 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20522 bsr.l unf_res # calc default underflow result
20524 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20525 bsr.l dst_sgl # convert to single prec
20527 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20528 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20529 beq.b fout_sgl_unfl_dn # must save to integer regfile
20531 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20532 bsr.l _dmem_write_long # write long
20534 tst.l %d1 # did dstore fail?
20535 bne.l facc_out_l # yes
20537 bra.b fout_sgl_unfl_chkexc
20539 fout_sgl_unfl_dn:
20540 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20541 andi.w &0x7,%d1
20542 bsr.l store_dreg_l
20544 fout_sgl_unfl_chkexc:
20545 mov.b FPCR_ENABLE(%a6),%d1
20546 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20547 bne.w fout_sd_exc_unfl # yes
20548 addq.l &0x4,%sp
20552 # it's definitely an overflow so call ovf_res to get the correct answer
20554 fout_sgl_ovfl:
20555 tst.b 3+SRC_HI(%a0) # is result inexact?
20556 bne.b fout_sgl_ovfl_inex2
20557 tst.l SRC_LO(%a0) # is result inexact?
20558 bne.b fout_sgl_ovfl_inex2
20559 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20560 bra.b fout_sgl_ovfl_cont
20561 fout_sgl_ovfl_inex2:
20562 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20564 fout_sgl_ovfl_cont:
20565 mov.l %a0,-(%sp)
20567 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
20568 # overflow result. DON'T save the returned ccodes from ovf_res() since
20569 # fmove out doesn't alter them.
20570 tst.b SRC_EX(%a0) # is operand negative?
20571 smi %d1 # set if so
20572 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
20573 bsr.l ovf_res # calc OVFL result
20574 fmovm.x (%a0),&0x80 # load default overflow result
20575 fmov.s %fp0,%d0 # store to single
20577 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
20578 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
20579 beq.b fout_sgl_ovfl_dn # must save to integer regfile
20581 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
20582 bsr.l _dmem_write_long # write long
20584 tst.l %d1 # did dstore fail?
20585 bne.l facc_out_l # yes
20587 bra.b fout_sgl_ovfl_chkexc
20589 fout_sgl_ovfl_dn:
20590 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
20591 andi.w &0x7,%d1
20592 bsr.l store_dreg_l
20594 fout_sgl_ovfl_chkexc:
20595 mov.b FPCR_ENABLE(%a6),%d1
20596 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20597 bne.w fout_sd_exc_ovfl # yes
20598 addq.l &0x4,%sp
20602 # move out MAY overflow:
20603 # (1) force the exp to 0x3fff
20604 # (2) do a move w/ appropriate rnd mode
20605 # (3) if exp still equals zero, then insert original exponent
20606 # for the correct result.
20607 # if exp now equals one, then it overflowed so call ovf_res.
20609 fout_sgl_may_ovfl:
20610 mov.w SRC_EX(%a0),%d1 # fetch current sign
20611 andi.w &0x8000,%d1 # keep it,clear exp
20612 ori.w &0x3fff,%d1 # insert exp = 0
20613 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20614 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20615 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20617 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20619 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20620 fmov.l &0x0,%fpcr # clear FPCR
20622 fabs.x %fp0 # need absolute value
20623 fcmp.b %fp0,&0x2 # did exponent increase?
20624 fblt.w fout_sgl_exg # no; go finish NORM
20625 bra.w fout_sgl_ovfl # yes; go handle overflow
20627 ################
20629 fout_sd_exc_unfl:
20630 mov.l (%sp)+,%a0
20632 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20633 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20634 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20636 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
20637 bne.b fout_sd_exc_cont # no
20639 lea FP_SCR0(%a6),%a0
20640 bsr.l norm
20641 neg.l %d0
20642 andi.w &0x7fff,%d0
20643 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
20644 bra.b fout_sd_exc_cont
20646 fout_sd_exc:
20647 fout_sd_exc_ovfl:
20648 mov.l (%sp)+,%a0 # restore a0
20650 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20651 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20652 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20654 fout_sd_exc_cont:
20655 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
20656 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
20657 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
20659 mov.b 3+L_SCR3(%a6),%d1
20660 lsr.b &0x4,%d1
20661 andi.w &0x0c,%d1
20662 swap %d1
20663 mov.b 3+L_SCR3(%a6),%d1
20664 lsr.b &0x4,%d1
20665 andi.w &0x03,%d1
20666 clr.l %d0 # pass: zero g,r,s
20667 bsr.l _round # round the DENORM
20669 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
20670 beq.b fout_sd_exc_done # no
20671 bset &0x7,FP_SCR0_EX(%a6) # yes
20673 fout_sd_exc_done:
20674 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
20677 #################################################################
20678 # fmove.d out ###################################################
20679 #################################################################
20680 fout_dbl:
20681 andi.b &0x30,%d0 # clear rnd prec
20682 ori.b &d_mode*0x10,%d0 # insert dbl prec
20683 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
20686 # operand is a normalized number. first, we check to see if the move out
20687 # would cause either an underflow or overflow. these cases are handled
20688 # separately. otherwise, set the FPCR to the proper rounding mode and
20689 # execute the move.
20691 mov.w SRC_EX(%a0),%d0 # extract exponent
20692 andi.w &0x7fff,%d0 # strip sign
20694 cmpi.w %d0,&DBL_HI # will operand overflow?
20695 bgt.w fout_dbl_ovfl # yes; go handle OVFL
20696 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
20697 cmpi.w %d0,&DBL_LO # will operand underflow?
20698 blt.w fout_dbl_unfl # yes; go handle underflow
20701 # NORMs(in range) can be stored out by a simple "fmov.d"
20702 # Unnormalized inputs can come through this point.
20704 fout_dbl_exg:
20705 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
20707 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20708 fmov.l &0x0,%fpsr # clear FPSR
20710 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
20712 fmov.l &0x0,%fpcr # clear FPCR
20713 fmov.l %fpsr,%d0 # save FPSR
20715 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
20717 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20718 lea L_SCR1(%a6),%a0 # pass: src addr
20719 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20720 bsr.l _dmem_write # store dbl fop to memory
20722 tst.l %d1 # did dstore fail?
20723 bne.l facc_out_d # yes
20725 rts # no; so we're finished
20728 # here, we know that the operand would UNFL if moved out to double prec,
20729 # so, denorm and round and then use generic store double routine to
20730 # write the value to memory.
20732 fout_dbl_unfl:
20733 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
20735 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
20736 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
20737 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
20738 mov.l %a0,-(%sp)
20740 clr.l %d0 # pass: S.F. = 0
20742 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
20743 bne.b fout_dbl_unfl_cont # let DENORMs fall through
20745 lea FP_SCR0(%a6),%a0
20746 bsr.l norm # normalize the DENORM
20748 fout_dbl_unfl_cont:
20749 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
20750 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
20751 bsr.l unf_res # calc default underflow result
20753 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
20754 bsr.l dst_dbl # convert to single prec
20755 mov.l %d0,L_SCR1(%a6)
20756 mov.l %d1,L_SCR2(%a6)
20758 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20759 lea L_SCR1(%a6),%a0 # pass: src addr
20760 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20761 bsr.l _dmem_write # store dbl fop to memory
20763 tst.l %d1 # did dstore fail?
20764 bne.l facc_out_d # yes
20766 mov.b FPCR_ENABLE(%a6),%d1
20767 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20768 bne.w fout_sd_exc_unfl # yes
20769 addq.l &0x4,%sp
20773 # it's definitely an overflow so call ovf_res to get the correct answer
20775 fout_dbl_ovfl:
20776 mov.w 2+SRC_LO(%a0),%d0
20777 andi.w &0x7ff,%d0
20778 bne.b fout_dbl_ovfl_inex2
20780 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
20781 bra.b fout_dbl_ovfl_cont
20782 fout_dbl_ovfl_inex2:
20783 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
20785 fout_dbl_ovfl_cont:
20786 mov.l %a0,-(%sp)
20788 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
20789 # overflow result. DON'T save the returned ccodes from ovf_res() since
20790 # fmove out doesn't alter them.
20791 tst.b SRC_EX(%a0) # is operand negative?
20792 smi %d1 # set if so
20793 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
20794 bsr.l ovf_res # calc OVFL result
20795 fmovm.x (%a0),&0x80 # load default overflow result
20796 fmov.d %fp0,L_SCR1(%a6) # store to double
20798 mov.l EXC_EA(%a6),%a1 # pass: dst addr
20799 lea L_SCR1(%a6),%a0 # pass: src addr
20800 movq.l &0x8,%d0 # pass: opsize is 8 bytes
20801 bsr.l _dmem_write # store dbl fop to memory
20803 tst.l %d1 # did dstore fail?
20804 bne.l facc_out_d # yes
20806 mov.b FPCR_ENABLE(%a6),%d1
20807 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
20808 bne.w fout_sd_exc_ovfl # yes
20809 addq.l &0x4,%sp
20813 # move out MAY overflow:
20814 # (1) force the exp to 0x3fff
20815 # (2) do a move w/ appropriate rnd mode
20816 # (3) if exp still equals zero, then insert original exponent
20817 # for the correct result.
20818 # if exp now equals one, then it overflowed so call ovf_res.
20820 fout_dbl_may_ovfl:
20821 mov.w SRC_EX(%a0),%d1 # fetch current sign
20822 andi.w &0x8000,%d1 # keep it,clear exp
20823 ori.w &0x3fff,%d1 # insert exp = 0
20824 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
20825 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
20826 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
20828 fmov.l L_SCR3(%a6),%fpcr # set FPCR
20830 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
20831 fmov.l &0x0,%fpcr # clear FPCR
20833 fabs.x %fp0 # need absolute value
20834 fcmp.b %fp0,&0x2 # did exponent increase?
20835 fblt.w fout_dbl_exg # no; go finish NORM
20836 bra.w fout_dbl_ovfl # yes; go handle overflow
20838 #########################################################################
20839 # XDEF **************************************************************** #
20840 # dst_dbl(): create double precision value from extended prec. #
20842 # XREF **************************************************************** #
20843 # None #
20845 # INPUT *************************************************************** #
20846 # a0 = pointer to source operand in extended precision #
20848 # OUTPUT ************************************************************** #
20849 # d0 = hi(double precision result) #
20850 # d1 = lo(double precision result) #
20852 # ALGORITHM *********************************************************** #
20854 # Changes extended precision to double precision. #
20855 # Note: no attempt is made to round the extended value to double. #
20856 # dbl_sign = ext_sign #
20857 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
20858 # get rid of ext integer bit #
20859 # dbl_mant = ext_mant{62:12} #
20861 # --------------- --------------- --------------- #
20862 # extended -> |s| exp | |1| ms mant | | ls mant | #
20863 # --------------- --------------- --------------- #
20864 # 95 64 63 62 32 31 11 0 #
20865 # | | #
20866 # | | #
20867 # | | #
20868 # v v #
20869 # --------------- --------------- #
20870 # double -> |s|exp| mant | | mant | #
20871 # --------------- --------------- #
20872 # 63 51 32 31 0 #
20874 #########################################################################
20876 dst_dbl:
20877 clr.l %d0 # clear d0
20878 mov.w FTEMP_EX(%a0),%d0 # get exponent
20879 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20880 addi.w &DBL_BIAS,%d0 # add double precision bias
20881 tst.b FTEMP_HI(%a0) # is number a denorm?
20882 bmi.b dst_get_dupper # no
20883 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
20884 dst_get_dupper:
20885 swap %d0 # d0 now in upper word
20886 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
20887 tst.b FTEMP_EX(%a0) # test sign
20888 bpl.b dst_get_dman # if postive, go process mantissa
20889 bset &0x1f,%d0 # if negative, set sign
20890 dst_get_dman:
20891 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20892 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
20893 or.l %d1,%d0 # put these bits in ms word of double
20894 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
20895 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20896 mov.l &21,%d0 # load shift count
20897 lsl.l %d0,%d1 # put lower 11 bits in upper bits
20898 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
20899 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
20900 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
20901 mov.l L_SCR2(%a6),%d1
20902 or.l %d0,%d1 # put them in double result
20903 mov.l L_SCR1(%a6),%d0
20906 #########################################################################
20907 # XDEF **************************************************************** #
20908 # dst_sgl(): create single precision value from extended prec #
20910 # XREF **************************************************************** #
20912 # INPUT *************************************************************** #
20913 # a0 = pointer to source operand in extended precision #
20915 # OUTPUT ************************************************************** #
20916 # d0 = single precision result #
20918 # ALGORITHM *********************************************************** #
20920 # Changes extended precision to single precision. #
20921 # sgl_sign = ext_sign #
20922 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
20923 # get rid of ext integer bit #
20924 # sgl_mant = ext_mant{62:12} #
20926 # --------------- --------------- --------------- #
20927 # extended -> |s| exp | |1| ms mant | | ls mant | #
20928 # --------------- --------------- --------------- #
20929 # 95 64 63 62 40 32 31 12 0 #
20930 # | | #
20931 # | | #
20932 # | | #
20933 # v v #
20934 # --------------- #
20935 # single -> |s|exp| mant | #
20936 # --------------- #
20937 # 31 22 0 #
20939 #########################################################################
20941 dst_sgl:
20942 clr.l %d0
20943 mov.w FTEMP_EX(%a0),%d0 # get exponent
20944 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
20945 addi.w &SGL_BIAS,%d0 # add single precision bias
20946 tst.b FTEMP_HI(%a0) # is number a denorm?
20947 bmi.b dst_get_supper # no
20948 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
20949 dst_get_supper:
20950 swap %d0 # put exp in upper word of d0
20951 lsl.l &0x7,%d0 # shift it into single exp bits
20952 tst.b FTEMP_EX(%a0) # test sign
20953 bpl.b dst_get_sman # if positive, continue
20954 bset &0x1f,%d0 # if negative, put in sign first
20955 dst_get_sman:
20956 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
20957 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
20958 lsr.l &0x8,%d1 # and put them flush right
20959 or.l %d1,%d0 # put these bits in ms word of single
20962 ##############################################################################
20963 fout_pack:
20964 bsr.l _calc_ea_fout # fetch the <ea>
20965 mov.l %a0,-(%sp)
20967 mov.b STAG(%a6),%d0 # fetch input type
20968 bne.w fout_pack_not_norm # input is not NORM
20970 fout_pack_norm:
20971 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
20972 beq.b fout_pack_s # static
20974 fout_pack_d:
20975 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
20976 lsr.b &0x4,%d1
20977 andi.w &0x7,%d1
20979 bsr.l fetch_dreg # fetch Dn w/ k-factor
20981 bra.b fout_pack_type
20982 fout_pack_s:
20983 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
20985 fout_pack_type:
20986 bfexts %d0{&25:&7},%d0 # extract k-factor
20987 mov.l %d0,-(%sp)
20989 lea FP_SRC(%a6),%a0 # pass: ptr to input
20991 # bindec is currently scrambling FP_SRC for denorm inputs.
20992 # we'll have to change this, but for now, tough luck!!!
20993 bsr.l bindec # convert xprec to packed
20995 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
20996 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
20998 mov.l (%sp)+,%d0
21000 tst.b 3+FP_SCR0_EX(%a6)
21001 bne.b fout_pack_set
21002 tst.l FP_SCR0_HI(%a6)
21003 bne.b fout_pack_set
21004 tst.l FP_SCR0_LO(%a6)
21005 bne.b fout_pack_set
21007 # add the extra condition that only if the k-factor was zero, too, should
21008 # we zero the exponent
21009 tst.l %d0
21010 bne.b fout_pack_set
21011 # "mantissa" is all zero which means that the answer is zero. but, the '040
21012 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
21013 # if the mantissa is zero, I will zero the exponent, too.
21014 # the question now is whether the exponents sign bit is allowed to be non-zero
21015 # for a zero, also...
21016 andi.w &0xf000,FP_SCR0(%a6)
21018 fout_pack_set:
21020 lea FP_SCR0(%a6),%a0 # pass: src addr
21022 fout_pack_write:
21023 mov.l (%sp)+,%a1 # pass: dst addr
21024 mov.l &0xc,%d0 # pass: opsize is 12 bytes
21026 cmpi.b SPCOND_FLG(%a6),&mda7_flg
21027 beq.b fout_pack_a7
21029 bsr.l _dmem_write # write ext prec number to memory
21031 tst.l %d1 # did dstore fail?
21032 bne.w fout_ext_err # yes
21036 # we don't want to do the write if the exception occurred in supervisor mode
21037 # so _mem_write2() handles this for us.
21038 fout_pack_a7:
21039 bsr.l _mem_write2 # write ext prec number to memory
21041 tst.l %d1 # did dstore fail?
21042 bne.w fout_ext_err # yes
21046 fout_pack_not_norm:
21047 cmpi.b %d0,&DENORM # is it a DENORM?
21048 beq.w fout_pack_norm # yes
21049 lea FP_SRC(%a6),%a0
21050 clr.w 2+FP_SRC_EX(%a6)
21051 cmpi.b %d0,&SNAN # is it an SNAN?
21052 beq.b fout_pack_snan # yes
21053 bra.b fout_pack_write # no
21055 fout_pack_snan:
21056 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
21057 bset &0x6,FP_SRC_HI(%a6) # set snan bit
21058 bra.b fout_pack_write
21060 #########################################################################
21061 # XDEF **************************************************************** #
21062 # fetch_dreg(): fetch register according to index in d1 #
21064 # XREF **************************************************************** #
21065 # None #
21067 # INPUT *************************************************************** #
21068 # d1 = index of register to fetch from #
21070 # OUTPUT ************************************************************** #
21071 # d0 = value of register fetched #
21073 # ALGORITHM *********************************************************** #
21074 # According to the index value in d1 which can range from zero #
21075 # to fifteen, load the corresponding register file value (where #
21076 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
21077 # stack. The rest should still be in their original places. #
21079 #########################################################################
21081 # this routine leaves d1 intact for subsequent store_dreg calls.
21082 global fetch_dreg
21083 fetch_dreg:
21084 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
21085 jmp (tbl_fdreg.b,%pc,%d0.w*1)
21087 tbl_fdreg:
21088 short fdreg0 - tbl_fdreg
21089 short fdreg1 - tbl_fdreg
21090 short fdreg2 - tbl_fdreg
21091 short fdreg3 - tbl_fdreg
21092 short fdreg4 - tbl_fdreg
21093 short fdreg5 - tbl_fdreg
21094 short fdreg6 - tbl_fdreg
21095 short fdreg7 - tbl_fdreg
21096 short fdreg8 - tbl_fdreg
21097 short fdreg9 - tbl_fdreg
21098 short fdrega - tbl_fdreg
21099 short fdregb - tbl_fdreg
21100 short fdregc - tbl_fdreg
21101 short fdregd - tbl_fdreg
21102 short fdrege - tbl_fdreg
21103 short fdregf - tbl_fdreg
21105 fdreg0:
21106 mov.l EXC_DREGS+0x0(%a6),%d0
21108 fdreg1:
21109 mov.l EXC_DREGS+0x4(%a6),%d0
21111 fdreg2:
21112 mov.l %d2,%d0
21114 fdreg3:
21115 mov.l %d3,%d0
21117 fdreg4:
21118 mov.l %d4,%d0
21120 fdreg5:
21121 mov.l %d5,%d0
21123 fdreg6:
21124 mov.l %d6,%d0
21126 fdreg7:
21127 mov.l %d7,%d0
21129 fdreg8:
21130 mov.l EXC_DREGS+0x8(%a6),%d0
21132 fdreg9:
21133 mov.l EXC_DREGS+0xc(%a6),%d0
21135 fdrega:
21136 mov.l %a2,%d0
21138 fdregb:
21139 mov.l %a3,%d0
21141 fdregc:
21142 mov.l %a4,%d0
21144 fdregd:
21145 mov.l %a5,%d0
21147 fdrege:
21148 mov.l (%a6),%d0
21150 fdregf:
21151 mov.l EXC_A7(%a6),%d0
21154 #########################################################################
21155 # XDEF **************************************************************** #
21156 # store_dreg_l(): store longword to data register specified by d1 #
21158 # XREF **************************************************************** #
21159 # None #
21161 # INPUT *************************************************************** #
21162 # d0 = longowrd value to store #
21163 # d1 = index of register to fetch from #
21165 # OUTPUT ************************************************************** #
21166 # (data register is updated) #
21168 # ALGORITHM *********************************************************** #
21169 # According to the index value in d1, store the longword value #
21170 # in d0 to the corresponding data register. D0/D1 are on the stack #
21171 # while the rest are in their initial places. #
21173 #########################################################################
21175 global store_dreg_l
21176 store_dreg_l:
21177 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
21178 jmp (tbl_sdregl.b,%pc,%d1.w*1)
21180 tbl_sdregl:
21181 short sdregl0 - tbl_sdregl
21182 short sdregl1 - tbl_sdregl
21183 short sdregl2 - tbl_sdregl
21184 short sdregl3 - tbl_sdregl
21185 short sdregl4 - tbl_sdregl
21186 short sdregl5 - tbl_sdregl
21187 short sdregl6 - tbl_sdregl
21188 short sdregl7 - tbl_sdregl
21190 sdregl0:
21191 mov.l %d0,EXC_DREGS+0x0(%a6)
21193 sdregl1:
21194 mov.l %d0,EXC_DREGS+0x4(%a6)
21196 sdregl2:
21197 mov.l %d0,%d2
21199 sdregl3:
21200 mov.l %d0,%d3
21202 sdregl4:
21203 mov.l %d0,%d4
21205 sdregl5:
21206 mov.l %d0,%d5
21208 sdregl6:
21209 mov.l %d0,%d6
21211 sdregl7:
21212 mov.l %d0,%d7
21215 #########################################################################
21216 # XDEF **************************************************************** #
21217 # store_dreg_w(): store word to data register specified by d1 #
21219 # XREF **************************************************************** #
21220 # None #
21222 # INPUT *************************************************************** #
21223 # d0 = word value to store #
21224 # d1 = index of register to fetch from #
21226 # OUTPUT ************************************************************** #
21227 # (data register is updated) #
21229 # ALGORITHM *********************************************************** #
21230 # According to the index value in d1, store the word value #
21231 # in d0 to the corresponding data register. D0/D1 are on the stack #
21232 # while the rest are in their initial places. #
21234 #########################################################################
21236 global store_dreg_w
21237 store_dreg_w:
21238 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
21239 jmp (tbl_sdregw.b,%pc,%d1.w*1)
21241 tbl_sdregw:
21242 short sdregw0 - tbl_sdregw
21243 short sdregw1 - tbl_sdregw
21244 short sdregw2 - tbl_sdregw
21245 short sdregw3 - tbl_sdregw
21246 short sdregw4 - tbl_sdregw
21247 short sdregw5 - tbl_sdregw
21248 short sdregw6 - tbl_sdregw
21249 short sdregw7 - tbl_sdregw
21251 sdregw0:
21252 mov.w %d0,2+EXC_DREGS+0x0(%a6)
21254 sdregw1:
21255 mov.w %d0,2+EXC_DREGS+0x4(%a6)
21257 sdregw2:
21258 mov.w %d0,%d2
21260 sdregw3:
21261 mov.w %d0,%d3
21263 sdregw4:
21264 mov.w %d0,%d4
21266 sdregw5:
21267 mov.w %d0,%d5
21269 sdregw6:
21270 mov.w %d0,%d6
21272 sdregw7:
21273 mov.w %d0,%d7
21276 #########################################################################
21277 # XDEF **************************************************************** #
21278 # store_dreg_b(): store byte to data register specified by d1 #
21280 # XREF **************************************************************** #
21281 # None #
21283 # INPUT *************************************************************** #
21284 # d0 = byte value to store #
21285 # d1 = index of register to fetch from #
21287 # OUTPUT ************************************************************** #
21288 # (data register is updated) #
21290 # ALGORITHM *********************************************************** #
21291 # According to the index value in d1, store the byte value #
21292 # in d0 to the corresponding data register. D0/D1 are on the stack #
21293 # while the rest are in their initial places. #
21295 #########################################################################
21297 global store_dreg_b
21298 store_dreg_b:
21299 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
21300 jmp (tbl_sdregb.b,%pc,%d1.w*1)
21302 tbl_sdregb:
21303 short sdregb0 - tbl_sdregb
21304 short sdregb1 - tbl_sdregb
21305 short sdregb2 - tbl_sdregb
21306 short sdregb3 - tbl_sdregb
21307 short sdregb4 - tbl_sdregb
21308 short sdregb5 - tbl_sdregb
21309 short sdregb6 - tbl_sdregb
21310 short sdregb7 - tbl_sdregb
21312 sdregb0:
21313 mov.b %d0,3+EXC_DREGS+0x0(%a6)
21315 sdregb1:
21316 mov.b %d0,3+EXC_DREGS+0x4(%a6)
21318 sdregb2:
21319 mov.b %d0,%d2
21321 sdregb3:
21322 mov.b %d0,%d3
21324 sdregb4:
21325 mov.b %d0,%d4
21327 sdregb5:
21328 mov.b %d0,%d5
21330 sdregb6:
21331 mov.b %d0,%d6
21333 sdregb7:
21334 mov.b %d0,%d7
21337 #########################################################################
21338 # XDEF **************************************************************** #
21339 # inc_areg(): increment an address register by the value in d0 #
21341 # XREF **************************************************************** #
21342 # None #
21344 # INPUT *************************************************************** #
21345 # d0 = amount to increment by #
21346 # d1 = index of address register to increment #
21348 # OUTPUT ************************************************************** #
21349 # (address register is updated) #
21351 # ALGORITHM *********************************************************** #
21352 # Typically used for an instruction w/ a post-increment <ea>, #
21353 # this routine adds the increment value in d0 to the address register #
21354 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21355 # in their original places. #
21356 # For a7, if the increment amount is one, then we have to #
21357 # increment by two. For any a7 update, set the mia7_flag so that if #
21358 # an access error exception occurs later in emulation, this address #
21359 # register update can be undone. #
21361 #########################################################################
21363 global inc_areg
21364 inc_areg:
21365 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
21366 jmp (tbl_iareg.b,%pc,%d1.w*1)
21368 tbl_iareg:
21369 short iareg0 - tbl_iareg
21370 short iareg1 - tbl_iareg
21371 short iareg2 - tbl_iareg
21372 short iareg3 - tbl_iareg
21373 short iareg4 - tbl_iareg
21374 short iareg5 - tbl_iareg
21375 short iareg6 - tbl_iareg
21376 short iareg7 - tbl_iareg
21378 iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
21380 iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
21382 iareg2: add.l %d0,%a2
21384 iareg3: add.l %d0,%a3
21386 iareg4: add.l %d0,%a4
21388 iareg5: add.l %d0,%a5
21390 iareg6: add.l %d0,(%a6)
21392 iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
21393 cmpi.b %d0,&0x1
21394 beq.b iareg7b
21395 add.l %d0,EXC_A7(%a6)
21397 iareg7b:
21398 addq.l &0x2,EXC_A7(%a6)
21401 #########################################################################
21402 # XDEF **************************************************************** #
21403 # dec_areg(): decrement an address register by the value in d0 #
21405 # XREF **************************************************************** #
21406 # None #
21408 # INPUT *************************************************************** #
21409 # d0 = amount to decrement by #
21410 # d1 = index of address register to decrement #
21412 # OUTPUT ************************************************************** #
21413 # (address register is updated) #
21415 # ALGORITHM *********************************************************** #
21416 # Typically used for an instruction w/ a pre-decrement <ea>, #
21417 # this routine adds the decrement value in d0 to the address register #
21418 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
21419 # in their original places. #
21420 # For a7, if the decrement amount is one, then we have to #
21421 # decrement by two. For any a7 update, set the mda7_flag so that if #
21422 # an access error exception occurs later in emulation, this address #
21423 # register update can be undone. #
21425 #########################################################################
21427 global dec_areg
21428 dec_areg:
21429 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
21430 jmp (tbl_dareg.b,%pc,%d1.w*1)
21432 tbl_dareg:
21433 short dareg0 - tbl_dareg
21434 short dareg1 - tbl_dareg
21435 short dareg2 - tbl_dareg
21436 short dareg3 - tbl_dareg
21437 short dareg4 - tbl_dareg
21438 short dareg5 - tbl_dareg
21439 short dareg6 - tbl_dareg
21440 short dareg7 - tbl_dareg
21442 dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
21444 dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
21446 dareg2: sub.l %d0,%a2
21448 dareg3: sub.l %d0,%a3
21450 dareg4: sub.l %d0,%a4
21452 dareg5: sub.l %d0,%a5
21454 dareg6: sub.l %d0,(%a6)
21456 dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
21457 cmpi.b %d0,&0x1
21458 beq.b dareg7b
21459 sub.l %d0,EXC_A7(%a6)
21461 dareg7b:
21462 subq.l &0x2,EXC_A7(%a6)
21465 ##############################################################################
21467 #########################################################################
21468 # XDEF **************************************************************** #
21469 # load_fpn1(): load FP register value into FP_SRC(a6). #
21471 # XREF **************************************************************** #
21472 # None #
21474 # INPUT *************************************************************** #
21475 # d0 = index of FP register to load #
21477 # OUTPUT ************************************************************** #
21478 # FP_SRC(a6) = value loaded from FP register file #
21480 # ALGORITHM *********************************************************** #
21481 # Using the index in d0, load FP_SRC(a6) with a number from the #
21482 # FP register file. #
21484 #########################################################################
21486 global load_fpn1
21487 load_fpn1:
21488 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
21489 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
21491 tbl_load_fpn1:
21492 short load_fpn1_0 - tbl_load_fpn1
21493 short load_fpn1_1 - tbl_load_fpn1
21494 short load_fpn1_2 - tbl_load_fpn1
21495 short load_fpn1_3 - tbl_load_fpn1
21496 short load_fpn1_4 - tbl_load_fpn1
21497 short load_fpn1_5 - tbl_load_fpn1
21498 short load_fpn1_6 - tbl_load_fpn1
21499 short load_fpn1_7 - tbl_load_fpn1
21501 load_fpn1_0:
21502 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
21503 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
21504 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
21505 lea FP_SRC(%a6), %a0
21507 load_fpn1_1:
21508 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
21509 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
21510 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
21511 lea FP_SRC(%a6), %a0
21513 load_fpn1_2:
21514 fmovm.x &0x20, FP_SRC(%a6)
21515 lea FP_SRC(%a6), %a0
21517 load_fpn1_3:
21518 fmovm.x &0x10, FP_SRC(%a6)
21519 lea FP_SRC(%a6), %a0
21521 load_fpn1_4:
21522 fmovm.x &0x08, FP_SRC(%a6)
21523 lea FP_SRC(%a6), %a0
21525 load_fpn1_5:
21526 fmovm.x &0x04, FP_SRC(%a6)
21527 lea FP_SRC(%a6), %a0
21529 load_fpn1_6:
21530 fmovm.x &0x02, FP_SRC(%a6)
21531 lea FP_SRC(%a6), %a0
21533 load_fpn1_7:
21534 fmovm.x &0x01, FP_SRC(%a6)
21535 lea FP_SRC(%a6), %a0
21538 #############################################################################
21540 #########################################################################
21541 # XDEF **************************************************************** #
21542 # load_fpn2(): load FP register value into FP_DST(a6). #
21544 # XREF **************************************************************** #
21545 # None #
21547 # INPUT *************************************************************** #
21548 # d0 = index of FP register to load #
21550 # OUTPUT ************************************************************** #
21551 # FP_DST(a6) = value loaded from FP register file #
21553 # ALGORITHM *********************************************************** #
21554 # Using the index in d0, load FP_DST(a6) with a number from the #
21555 # FP register file. #
21557 #########################################################################
21559 global load_fpn2
21560 load_fpn2:
21561 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
21562 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
21564 tbl_load_fpn2:
21565 short load_fpn2_0 - tbl_load_fpn2
21566 short load_fpn2_1 - tbl_load_fpn2
21567 short load_fpn2_2 - tbl_load_fpn2
21568 short load_fpn2_3 - tbl_load_fpn2
21569 short load_fpn2_4 - tbl_load_fpn2
21570 short load_fpn2_5 - tbl_load_fpn2
21571 short load_fpn2_6 - tbl_load_fpn2
21572 short load_fpn2_7 - tbl_load_fpn2
21574 load_fpn2_0:
21575 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
21576 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
21577 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
21578 lea FP_DST(%a6), %a0
21580 load_fpn2_1:
21581 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
21582 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
21583 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
21584 lea FP_DST(%a6), %a0
21586 load_fpn2_2:
21587 fmovm.x &0x20, FP_DST(%a6)
21588 lea FP_DST(%a6), %a0
21590 load_fpn2_3:
21591 fmovm.x &0x10, FP_DST(%a6)
21592 lea FP_DST(%a6), %a0
21594 load_fpn2_4:
21595 fmovm.x &0x08, FP_DST(%a6)
21596 lea FP_DST(%a6), %a0
21598 load_fpn2_5:
21599 fmovm.x &0x04, FP_DST(%a6)
21600 lea FP_DST(%a6), %a0
21602 load_fpn2_6:
21603 fmovm.x &0x02, FP_DST(%a6)
21604 lea FP_DST(%a6), %a0
21606 load_fpn2_7:
21607 fmovm.x &0x01, FP_DST(%a6)
21608 lea FP_DST(%a6), %a0
21611 #############################################################################
21613 #########################################################################
21614 # XDEF **************************************************************** #
21615 # store_fpreg(): store an fp value to the fpreg designated d0. #
21617 # XREF **************************************************************** #
21618 # None #
21620 # INPUT *************************************************************** #
21621 # fp0 = extended precision value to store #
21622 # d0 = index of floating-point register #
21624 # OUTPUT ************************************************************** #
21625 # None #
21627 # ALGORITHM *********************************************************** #
21628 # Store the value in fp0 to the FP register designated by the #
21629 # value in d0. The FP number can be DENORM or SNAN so we have to be #
21630 # careful that we don't take an exception here. #
21632 #########################################################################
21634 global store_fpreg
21635 store_fpreg:
21636 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
21637 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
21639 tbl_store_fpreg:
21640 short store_fpreg_0 - tbl_store_fpreg
21641 short store_fpreg_1 - tbl_store_fpreg
21642 short store_fpreg_2 - tbl_store_fpreg
21643 short store_fpreg_3 - tbl_store_fpreg
21644 short store_fpreg_4 - tbl_store_fpreg
21645 short store_fpreg_5 - tbl_store_fpreg
21646 short store_fpreg_6 - tbl_store_fpreg
21647 short store_fpreg_7 - tbl_store_fpreg
21649 store_fpreg_0:
21650 fmovm.x &0x80, EXC_FP0(%a6)
21652 store_fpreg_1:
21653 fmovm.x &0x80, EXC_FP1(%a6)
21655 store_fpreg_2:
21656 fmovm.x &0x01, -(%sp)
21657 fmovm.x (%sp)+, &0x20
21659 store_fpreg_3:
21660 fmovm.x &0x01, -(%sp)
21661 fmovm.x (%sp)+, &0x10
21663 store_fpreg_4:
21664 fmovm.x &0x01, -(%sp)
21665 fmovm.x (%sp)+, &0x08
21667 store_fpreg_5:
21668 fmovm.x &0x01, -(%sp)
21669 fmovm.x (%sp)+, &0x04
21671 store_fpreg_6:
21672 fmovm.x &0x01, -(%sp)
21673 fmovm.x (%sp)+, &0x02
21675 store_fpreg_7:
21676 fmovm.x &0x01, -(%sp)
21677 fmovm.x (%sp)+, &0x01
21680 #########################################################################
21681 # XDEF **************************************************************** #
21682 # _denorm(): denormalize an intermediate result #
21684 # XREF **************************************************************** #
21685 # None #
21687 # INPUT *************************************************************** #
21688 # a0 = points to the operand to be denormalized #
21689 # (in the internal extended format) #
21691 # d0 = rounding precision #
21693 # OUTPUT ************************************************************** #
21694 # a0 = pointer to the denormalized result #
21695 # (in the internal extended format) #
21697 # d0 = guard,round,sticky #
21699 # ALGORITHM *********************************************************** #
21700 # According to the exponent underflow threshold for the given #
21701 # precision, shift the mantissa bits to the right in order raise the #
21702 # exponent of the operand to the threshold value. While shifting the #
21703 # mantissa bits right, maintain the value of the guard, round, and #
21704 # sticky bits. #
21705 # other notes: #
21706 # (1) _denorm() is called by the underflow routines #
21707 # (2) _denorm() does NOT affect the status register #
21709 #########################################################################
21712 # table of exponent threshold values for each precision
21714 tbl_thresh:
21715 short 0x0
21716 short sgl_thresh
21717 short dbl_thresh
21719 global _denorm
21720 _denorm:
21722 # Load the exponent threshold for the precision selected and check
21723 # to see if (threshold - exponent) is > 65 in which case we can
21724 # simply calculate the sticky bit and zero the mantissa. otherwise
21725 # we have to call the denormalization routine.
21727 lsr.b &0x2, %d0 # shift prec to lo bits
21728 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
21729 mov.w %d1, %d0 # copy d1 into d0
21730 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
21731 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
21732 bpl.b denorm_set_stky # yes; just calc sticky
21734 clr.l %d0 # clear g,r,s
21735 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
21736 beq.b denorm_call # no; don't change anything
21737 bset &29, %d0 # yes; set sticky bit
21739 denorm_call:
21740 bsr.l dnrm_lp # denormalize the number
21744 # all bit would have been shifted off during the denorm so simply
21745 # calculate if the sticky should be set and clear the entire mantissa.
21747 denorm_set_stky:
21748 mov.l &0x20000000, %d0 # set sticky bit in return value
21749 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
21750 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
21751 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
21755 # dnrm_lp(): normalize exponent/mantissa to specified threshhold #
21757 # INPUT: #
21758 # %a0 : points to the operand to be denormalized #
21759 # %d0{31:29} : initial guard,round,sticky #
21760 # %d1{15:0} : denormalization threshold #
21761 # OUTPUT: #
21762 # %a0 : points to the denormalized operand #
21763 # %d0{31:29} : final guard,round,sticky #
21766 # *** Local Equates *** #
21767 set GRS, L_SCR2 # g,r,s temp storage
21768 set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
21770 global dnrm_lp
21771 dnrm_lp:
21774 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
21775 # in memory so as to make the bitfield extraction for denormalization easier.
21777 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
21778 mov.l %d0, GRS(%a6) # place g,r,s after it
21781 # check to see how much less than the underflow threshold the operand
21782 # exponent is.
21784 mov.l %d1, %d0 # copy the denorm threshold
21785 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
21786 ble.b dnrm_no_lp # d1 <= 0
21787 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
21788 blt.b case_1 # yes
21789 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
21790 blt.b case_2 # yes
21791 bra.w case_3 # (d1 >= 64)
21794 # No normalization necessary
21796 dnrm_no_lp:
21797 mov.l GRS(%a6), %d0 # restore original g,r,s
21801 # case (0<d1<32)
21803 # %d0 = denorm threshold
21804 # %d1 = "n" = amt to shift
21806 # ---------------------------------------------------------
21807 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21808 # ---------------------------------------------------------
21809 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21810 # \ \ \ \
21811 # \ \ \ \
21812 # \ \ \ \
21813 # \ \ \ \
21814 # \ \ \ \
21815 # \ \ \ \
21816 # \ \ \ \
21817 # \ \ \ \
21818 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
21819 # ---------------------------------------------------------
21820 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
21821 # ---------------------------------------------------------
21823 case_1:
21824 mov.l %d2, -(%sp) # create temp storage
21826 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21827 mov.l &32, %d0
21828 sub.w %d1, %d0 # %d0 = 32 - %d1
21830 cmpi.w %d1, &29 # is shft amt >= 29
21831 blt.b case1_extract # no; no fix needed
21832 mov.b GRS(%a6), %d2
21833 or.b %d2, 3+FTEMP_LO2(%a6)
21835 case1_extract:
21836 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
21837 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
21838 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
21840 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
21841 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
21843 bftst %d0{&2:&30} # were bits shifted off?
21844 beq.b case1_sticky_clear # no; go finish
21845 bset &rnd_stky_bit, %d0 # yes; set sticky bit
21847 case1_sticky_clear:
21848 and.l &0xe0000000, %d0 # clear all but G,R,S
21849 mov.l (%sp)+, %d2 # restore temp register
21853 # case (32<=d1<64)
21855 # %d0 = denorm threshold
21856 # %d1 = "n" = amt to shift
21858 # ---------------------------------------------------------
21859 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21860 # ---------------------------------------------------------
21861 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
21862 # \ \ \
21863 # \ \ \
21864 # \ \ -------------------
21865 # \ -------------------- \
21866 # ------------------- \ \
21867 # \ \ \
21868 # \ \ \
21869 # \ \ \
21870 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
21871 # ---------------------------------------------------------
21872 # |0...............0|0....0| NEW_LO |grs |
21873 # ---------------------------------------------------------
21875 case_2:
21876 mov.l %d2, -(%sp) # create temp storage
21878 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
21879 subi.w &0x20, %d1 # %d1 now between 0 and 32
21880 mov.l &0x20, %d0
21881 sub.w %d1, %d0 # %d0 = 32 - %d1
21883 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
21884 # the number of bits to check for the sticky detect.
21885 # it only plays a role in shift amounts of 61-63.
21886 mov.b GRS(%a6), %d2
21887 or.b %d2, 3+FTEMP_LO2(%a6)
21889 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
21890 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
21892 bftst %d1{&2:&30} # were any bits shifted off?
21893 bne.b case2_set_sticky # yes; set sticky bit
21894 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
21895 bne.b case2_set_sticky # yes; set sticky bit
21897 mov.l %d1, %d0 # move new G,R,S to %d0
21898 bra.b case2_end
21900 case2_set_sticky:
21901 mov.l %d1, %d0 # move new G,R,S to %d0
21902 bset &rnd_stky_bit, %d0 # set sticky bit
21904 case2_end:
21905 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
21906 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
21907 and.l &0xe0000000, %d0 # clear all but G,R,S
21909 mov.l (%sp)+,%d2 # restore temp register
21913 # case (d1>=64)
21915 # %d0 = denorm threshold
21916 # %d1 = amt to shift
21918 case_3:
21919 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
21921 cmpi.w %d1, &65 # is shift amt > 65?
21922 blt.b case3_64 # no; it's == 64
21923 beq.b case3_65 # no; it's == 65
21926 # case (d1>65)
21928 # Shift value is > 65 and out of range. All bits are shifted off.
21929 # Return a zero mantissa with the sticky bit set
21931 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
21932 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
21933 mov.l &0x20000000, %d0 # set sticky bit
21937 # case (d1 == 64)
21939 # ---------------------------------------------------------
21940 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21941 # ---------------------------------------------------------
21942 # <-------(32)------>
21943 # \ \
21944 # \ \
21945 # \ \
21946 # \ ------------------------------
21947 # ------------------------------- \
21948 # \ \
21949 # \ \
21950 # \ \
21951 # <-------(32)------>
21952 # ---------------------------------------------------------
21953 # |0...............0|0................0|grs |
21954 # ---------------------------------------------------------
21956 case3_64:
21957 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21958 mov.l %d0, %d1 # make a copy
21959 and.l &0xc0000000, %d0 # extract G,R
21960 and.l &0x3fffffff, %d1 # extract other bits
21962 bra.b case3_complete
21965 # case (d1 == 65)
21967 # ---------------------------------------------------------
21968 # | FTEMP_HI | FTEMP_LO |grs000.........000|
21969 # ---------------------------------------------------------
21970 # <-------(32)------>
21971 # \ \
21972 # \ \
21973 # \ \
21974 # \ ------------------------------
21975 # -------------------------------- \
21976 # \ \
21977 # \ \
21978 # \ \
21979 # <-------(31)----->
21980 # ---------------------------------------------------------
21981 # |0...............0|0................0|0rs |
21982 # ---------------------------------------------------------
21984 case3_65:
21985 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
21986 and.l &0x80000000, %d0 # extract R bit
21987 lsr.l &0x1, %d0 # shift high bit into R bit
21988 and.l &0x7fffffff, %d1 # extract other bits
21990 case3_complete:
21991 # last operation done was an "and" of the bits shifted off so the condition
21992 # codes are already set so branch accordingly.
21993 bne.b case3_set_sticky # yes; go set new sticky
21994 tst.l FTEMP_LO(%a0) # were any bits shifted off?
21995 bne.b case3_set_sticky # yes; go set new sticky
21996 tst.b GRS(%a6) # were any bits shifted off?
21997 bne.b case3_set_sticky # yes; go set new sticky
22000 # no bits were shifted off so don't set the sticky bit.
22001 # the guard and
22002 # the entire mantissa is zero.
22004 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
22005 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
22009 # some bits were shifted off so set the sticky bit.
22010 # the entire mantissa is zero.
22012 case3_set_sticky:
22013 bset &rnd_stky_bit,%d0 # set new sticky bit
22014 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
22015 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
22018 #########################################################################
22019 # XDEF **************************************************************** #
22020 # _round(): round result according to precision/mode #
22022 # XREF **************************************************************** #
22023 # None #
22025 # INPUT *************************************************************** #
22026 # a0 = ptr to input operand in internal extended format #
22027 # d1(hi) = contains rounding precision: #
22028 # ext = $0000xxxx #
22029 # sgl = $0004xxxx #
22030 # dbl = $0008xxxx #
22031 # d1(lo) = contains rounding mode: #
22032 # RN = $xxxx0000 #
22033 # RZ = $xxxx0001 #
22034 # RM = $xxxx0002 #
22035 # RP = $xxxx0003 #
22036 # d0{31:29} = contains the g,r,s bits (extended) #
22038 # OUTPUT ************************************************************** #
22039 # a0 = pointer to rounded result #
22041 # ALGORITHM *********************************************************** #
22042 # On return the value pointed to by a0 is correctly rounded, #
22043 # a0 is preserved and the g-r-s bits in d0 are cleared. #
22044 # The result is not typed - the tag field is invalid. The #
22045 # result is still in the internal extended format. #
22047 # The INEX bit of USER_FPSR will be set if the rounded result was #
22048 # inexact (i.e. if any of the g-r-s bits were set). #
22050 #########################################################################
22052 global _round
22053 _round:
22055 # ext_grs() looks at the rounding precision and sets the appropriate
22056 # G,R,S bits.
22057 # If (G,R,S == 0) then result is exact and round is done, else set
22058 # the inex flag in status reg and continue.
22060 bsr.l ext_grs # extract G,R,S
22062 tst.l %d0 # are G,R,S zero?
22063 beq.w truncate # yes; round is complete
22065 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
22068 # Use rounding mode as an index into a jump table for these modes.
22069 # All of the following assumes grs != 0.
22071 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
22072 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
22074 tbl_mode:
22075 short rnd_near - tbl_mode
22076 short truncate - tbl_mode # RZ always truncates
22077 short rnd_mnus - tbl_mode
22078 short rnd_plus - tbl_mode
22080 #################################################################
22081 # ROUND PLUS INFINITY #
22083 # If sign of fp number = 0 (positive), then add 1 to l. #
22084 #################################################################
22085 rnd_plus:
22086 tst.b FTEMP_SGN(%a0) # check for sign
22087 bmi.w truncate # if positive then truncate
22089 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22090 swap %d1 # set up d1 for round prec.
22092 cmpi.b %d1, &s_mode # is prec = sgl?
22093 beq.w add_sgl # yes
22094 bgt.w add_dbl # no; it's dbl
22095 bra.w add_ext # no; it's ext
22097 #################################################################
22098 # ROUND MINUS INFINITY #
22100 # If sign of fp number = 1 (negative), then add 1 to l. #
22101 #################################################################
22102 rnd_mnus:
22103 tst.b FTEMP_SGN(%a0) # check for sign
22104 bpl.w truncate # if negative then truncate
22106 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
22107 swap %d1 # set up d1 for round prec.
22109 cmpi.b %d1, &s_mode # is prec = sgl?
22110 beq.w add_sgl # yes
22111 bgt.w add_dbl # no; it's dbl
22112 bra.w add_ext # no; it's ext
22114 #################################################################
22115 # ROUND NEAREST #
22117 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
22118 # Note that this will round to even in case of a tie. #
22119 #################################################################
22120 rnd_near:
22121 asl.l &0x1, %d0 # shift g-bit to c-bit
22122 bcc.w truncate # if (g=1) then
22124 swap %d1 # set up d1 for round prec.
22126 cmpi.b %d1, &s_mode # is prec = sgl?
22127 beq.w add_sgl # yes
22128 bgt.w add_dbl # no; it's dbl
22129 bra.w add_ext # no; it's ext
22131 # *** LOCAL EQUATES ***
22132 set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
22133 set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
22135 #########################
22136 # ADD SINGLE #
22137 #########################
22138 add_sgl:
22139 add.l &ad_1_sgl, FTEMP_HI(%a0)
22140 bcc.b scc_clr # no mantissa overflow
22141 roxr.w FTEMP_HI(%a0) # shift v-bit back in
22142 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
22143 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
22144 scc_clr:
22145 tst.l %d0 # test for rs = 0
22146 bne.b sgl_done
22147 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
22148 sgl_done:
22149 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
22150 clr.l FTEMP_LO(%a0) # clear d2
22153 #########################
22154 # ADD EXTENDED #
22155 #########################
22156 add_ext:
22157 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
22158 bcc.b xcc_clr # test for carry out
22159 addq.l &1,FTEMP_HI(%a0) # propogate carry
22160 bcc.b xcc_clr
22161 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22162 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22163 roxr.w FTEMP_LO(%a0)
22164 roxr.w FTEMP_LO+2(%a0)
22165 add.w &0x1,FTEMP_EX(%a0) # and inc exp
22166 xcc_clr:
22167 tst.l %d0 # test rs = 0
22168 bne.b add_ext_done
22169 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
22170 add_ext_done:
22173 #########################
22174 # ADD DOUBLE #
22175 #########################
22176 add_dbl:
22177 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
22178 bcc.b dcc_clr # no carry
22179 addq.l &0x1, FTEMP_HI(%a0) # propogate carry
22180 bcc.b dcc_clr # no carry
22182 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
22183 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
22184 roxr.w FTEMP_LO(%a0)
22185 roxr.w FTEMP_LO+2(%a0)
22186 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
22187 dcc_clr:
22188 tst.l %d0 # test for rs = 0
22189 bne.b dbl_done
22190 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
22192 dbl_done:
22193 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
22196 ###########################
22197 # Truncate all other bits #
22198 ###########################
22199 truncate:
22200 swap %d1 # select rnd prec
22202 cmpi.b %d1, &s_mode # is prec sgl?
22203 beq.w sgl_done # yes
22204 bgt.b dbl_done # no; it's dbl
22205 rts # no; it's ext
22209 # ext_grs(): extract guard, round and sticky bits according to
22210 # rounding precision.
22212 # INPUT
22213 # d0 = extended precision g,r,s (in d0{31:29})
22214 # d1 = {PREC,ROUND}
22215 # OUTPUT
22216 # d0{31:29} = guard, round, sticky
22218 # The ext_grs extract the guard/round/sticky bits according to the
22219 # selected rounding precision. It is called by the round subroutine
22220 # only. All registers except d0 are kept intact. d0 becomes an
22221 # updated guard,round,sticky in d0{31:29}
22223 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
22224 # prior to usage, and needs to restore d1 to original. this
22225 # routine is tightly tied to the round routine and not meant to
22226 # uphold standard subroutine calling practices.
22229 ext_grs:
22230 swap %d1 # have d1.w point to round precision
22231 tst.b %d1 # is rnd prec = extended?
22232 bne.b ext_grs_not_ext # no; go handle sgl or dbl
22235 # %d0 actually already hold g,r,s since _round() had it before calling
22236 # this function. so, as long as we don't disturb it, we are "returning" it.
22238 ext_grs_ext:
22239 swap %d1 # yes; return to correct positions
22242 ext_grs_not_ext:
22243 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
22245 cmpi.b %d1, &s_mode # is rnd prec = sgl?
22246 bne.b ext_grs_dbl # no; go handle dbl
22249 # sgl:
22250 # 96 64 40 32 0
22251 # -----------------------------------------------------
22252 # | EXP |XXXXXXX| |xx | |grs|
22253 # -----------------------------------------------------
22254 # <--(24)--->nn\ /
22255 # ee ---------------------
22256 # ww |
22258 # gr new sticky
22260 ext_grs_sgl:
22261 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
22262 mov.l &30, %d2 # of the sgl prec. limits
22263 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
22264 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
22265 and.l &0x0000003f, %d2 # s bit is the or of all other
22266 bne.b ext_grs_st_stky # bits to the right of g-r
22267 tst.l FTEMP_LO(%a0) # test lower mantissa
22268 bne.b ext_grs_st_stky # if any are set, set sticky
22269 tst.l %d0 # test original g,r,s
22270 bne.b ext_grs_st_stky # if any are set, set sticky
22271 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
22274 # dbl:
22275 # 96 64 32 11 0
22276 # -----------------------------------------------------
22277 # | EXP |XXXXXXX| | |xx |grs|
22278 # -----------------------------------------------------
22279 # nn\ /
22280 # ee -------
22281 # ww |
22283 # gr new sticky
22285 ext_grs_dbl:
22286 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
22287 mov.l &30, %d2 # of the dbl prec. limits
22288 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
22289 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
22290 and.l &0x000001ff, %d2 # s bit is the or-ing of all
22291 bne.b ext_grs_st_stky # other bits to the right of g-r
22292 tst.l %d0 # test word original g,r,s
22293 bne.b ext_grs_st_stky # if any are set, set sticky
22294 bra.b ext_grs_end_sd # if clear, exit
22296 ext_grs_st_stky:
22297 bset &rnd_stky_bit, %d3 # set sticky bit
22298 ext_grs_end_sd:
22299 mov.l %d3, %d0 # return grs to d0
22301 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
22303 swap %d1 # restore d1 to original
22306 #########################################################################
22307 # norm(): normalize the mantissa of an extended precision input. the #
22308 # input operand should not be normalized already. #
22310 # XDEF **************************************************************** #
22311 # norm() #
22313 # XREF **************************************************************** #
22314 # none #
22316 # INPUT *************************************************************** #
22317 # a0 = pointer fp extended precision operand to normalize #
22319 # OUTPUT ************************************************************** #
22320 # d0 = number of bit positions the mantissa was shifted #
22321 # a0 = the input operand's mantissa is normalized; the exponent #
22322 # is unchanged. #
22324 #########################################################################
22325 global norm
22326 norm:
22327 mov.l %d2, -(%sp) # create some temp regs
22328 mov.l %d3, -(%sp)
22330 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
22331 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
22333 bfffo %d0{&0:&32}, %d2 # how many places to shift?
22334 beq.b norm_lo # hi(man) is all zeroes!
22336 norm_hi:
22337 lsl.l %d2, %d0 # left shift hi(man)
22338 bfextu %d1{&0:%d2}, %d3 # extract lo bits
22340 or.l %d3, %d0 # create hi(man)
22341 lsl.l %d2, %d1 # create lo(man)
22343 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22344 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
22346 mov.l %d2, %d0 # return shift amount
22348 mov.l (%sp)+, %d3 # restore temp regs
22349 mov.l (%sp)+, %d2
22353 norm_lo:
22354 bfffo %d1{&0:&32}, %d2 # how many places to shift?
22355 lsl.l %d2, %d1 # shift lo(man)
22356 add.l &32, %d2 # add 32 to shft amount
22358 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
22359 clr.l FTEMP_LO(%a0) # lo(man) is now zero
22361 mov.l %d2, %d0 # return shift amount
22363 mov.l (%sp)+, %d3 # restore temp regs
22364 mov.l (%sp)+, %d2
22368 #########################################################################
22369 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
22370 # - returns corresponding optype tag #
22372 # XDEF **************************************************************** #
22373 # unnorm_fix() #
22375 # XREF **************************************************************** #
22376 # norm() - normalize the mantissa #
22378 # INPUT *************************************************************** #
22379 # a0 = pointer to unnormalized extended precision number #
22381 # OUTPUT ************************************************************** #
22382 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
22383 # a0 = input operand has been converted to a norm, denorm, or #
22384 # zero; both the exponent and mantissa are changed. #
22386 #########################################################################
22388 global unnorm_fix
22389 unnorm_fix:
22390 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
22391 bne.b unnorm_shift # hi(man) is not all zeroes
22394 # hi(man) is all zeroes so see if any bits in lo(man) are set
22396 unnorm_chk_lo:
22397 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
22398 beq.w unnorm_zero # yes
22400 add.w &32, %d0 # no; fix shift distance
22403 # d0 = # shifts needed for complete normalization
22405 unnorm_shift:
22406 clr.l %d1 # clear top word
22407 mov.w FTEMP_EX(%a0), %d1 # extract exponent
22408 and.w &0x7fff, %d1 # strip off sgn
22410 cmp.w %d0, %d1 # will denorm push exp < 0?
22411 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
22414 # exponent would not go < 0. therefore, number stays normalized
22416 sub.w %d0, %d1 # shift exponent value
22417 mov.w FTEMP_EX(%a0), %d0 # load old exponent
22418 and.w &0x8000, %d0 # save old sign
22419 or.w %d0, %d1 # {sgn,new exp}
22420 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
22422 bsr.l norm # normalize UNNORM
22424 mov.b &NORM, %d0 # return new optype tag
22428 # exponent would go < 0, so only denormalize until exp = 0
22430 unnorm_nrm_zero:
22431 cmp.b %d1, &32 # is exp <= 32?
22432 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
22434 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
22435 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
22437 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22438 lsl.l %d1, %d0 # extract new lo(man)
22439 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
22441 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22443 mov.b &DENORM, %d0 # return new optype tag
22447 # only mantissa bits set are in lo(man)
22449 unnorm_nrm_zero_lrg:
22450 sub.w &32, %d1 # adjust shft amt by 32
22452 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
22453 lsl.l %d1, %d0 # left shift lo(man)
22455 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
22456 clr.l FTEMP_LO(%a0) # lo(man) = 0
22458 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
22460 mov.b &DENORM, %d0 # return new optype tag
22464 # whole mantissa is zero so this UNNORM is actually a zero
22466 unnorm_zero:
22467 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
22469 mov.b &ZERO, %d0 # fix optype tag
22472 #########################################################################
22473 # XDEF **************************************************************** #
22474 # set_tag_x(): return the optype of the input ext fp number #
22476 # XREF **************************************************************** #
22477 # None #
22479 # INPUT *************************************************************** #
22480 # a0 = pointer to extended precision operand #
22482 # OUTPUT ************************************************************** #
22483 # d0 = value of type tag #
22484 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
22486 # ALGORITHM *********************************************************** #
22487 # Simply test the exponent, j-bit, and mantissa values to #
22488 # determine the type of operand. #
22489 # If it's an unnormalized zero, alter the operand and force it #
22490 # to be a normal zero. #
22492 #########################################################################
22494 global set_tag_x
22495 set_tag_x:
22496 mov.w FTEMP_EX(%a0), %d0 # extract exponent
22497 andi.w &0x7fff, %d0 # strip off sign
22498 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
22499 beq.b inf_or_nan_x
22500 not_inf_or_nan_x:
22501 btst &0x7,FTEMP_HI(%a0)
22502 beq.b not_norm_x
22503 is_norm_x:
22504 mov.b &NORM, %d0
22506 not_norm_x:
22507 tst.w %d0 # is exponent = 0?
22508 bne.b is_unnorm_x
22509 not_unnorm_x:
22510 tst.l FTEMP_HI(%a0)
22511 bne.b is_denorm_x
22512 tst.l FTEMP_LO(%a0)
22513 bne.b is_denorm_x
22514 is_zero_x:
22515 mov.b &ZERO, %d0
22517 is_denorm_x:
22518 mov.b &DENORM, %d0
22520 # must distinguish now "Unnormalized zeroes" which we
22521 # must convert to zero.
22522 is_unnorm_x:
22523 tst.l FTEMP_HI(%a0)
22524 bne.b is_unnorm_reg_x
22525 tst.l FTEMP_LO(%a0)
22526 bne.b is_unnorm_reg_x
22527 # it's an "unnormalized zero". let's convert it to an actual zero...
22528 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
22529 mov.b &ZERO, %d0
22531 is_unnorm_reg_x:
22532 mov.b &UNNORM, %d0
22534 inf_or_nan_x:
22535 tst.l FTEMP_LO(%a0)
22536 bne.b is_nan_x
22537 mov.l FTEMP_HI(%a0), %d0
22538 and.l &0x7fffffff, %d0 # msb is a don't care!
22539 bne.b is_nan_x
22540 is_inf_x:
22541 mov.b &INF, %d0
22543 is_nan_x:
22544 btst &0x6, FTEMP_HI(%a0)
22545 beq.b is_snan_x
22546 mov.b &QNAN, %d0
22548 is_snan_x:
22549 mov.b &SNAN, %d0
22552 #########################################################################
22553 # XDEF **************************************************************** #
22554 # set_tag_d(): return the optype of the input dbl fp number #
22556 # XREF **************************************************************** #
22557 # None #
22559 # INPUT *************************************************************** #
22560 # a0 = points to double precision operand #
22562 # OUTPUT ************************************************************** #
22563 # d0 = value of type tag #
22564 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22566 # ALGORITHM *********************************************************** #
22567 # Simply test the exponent, j-bit, and mantissa values to #
22568 # determine the type of operand. #
22570 #########################################################################
22572 global set_tag_d
22573 set_tag_d:
22574 mov.l FTEMP(%a0), %d0
22575 mov.l %d0, %d1
22577 andi.l &0x7ff00000, %d0
22578 beq.b zero_or_denorm_d
22580 cmpi.l %d0, &0x7ff00000
22581 beq.b inf_or_nan_d
22583 is_norm_d:
22584 mov.b &NORM, %d0
22586 zero_or_denorm_d:
22587 and.l &0x000fffff, %d1
22588 bne is_denorm_d
22589 tst.l 4+FTEMP(%a0)
22590 bne is_denorm_d
22591 is_zero_d:
22592 mov.b &ZERO, %d0
22594 is_denorm_d:
22595 mov.b &DENORM, %d0
22597 inf_or_nan_d:
22598 and.l &0x000fffff, %d1
22599 bne is_nan_d
22600 tst.l 4+FTEMP(%a0)
22601 bne is_nan_d
22602 is_inf_d:
22603 mov.b &INF, %d0
22605 is_nan_d:
22606 btst &19, %d1
22607 bne is_qnan_d
22608 is_snan_d:
22609 mov.b &SNAN, %d0
22611 is_qnan_d:
22612 mov.b &QNAN, %d0
22615 #########################################################################
22616 # XDEF **************************************************************** #
22617 # set_tag_s(): return the optype of the input sgl fp number #
22619 # XREF **************************************************************** #
22620 # None #
22622 # INPUT *************************************************************** #
22623 # a0 = pointer to single precision operand #
22625 # OUTPUT ************************************************************** #
22626 # d0 = value of type tag #
22627 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
22629 # ALGORITHM *********************************************************** #
22630 # Simply test the exponent, j-bit, and mantissa values to #
22631 # determine the type of operand. #
22633 #########################################################################
22635 global set_tag_s
22636 set_tag_s:
22637 mov.l FTEMP(%a0), %d0
22638 mov.l %d0, %d1
22640 andi.l &0x7f800000, %d0
22641 beq.b zero_or_denorm_s
22643 cmpi.l %d0, &0x7f800000
22644 beq.b inf_or_nan_s
22646 is_norm_s:
22647 mov.b &NORM, %d0
22649 zero_or_denorm_s:
22650 and.l &0x007fffff, %d1
22651 bne is_denorm_s
22652 is_zero_s:
22653 mov.b &ZERO, %d0
22655 is_denorm_s:
22656 mov.b &DENORM, %d0
22658 inf_or_nan_s:
22659 and.l &0x007fffff, %d1
22660 bne is_nan_s
22661 is_inf_s:
22662 mov.b &INF, %d0
22664 is_nan_s:
22665 btst &22, %d1
22666 bne is_qnan_s
22667 is_snan_s:
22668 mov.b &SNAN, %d0
22670 is_qnan_s:
22671 mov.b &QNAN, %d0
22674 #########################################################################
22675 # XDEF **************************************************************** #
22676 # unf_res(): routine to produce default underflow result of a #
22677 # scaled extended precision number; this is used by #
22678 # fadd/fdiv/fmul/etc. emulation routines. #
22679 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
22680 # single round prec and extended prec mode. #
22682 # XREF **************************************************************** #
22683 # _denorm() - denormalize according to scale factor #
22684 # _round() - round denormalized number according to rnd prec #
22686 # INPUT *************************************************************** #
22687 # a0 = pointer to extended precison operand #
22688 # d0 = scale factor #
22689 # d1 = rounding precision/mode #
22691 # OUTPUT ************************************************************** #
22692 # a0 = pointer to default underflow result in extended precision #
22693 # d0.b = result FPSR_cc which caller may or may not want to save #
22695 # ALGORITHM *********************************************************** #
22696 # Convert the input operand to "internal format" which means the #
22697 # exponent is extended to 16 bits and the sign is stored in the unused #
22698 # portion of the extended precison operand. Denormalize the number #
22699 # according to the scale factor passed in d0. Then, round the #
22700 # denormalized result. #
22701 # Set the FPSR_exc bits as appropriate but return the cc bits in #
22702 # d0 in case the caller doesn't want to save them (as is the case for #
22703 # fmove out). #
22704 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
22705 # precision and the rounding mode to single. #
22707 #########################################################################
22708 global unf_res
22709 unf_res:
22710 mov.l %d1, -(%sp) # save rnd prec,mode on stack
22712 btst &0x7, FTEMP_EX(%a0) # make "internal" format
22713 sne FTEMP_SGN(%a0)
22715 mov.w FTEMP_EX(%a0), %d1 # extract exponent
22716 and.w &0x7fff, %d1
22717 sub.w %d0, %d1
22718 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
22720 mov.l %a0, -(%sp) # save operand ptr during calls
22722 mov.l 0x4(%sp),%d0 # pass rnd prec.
22723 andi.w &0x00c0,%d0
22724 lsr.w &0x4,%d0
22725 bsr.l _denorm # denorm result
22727 mov.l (%sp),%a0
22728 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
22729 andi.w &0xc0,%d1 # extract rnd prec
22730 lsr.w &0x4,%d1
22731 swap %d1
22732 mov.w 0x6(%sp),%d1
22733 andi.w &0x30,%d1
22734 lsr.w &0x4,%d1
22735 bsr.l _round # round the denorm
22737 mov.l (%sp)+, %a0
22739 # result is now rounded properly. convert back to normal format
22740 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
22741 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22742 beq.b unf_res_chkifzero # no; result is positive
22743 bset &0x7, FTEMP_EX(%a0) # set result sgn
22744 clr.b FTEMP_SGN(%a0) # clear temp sign
22746 # the number may have become zero after rounding. set ccodes accordingly.
22747 unf_res_chkifzero:
22748 clr.l %d0
22749 tst.l FTEMP_HI(%a0) # is value now a zero?
22750 bne.b unf_res_cont # no
22751 tst.l FTEMP_LO(%a0)
22752 bne.b unf_res_cont # no
22753 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
22754 bset &z_bit, %d0 # yes; set zero ccode bit
22756 unf_res_cont:
22759 # can inex1 also be set along with unfl and inex2???
22761 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22763 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
22764 beq.b unf_res_end # no
22765 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
22767 unf_res_end:
22768 add.l &0x4, %sp # clear stack
22771 # unf_res() for fsglmul() and fsgldiv().
22772 global unf_res4
22773 unf_res4:
22774 mov.l %d1,-(%sp) # save rnd prec,mode on stack
22776 btst &0x7,FTEMP_EX(%a0) # make "internal" format
22777 sne FTEMP_SGN(%a0)
22779 mov.w FTEMP_EX(%a0),%d1 # extract exponent
22780 and.w &0x7fff,%d1
22781 sub.w %d0,%d1
22782 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
22784 mov.l %a0,-(%sp) # save operand ptr during calls
22786 clr.l %d0 # force rnd prec = ext
22787 bsr.l _denorm # denorm result
22789 mov.l (%sp),%a0
22790 mov.w &s_mode,%d1 # force rnd prec = sgl
22791 swap %d1
22792 mov.w 0x6(%sp),%d1 # load rnd mode
22793 andi.w &0x30,%d1 # extract rnd prec
22794 lsr.w &0x4,%d1
22795 bsr.l _round # round the denorm
22797 mov.l (%sp)+,%a0
22799 # result is now rounded properly. convert back to normal format
22800 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
22801 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
22802 beq.b unf_res4_chkifzero # no; result is positive
22803 bset &0x7,FTEMP_EX(%a0) # set result sgn
22804 clr.b FTEMP_SGN(%a0) # clear temp sign
22806 # the number may have become zero after rounding. set ccodes accordingly.
22807 unf_res4_chkifzero:
22808 clr.l %d0
22809 tst.l FTEMP_HI(%a0) # is value now a zero?
22810 bne.b unf_res4_cont # no
22811 tst.l FTEMP_LO(%a0)
22812 bne.b unf_res4_cont # no
22813 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
22814 bset &z_bit,%d0 # yes; set zero ccode bit
22816 unf_res4_cont:
22819 # can inex1 also be set along with unfl and inex2???
22821 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
22823 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
22824 beq.b unf_res4_end # no
22825 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
22827 unf_res4_end:
22828 add.l &0x4,%sp # clear stack
22831 #########################################################################
22832 # XDEF **************************************************************** #
22833 # ovf_res(): routine to produce the default overflow result of #
22834 # an overflowing number. #
22835 # ovf_res2(): same as above but the rnd mode/prec are passed #
22836 # differently. #
22838 # XREF **************************************************************** #
22839 # none #
22841 # INPUT *************************************************************** #
22842 # d1.b = '-1' => (-); '0' => (+) #
22843 # ovf_res(): #
22844 # d0 = rnd mode/prec #
22845 # ovf_res2(): #
22846 # hi(d0) = rnd prec #
22847 # lo(d0) = rnd mode #
22849 # OUTPUT ************************************************************** #
22850 # a0 = points to extended precision result #
22851 # d0.b = condition code bits #
22853 # ALGORITHM *********************************************************** #
22854 # The default overflow result can be determined by the sign of #
22855 # the result and the rounding mode/prec in effect. These bits are #
22856 # concatenated together to create an index into the default result #
22857 # table. A pointer to the correct result is returned in a0. The #
22858 # resulting condition codes are returned in d0 in case the caller #
22859 # doesn't want FPSR_cc altered (as is the case for fmove out). #
22861 #########################################################################
22863 global ovf_res
22864 ovf_res:
22865 andi.w &0x10,%d1 # keep result sign
22866 lsr.b &0x4,%d0 # shift prec/mode
22867 or.b %d0,%d1 # concat the two
22868 mov.w %d1,%d0 # make a copy
22869 lsl.b &0x1,%d1 # multiply d1 by 2
22870 bra.b ovf_res_load
22872 global ovf_res2
22873 ovf_res2:
22874 and.w &0x10, %d1 # keep result sign
22875 or.b %d0, %d1 # insert rnd mode
22876 swap %d0
22877 or.b %d0, %d1 # insert rnd prec
22878 mov.w %d1, %d0 # make a copy
22879 lsl.b &0x1, %d1 # shift left by 1
22882 # use the rounding mode, precision, and result sign as in index into the
22883 # two tables below to fetch the default result and the result ccodes.
22885 ovf_res_load:
22886 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
22887 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
22891 tbl_ovfl_cc:
22892 byte 0x2, 0x0, 0x0, 0x2
22893 byte 0x2, 0x0, 0x0, 0x2
22894 byte 0x2, 0x0, 0x0, 0x2
22895 byte 0x0, 0x0, 0x0, 0x0
22896 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22897 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22898 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
22900 tbl_ovfl_result:
22901 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22902 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
22903 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
22904 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22906 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22907 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
22908 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
22909 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22911 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
22912 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
22913 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
22914 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
22916 long 0x00000000,0x00000000,0x00000000,0x00000000
22917 long 0x00000000,0x00000000,0x00000000,0x00000000
22918 long 0x00000000,0x00000000,0x00000000,0x00000000
22919 long 0x00000000,0x00000000,0x00000000,0x00000000
22921 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22922 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
22923 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22924 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
22926 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22927 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
22928 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22929 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
22931 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
22932 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
22933 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
22934 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
22936 #########################################################################
22937 # XDEF **************************************************************** #
22938 # get_packed(): fetch a packed operand from memory and then #
22939 # convert it to a floating-point binary number. #
22941 # XREF **************************************************************** #
22942 # _dcalc_ea() - calculate the correct <ea> #
22943 # _mem_read() - fetch the packed operand from memory #
22944 # facc_in_x() - the fetch failed so jump to special exit code #
22945 # decbin() - convert packed to binary extended precision #
22947 # INPUT *************************************************************** #
22948 # None #
22950 # OUTPUT ************************************************************** #
22951 # If no failure on _mem_read(): #
22952 # FP_SRC(a6) = packed operand now as a binary FP number #
22954 # ALGORITHM *********************************************************** #
22955 # Get the correct <ea> whihc is the value on the exception stack #
22956 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
22957 # Then, fetch the operand from memory. If the fetch fails, exit #
22958 # through facc_in_x(). #
22959 # If the packed operand is a ZERO,NAN, or INF, convert it to #
22960 # its binary representation here. Else, call decbin() which will #
22961 # convert the packed value to an extended precision binary value. #
22963 #########################################################################
22965 # the stacked <ea> for packed is correct except for -(An).
22966 # the base reg must be updated for both -(An) and (An)+.
22967 global get_packed
22968 get_packed:
22969 mov.l &0xc,%d0 # packed is 12 bytes
22970 bsr.l _dcalc_ea # fetch <ea>; correct An
22972 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
22973 mov.l &0xc,%d0 # pass: 12 bytes
22974 bsr.l _dmem_read # read packed operand
22976 tst.l %d1 # did dfetch fail?
22977 bne.l facc_in_x # yes
22979 # The packed operand is an INF or a NAN if the exponent field is all ones.
22980 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
22981 cmpi.w %d0,&0x7fff # INF or NAN?
22982 bne.b gp_try_zero # no
22983 rts # operand is an INF or NAN
22985 # The packed operand is a zero if the mantissa is all zero, else it's
22986 # a normal packed op.
22987 gp_try_zero:
22988 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
22989 andi.b &0x0f,%d0 # clear all but last nybble
22990 bne.b gp_not_spec # not a zero
22991 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
22992 bne.b gp_not_spec # not a zero
22993 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
22994 bne.b gp_not_spec # not a zero
22995 rts # operand is a ZERO
22996 gp_not_spec:
22997 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
22998 bsr.l decbin # convert to extended
22999 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
23002 #########################################################################
23003 # decbin(): Converts normalized packed bcd value pointed to by register #
23004 # a0 to extended-precision value in fp0. #
23006 # INPUT *************************************************************** #
23007 # a0 = pointer to normalized packed bcd value #
23009 # OUTPUT ************************************************************** #
23010 # fp0 = exact fp representation of the packed bcd value. #
23012 # ALGORITHM *********************************************************** #
23013 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
23014 # and NaN operands are dispatched without entering this routine) #
23015 # value in 68881/882 format at location (a0). #
23017 # A1. Convert the bcd exponent to binary by successive adds and #
23018 # muls. Set the sign according to SE. Subtract 16 to compensate #
23019 # for the mantissa which is to be interpreted as 17 integer #
23020 # digits, rather than 1 integer and 16 fraction digits. #
23021 # Note: this operation can never overflow. #
23023 # A2. Convert the bcd mantissa to binary by successive #
23024 # adds and muls in FP0. Set the sign according to SM. #
23025 # The mantissa digits will be converted with the decimal point #
23026 # assumed following the least-significant digit. #
23027 # Note: this operation can never overflow. #
23029 # A3. Count the number of leading/trailing zeros in the #
23030 # bcd string. If SE is positive, count the leading zeros; #
23031 # if negative, count the trailing zeros. Set the adjusted #
23032 # exponent equal to the exponent from A1 and the zero count #
23033 # added if SM = 1 and subtracted if SM = 0. Scale the #
23034 # mantissa the equivalent of forcing in the bcd value: #
23036 # SM = 0 a non-zero digit in the integer position #
23037 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
23039 # this will insure that any value, regardless of its #
23040 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
23041 # consistently. #
23043 # A4. Calculate the factor 10^exp in FP1 using a table of #
23044 # 10^(2^n) values. To reduce the error in forming factors #
23045 # greater than 10^27, a directed rounding scheme is used with #
23046 # tables rounded to RN, RM, and RP, according to the table #
23047 # in the comments of the pwrten section. #
23049 # A5. Form the final binary number by scaling the mantissa by #
23050 # the exponent factor. This is done by multiplying the #
23051 # mantissa in FP0 by the factor in FP1 if the adjusted #
23052 # exponent sign is positive, and dividing FP0 by FP1 if #
23053 # it is negative. #
23055 # Clean up and return. Check if the final mul or div was inexact. #
23056 # If so, set INEX1 in USER_FPSR. #
23058 #########################################################################
23061 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
23062 # to nearest, minus, and plus, respectively. The tables include
23063 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
23064 # is required until the power is greater than 27, however, all
23065 # tables include the first 5 for ease of indexing.
23067 RTABLE:
23068 byte 0,0,0,0
23069 byte 2,3,2,3
23070 byte 2,3,3,2
23071 byte 3,2,2,3
23073 set FNIBS,7
23074 set FSTRT,0
23076 set ESTRT,4
23077 set EDIGITS,2
23079 global decbin
23080 decbin:
23081 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
23082 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
23083 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
23085 lea FP_SCR0(%a6),%a0
23087 movm.l &0x3c00,-(%sp) # save d2-d5
23088 fmovm.x &0x1,-(%sp) # save fp1
23090 # Calculate exponent:
23091 # 1. Copy bcd value in memory for use as a working copy.
23092 # 2. Calculate absolute value of exponent in d1 by mul and add.
23093 # 3. Correct for exponent sign.
23094 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
23095 # (i.e., all digits assumed left of the decimal point.)
23097 # Register usage:
23099 # calc_e:
23100 # (*) d0: temp digit storage
23101 # (*) d1: accumulator for binary exponent
23102 # (*) d2: digit count
23103 # (*) d3: offset pointer
23104 # ( ) d4: first word of bcd
23105 # ( ) a0: pointer to working bcd value
23106 # ( ) a6: pointer to original bcd value
23107 # (*) FP_SCR1: working copy of original bcd value
23108 # (*) L_SCR1: copy of original exponent word
23110 calc_e:
23111 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
23112 mov.l &ESTRT,%d3 # counter to pick up digits
23113 mov.l (%a0),%d4 # get first word of bcd
23114 clr.l %d1 # zero d1 for accumulator
23115 e_gd:
23116 mulu.l &0xa,%d1 # mul partial product by one digit place
23117 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
23118 add.l %d0,%d1 # d1 = d1 + d0
23119 addq.b &4,%d3 # advance d3 to the next digit
23120 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
23121 btst &30,%d4 # get SE
23122 beq.b e_pos # don't negate if pos
23123 neg.l %d1 # negate before subtracting
23124 e_pos:
23125 sub.l &16,%d1 # sub to compensate for shift of mant
23126 bge.b e_save # if still pos, do not neg
23127 neg.l %d1 # now negative, make pos and set SE
23128 or.l &0x40000000,%d4 # set SE in d4,
23129 or.l &0x40000000,(%a0) # and in working bcd
23130 e_save:
23131 mov.l %d1,-(%sp) # save exp on stack
23134 # Calculate mantissa:
23135 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
23136 # 2. Correct for mantissa sign.
23137 # (i.e., all digits assumed left of the decimal point.)
23139 # Register usage:
23141 # calc_m:
23142 # (*) d0: temp digit storage
23143 # (*) d1: lword counter
23144 # (*) d2: digit count
23145 # (*) d3: offset pointer
23146 # ( ) d4: words 2 and 3 of bcd
23147 # ( ) a0: pointer to working bcd value
23148 # ( ) a6: pointer to original bcd value
23149 # (*) fp0: mantissa accumulator
23150 # ( ) FP_SCR1: working copy of original bcd value
23151 # ( ) L_SCR1: copy of original exponent word
23153 calc_m:
23154 mov.l &1,%d1 # word counter, init to 1
23155 fmov.s &0x00000000,%fp0 # accumulator
23158 # Since the packed number has a long word between the first & second parts,
23159 # get the integer digit then skip down & get the rest of the
23160 # mantissa. We will unroll the loop once.
23162 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
23163 fadd.b %d0,%fp0 # add digit to sum in fp0
23166 # Get the rest of the mantissa.
23168 loadlw:
23169 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
23170 mov.l &FSTRT,%d3 # counter to pick up digits
23171 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
23172 md2b:
23173 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
23174 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
23175 fadd.b %d0,%fp0 # fp0 = fp0 + digit
23178 # If all the digits (8) in that long word have been converted (d2=0),
23179 # then inc d1 (=2) to point to the next long word and reset d3 to 0
23180 # to initialize the digit offset, and set d2 to 7 for the digit count;
23181 # else continue with this long word.
23183 addq.b &4,%d3 # advance d3 to the next digit
23184 dbf.w %d2,md2b # check for last digit in this lw
23185 nextlw:
23186 addq.l &1,%d1 # inc lw pointer in mantissa
23187 cmp.l %d1,&2 # test for last lw
23188 ble.b loadlw # if not, get last one
23190 # Check the sign of the mant and make the value in fp0 the same sign.
23192 m_sign:
23193 btst &31,(%a0) # test sign of the mantissa
23194 beq.b ap_st_z # if clear, go to append/strip zeros
23195 fneg.x %fp0 # if set, negate fp0
23197 # Append/strip zeros:
23199 # For adjusted exponents which have an absolute value greater than 27*,
23200 # this routine calculates the amount needed to normalize the mantissa
23201 # for the adjusted exponent. That number is subtracted from the exp
23202 # if the exp was positive, and added if it was negative. The purpose
23203 # of this is to reduce the value of the exponent and the possibility
23204 # of error in calculation of pwrten.
23206 # 1. Branch on the sign of the adjusted exponent.
23207 # 2p.(positive exp)
23208 # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
23209 # 3. Add one for each zero encountered until a non-zero digit.
23210 # 4. Subtract the count from the exp.
23211 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
23212 # and set SE.
23213 # 6. Multiply the mantissa by 10**count.
23214 # 2n.(negative exp)
23215 # 2. Check the digits in lwords 3 and 2 in decending order.
23216 # 3. Add one for each zero encountered until a non-zero digit.
23217 # 4. Add the count to the exp.
23218 # 5. Check if the exp has crossed zero in #3 above; clear SE.
23219 # 6. Divide the mantissa by 10**count.
23221 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
23222 # any adjustment due to append/strip zeros will drive the resultane
23223 # exponent towards zero. Since all pwrten constants with a power
23224 # of 27 or less are exact, there is no need to use this routine to
23225 # attempt to lessen the resultant exponent.
23227 # Register usage:
23229 # ap_st_z:
23230 # (*) d0: temp digit storage
23231 # (*) d1: zero count
23232 # (*) d2: digit count
23233 # (*) d3: offset pointer
23234 # ( ) d4: first word of bcd
23235 # (*) d5: lword counter
23236 # ( ) a0: pointer to working bcd value
23237 # ( ) FP_SCR1: working copy of original bcd value
23238 # ( ) L_SCR1: copy of original exponent word
23241 # First check the absolute value of the exponent to see if this
23242 # routine is necessary. If so, then check the sign of the exponent
23243 # and do append (+) or strip (-) zeros accordingly.
23244 # This section handles a positive adjusted exponent.
23246 ap_st_z:
23247 mov.l (%sp),%d1 # load expA for range test
23248 cmp.l %d1,&27 # test is with 27
23249 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
23250 btst &30,(%a0) # check sign of exp
23251 bne.b ap_st_n # if neg, go to neg side
23252 clr.l %d1 # zero count reg
23253 mov.l (%a0),%d4 # load lword 1 to d4
23254 bfextu %d4{&28:&4},%d0 # get M16 in d0
23255 bne.b ap_p_fx # if M16 is non-zero, go fix exp
23256 addq.l &1,%d1 # inc zero count
23257 mov.l &1,%d5 # init lword counter
23258 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
23259 bne.b ap_p_cl # if lw 2 is zero, skip it
23260 addq.l &8,%d1 # and inc count by 8
23261 addq.l &1,%d5 # inc lword counter
23262 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
23263 ap_p_cl:
23264 clr.l %d3 # init offset reg
23265 mov.l &7,%d2 # init digit counter
23266 ap_p_gd:
23267 bfextu %d4{%d3:&4},%d0 # get digit
23268 bne.b ap_p_fx # if non-zero, go to fix exp
23269 addq.l &4,%d3 # point to next digit
23270 addq.l &1,%d1 # inc digit counter
23271 dbf.w %d2,ap_p_gd # get next digit
23272 ap_p_fx:
23273 mov.l %d1,%d0 # copy counter to d2
23274 mov.l (%sp),%d1 # get adjusted exp from memory
23275 sub.l %d0,%d1 # subtract count from exp
23276 bge.b ap_p_fm # if still pos, go to pwrten
23277 neg.l %d1 # now its neg; get abs
23278 mov.l (%a0),%d4 # load lword 1 to d4
23279 or.l &0x40000000,%d4 # and set SE in d4
23280 or.l &0x40000000,(%a0) # and in memory
23282 # Calculate the mantissa multiplier to compensate for the striping of
23283 # zeros from the mantissa.
23285 ap_p_fm:
23286 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23287 clr.l %d3 # init table index
23288 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23289 mov.l &3,%d2 # init d2 to count bits in counter
23290 ap_p_el:
23291 asr.l &1,%d0 # shift lsb into carry
23292 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
23293 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23294 ap_p_en:
23295 add.l &12,%d3 # inc d3 to next rtable entry
23296 tst.l %d0 # check if d0 is zero
23297 bne.b ap_p_el # if not, get next bit
23298 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
23299 bra.b pwrten # go calc pwrten
23301 # This section handles a negative adjusted exponent.
23303 ap_st_n:
23304 clr.l %d1 # clr counter
23305 mov.l &2,%d5 # set up d5 to point to lword 3
23306 mov.l (%a0,%d5.L*4),%d4 # get lword 3
23307 bne.b ap_n_cl # if not zero, check digits
23308 sub.l &1,%d5 # dec d5 to point to lword 2
23309 addq.l &8,%d1 # inc counter by 8
23310 mov.l (%a0,%d5.L*4),%d4 # get lword 2
23311 ap_n_cl:
23312 mov.l &28,%d3 # point to last digit
23313 mov.l &7,%d2 # init digit counter
23314 ap_n_gd:
23315 bfextu %d4{%d3:&4},%d0 # get digit
23316 bne.b ap_n_fx # if non-zero, go to exp fix
23317 subq.l &4,%d3 # point to previous digit
23318 addq.l &1,%d1 # inc digit counter
23319 dbf.w %d2,ap_n_gd # get next digit
23320 ap_n_fx:
23321 mov.l %d1,%d0 # copy counter to d0
23322 mov.l (%sp),%d1 # get adjusted exp from memory
23323 sub.l %d0,%d1 # subtract count from exp
23324 bgt.b ap_n_fm # if still pos, go fix mantissa
23325 neg.l %d1 # take abs of exp and clr SE
23326 mov.l (%a0),%d4 # load lword 1 to d4
23327 and.l &0xbfffffff,%d4 # and clr SE in d4
23328 and.l &0xbfffffff,(%a0) # and in memory
23330 # Calculate the mantissa multiplier to compensate for the appending of
23331 # zeros to the mantissa.
23333 ap_n_fm:
23334 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
23335 clr.l %d3 # init table index
23336 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23337 mov.l &3,%d2 # init d2 to count bits in counter
23338 ap_n_el:
23339 asr.l &1,%d0 # shift lsb into carry
23340 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
23341 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23342 ap_n_en:
23343 add.l &12,%d3 # inc d3 to next rtable entry
23344 tst.l %d0 # check if d0 is zero
23345 bne.b ap_n_el # if not, get next bit
23346 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
23349 # Calculate power-of-ten factor from adjusted and shifted exponent.
23351 # Register usage:
23353 # pwrten:
23354 # (*) d0: temp
23355 # ( ) d1: exponent
23356 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
23357 # (*) d3: FPCR work copy
23358 # ( ) d4: first word of bcd
23359 # (*) a1: RTABLE pointer
23360 # calc_p:
23361 # (*) d0: temp
23362 # ( ) d1: exponent
23363 # (*) d3: PWRTxx table index
23364 # ( ) a0: pointer to working copy of bcd
23365 # (*) a1: PWRTxx pointer
23366 # (*) fp1: power-of-ten accumulator
23368 # Pwrten calculates the exponent factor in the selected rounding mode
23369 # according to the following table:
23371 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
23373 # ANY ANY RN RN
23375 # + + RP RP
23376 # - + RP RM
23377 # + - RP RM
23378 # - - RP RP
23380 # + + RM RM
23381 # - + RM RP
23382 # + - RM RP
23383 # - - RM RM
23385 # + + RZ RM
23386 # - + RZ RM
23387 # + - RZ RP
23388 # - - RZ RP
23391 pwrten:
23392 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
23393 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
23394 mov.l (%a0),%d4 # reload 1st bcd word to d4
23395 asl.l &2,%d2 # format d2 to be
23396 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
23397 add.l %d0,%d2 # in d2 as index into RTABLE
23398 lea.l RTABLE(%pc),%a1 # load rtable base
23399 mov.b (%a1,%d2),%d0 # load new rounding bits from table
23400 clr.l %d3 # clear d3 to force no exc and extended
23401 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
23402 fmov.l %d3,%fpcr # write new FPCR
23403 asr.l &1,%d0 # write correct PTENxx table
23404 bcc.b not_rp # to a1
23405 lea.l PTENRP(%pc),%a1 # it is RP
23406 bra.b calc_p # go to init section
23407 not_rp:
23408 asr.l &1,%d0 # keep checking
23409 bcc.b not_rm
23410 lea.l PTENRM(%pc),%a1 # it is RM
23411 bra.b calc_p # go to init section
23412 not_rm:
23413 lea.l PTENRN(%pc),%a1 # it is RN
23414 calc_p:
23415 mov.l %d1,%d0 # copy exp to d0;use d0
23416 bpl.b no_neg # if exp is negative,
23417 neg.l %d0 # invert it
23418 or.l &0x40000000,(%a0) # and set SE bit
23419 no_neg:
23420 clr.l %d3 # table index
23421 fmov.s &0x3f800000,%fp1 # init fp1 to 1
23422 e_loop:
23423 asr.l &1,%d0 # shift next bit into carry
23424 bcc.b e_next # if zero, skip the mul
23425 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23426 e_next:
23427 add.l &12,%d3 # inc d3 to next rtable entry
23428 tst.l %d0 # check if d0 is zero
23429 bne.b e_loop # not zero, continue shifting
23432 # Check the sign of the adjusted exp and make the value in fp0 the
23433 # same sign. If the exp was pos then multiply fp1*fp0;
23434 # else divide fp0/fp1.
23436 # Register Usage:
23437 # norm:
23438 # ( ) a0: pointer to working bcd value
23439 # (*) fp0: mantissa accumulator
23440 # ( ) fp1: scaling factor - 10**(abs(exp))
23442 pnorm:
23443 btst &30,(%a0) # test the sign of the exponent
23444 beq.b mul # if clear, go to multiply
23445 div:
23446 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
23447 bra.b end_dec
23448 mul:
23449 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
23452 # Clean up and return with result in fp0.
23454 # If the final mul/div in decbin incurred an inex exception,
23455 # it will be inex2, but will be reported as inex1 by get_op.
23457 end_dec:
23458 fmov.l %fpsr,%d0 # get status register
23459 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
23460 beq.b no_exc # skip this if no exc
23461 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
23462 no_exc:
23463 add.l &0x4,%sp # clear 1 lw param
23464 fmovm.x (%sp)+,&0x40 # restore fp1
23465 movm.l (%sp)+,&0x3c # restore d2-d5
23466 fmov.l &0x0,%fpcr
23467 fmov.l &0x0,%fpsr
23470 #########################################################################
23471 # bindec(): Converts an input in extended precision format to bcd format#
23473 # INPUT *************************************************************** #
23474 # a0 = pointer to the input extended precision value in memory. #
23475 # the input may be either normalized, unnormalized, or #
23476 # denormalized. #
23477 # d0 = contains the k-factor sign-extended to 32-bits. #
23479 # OUTPUT ************************************************************** #
23480 # FP_SCR0(a6) = bcd format result on the stack. #
23482 # ALGORITHM *********************************************************** #
23484 # A1. Set RM and size ext; Set SIGMA = sign of input. #
23485 # The k-factor is saved for use in d7. Clear the #
23486 # BINDEC_FLG for separating normalized/denormalized #
23487 # input. If input is unnormalized or denormalized, #
23488 # normalize it. #
23490 # A2. Set X = abs(input). #
23492 # A3. Compute ILOG. #
23493 # ILOG is the log base 10 of the input value. It is #
23494 # approximated by adding e + 0.f when the original #
23495 # value is viewed as 2^^e * 1.f in extended precision. #
23496 # This value is stored in d6. #
23498 # A4. Clr INEX bit. #
23499 # The operation in A3 above may have set INEX2. #
23501 # A5. Set ICTR = 0; #
23502 # ICTR is a flag used in A13. It must be set before the #
23503 # loop entry A6. #
23505 # A6. Calculate LEN. #
23506 # LEN is the number of digits to be displayed. The #
23507 # k-factor can dictate either the total number of digits, #
23508 # if it is a positive number, or the number of digits #
23509 # after the decimal point which are to be included as #
23510 # significant. See the 68882 manual for examples. #
23511 # If LEN is computed to be greater than 17, set OPERR in #
23512 # USER_FPSR. LEN is stored in d4. #
23514 # A7. Calculate SCALE. #
23515 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
23516 # of decimal places needed to insure LEN integer digits #
23517 # in the output before conversion to bcd. LAMBDA is the #
23518 # sign of ISCALE, used in A9. Fp1 contains #
23519 # 10^^(abs(ISCALE)) using a rounding mode which is a #
23520 # function of the original rounding mode and the signs #
23521 # of ISCALE and X. A table is given in the code. #
23523 # A8. Clr INEX; Force RZ. #
23524 # The operation in A3 above may have set INEX2. #
23525 # RZ mode is forced for the scaling operation to insure #
23526 # only one rounding error. The grs bits are collected in #
23527 # the INEX flag for use in A10. #
23529 # A9. Scale X -> Y. #
23530 # The mantissa is scaled to the desired number of #
23531 # significant digits. The excess digits are collected #
23532 # in INEX2. #
23534 # A10. Or in INEX. #
23535 # If INEX is set, round error occurred. This is #
23536 # compensated for by 'or-ing' in the INEX2 flag to #
23537 # the lsb of Y. #
23539 # A11. Restore original FPCR; set size ext. #
23540 # Perform FINT operation in the user's rounding mode. #
23541 # Keep the size to extended. #
23543 # A12. Calculate YINT = FINT(Y) according to user's rounding #
23544 # mode. The FPSP routine sintd0 is used. The output #
23545 # is in fp0. #
23547 # A13. Check for LEN digits. #
23548 # If the int operation results in more than LEN digits, #
23549 # or less than LEN -1 digits, adjust ILOG and repeat from #
23550 # A6. This test occurs only on the first pass. If the #
23551 # result is exactly 10^LEN, decrement ILOG and divide #
23552 # the mantissa by 10. #
23554 # A14. Convert the mantissa to bcd. #
23555 # The binstr routine is used to convert the LEN digit #
23556 # mantissa to bcd in memory. The input to binstr is #
23557 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
23558 # such that the decimal point is to the left of bit 63. #
23559 # The bcd digits are stored in the correct position in #
23560 # the final string area in memory. #
23562 # A15. Convert the exponent to bcd. #
23563 # As in A14 above, the exp is converted to bcd and the #
23564 # digits are stored in the final string. #
23565 # Test the length of the final exponent string. If the #
23566 # length is 4, set operr. #
23568 # A16. Write sign bits to final string. #
23570 #########################################################################
23572 set BINDEC_FLG, EXC_TEMP # DENORM flag
23574 # Constants in extended precision
23575 PLOG2:
23576 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
23577 PLOG2UP1:
23578 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
23580 # Constants in single precision
23581 FONE:
23582 long 0x3F800000,0x00000000,0x00000000,0x00000000
23583 FTWO:
23584 long 0x40000000,0x00000000,0x00000000,0x00000000
23585 FTEN:
23586 long 0x41200000,0x00000000,0x00000000,0x00000000
23587 F4933:
23588 long 0x459A2800,0x00000000,0x00000000,0x00000000
23590 RBDTBL:
23591 byte 0,0,0,0
23592 byte 3,3,2,2
23593 byte 3,2,2,3
23594 byte 2,3,3,2
23596 # Implementation Notes:
23598 # The registers are used as follows:
23600 # d0: scratch; LEN input to binstr
23601 # d1: scratch
23602 # d2: upper 32-bits of mantissa for binstr
23603 # d3: scratch;lower 32-bits of mantissa for binstr
23604 # d4: LEN
23605 # d5: LAMBDA/ICTR
23606 # d6: ILOG
23607 # d7: k-factor
23608 # a0: ptr for original operand/final result
23609 # a1: scratch pointer
23610 # a2: pointer to FP_X; abs(original value) in ext
23611 # fp0: scratch
23612 # fp1: scratch
23613 # fp2: scratch
23614 # F_SCR1:
23615 # F_SCR2:
23616 # L_SCR1:
23617 # L_SCR2:
23619 global bindec
23620 bindec:
23621 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
23622 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
23624 # A1. Set RM and size ext. Set SIGMA = sign input;
23625 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
23626 # separating normalized/denormalized input. If the input
23627 # is a denormalized number, set the BINDEC_FLG memory word
23628 # to signal denorm. If the input is unnormalized, normalize
23629 # the input and test for denormalized result.
23631 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
23632 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
23633 mov.l %d0,%d7 # move k-factor to d7
23635 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
23636 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
23637 bne.w A2_str # no; input is a NORM
23640 # Normalize the denorm
23642 un_de_norm:
23643 mov.w (%a0),%d0
23644 and.w &0x7fff,%d0 # strip sign of normalized exp
23645 mov.l 4(%a0),%d1
23646 mov.l 8(%a0),%d2
23647 norm_loop:
23648 sub.w &1,%d0
23649 lsl.l &1,%d2
23650 roxl.l &1,%d1
23651 tst.l %d1
23652 bge.b norm_loop
23654 # Test if the normalized input is denormalized
23656 tst.w %d0
23657 bgt.b pos_exp # if greater than zero, it is a norm
23658 st BINDEC_FLG(%a6) # set flag for denorm
23659 pos_exp:
23660 and.w &0x7fff,%d0 # strip sign of normalized exp
23661 mov.w %d0,(%a0)
23662 mov.l %d1,4(%a0)
23663 mov.l %d2,8(%a0)
23665 # A2. Set X = abs(input).
23667 A2_str:
23668 mov.l (%a0),FP_SCR1(%a6) # move input to work space
23669 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
23670 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
23671 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
23673 # A3. Compute ILOG.
23674 # ILOG is the log base 10 of the input value. It is approx-
23675 # imated by adding e + 0.f when the original value is viewed
23676 # as 2^^e * 1.f in extended precision. This value is stored
23677 # in d6.
23679 # Register usage:
23680 # Input/Output
23681 # d0: k-factor/exponent
23682 # d2: x/x
23683 # d3: x/x
23684 # d4: x/x
23685 # d5: x/x
23686 # d6: x/ILOG
23687 # d7: k-factor/Unchanged
23688 # a0: ptr for original operand/final result
23689 # a1: x/x
23690 # a2: x/x
23691 # fp0: x/float(ILOG)
23692 # fp1: x/x
23693 # fp2: x/x
23694 # F_SCR1:x/x
23695 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
23696 # L_SCR1:x/x
23697 # L_SCR2:first word of X packed/Unchanged
23699 tst.b BINDEC_FLG(%a6) # check for denorm
23700 beq.b A3_cont # if clr, continue with norm
23701 mov.l &-4933,%d6 # force ILOG = -4933
23702 bra.b A4_str
23703 A3_cont:
23704 mov.w FP_SCR1(%a6),%d0 # move exp to d0
23705 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
23706 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
23707 sub.w &0x3fff,%d0 # strip off bias
23708 fadd.w %d0,%fp0 # add in exp
23709 fsub.s FONE(%pc),%fp0 # subtract off 1.0
23710 fbge.w pos_res # if pos, branch
23711 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
23712 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23713 bra.b A4_str # go move out ILOG
23714 pos_res:
23715 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
23716 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
23719 # A4. Clr INEX bit.
23720 # The operation in A3 above may have set INEX2.
23722 A4_str:
23723 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
23726 # A5. Set ICTR = 0;
23727 # ICTR is a flag used in A13. It must be set before the
23728 # loop entry A6. The lower word of d5 is used for ICTR.
23730 clr.w %d5 # clear ICTR
23732 # A6. Calculate LEN.
23733 # LEN is the number of digits to be displayed. The k-factor
23734 # can dictate either the total number of digits, if it is
23735 # a positive number, or the number of digits after the
23736 # original decimal point which are to be included as
23737 # significant. See the 68882 manual for examples.
23738 # If LEN is computed to be greater than 17, set OPERR in
23739 # USER_FPSR. LEN is stored in d4.
23741 # Register usage:
23742 # Input/Output
23743 # d0: exponent/Unchanged
23744 # d2: x/x/scratch
23745 # d3: x/x
23746 # d4: exc picture/LEN
23747 # d5: ICTR/Unchanged
23748 # d6: ILOG/Unchanged
23749 # d7: k-factor/Unchanged
23750 # a0: ptr for original operand/final result
23751 # a1: x/x
23752 # a2: x/x
23753 # fp0: float(ILOG)/Unchanged
23754 # fp1: x/x
23755 # fp2: x/x
23756 # F_SCR1:x/x
23757 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23758 # L_SCR1:x/x
23759 # L_SCR2:first word of X packed/Unchanged
23761 A6_str:
23762 tst.l %d7 # branch on sign of k
23763 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
23764 mov.l %d7,%d4 # if k > 0, LEN = k
23765 bra.b len_ck # skip to LEN check
23766 k_neg:
23767 mov.l %d6,%d4 # first load ILOG to d4
23768 sub.l %d7,%d4 # subtract off k
23769 addq.l &1,%d4 # add in the 1
23770 len_ck:
23771 tst.l %d4 # LEN check: branch on sign of LEN
23772 ble.b LEN_ng # if neg, set LEN = 1
23773 cmp.l %d4,&17 # test if LEN > 17
23774 ble.b A7_str # if not, forget it
23775 mov.l &17,%d4 # set max LEN = 17
23776 tst.l %d7 # if negative, never set OPERR
23777 ble.b A7_str # if positive, continue
23778 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
23779 bra.b A7_str # finished here
23780 LEN_ng:
23781 mov.l &1,%d4 # min LEN is 1
23784 # A7. Calculate SCALE.
23785 # SCALE is equal to 10^ISCALE, where ISCALE is the number
23786 # of decimal places needed to insure LEN integer digits
23787 # in the output before conversion to bcd. LAMBDA is the sign
23788 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
23789 # the rounding mode as given in the following table (see
23790 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
23791 # of opposite sign in bindec.sa from Coonen).
23793 # Initial USE
23794 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
23795 # ----------------------------------------------
23796 # RN 00 0 0 00/0 RN
23797 # RN 00 0 1 00/0 RN
23798 # RN 00 1 0 00/0 RN
23799 # RN 00 1 1 00/0 RN
23800 # RZ 01 0 0 11/3 RP
23801 # RZ 01 0 1 11/3 RP
23802 # RZ 01 1 0 10/2 RM
23803 # RZ 01 1 1 10/2 RM
23804 # RM 10 0 0 11/3 RP
23805 # RM 10 0 1 10/2 RM
23806 # RM 10 1 0 10/2 RM
23807 # RM 10 1 1 11/3 RP
23808 # RP 11 0 0 10/2 RM
23809 # RP 11 0 1 11/3 RP
23810 # RP 11 1 0 11/3 RP
23811 # RP 11 1 1 10/2 RM
23813 # Register usage:
23814 # Input/Output
23815 # d0: exponent/scratch - final is 0
23816 # d2: x/0 or 24 for A9
23817 # d3: x/scratch - offset ptr into PTENRM array
23818 # d4: LEN/Unchanged
23819 # d5: 0/ICTR:LAMBDA
23820 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
23821 # d7: k-factor/Unchanged
23822 # a0: ptr for original operand/final result
23823 # a1: x/ptr to PTENRM array
23824 # a2: x/x
23825 # fp0: float(ILOG)/Unchanged
23826 # fp1: x/10^ISCALE
23827 # fp2: x/x
23828 # F_SCR1:x/x
23829 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23830 # L_SCR1:x/x
23831 # L_SCR2:first word of X packed/Unchanged
23833 A7_str:
23834 tst.l %d7 # test sign of k
23835 bgt.b k_pos # if pos and > 0, skip this
23836 cmp.l %d7,%d6 # test k - ILOG
23837 blt.b k_pos # if ILOG >= k, skip this
23838 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
23839 k_pos:
23840 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
23841 addq.l &1,%d0 # add the 1
23842 sub.l %d4,%d0 # sub off LEN
23843 swap %d5 # use upper word of d5 for LAMBDA
23844 clr.w %d5 # set it zero initially
23845 clr.w %d2 # set up d2 for very small case
23846 tst.l %d0 # test sign of ISCALE
23847 bge.b iscale # if pos, skip next inst
23848 addq.w &1,%d5 # if neg, set LAMBDA true
23849 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
23850 bgt.b no_inf # if false, skip rest
23851 add.l &24,%d0 # add in 24 to iscale
23852 mov.l &24,%d2 # put 24 in d2 for A9
23853 no_inf:
23854 neg.l %d0 # and take abs of ISCALE
23855 iscale:
23856 fmov.s FONE(%pc),%fp1 # init fp1 to 1
23857 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
23858 lsl.w &1,%d1 # put them in bits 2:1
23859 add.w %d5,%d1 # add in LAMBDA
23860 lsl.w &1,%d1 # put them in bits 3:1
23861 tst.l L_SCR2(%a6) # test sign of original x
23862 bge.b x_pos # if pos, don't set bit 0
23863 addq.l &1,%d1 # if neg, set bit 0
23864 x_pos:
23865 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
23866 mov.b (%a2,%d1),%d3 # load d3 with new rmode
23867 lsl.l &4,%d3 # put bits in proper position
23868 fmov.l %d3,%fpcr # load bits into fpu
23869 lsr.l &4,%d3 # put bits in proper position
23870 tst.b %d3 # decode new rmode for pten table
23871 bne.b not_rn # if zero, it is RN
23872 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
23873 bra.b rmode # exit decode
23874 not_rn:
23875 lsr.b &1,%d3 # get lsb in carry
23876 bcc.b not_rp2 # if carry clear, it is RM
23877 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
23878 bra.b rmode # exit decode
23879 not_rp2:
23880 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
23881 rmode:
23882 clr.l %d3 # clr table index
23883 e_loop2:
23884 lsr.l &1,%d0 # shift next bit into carry
23885 bcc.b e_next2 # if zero, skip the mul
23886 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
23887 e_next2:
23888 add.l &12,%d3 # inc d3 to next pwrten table entry
23889 tst.l %d0 # test if ISCALE is zero
23890 bne.b e_loop2 # if not, loop
23892 # A8. Clr INEX; Force RZ.
23893 # The operation in A3 above may have set INEX2.
23894 # RZ mode is forced for the scaling operation to insure
23895 # only one rounding error. The grs bits are collected in
23896 # the INEX flag for use in A10.
23898 # Register usage:
23899 # Input/Output
23901 fmov.l &0,%fpsr # clr INEX
23902 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
23904 # A9. Scale X -> Y.
23905 # The mantissa is scaled to the desired number of significant
23906 # digits. The excess digits are collected in INEX2. If mul,
23907 # Check d2 for excess 10 exponential value. If not zero,
23908 # the iscale value would have caused the pwrten calculation
23909 # to overflow. Only a negative iscale can cause this, so
23910 # multiply by 10^(d2), which is now only allowed to be 24,
23911 # with a multiply by 10^8 and 10^16, which is exact since
23912 # 10^24 is exact. If the input was denormalized, we must
23913 # create a busy stack frame with the mul command and the
23914 # two operands, and allow the fpu to complete the multiply.
23916 # Register usage:
23917 # Input/Output
23918 # d0: FPCR with RZ mode/Unchanged
23919 # d2: 0 or 24/unchanged
23920 # d3: x/x
23921 # d4: LEN/Unchanged
23922 # d5: ICTR:LAMBDA
23923 # d6: ILOG/Unchanged
23924 # d7: k-factor/Unchanged
23925 # a0: ptr for original operand/final result
23926 # a1: ptr to PTENRM array/Unchanged
23927 # a2: x/x
23928 # fp0: float(ILOG)/X adjusted for SCALE (Y)
23929 # fp1: 10^ISCALE/Unchanged
23930 # fp2: x/x
23931 # F_SCR1:x/x
23932 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
23933 # L_SCR1:x/x
23934 # L_SCR2:first word of X packed/Unchanged
23936 A9_str:
23937 fmov.x (%a0),%fp0 # load X from memory
23938 fabs.x %fp0 # use abs(X)
23939 tst.w %d5 # LAMBDA is in lower word of d5
23940 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
23941 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
23942 bra.w A10_st # branch to A10
23944 sc_mul:
23945 tst.b BINDEC_FLG(%a6) # check for denorm
23946 beq.w A9_norm # if norm, continue with mul
23948 # for DENORM, we must calculate:
23949 # fp0 = input_op * 10^ISCALE * 10^24
23950 # since the input operand is a DENORM, we can't multiply it directly.
23951 # so, we do the multiplication of the exponents and mantissas separately.
23952 # in this way, we avoid underflow on intermediate stages of the
23953 # multiplication and guarantee a result without exception.
23954 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
23956 mov.w (%sp),%d3 # grab exponent
23957 andi.w &0x7fff,%d3 # clear sign
23958 ori.w &0x8000,(%a0) # make DENORM exp negative
23959 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
23960 subi.w &0x3fff,%d3 # subtract BIAS
23961 add.w 36(%a1),%d3
23962 subi.w &0x3fff,%d3 # subtract BIAS
23963 add.w 48(%a1),%d3
23964 subi.w &0x3fff,%d3 # subtract BIAS
23966 bmi.w sc_mul_err # is result is DENORM, punt!!!
23968 andi.w &0x8000,(%sp) # keep sign
23969 or.w %d3,(%sp) # insert new exponent
23970 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
23971 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
23972 mov.l 0x4(%a0),-(%sp)
23973 mov.l &0x3fff0000,-(%sp) # force exp to zero
23974 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
23975 fmul.x (%sp)+,%fp0
23977 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23978 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23979 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
23980 mov.l 36+4(%a1),-(%sp)
23981 mov.l &0x3fff0000,-(%sp) # force exp to zero
23982 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
23983 mov.l 48+4(%a1),-(%sp)
23984 mov.l &0x3fff0000,-(%sp)# force exp to zero
23985 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
23986 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
23987 bra.b A10_st
23989 sc_mul_err:
23990 bra.b sc_mul_err
23992 A9_norm:
23993 tst.w %d2 # test for small exp case
23994 beq.b A9_con # if zero, continue as normal
23995 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
23996 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
23997 A9_con:
23998 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
24000 # A10. Or in INEX.
24001 # If INEX is set, round error occurred. This is compensated
24002 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
24004 # Register usage:
24005 # Input/Output
24006 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
24007 # d2: x/x
24008 # d3: x/x
24009 # d4: LEN/Unchanged
24010 # d5: ICTR:LAMBDA
24011 # d6: ILOG/Unchanged
24012 # d7: k-factor/Unchanged
24013 # a0: ptr for original operand/final result
24014 # a1: ptr to PTENxx array/Unchanged
24015 # a2: x/ptr to FP_SCR1(a6)
24016 # fp0: Y/Y with lsb adjusted
24017 # fp1: 10^ISCALE/Unchanged
24018 # fp2: x/x
24020 A10_st:
24021 fmov.l %fpsr,%d0 # get FPSR
24022 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
24023 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
24024 btst &9,%d0 # check if INEX2 set
24025 beq.b A11_st # if clear, skip rest
24026 or.l &1,8(%a2) # or in 1 to lsb of mantissa
24027 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
24030 # A11. Restore original FPCR; set size ext.
24031 # Perform FINT operation in the user's rounding mode. Keep
24032 # the size to extended. The sintdo entry point in the sint
24033 # routine expects the FPCR value to be in USER_FPCR for
24034 # mode and precision. The original FPCR is saved in L_SCR1.
24036 A11_st:
24037 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
24038 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
24039 # ;block exceptions
24042 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
24043 # The FPSP routine sintd0 is used. The output is in fp0.
24045 # Register usage:
24046 # Input/Output
24047 # d0: FPSR with AINEX cleared/FPCR with size set to ext
24048 # d2: x/x/scratch
24049 # d3: x/x
24050 # d4: LEN/Unchanged
24051 # d5: ICTR:LAMBDA/Unchanged
24052 # d6: ILOG/Unchanged
24053 # d7: k-factor/Unchanged
24054 # a0: ptr for original operand/src ptr for sintdo
24055 # a1: ptr to PTENxx array/Unchanged
24056 # a2: ptr to FP_SCR1(a6)/Unchanged
24057 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
24058 # fp0: Y/YINT
24059 # fp1: 10^ISCALE/Unchanged
24060 # fp2: x/x
24061 # F_SCR1:x/x
24062 # F_SCR2:Y adjusted for inex/Y with original exponent
24063 # L_SCR1:x/original USER_FPCR
24064 # L_SCR2:first word of X packed/Unchanged
24066 A12_st:
24067 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
24068 mov.l L_SCR1(%a6),-(%sp)
24069 mov.l L_SCR2(%a6),-(%sp)
24071 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
24072 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
24073 tst.l L_SCR2(%a6) # test sign of original operand
24074 bge.b do_fint12 # if pos, use Y
24075 or.l &0x80000000,(%a0) # if neg, use -Y
24076 do_fint12:
24077 mov.l USER_FPSR(%a6),-(%sp)
24078 # bsr sintdo # sint routine returns int in fp0
24080 fmov.l USER_FPCR(%a6),%fpcr
24081 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
24082 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
24083 ## andi.l &0x00000030,%d0
24084 ## fmov.l %d0,%fpcr
24085 fint.x FP_SCR1(%a6),%fp0 # do fint()
24086 fmov.l %fpsr,%d0
24087 or.w %d0,FPSR_EXCEPT(%a6)
24088 ## fmov.l &0x0,%fpcr
24089 ## fmov.l %fpsr,%d0 # don't keep ccodes
24090 ## or.w %d0,FPSR_EXCEPT(%a6)
24092 mov.b (%sp),USER_FPSR(%a6)
24093 add.l &4,%sp
24095 mov.l (%sp)+,L_SCR2(%a6)
24096 mov.l (%sp)+,L_SCR1(%a6)
24097 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
24099 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
24100 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
24102 # A13. Check for LEN digits.
24103 # If the int operation results in more than LEN digits,
24104 # or less than LEN -1 digits, adjust ILOG and repeat from
24105 # A6. This test occurs only on the first pass. If the
24106 # result is exactly 10^LEN, decrement ILOG and divide
24107 # the mantissa by 10. The calculation of 10^LEN cannot
24108 # be inexact, since all powers of ten upto 10^27 are exact
24109 # in extended precision, so the use of a previous power-of-ten
24110 # table will introduce no error.
24113 # Register usage:
24114 # Input/Output
24115 # d0: FPCR with size set to ext/scratch final = 0
24116 # d2: x/x
24117 # d3: x/scratch final = x
24118 # d4: LEN/LEN adjusted
24119 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24120 # d6: ILOG/ILOG adjusted
24121 # d7: k-factor/Unchanged
24122 # a0: pointer into memory for packed bcd string formation
24123 # a1: ptr to PTENxx array/Unchanged
24124 # a2: ptr to FP_SCR1(a6)/Unchanged
24125 # fp0: int portion of Y/abs(YINT) adjusted
24126 # fp1: 10^ISCALE/Unchanged
24127 # fp2: x/10^LEN
24128 # F_SCR1:x/x
24129 # F_SCR2:Y with original exponent/Unchanged
24130 # L_SCR1:original USER_FPCR/Unchanged
24131 # L_SCR2:first word of X packed/Unchanged
24133 A13_st:
24134 swap %d5 # put ICTR in lower word of d5
24135 tst.w %d5 # check if ICTR = 0
24136 bne not_zr # if non-zero, go to second test
24138 # Compute 10^(LEN-1)
24140 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24141 mov.l %d4,%d0 # put LEN in d0
24142 subq.l &1,%d0 # d0 = LEN -1
24143 clr.l %d3 # clr table index
24144 l_loop:
24145 lsr.l &1,%d0 # shift next bit into carry
24146 bcc.b l_next # if zero, skip the mul
24147 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24148 l_next:
24149 add.l &12,%d3 # inc d3 to next pwrten table entry
24150 tst.l %d0 # test if LEN is zero
24151 bne.b l_loop # if not, loop
24153 # 10^LEN-1 is computed for this test and A14. If the input was
24154 # denormalized, check only the case in which YINT > 10^LEN.
24156 tst.b BINDEC_FLG(%a6) # check if input was norm
24157 beq.b A13_con # if norm, continue with checking
24158 fabs.x %fp0 # take abs of YINT
24159 bra test_2
24161 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
24163 A13_con:
24164 fabs.x %fp0 # take abs of YINT
24165 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
24166 fbge.w test_2 # if greater, do next test
24167 subq.l &1,%d6 # subtract 1 from ILOG
24168 mov.w &1,%d5 # set ICTR
24169 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24170 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24171 bra.w A6_str # return to A6 and recompute YINT
24172 test_2:
24173 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
24174 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
24175 fblt.w A14_st # if less, all is ok, go to A14
24176 fbgt.w fix_ex # if greater, fix and redo
24177 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
24178 addq.l &1,%d6 # and inc ILOG
24179 bra.b A14_st # and continue elsewhere
24180 fix_ex:
24181 addq.l &1,%d6 # increment ILOG by 1
24182 mov.w &1,%d5 # set ICTR
24183 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
24184 bra.w A6_str # return to A6 and recompute YINT
24186 # Since ICTR <> 0, we have already been through one adjustment,
24187 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
24188 # 10^LEN is again computed using whatever table is in a1 since the
24189 # value calculated cannot be inexact.
24191 not_zr:
24192 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
24193 mov.l %d4,%d0 # put LEN in d0
24194 clr.l %d3 # clr table index
24195 z_loop:
24196 lsr.l &1,%d0 # shift next bit into carry
24197 bcc.b z_next # if zero, skip the mul
24198 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
24199 z_next:
24200 add.l &12,%d3 # inc d3 to next pwrten table entry
24201 tst.l %d0 # test if LEN is zero
24202 bne.b z_loop # if not, loop
24203 fabs.x %fp0 # get abs(YINT)
24204 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
24205 fbneq.w A14_st # if not, skip this
24206 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
24207 addq.l &1,%d6 # and inc ILOG by 1
24208 addq.l &1,%d4 # and inc LEN
24209 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
24211 # A14. Convert the mantissa to bcd.
24212 # The binstr routine is used to convert the LEN digit
24213 # mantissa to bcd in memory. The input to binstr is
24214 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
24215 # such that the decimal point is to the left of bit 63.
24216 # The bcd digits are stored in the correct position in
24217 # the final string area in memory.
24220 # Register usage:
24221 # Input/Output
24222 # d0: x/LEN call to binstr - final is 0
24223 # d1: x/0
24224 # d2: x/ms 32-bits of mant of abs(YINT)
24225 # d3: x/ls 32-bits of mant of abs(YINT)
24226 # d4: LEN/Unchanged
24227 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24228 # d6: ILOG
24229 # d7: k-factor/Unchanged
24230 # a0: pointer into memory for packed bcd string formation
24231 # /ptr to first mantissa byte in result string
24232 # a1: ptr to PTENxx array/Unchanged
24233 # a2: ptr to FP_SCR1(a6)/Unchanged
24234 # fp0: int portion of Y/abs(YINT) adjusted
24235 # fp1: 10^ISCALE/Unchanged
24236 # fp2: 10^LEN/Unchanged
24237 # F_SCR1:x/Work area for final result
24238 # F_SCR2:Y with original exponent/Unchanged
24239 # L_SCR1:original USER_FPCR/Unchanged
24240 # L_SCR2:first word of X packed/Unchanged
24242 A14_st:
24243 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
24244 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
24245 lea.l FP_SCR0(%a6),%a0
24246 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
24247 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
24248 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
24249 clr.l 4(%a0) # zero word 2 of FP_RES
24250 clr.l 8(%a0) # zero word 3 of FP_RES
24251 mov.l (%a0),%d0 # move exponent to d0
24252 swap %d0 # put exponent in lower word
24253 beq.b no_sft # if zero, don't shift
24254 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
24255 tst.l %d0 # check if > 1
24256 bgt.b no_sft # if so, don't shift
24257 neg.l %d0 # make exp positive
24258 m_loop:
24259 lsr.l &1,%d2 # shift d2:d3 right, add 0s
24260 roxr.l &1,%d3 # the number of places
24261 dbf.w %d0,m_loop # given in d0
24262 no_sft:
24263 tst.l %d2 # check for mantissa of zero
24264 bne.b no_zr # if not, go on
24265 tst.l %d3 # continue zero check
24266 beq.b zer_m # if zero, go directly to binstr
24267 no_zr:
24268 clr.l %d1 # put zero in d1 for addx
24269 add.l &0x00000080,%d3 # inc at bit 7
24270 addx.l %d1,%d2 # continue inc
24271 and.l &0xffffff80,%d3 # strip off lsb not used by 882
24272 zer_m:
24273 mov.l %d4,%d0 # put LEN in d0 for binstr call
24274 addq.l &3,%a0 # a0 points to M16 byte in result
24275 bsr binstr # call binstr to convert mant
24278 # A15. Convert the exponent to bcd.
24279 # As in A14 above, the exp is converted to bcd and the
24280 # digits are stored in the final string.
24282 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
24284 # 32 16 15 0
24285 # -----------------------------------------
24286 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
24287 # -----------------------------------------
24289 # And are moved into their proper places in FP_SCR0. If digit e4
24290 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
24291 # written as specified in the 881/882 manual for packed decimal.
24293 # Register usage:
24294 # Input/Output
24295 # d0: x/LEN call to binstr - final is 0
24296 # d1: x/scratch (0);shift count for final exponent packing
24297 # d2: x/ms 32-bits of exp fraction/scratch
24298 # d3: x/ls 32-bits of exp fraction
24299 # d4: LEN/Unchanged
24300 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24301 # d6: ILOG
24302 # d7: k-factor/Unchanged
24303 # a0: ptr to result string/ptr to L_SCR1(a6)
24304 # a1: ptr to PTENxx array/Unchanged
24305 # a2: ptr to FP_SCR1(a6)/Unchanged
24306 # fp0: abs(YINT) adjusted/float(ILOG)
24307 # fp1: 10^ISCALE/Unchanged
24308 # fp2: 10^LEN/Unchanged
24309 # F_SCR1:Work area for final result/BCD result
24310 # F_SCR2:Y with original exponent/ILOG/10^4
24311 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
24312 # L_SCR2:first word of X packed/Unchanged
24314 A15_st:
24315 tst.b BINDEC_FLG(%a6) # check for denorm
24316 beq.b not_denorm
24317 ftest.x %fp0 # test for zero
24318 fbeq.w den_zero # if zero, use k-factor or 4933
24319 fmov.l %d6,%fp0 # float ILOG
24320 fabs.x %fp0 # get abs of ILOG
24321 bra.b convrt
24322 den_zero:
24323 tst.l %d7 # check sign of the k-factor
24324 blt.b use_ilog # if negative, use ILOG
24325 fmov.s F4933(%pc),%fp0 # force exponent to 4933
24326 bra.b convrt # do it
24327 use_ilog:
24328 fmov.l %d6,%fp0 # float ILOG
24329 fabs.x %fp0 # get abs of ILOG
24330 bra.b convrt
24331 not_denorm:
24332 ftest.x %fp0 # test for zero
24333 fbneq.w not_zero # if zero, force exponent
24334 fmov.s FONE(%pc),%fp0 # force exponent to 1
24335 bra.b convrt # do it
24336 not_zero:
24337 fmov.l %d6,%fp0 # float ILOG
24338 fabs.x %fp0 # get abs of ILOG
24339 convrt:
24340 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
24341 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
24342 mov.l 4(%a2),%d2 # move word 2 to d2
24343 mov.l 8(%a2),%d3 # move word 3 to d3
24344 mov.w (%a2),%d0 # move exp to d0
24345 beq.b x_loop_fin # if zero, skip the shift
24346 sub.w &0x3ffd,%d0 # subtract off bias
24347 neg.w %d0 # make exp positive
24348 x_loop:
24349 lsr.l &1,%d2 # shift d2:d3 right
24350 roxr.l &1,%d3 # the number of places
24351 dbf.w %d0,x_loop # given in d0
24352 x_loop_fin:
24353 clr.l %d1 # put zero in d1 for addx
24354 add.l &0x00000080,%d3 # inc at bit 6
24355 addx.l %d1,%d2 # continue inc
24356 and.l &0xffffff80,%d3 # strip off lsb not used by 882
24357 mov.l &4,%d0 # put 4 in d0 for binstr call
24358 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
24359 bsr binstr # call binstr to convert exp
24360 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
24361 mov.l &12,%d1 # use d1 for shift count
24362 lsr.l %d1,%d0 # shift d0 right by 12
24363 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
24364 lsr.l %d1,%d0 # shift d0 right by 12
24365 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
24366 tst.b %d0 # check if e4 is zero
24367 beq.b A16_st # if zero, skip rest
24368 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
24371 # A16. Write sign bits to final string.
24372 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
24374 # Register usage:
24375 # Input/Output
24376 # d0: x/scratch - final is x
24377 # d2: x/x
24378 # d3: x/x
24379 # d4: LEN/Unchanged
24380 # d5: ICTR:LAMBDA/LAMBDA:ICTR
24381 # d6: ILOG/ILOG adjusted
24382 # d7: k-factor/Unchanged
24383 # a0: ptr to L_SCR1(a6)/Unchanged
24384 # a1: ptr to PTENxx array/Unchanged
24385 # a2: ptr to FP_SCR1(a6)/Unchanged
24386 # fp0: float(ILOG)/Unchanged
24387 # fp1: 10^ISCALE/Unchanged
24388 # fp2: 10^LEN/Unchanged
24389 # F_SCR1:BCD result with correct signs
24390 # F_SCR2:ILOG/10^4
24391 # L_SCR1:Exponent digits on return from binstr
24392 # L_SCR2:first word of X packed/Unchanged
24394 A16_st:
24395 clr.l %d0 # clr d0 for collection of signs
24396 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
24397 tst.l L_SCR2(%a6) # check sign of original mantissa
24398 bge.b mant_p # if pos, don't set SM
24399 mov.l &2,%d0 # move 2 in to d0 for SM
24400 mant_p:
24401 tst.l %d6 # check sign of ILOG
24402 bge.b wr_sgn # if pos, don't set SE
24403 addq.l &1,%d0 # set bit 0 in d0 for SE
24404 wr_sgn:
24405 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
24407 # Clean up and restore all registers used.
24409 fmov.l &0,%fpsr # clear possible inex2/ainex bits
24410 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
24411 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
24414 global PTENRN
24415 PTENRN:
24416 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24417 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24418 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24419 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24420 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24421 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24422 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24423 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24424 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24425 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24426 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24427 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24428 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24430 global PTENRP
24431 PTENRP:
24432 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24433 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24434 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24435 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24436 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24437 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
24438 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
24439 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
24440 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
24441 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
24442 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
24443 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
24444 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
24446 global PTENRM
24447 PTENRM:
24448 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
24449 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
24450 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
24451 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
24452 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
24453 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
24454 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
24455 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
24456 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
24457 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
24458 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
24459 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
24460 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
24462 #########################################################################
24463 # binstr(): Converts a 64-bit binary integer to bcd. #
24465 # INPUT *************************************************************** #
24466 # d2:d3 = 64-bit binary integer #
24467 # d0 = desired length (LEN) #
24468 # a0 = pointer to start in memory for bcd characters #
24469 # (This pointer must point to byte 4 of the first #
24470 # lword of the packed decimal memory string.) #
24472 # OUTPUT ************************************************************** #
24473 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
24475 # ALGORITHM *********************************************************** #
24476 # The 64-bit binary is assumed to have a decimal point before #
24477 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
24478 # shift and a mul by 8 shift. The bits shifted out of the #
24479 # msb form a decimal digit. This process is iterated until #
24480 # LEN digits are formed. #
24482 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
24483 # digit formed will be assumed the least significant. This is #
24484 # to force the first byte formed to have a 0 in the upper 4 bits. #
24486 # A2. Beginning of the loop: #
24487 # Copy the fraction in d2:d3 to d4:d5. #
24489 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
24490 # extracts and shifts. The three msbs from d2 will go into d1. #
24492 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
24493 # will be collected by the carry. #
24495 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
24496 # into d2:d3. D1 will contain the bcd digit formed. #
24498 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
24499 # zero, it is the ls digit. Put the digit in its place in the #
24500 # upper word of d0. If it is the ls digit, write the word #
24501 # from d0 to memory. #
24503 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
24505 #########################################################################
24507 # Implementation Notes:
24509 # The registers are used as follows:
24511 # d0: LEN counter
24512 # d1: temp used to form the digit
24513 # d2: upper 32-bits of fraction for mul by 8
24514 # d3: lower 32-bits of fraction for mul by 8
24515 # d4: upper 32-bits of fraction for mul by 2
24516 # d5: lower 32-bits of fraction for mul by 2
24517 # d6: temp for bit-field extracts
24518 # d7: byte digit formation word;digit count {0,1}
24519 # a0: pointer into memory for packed bcd string formation
24522 global binstr
24523 binstr:
24524 movm.l &0xff00,-(%sp) # {%d0-%d7}
24527 # A1: Init d7
24529 mov.l &1,%d7 # init d7 for second digit
24530 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
24532 # A2. Copy d2:d3 to d4:d5. Start loop.
24534 loop:
24535 mov.l %d2,%d4 # copy the fraction before muls
24536 mov.l %d3,%d5 # to d4:d5
24538 # A3. Multiply d2:d3 by 8; extract msbs into d1.
24540 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
24541 asl.l &3,%d2 # shift d2 left by 3 places
24542 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
24543 asl.l &3,%d3 # shift d3 left by 3 places
24544 or.l %d6,%d2 # or in msbs from d3 into d2
24546 # A4. Multiply d4:d5 by 2; add carry out to d1.
24548 asl.l &1,%d5 # mul d5 by 2
24549 roxl.l &1,%d4 # mul d4 by 2
24550 swap %d6 # put 0 in d6 lower word
24551 addx.w %d6,%d1 # add in extend from mul by 2
24553 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
24555 add.l %d5,%d3 # add lower 32 bits
24556 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24557 addx.l %d4,%d2 # add with extend upper 32 bits
24558 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
24559 addx.w %d6,%d1 # add in extend from add to d1
24560 swap %d6 # with d6 = 0; put 0 in upper word
24562 # A6. Test d7 and branch.
24564 tst.w %d7 # if zero, store digit & to loop
24565 beq.b first_d # if non-zero, form byte & write
24566 sec_d:
24567 swap %d7 # bring first digit to word d7b
24568 asl.w &4,%d7 # first digit in upper 4 bits d7b
24569 add.w %d1,%d7 # add in ls digit to d7b
24570 mov.b %d7,(%a0)+ # store d7b byte in memory
24571 swap %d7 # put LEN counter in word d7a
24572 clr.w %d7 # set d7a to signal no digits done
24573 dbf.w %d0,loop # do loop some more!
24574 bra.b end_bstr # finished, so exit
24575 first_d:
24576 swap %d7 # put digit word in d7b
24577 mov.w %d1,%d7 # put new digit in d7b
24578 swap %d7 # put LEN counter in word d7a
24579 addq.w &1,%d7 # set d7a to signal first digit done
24580 dbf.w %d0,loop # do loop some more!
24581 swap %d7 # put last digit in string
24582 lsl.w &4,%d7 # move it to upper 4 bits
24583 mov.b %d7,(%a0)+ # store it in memory string
24585 # Clean up and return with result in fp0.
24587 end_bstr:
24588 movm.l (%sp)+,&0xff # {%d0-%d7}
24591 #########################################################################
24592 # XDEF **************************************************************** #
24593 # facc_in_b(): dmem_read_byte failed #
24594 # facc_in_w(): dmem_read_word failed #
24595 # facc_in_l(): dmem_read_long failed #
24596 # facc_in_d(): dmem_read of dbl prec failed #
24597 # facc_in_x(): dmem_read of ext prec failed #
24599 # facc_out_b(): dmem_write_byte failed #
24600 # facc_out_w(): dmem_write_word failed #
24601 # facc_out_l(): dmem_write_long failed #
24602 # facc_out_d(): dmem_write of dbl prec failed #
24603 # facc_out_x(): dmem_write of ext prec failed #
24605 # XREF **************************************************************** #
24606 # _real_access() - exit through access error handler #
24608 # INPUT *************************************************************** #
24609 # None #
24611 # OUTPUT ************************************************************** #
24612 # None #
24614 # ALGORITHM *********************************************************** #
24615 # Flow jumps here when an FP data fetch call gets an error #
24616 # result. This means the operating system wants an access error frame #
24617 # made out of the current exception stack frame. #
24618 # So, we first call restore() which makes sure that any updated #
24619 # -(an)+ register gets returned to its pre-exception value and then #
24620 # we change the stack to an acess error stack frame. #
24622 #########################################################################
24624 facc_in_b:
24625 movq.l &0x1,%d0 # one byte
24626 bsr.w restore # fix An
24628 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
24629 bra.w facc_finish
24631 facc_in_w:
24632 movq.l &0x2,%d0 # two bytes
24633 bsr.w restore # fix An
24635 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
24636 bra.b facc_finish
24638 facc_in_l:
24639 movq.l &0x4,%d0 # four bytes
24640 bsr.w restore # fix An
24642 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
24643 bra.b facc_finish
24645 facc_in_d:
24646 movq.l &0x8,%d0 # eight bytes
24647 bsr.w restore # fix An
24649 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24650 bra.b facc_finish
24652 facc_in_x:
24653 movq.l &0xc,%d0 # twelve bytes
24654 bsr.w restore # fix An
24656 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
24657 bra.b facc_finish
24659 ################################################################
24661 facc_out_b:
24662 movq.l &0x1,%d0 # one byte
24663 bsr.w restore # restore An
24665 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
24666 bra.b facc_finish
24668 facc_out_w:
24669 movq.l &0x2,%d0 # two bytes
24670 bsr.w restore # restore An
24672 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
24673 bra.b facc_finish
24675 facc_out_l:
24676 movq.l &0x4,%d0 # four bytes
24677 bsr.w restore # restore An
24679 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
24680 bra.b facc_finish
24682 facc_out_d:
24683 movq.l &0x8,%d0 # eight bytes
24684 bsr.w restore # restore An
24686 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24687 bra.b facc_finish
24689 facc_out_x:
24690 mov.l &0xc,%d0 # twelve bytes
24691 bsr.w restore # restore An
24693 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
24695 # here's where we actually create the access error frame from the
24696 # current exception stack frame.
24697 facc_finish:
24698 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
24700 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
24701 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
24702 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
24704 unlk %a6
24706 mov.l (%sp),-(%sp) # store SR, hi(PC)
24707 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
24708 mov.l 0xc(%sp),0x8(%sp) # store EA
24709 mov.l &0x00000001,0xc(%sp) # store FSLW
24710 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
24711 mov.w &0x4008,0x6(%sp) # store voff
24713 btst &0x5,(%sp) # supervisor or user mode?
24714 beq.b facc_out2 # user
24715 bset &0x2,0xd(%sp) # set supervisor TM bit
24717 facc_out2:
24718 bra.l _real_access
24720 ##################################################################
24722 # if the effective addressing mode was predecrement or postincrement,
24723 # the emulation has already changed its value to the correct post-
24724 # instruction value. but since we're exiting to the access error
24725 # handler, then AN must be returned to its pre-instruction value.
24726 # we do that here.
24727 restore:
24728 mov.b EXC_OPWORD+0x1(%a6),%d1
24729 andi.b &0x38,%d1 # extract opmode
24730 cmpi.b %d1,&0x18 # postinc?
24731 beq.w rest_inc
24732 cmpi.b %d1,&0x20 # predec?
24733 beq.w rest_dec
24736 rest_inc:
24737 mov.b EXC_OPWORD+0x1(%a6),%d1
24738 andi.w &0x0007,%d1 # fetch An
24740 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
24741 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
24743 tbl_rest_inc:
24744 short ri_a0 - tbl_rest_inc
24745 short ri_a1 - tbl_rest_inc
24746 short ri_a2 - tbl_rest_inc
24747 short ri_a3 - tbl_rest_inc
24748 short ri_a4 - tbl_rest_inc
24749 short ri_a5 - tbl_rest_inc
24750 short ri_a6 - tbl_rest_inc
24751 short ri_a7 - tbl_rest_inc
24753 ri_a0:
24754 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
24756 ri_a1:
24757 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
24759 ri_a2:
24760 sub.l %d0,%a2 # fix a2
24762 ri_a3:
24763 sub.l %d0,%a3 # fix a3
24765 ri_a4:
24766 sub.l %d0,%a4 # fix a4
24768 ri_a5:
24769 sub.l %d0,%a5 # fix a5
24771 ri_a6:
24772 sub.l %d0,(%a6) # fix stacked a6
24774 # if it's a fmove out instruction, we don't have to fix a7
24775 # because we hadn't changed it yet. if it's an opclass two
24776 # instruction (data moved in) and the exception was in supervisor
24777 # mode, then also also wasn't updated. if it was user mode, then
24778 # restore the correct a7 which is in the USP currently.
24779 ri_a7:
24780 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
24781 bne.b ri_a7_done # out
24783 btst &0x5,EXC_SR(%a6) # user or supervisor?
24784 bne.b ri_a7_done # supervisor
24785 movc %usp,%a0 # restore USP
24786 sub.l %d0,%a0
24787 movc %a0,%usp
24788 ri_a7_done:
24791 # need to invert adjustment value if the <ea> was predec
24792 rest_dec:
24793 neg.l %d0
24794 bra.b rest_inc