make.tmpl: add missing compiler attribute to build_progs
[AROS.git] / arch / m68k-all / m680x0 / 060sp / dist / pfpsp.s
blobaf9e83ca681b3b506185df5ace6bbb9116b8c3eb
2 # $NetBSD: pfpsp.s,v 1.4 2005/12/11 12:17:52 christos Exp $
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
9 #
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27 # Motorola assumes no responsibility for the maintenance and support
28 # of the SOFTWARE.
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 # freal.s:
40 # This file is appended to the top of the 060FPSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located
43 # after _060FPSP_TABLE.
44 # Also, subroutine stubs exist in this file (_fpsp_done for
45 # example) that are referenced by the FPSP package itself in order
46 # to call a given routine. The stub routine actually performs the
47 # callout. The FPSP code does a "bsr" to the stub routine. This
48 # extra layer of hierarchy adds a slight performance penalty but
49 # it makes the FPSP code easier to read and more mainatinable.
52 set _off_bsun, 0x00
53 set _off_snan, 0x04
54 set _off_operr, 0x08
55 set _off_ovfl, 0x0c
56 set _off_unfl, 0x10
57 set _off_dz, 0x14
58 set _off_inex, 0x18
59 set _off_fline, 0x1c
60 set _off_fpu_dis, 0x20
61 set _off_trap, 0x24
62 set _off_trace, 0x28
63 set _off_access, 0x2c
64 set _off_done, 0x30
66 set _off_imr, 0x40
67 set _off_dmr, 0x44
68 set _off_dmw, 0x48
69 set _off_irw, 0x4c
70 set _off_irl, 0x50
71 set _off_drb, 0x54
72 set _off_drw, 0x58
73 set _off_drl, 0x5c
74 set _off_dwb, 0x60
75 set _off_dww, 0x64
76 set _off_dwl, 0x68
78 _060FPSP_TABLE:
80 ###############################################################
82 # Here's the table of ENTRY POINTS for those linking the package.
83 bra.l _fpsp_snan
84 short 0x0000
85 bra.l _fpsp_operr
86 short 0x0000
87 bra.l _fpsp_ovfl
88 short 0x0000
89 bra.l _fpsp_unfl
90 short 0x0000
91 bra.l _fpsp_dz
92 short 0x0000
93 bra.l _fpsp_inex
94 short 0x0000
95 bra.l _fpsp_fline
96 short 0x0000
97 bra.l _fpsp_unsupp
98 short 0x0000
99 bra.l _fpsp_effadd
100 short 0x0000
102 space 56
104 ###############################################################
105 global _fpsp_done
106 _fpsp_done:
107 mov.l %d0,-(%sp)
108 mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
109 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
110 mov.l 0x4(%sp),%d0
111 rtd &0x4
113 global _real_ovfl
114 _real_ovfl:
115 mov.l %d0,-(%sp)
116 mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
117 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
118 mov.l 0x4(%sp),%d0
119 rtd &0x4
121 global _real_unfl
122 _real_unfl:
123 mov.l %d0,-(%sp)
124 mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
125 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
126 mov.l 0x4(%sp),%d0
127 rtd &0x4
129 global _real_inex
130 _real_inex:
131 mov.l %d0,-(%sp)
132 mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
133 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
134 mov.l 0x4(%sp),%d0
135 rtd &0x4
137 global _real_bsun
138 _real_bsun:
139 mov.l %d0,-(%sp)
140 mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
141 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
142 mov.l 0x4(%sp),%d0
143 rtd &0x4
145 global _real_operr
146 _real_operr:
147 mov.l %d0,-(%sp)
148 mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
149 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
150 mov.l 0x4(%sp),%d0
151 rtd &0x4
153 global _real_snan
154 _real_snan:
155 mov.l %d0,-(%sp)
156 mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
157 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
158 mov.l 0x4(%sp),%d0
159 rtd &0x4
161 global _real_dz
162 _real_dz:
163 mov.l %d0,-(%sp)
164 mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
165 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
166 mov.l 0x4(%sp),%d0
167 rtd &0x4
169 global _real_fline
170 _real_fline:
171 mov.l %d0,-(%sp)
172 mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
173 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
174 mov.l 0x4(%sp),%d0
175 rtd &0x4
177 global _real_fpu_disabled
178 _real_fpu_disabled:
179 mov.l %d0,-(%sp)
180 mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
181 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
182 mov.l 0x4(%sp),%d0
183 rtd &0x4
185 global _real_trap
186 _real_trap:
187 mov.l %d0,-(%sp)
188 mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
189 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
190 mov.l 0x4(%sp),%d0
191 rtd &0x4
193 global _real_trace
194 _real_trace:
195 mov.l %d0,-(%sp)
196 mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
197 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
198 mov.l 0x4(%sp),%d0
199 rtd &0x4
201 global _real_access
202 _real_access:
203 mov.l %d0,-(%sp)
204 mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
205 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
206 mov.l 0x4(%sp),%d0
207 rtd &0x4
209 #######################################
211 global _imem_read
212 _imem_read:
213 mov.l %d0,-(%sp)
214 mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
215 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
216 mov.l 0x4(%sp),%d0
217 rtd &0x4
219 global _dmem_read
220 _dmem_read:
221 mov.l %d0,-(%sp)
222 mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
223 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
224 mov.l 0x4(%sp),%d0
225 rtd &0x4
227 global _dmem_write
228 _dmem_write:
229 mov.l %d0,-(%sp)
230 mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
231 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
232 mov.l 0x4(%sp),%d0
233 rtd &0x4
235 global _imem_read_word
236 _imem_read_word:
237 mov.l %d0,-(%sp)
238 mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
239 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
240 mov.l 0x4(%sp),%d0
241 rtd &0x4
243 global _imem_read_long
244 _imem_read_long:
245 mov.l %d0,-(%sp)
246 mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
247 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
248 mov.l 0x4(%sp),%d0
249 rtd &0x4
251 global _dmem_read_byte
252 _dmem_read_byte:
253 mov.l %d0,-(%sp)
254 mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
255 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
256 mov.l 0x4(%sp),%d0
257 rtd &0x4
259 global _dmem_read_word
260 _dmem_read_word:
261 mov.l %d0,-(%sp)
262 mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
263 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
264 mov.l 0x4(%sp),%d0
265 rtd &0x4
267 global _dmem_read_long
268 _dmem_read_long:
269 mov.l %d0,-(%sp)
270 mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
271 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
272 mov.l 0x4(%sp),%d0
273 rtd &0x4
275 global _dmem_write_byte
276 _dmem_write_byte:
277 mov.l %d0,-(%sp)
278 mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
279 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
280 mov.l 0x4(%sp),%d0
281 rtd &0x4
283 global _dmem_write_word
284 _dmem_write_word:
285 mov.l %d0,-(%sp)
286 mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
287 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
288 mov.l 0x4(%sp),%d0
289 rtd &0x4
291 global _dmem_write_long
292 _dmem_write_long:
293 mov.l %d0,-(%sp)
294 mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
295 pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
296 mov.l 0x4(%sp),%d0
297 rtd &0x4
300 # This file contains a set of define statements for constants
301 # in order to promote readability within the corecode itself.
304 set LOCAL_SIZE, 192 # stack frame size(bytes)
305 set LV, -LOCAL_SIZE # stack offset
307 set EXC_SR, 0x4 # stack status register
308 set EXC_PC, 0x6 # stack pc
309 set EXC_VOFF, 0xa # stacked vector offset
310 set EXC_EA, 0xc # stacked <ea>
312 set EXC_FP, 0x0 # frame pointer
314 set EXC_AREGS, -68 # offset of all address regs
315 set EXC_DREGS, -100 # offset of all data regs
316 set EXC_FPREGS, -36 # offset of all fp regs
318 set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
319 set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
320 set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
321 set EXC_A5, EXC_AREGS+(5*4)
322 set EXC_A4, EXC_AREGS+(4*4)
323 set EXC_A3, EXC_AREGS+(3*4)
324 set EXC_A2, EXC_AREGS+(2*4)
325 set EXC_A1, EXC_AREGS+(1*4)
326 set EXC_A0, EXC_AREGS+(0*4)
327 set EXC_D7, EXC_DREGS+(7*4)
328 set EXC_D6, EXC_DREGS+(6*4)
329 set EXC_D5, EXC_DREGS+(5*4)
330 set EXC_D4, EXC_DREGS+(4*4)
331 set EXC_D3, EXC_DREGS+(3*4)
332 set EXC_D2, EXC_DREGS+(2*4)
333 set EXC_D1, EXC_DREGS+(1*4)
334 set EXC_D0, EXC_DREGS+(0*4)
336 set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
337 set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
338 set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
340 set FP_SCR1, LV+80 # fp scratch 1
341 set FP_SCR1_EX, FP_SCR1+0
342 set FP_SCR1_SGN, FP_SCR1+2
343 set FP_SCR1_HI, FP_SCR1+4
344 set FP_SCR1_LO, FP_SCR1+8
346 set FP_SCR0, LV+68 # fp scratch 0
347 set FP_SCR0_EX, FP_SCR0+0
348 set FP_SCR0_SGN, FP_SCR0+2
349 set FP_SCR0_HI, FP_SCR0+4
350 set FP_SCR0_LO, FP_SCR0+8
352 set FP_DST, LV+56 # fp destination operand
353 set FP_DST_EX, FP_DST+0
354 set FP_DST_SGN, FP_DST+2
355 set FP_DST_HI, FP_DST+4
356 set FP_DST_LO, FP_DST+8
358 set FP_SRC, LV+44 # fp source operand
359 set FP_SRC_EX, FP_SRC+0
360 set FP_SRC_SGN, FP_SRC+2
361 set FP_SRC_HI, FP_SRC+4
362 set FP_SRC_LO, FP_SRC+8
364 set USER_FPIAR, LV+40 # FP instr address register
366 set USER_FPSR, LV+36 # FP status register
367 set FPSR_CC, USER_FPSR+0 # FPSR condition codes
368 set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
369 set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
370 set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
372 set USER_FPCR, LV+32 # FP control register
373 set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
374 set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
376 set L_SCR3, LV+28 # integer scratch 3
377 set L_SCR2, LV+24 # integer scratch 2
378 set L_SCR1, LV+20 # integer scratch 1
380 set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
382 set EXC_TEMP2, LV+24 # temporary space
383 set EXC_TEMP, LV+16 # temporary space
385 set DTAG, LV+15 # destination operand type
386 set STAG, LV+14 # source operand type
388 set SPCOND_FLG, LV+10 # flag: special case (see below)
390 set EXC_CC, LV+8 # saved condition codes
391 set EXC_EXTWPTR, LV+4 # saved current PC (active)
392 set EXC_EXTWORD, LV+2 # saved extension word
393 set EXC_CMDREG, LV+2 # saved extension word
394 set EXC_OPWORD, LV+0 # saved operation word
396 ################################
398 # Helpful macros
400 set FTEMP, 0 # offsets within an
401 set FTEMP_EX, 0 # extended precision
402 set FTEMP_SGN, 2 # value saved in memory.
403 set FTEMP_HI, 4
404 set FTEMP_LO, 8
405 set FTEMP_GRS, 12
407 set LOCAL, 0 # offsets within an
408 set LOCAL_EX, 0 # extended precision
409 set LOCAL_SGN, 2 # value saved in memory.
410 set LOCAL_HI, 4
411 set LOCAL_LO, 8
412 set LOCAL_GRS, 12
414 set DST, 0 # offsets within an
415 set DST_EX, 0 # extended precision
416 set DST_HI, 4 # value saved in memory.
417 set DST_LO, 8
419 set SRC, 0 # offsets within an
420 set SRC_EX, 0 # extended precision
421 set SRC_HI, 4 # value saved in memory.
422 set SRC_LO, 8
424 set SGL_LO, 0x3f81 # min sgl prec exponent
425 set SGL_HI, 0x407e # max sgl prec exponent
426 set DBL_LO, 0x3c01 # min dbl prec exponent
427 set DBL_HI, 0x43fe # max dbl prec exponent
428 set EXT_LO, 0x0 # min ext prec exponent
429 set EXT_HI, 0x7ffe # max ext prec exponent
431 set EXT_BIAS, 0x3fff # extended precision bias
432 set SGL_BIAS, 0x007f # single precision bias
433 set DBL_BIAS, 0x03ff # double precision bias
435 set NORM, 0x00 # operand type for STAG/DTAG
436 set ZERO, 0x01 # operand type for STAG/DTAG
437 set INF, 0x02 # operand type for STAG/DTAG
438 set QNAN, 0x03 # operand type for STAG/DTAG
439 set DENORM, 0x04 # operand type for STAG/DTAG
440 set SNAN, 0x05 # operand type for STAG/DTAG
441 set UNNORM, 0x06 # operand type for STAG/DTAG
443 ##################
444 # FPSR/FPCR bits #
445 ##################
446 set neg_bit, 0x3 # negative result
447 set z_bit, 0x2 # zero result
448 set inf_bit, 0x1 # infinite result
449 set nan_bit, 0x0 # NAN result
451 set q_sn_bit, 0x7 # sign bit of quotient byte
453 set bsun_bit, 7 # branch on unordered
454 set snan_bit, 6 # signalling NAN
455 set operr_bit, 5 # operand error
456 set ovfl_bit, 4 # overflow
457 set unfl_bit, 3 # underflow
458 set dz_bit, 2 # divide by zero
459 set inex2_bit, 1 # inexact result 2
460 set inex1_bit, 0 # inexact result 1
462 set aiop_bit, 7 # accrued inexact operation bit
463 set aovfl_bit, 6 # accrued overflow bit
464 set aunfl_bit, 5 # accrued underflow bit
465 set adz_bit, 4 # accrued dz bit
466 set ainex_bit, 3 # accrued inexact bit
468 #############################
469 # FPSR individual bit masks #
470 #############################
471 set neg_mask, 0x08000000 # negative bit mask (lw)
472 set inf_mask, 0x02000000 # infinity bit mask (lw)
473 set z_mask, 0x04000000 # zero bit mask (lw)
474 set nan_mask, 0x01000000 # nan bit mask (lw)
476 set neg_bmask, 0x08 # negative bit mask (byte)
477 set inf_bmask, 0x02 # infinity bit mask (byte)
478 set z_bmask, 0x04 # zero bit mask (byte)
479 set nan_bmask, 0x01 # nan bit mask (byte)
481 set bsun_mask, 0x00008000 # bsun exception mask
482 set snan_mask, 0x00004000 # snan exception mask
483 set operr_mask, 0x00002000 # operr exception mask
484 set ovfl_mask, 0x00001000 # overflow exception mask
485 set unfl_mask, 0x00000800 # underflow exception mask
486 set dz_mask, 0x00000400 # dz exception mask
487 set inex2_mask, 0x00000200 # inex2 exception mask
488 set inex1_mask, 0x00000100 # inex1 exception mask
490 set aiop_mask, 0x00000080 # accrued illegal operation
491 set aovfl_mask, 0x00000040 # accrued overflow
492 set aunfl_mask, 0x00000020 # accrued underflow
493 set adz_mask, 0x00000010 # accrued divide by zero
494 set ainex_mask, 0x00000008 # accrued inexact
496 ######################################
497 # FPSR combinations used in the FPSP #
498 ######################################
499 set dzinf_mask, inf_mask+dz_mask+adz_mask
500 set opnan_mask, nan_mask+operr_mask+aiop_mask
501 set nzi_mask, 0x01ffffff #clears N, Z, and I
502 set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
503 set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
504 set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
505 set inx1a_mask, inex1_mask+ainex_mask
506 set inx2a_mask, inex2_mask+ainex_mask
507 set snaniop_mask, nan_mask+snan_mask+aiop_mask
508 set snaniop2_mask, snan_mask+aiop_mask
509 set naniop_mask, nan_mask+aiop_mask
510 set neginf_mask, neg_mask+inf_mask
511 set infaiop_mask, inf_mask+aiop_mask
512 set negz_mask, neg_mask+z_mask
513 set opaop_mask, operr_mask+aiop_mask
514 set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
515 set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
517 #########
518 # misc. #
519 #########
520 set rnd_stky_bit, 29 # stky bit pos in longword
522 set sign_bit, 0x7 # sign bit
523 set signan_bit, 0x6 # signalling nan bit
525 set sgl_thresh, 0x3f81 # minimum sgl exponent
526 set dbl_thresh, 0x3c01 # minimum dbl exponent
528 set x_mode, 0x0 # extended precision
529 set s_mode, 0x4 # single precision
530 set d_mode, 0x8 # double precision
532 set rn_mode, 0x0 # round-to-nearest
533 set rz_mode, 0x1 # round-to-zero
534 set rm_mode, 0x2 # round-tp-minus-infinity
535 set rp_mode, 0x3 # round-to-plus-infinity
537 set mantissalen, 64 # length of mantissa in bits
539 set BYTE, 1 # len(byte) == 1 byte
540 set WORD, 2 # len(word) == 2 bytes
541 set LONG, 4 # len(longword) == 2 bytes
543 set BSUN_VEC, 0xc0 # bsun vector offset
544 set INEX_VEC, 0xc4 # inexact vector offset
545 set DZ_VEC, 0xc8 # dz vector offset
546 set UNFL_VEC, 0xcc # unfl vector offset
547 set OPERR_VEC, 0xd0 # operr vector offset
548 set OVFL_VEC, 0xd4 # ovfl vector offset
549 set SNAN_VEC, 0xd8 # snan vector offset
551 ###########################
552 # SPecial CONDition FLaGs #
553 ###########################
554 set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
555 set fbsun_flg, 0x02 # flag bit: bsun exception
556 set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
557 set mda7_flg, 0x08 # flag bit: -(a7) <ea>
558 set fmovm_flg, 0x40 # flag bit: fmovm instruction
559 set immed_flg, 0x80 # flag bit: &<data> <ea>
561 set ftrapcc_bit, 0x0
562 set fbsun_bit, 0x1
563 set mia7_bit, 0x2
564 set mda7_bit, 0x3
565 set immed_bit, 0x7
567 ##################################
568 # TRANSCENDENTAL "LAST-OP" FLAGS #
569 ##################################
570 set FMUL_OP, 0x0 # fmul instr performed last
571 set FDIV_OP, 0x1 # fdiv performed last
572 set FADD_OP, 0x2 # fadd performed last
573 set FMOV_OP, 0x3 # fmov performed last
575 #############
576 # CONSTANTS #
577 #############
578 T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
579 T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
581 PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
582 PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
584 TWOBYPI:
585 long 0x3FE45F30,0x6DC9C883
587 #########################################################################
588 # XDEF **************************************************************** #
589 # _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
591 # This handler should be the first code executed upon taking the #
592 # FP Overflow exception in an operating system. #
594 # XREF **************************************************************** #
595 # _imem_read_long() - read instruction longword #
596 # fix_skewed_ops() - adjust src operand in fsave frame #
597 # set_tag_x() - determine optype of src/dst operands #
598 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
599 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
600 # load_fpn2() - load dst operand from FP regfile #
601 # fout() - emulate an opclass 3 instruction #
602 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
603 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
604 # _real_ovfl() - "callout" for Overflow exception enabled code #
605 # _real_inex() - "callout" for Inexact exception enabled code #
606 # _real_trace() - "callout" for Trace exception code #
608 # INPUT *************************************************************** #
609 # - The system stack contains the FP Ovfl exception stack frame #
610 # - The fsave frame contains the source operand #
612 # OUTPUT ************************************************************** #
613 # Overflow Exception enabled: #
614 # - The system stack is unchanged #
615 # - The fsave frame contains the adjusted src op for opclass 0,2 #
616 # Overflow Exception disabled: #
617 # - The system stack is unchanged #
618 # - The "exception present" flag in the fsave frame is cleared #
620 # ALGORITHM *********************************************************** #
621 # On the 060, if an FP overflow is present as the result of any #
622 # instruction, the 060 will take an overflow exception whether the #
623 # exception is enabled or disabled in the FPCR. For the disabled case, #
624 # This handler emulates the instruction to determine what the correct #
625 # default result should be for the operation. This default result is #
626 # then stored in either the FP regfile, data regfile, or memory. #
627 # Finally, the handler exits through the "callout" _fpsp_done() #
628 # denoting that no exceptional conditions exist within the machine. #
629 # If the exception is enabled, then this handler must create the #
630 # exceptional operand and plave it in the fsave state frame, and store #
631 # the default result (only if the instruction is opclass 3). For #
632 # exceptions enabled, this handler must exit through the "callout" #
633 # _real_ovfl() so that the operating system enabled overflow handler #
634 # can handle this case. #
635 # Two other conditions exist. First, if overflow was disabled #
636 # but the inexact exception was enabled, this handler must exit #
637 # through the "callout" _real_inex() regardless of whether the result #
638 # was inexact. #
639 # Also, in the case of an opclass three instruction where #
640 # overflow was disabled and the trace exception was enabled, this #
641 # handler must exit through the "callout" _real_trace(). #
643 #########################################################################
645 global _fpsp_ovfl
646 _fpsp_ovfl:
648 #$# sub.l &24,%sp # make room for src/dst
650 link.w %a6,&-LOCAL_SIZE # init stack frame
652 fsave FP_SRC(%a6) # grab the "busy" frame
654 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
655 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
656 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
658 # the FPIAR holds the "current PC" of the faulting instruction
659 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
660 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
661 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
662 bsr.l _imem_read_long # fetch the instruction words
663 mov.l %d0,EXC_OPWORD(%a6)
665 ##############################################################################
667 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
668 bne.w fovfl_out
671 lea FP_SRC(%a6),%a0 # pass: ptr to src op
672 bsr.l fix_skewed_ops # fix src op
674 # since, I believe, only NORMs and DENORMs can come through here,
675 # maybe we can avoid the subroutine call.
676 lea FP_SRC(%a6),%a0 # pass: ptr to src op
677 bsr.l set_tag_x # tag the operand type
678 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
680 # bit five of the fp extension word separates the monadic and dyadic operations
681 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
682 # will never take this exception.
683 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
684 beq.b fovfl_extract # monadic
686 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
687 bsr.l load_fpn2 # load dst into FP_DST
689 lea FP_DST(%a6),%a0 # pass: ptr to dst op
690 bsr.l set_tag_x # tag the operand type
691 cmpi.b %d0,&UNNORM # is operand an UNNORM?
692 bne.b fovfl_op2_done # no
693 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
694 fovfl_op2_done:
695 mov.b %d0,DTAG(%a6) # save dst optype tag
697 fovfl_extract:
699 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
700 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
701 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
702 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
703 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
704 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
706 clr.l %d0
707 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
709 mov.b 1+EXC_CMDREG(%a6),%d1
710 andi.w &0x007f,%d1 # extract extension
712 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
714 fmov.l &0x0,%fpcr # zero current control regs
715 fmov.l &0x0,%fpsr
717 lea FP_SRC(%a6),%a0
718 lea FP_DST(%a6),%a1
720 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
721 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
722 jsr (tbl_unsupp.l,%pc,%d1.l*1)
724 # the operation has been emulated. the result is in fp0.
725 # the EXOP, if an exception occurred, is in fp1.
726 # we must save the default result regardless of whether
727 # traps are enabled or disabled.
728 bfextu EXC_CMDREG(%a6){&6:&3},%d0
729 bsr.l store_fpreg
731 # the exceptional possibilities we have left ourselves with are ONLY overflow
732 # and inexact. and, the inexact is such that overflow occurred and was disabled
733 # but inexact was enabled.
734 btst &ovfl_bit,FPCR_ENABLE(%a6)
735 bne.b fovfl_ovfl_on
737 btst &inex2_bit,FPCR_ENABLE(%a6)
738 bne.b fovfl_inex_on
740 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
741 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
742 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
744 unlk %a6
745 #$# add.l &24,%sp
746 bra.l _fpsp_done
748 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
749 # in fp1. now, simply jump to _real_ovfl()!
750 fovfl_ovfl_on:
751 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
753 mov.w &0xe005,2+FP_SRC(%a6) # save exc status
755 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
756 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
757 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
759 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
761 unlk %a6
763 bra.l _real_ovfl
765 # overflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
766 # we must jump to real_inex().
767 fovfl_inex_on:
769 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
771 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
772 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
774 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
775 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
776 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
778 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
780 unlk %a6
782 bra.l _real_inex
784 ########################################################################
785 fovfl_out:
788 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
789 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
790 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
792 # the src operand is definitely a NORM(!), so tag it as such
793 mov.b &NORM,STAG(%a6) # set src optype tag
795 clr.l %d0
796 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
798 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
800 fmov.l &0x0,%fpcr # zero current control regs
801 fmov.l &0x0,%fpsr
803 lea FP_SRC(%a6),%a0 # pass ptr to src operand
805 bsr.l fout
807 btst &ovfl_bit,FPCR_ENABLE(%a6)
808 bne.w fovfl_ovfl_on
810 btst &inex2_bit,FPCR_ENABLE(%a6)
811 bne.w fovfl_inex_on
813 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
814 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
815 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
817 unlk %a6
818 #$# add.l &24,%sp
820 btst &0x7,(%sp) # is trace on?
821 beq.l _fpsp_done # no
823 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
824 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
825 bra.l _real_trace
827 #########################################################################
828 # XDEF **************************************************************** #
829 # _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
831 # This handler should be the first code executed upon taking the #
832 # FP Underflow exception in an operating system. #
834 # XREF **************************************************************** #
835 # _imem_read_long() - read instruction longword #
836 # fix_skewed_ops() - adjust src operand in fsave frame #
837 # set_tag_x() - determine optype of src/dst operands #
838 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
839 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
840 # load_fpn2() - load dst operand from FP regfile #
841 # fout() - emulate an opclass 3 instruction #
842 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
843 # _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
844 # _real_ovfl() - "callout" for Overflow exception enabled code #
845 # _real_inex() - "callout" for Inexact exception enabled code #
846 # _real_trace() - "callout" for Trace exception code #
848 # INPUT *************************************************************** #
849 # - The system stack contains the FP Unfl exception stack frame #
850 # - The fsave frame contains the source operand #
852 # OUTPUT ************************************************************** #
853 # Underflow Exception enabled: #
854 # - The system stack is unchanged #
855 # - The fsave frame contains the adjusted src op for opclass 0,2 #
856 # Underflow Exception disabled: #
857 # - The system stack is unchanged #
858 # - The "exception present" flag in the fsave frame is cleared #
860 # ALGORITHM *********************************************************** #
861 # On the 060, if an FP underflow is present as the result of any #
862 # instruction, the 060 will take an underflow exception whether the #
863 # exception is enabled or disabled in the FPCR. For the disabled case, #
864 # This handler emulates the instruction to determine what the correct #
865 # default result should be for the operation. This default result is #
866 # then stored in either the FP regfile, data regfile, or memory. #
867 # Finally, the handler exits through the "callout" _fpsp_done() #
868 # denoting that no exceptional conditions exist within the machine. #
869 # If the exception is enabled, then this handler must create the #
870 # exceptional operand and plave it in the fsave state frame, and store #
871 # the default result (only if the instruction is opclass 3). For #
872 # exceptions enabled, this handler must exit through the "callout" #
873 # _real_unfl() so that the operating system enabled overflow handler #
874 # can handle this case. #
875 # Two other conditions exist. First, if underflow was disabled #
876 # but the inexact exception was enabled and the result was inexact, #
877 # this handler must exit through the "callout" _real_inex(). #
878 # was inexact. #
879 # Also, in the case of an opclass three instruction where #
880 # underflow was disabled and the trace exception was enabled, this #
881 # handler must exit through the "callout" _real_trace(). #
883 #########################################################################
885 global _fpsp_unfl
886 _fpsp_unfl:
888 #$# sub.l &24,%sp # make room for src/dst
890 link.w %a6,&-LOCAL_SIZE # init stack frame
892 fsave FP_SRC(%a6) # grab the "busy" frame
894 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
895 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
896 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
898 # the FPIAR holds the "current PC" of the faulting instruction
899 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
900 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
901 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
902 bsr.l _imem_read_long # fetch the instruction words
903 mov.l %d0,EXC_OPWORD(%a6)
905 ##############################################################################
907 btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
908 bne.w funfl_out
911 lea FP_SRC(%a6),%a0 # pass: ptr to src op
912 bsr.l fix_skewed_ops # fix src op
914 lea FP_SRC(%a6),%a0 # pass: ptr to src op
915 bsr.l set_tag_x # tag the operand type
916 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
918 # bit five of the fp ext word separates the monadic and dyadic operations
919 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
920 # will never take this exception.
921 btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
922 beq.b funfl_extract # monadic
924 # now, what's left that's not dyadic is fsincos. we can distinguish it
925 # from all dyadics by the '0110xxx pattern
926 btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
927 bne.b funfl_extract # yes
929 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
930 bsr.l load_fpn2 # load dst into FP_DST
932 lea FP_DST(%a6),%a0 # pass: ptr to dst op
933 bsr.l set_tag_x # tag the operand type
934 cmpi.b %d0,&UNNORM # is operand an UNNORM?
935 bne.b funfl_op2_done # no
936 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
937 funfl_op2_done:
938 mov.b %d0,DTAG(%a6) # save dst optype tag
940 funfl_extract:
942 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
943 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
944 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
945 #$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
946 #$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
947 #$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
949 clr.l %d0
950 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
952 mov.b 1+EXC_CMDREG(%a6),%d1
953 andi.w &0x007f,%d1 # extract extension
955 andi.l &0x00ff01ff,USER_FPSR(%a6)
957 fmov.l &0x0,%fpcr # zero current control regs
958 fmov.l &0x0,%fpsr
960 lea FP_SRC(%a6),%a0
961 lea FP_DST(%a6),%a1
963 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
964 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
965 jsr (tbl_unsupp.l,%pc,%d1.l*1)
967 bfextu EXC_CMDREG(%a6){&6:&3},%d0
968 bsr.l store_fpreg
970 # The `060 FPU multiplier hardware is such that if the result of a
971 # multiply operation is the smallest possible normalized number
972 # (0x00000000_80000000_00000000), then the machine will take an
973 # underflow exception. Since this is incorrect, we need to check
974 # if our emulation, after re-doing the operation, decided that
975 # no underflow was called for. We do these checks only in
976 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
977 # special case will simply exit gracefully with the correct result.
979 # the exceptional possibilities we have left ourselves with are ONLY overflow
980 # and inexact. and, the inexact is such that overflow occurred and was disabled
981 # but inexact was enabled.
982 btst &unfl_bit,FPCR_ENABLE(%a6)
983 bne.b funfl_unfl_on
985 funfl_chkinex:
986 btst &inex2_bit,FPCR_ENABLE(%a6)
987 bne.b funfl_inex_on
989 funfl_exit:
990 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
991 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
992 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
994 unlk %a6
995 #$# add.l &24,%sp
996 bra.l _fpsp_done
998 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
999 # in fp1 (don't forget to save fp0). what to do now?
1000 # well, we simply have to get to go to _real_unfl()!
1001 funfl_unfl_on:
1003 # The `060 FPU multiplier hardware is such that if the result of a
1004 # multiply operation is the smallest possible normalized number
1005 # (0x00000000_80000000_00000000), then the machine will take an
1006 # underflow exception. Since this is incorrect, we check here to see
1007 # if our emulation, after re-doing the operation, decided that
1008 # no underflow was called for.
1009 btst &unfl_bit,FPSR_EXCEPT(%a6)
1010 beq.w funfl_chkinex
1012 funfl_unfl_on2:
1013 fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1015 mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1017 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1018 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1019 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1021 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1023 unlk %a6
1025 bra.l _real_unfl
1027 # undeflow occurred but is disabled. meanwhile, inexact is enabled. therefore,
1028 # we must jump to real_inex().
1029 funfl_inex_on:
1031 # The `060 FPU multiplier hardware is such that if the result of a
1032 # multiply operation is the smallest possible normalized number
1033 # (0x00000000_80000000_00000000), then the machine will take an
1034 # underflow exception.
1035 # But, whether bogus or not, if inexact is enabled AND it occurred,
1036 # then we have to branch to real_inex.
1038 btst &inex2_bit,FPSR_EXCEPT(%a6)
1039 beq.w funfl_exit
1041 funfl_inex_on2:
1043 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1045 mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1046 mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1048 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1049 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1050 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1052 frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1054 unlk %a6
1056 bra.l _real_inex
1058 #######################################################################
1059 funfl_out:
1062 #$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1063 #$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1064 #$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1066 # the src operand is definitely a NORM(!), so tag it as such
1067 mov.b &NORM,STAG(%a6) # set src optype tag
1069 clr.l %d0
1070 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1072 and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1074 fmov.l &0x0,%fpcr # zero current control regs
1075 fmov.l &0x0,%fpsr
1077 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1079 bsr.l fout
1081 btst &unfl_bit,FPCR_ENABLE(%a6)
1082 bne.w funfl_unfl_on2
1084 btst &inex2_bit,FPCR_ENABLE(%a6)
1085 bne.w funfl_inex_on2
1087 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1088 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1089 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1091 unlk %a6
1092 #$# add.l &24,%sp
1094 btst &0x7,(%sp) # is trace on?
1095 beq.l _fpsp_done # no
1097 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1098 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1099 bra.l _real_trace
1101 #########################################################################
1102 # XDEF **************************************************************** #
1103 # _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1104 # Data Type" exception. #
1106 # This handler should be the first code executed upon taking the #
1107 # FP Unimplemented Data Type exception in an operating system. #
1109 # XREF **************************************************************** #
1110 # _imem_read_{word,long}() - read instruction word/longword #
1111 # fix_skewed_ops() - adjust src operand in fsave frame #
1112 # set_tag_x() - determine optype of src/dst operands #
1113 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
1114 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
1115 # load_fpn2() - load dst operand from FP regfile #
1116 # load_fpn1() - load src operand from FP regfile #
1117 # fout() - emulate an opclass 3 instruction #
1118 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1119 # _real_inex() - "callout" to operating system inexact handler #
1120 # _fpsp_done() - "callout" for exit; work all done #
1121 # _real_trace() - "callout" for Trace enabled exception #
1122 # funimp_skew() - adjust fsave src ops to "incorrect" value #
1123 # _real_snan() - "callout" for SNAN exception #
1124 # _real_operr() - "callout" for OPERR exception #
1125 # _real_ovfl() - "callout" for OVFL exception #
1126 # _real_unfl() - "callout" for UNFL exception #
1127 # get_packed() - fetch packed operand from memory #
1129 # INPUT *************************************************************** #
1130 # - The system stack contains the "Unimp Data Type" stk frame #
1131 # - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1133 # OUTPUT ************************************************************** #
1134 # If Inexact exception (opclass 3): #
1135 # - The system stack is changed to an Inexact exception stk frame #
1136 # If SNAN exception (opclass 3): #
1137 # - The system stack is changed to an SNAN exception stk frame #
1138 # If OPERR exception (opclass 3): #
1139 # - The system stack is changed to an OPERR exception stk frame #
1140 # If OVFL exception (opclass 3): #
1141 # - The system stack is changed to an OVFL exception stk frame #
1142 # If UNFL exception (opclass 3): #
1143 # - The system stack is changed to an UNFL exception stack frame #
1144 # If Trace exception enabled: #
1145 # - The system stack is changed to a Trace exception stack frame #
1146 # Else: (normal case) #
1147 # - Correct result has been stored as appropriate #
1149 # ALGORITHM *********************************************************** #
1150 # Two main instruction types can enter here: (1) DENORM or UNNORM #
1151 # unimplemented data types. These can be either opclass 0,2 or 3 #
1152 # instructions, and (2) PACKED unimplemented data format instructions #
1153 # also of opclasses 0,2, or 3. #
1154 # For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1155 # operand from the fsave state frame and the dst operand (if dyadic) #
1156 # from the FP register file. The instruction is then emulated by #
1157 # choosing an emulation routine from a table of routines indexed by #
1158 # instruction type. Once the instruction has been emulated and result #
1159 # saved, then we check to see if any enabled exceptions resulted from #
1160 # instruction emulation. If none, then we exit through the "callout" #
1161 # _fpsp_done(). If there is an enabled FP exception, then we insert #
1162 # this exception into the FPU in the fsave state frame and then exit #
1163 # through _fpsp_done(). #
1164 # PACKED opclass 0 and 2 is similar in how the instruction is #
1165 # emulated and exceptions handled. The differences occur in how the #
1166 # handler loads the packed op (by calling get_packed() routine) and #
1167 # by the fact that a Trace exception could be pending for PACKED ops. #
1168 # If a Trace exception is pending, then the current exception stack #
1169 # frame is changed to a Trace exception stack frame and an exit is #
1170 # made through _real_trace(). #
1171 # For UNNORM/DENORM opclass 3, the actual move out to memory is #
1172 # performed by calling the routine fout(). If no exception should occur #
1173 # as the result of emulation, then an exit either occurs through #
1174 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1175 # (a Trace stack frame must be created here, too). If an FP exception #
1176 # should occur, then we must create an exception stack frame of that #
1177 # type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1178 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1179 # emulation is performed in a similar manner. #
1181 #########################################################################
1184 # (1) DENORM and UNNORM (unimplemented) data types:
1186 # post-instruction
1187 # *****************
1188 # * EA *
1189 # pre-instruction * *
1190 # ***************** *****************
1191 # * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1192 # ***************** *****************
1193 # * Next * * Next *
1194 # * PC * * PC *
1195 # ***************** *****************
1196 # * SR * * SR *
1197 # ***************** *****************
1199 # (2) PACKED format (unsupported) opclasses two and three:
1200 # *****************
1201 # * EA *
1202 # * *
1203 # *****************
1204 # * 0x2 * 0x0dc *
1205 # *****************
1206 # * Next *
1207 # * PC *
1208 # *****************
1209 # * SR *
1210 # *****************
1212 global _fpsp_unsupp
1213 _fpsp_unsupp:
1215 link.w %a6,&-LOCAL_SIZE # init stack frame
1217 fsave FP_SRC(%a6) # save fp state
1219 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1220 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1221 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1223 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1224 bne.b fu_s
1225 fu_u:
1226 mov.l %usp,%a0 # fetch user stack pointer
1227 mov.l %a0,EXC_A7(%a6) # save on stack
1228 bra.b fu_cont
1229 # if the exception is an opclass zero or two unimplemented data type
1230 # exception, then the a7' calculated here is wrong since it doesn't
1231 # stack an ea. however, we don't need an a7' for this case anyways.
1232 fu_s:
1233 lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1234 mov.l %a0,EXC_A7(%a6) # save on stack
1236 fu_cont:
1238 # the FPIAR holds the "current PC" of the faulting instruction
1239 # the FPIAR should be set correctly for ALL exceptions passing through
1240 # this point.
1241 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1242 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1243 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1244 bsr.l _imem_read_long # fetch the instruction words
1245 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1247 ############################
1249 clr.b SPCOND_FLG(%a6) # clear special condition flag
1251 # Separate opclass three (fpn-to-mem) ops since they have a different
1252 # stack frame and protocol.
1253 btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1254 bne.w fu_out # yes
1256 # Separate packed opclass two instructions.
1257 bfextu EXC_CMDREG(%a6){&0:&6},%d0
1258 cmpi.b %d0,&0x13
1259 beq.w fu_in_pack
1262 # I'm not sure at this point what FPSR bits are valid for this instruction.
1263 # so, since the emulation routines re-create them anyways, zero exception field
1264 andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1266 fmov.l &0x0,%fpcr # zero current control regs
1267 fmov.l &0x0,%fpsr
1269 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1270 # precision format if the src format was single or double and the
1271 # source data type was an INF, NAN, DENORM, or UNNORM
1272 lea FP_SRC(%a6),%a0 # pass ptr to input
1273 bsr.l fix_skewed_ops
1275 # we don't know whether the src operand or the dst operand (or both) is the
1276 # UNNORM or DENORM. call the function that tags the operand type. if the
1277 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1278 lea FP_SRC(%a6),%a0 # pass: ptr to src op
1279 bsr.l set_tag_x # tag the operand type
1280 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1281 bne.b fu_op2 # no
1282 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1284 fu_op2:
1285 mov.b %d0,STAG(%a6) # save src optype tag
1287 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1289 # bit five of the fp extension word separates the monadic and dyadic operations
1290 # at this point
1291 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1292 beq.b fu_extract # monadic
1293 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1294 beq.b fu_extract # yes, so it's monadic, too
1296 bsr.l load_fpn2 # load dst into FP_DST
1298 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1299 bsr.l set_tag_x # tag the operand type
1300 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1301 bne.b fu_op2_done # no
1302 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1303 fu_op2_done:
1304 mov.b %d0,DTAG(%a6) # save dst optype tag
1306 fu_extract:
1307 clr.l %d0
1308 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1310 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1312 lea FP_SRC(%a6),%a0
1313 lea FP_DST(%a6),%a1
1315 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1316 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1319 # Exceptions in order of precedence:
1320 # BSUN : none
1321 # SNAN : all dyadic ops
1322 # OPERR : fsqrt(-NORM)
1323 # OVFL : all except ftst,fcmp
1324 # UNFL : all except ftst,fcmp
1325 # DZ : fdiv
1326 # INEX2 : all except ftst,fcmp
1327 # INEX1 : none (packed doesn't go through here)
1330 # we determine the highest priority exception(if any) set by the
1331 # emulation routine that has also been enabled by the user.
1332 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1333 bne.b fu_in_ena # some are enabled
1335 fu_in_cont:
1336 # fcmp and ftst do not store any result.
1337 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1338 andi.b &0x38,%d0 # extract bits 3-5
1339 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1340 beq.b fu_in_exit # yes
1342 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1343 bsr.l store_fpreg # store the result
1345 fu_in_exit:
1347 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1348 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1349 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1351 unlk %a6
1353 bra.l _fpsp_done
1355 fu_in_ena:
1356 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1357 bfffo %d0{&24:&8},%d0 # find highest priority exception
1358 bne.b fu_in_exc # there is at least one set
1361 # No exceptions occurred that were also enabled. Now:
1363 # if (OVFL && ovfl_disabled && inexact_enabled) {
1364 # branch to _real_inex() (even if the result was exact!);
1365 # } else {
1366 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1367 # return;
1370 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1371 beq.b fu_in_cont # no
1373 fu_in_ovflchk:
1374 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1375 beq.b fu_in_cont # no
1376 bra.w fu_in_exc_ovfl # go insert overflow frame
1379 # An exception occurred and that exception was enabled:
1381 # shift enabled exception field into lo byte of d0;
1382 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1383 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1384 # /*
1385 # * this is the case where we must call _real_inex() now or else
1386 # * there will be no other way to pass it the exceptional operand
1387 # */
1388 # call _real_inex();
1389 # } else {
1390 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1393 fu_in_exc:
1394 subi.l &24,%d0 # fix offset to be 0-8
1395 cmpi.b %d0,&0x6 # is exception INEX? (6)
1396 bne.b fu_in_exc_exit # no
1398 # the enabled exception was inexact
1399 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1400 bne.w fu_in_exc_unfl # yes
1401 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1402 bne.w fu_in_exc_ovfl # yes
1404 # here, we insert the correct fsave status value into the fsave frame for the
1405 # corresponding exception. the operand in the fsave frame should be the original
1406 # src operand.
1407 fu_in_exc_exit:
1408 mov.l %d0,-(%sp) # save d0
1409 bsr.l funimp_skew # skew sgl or dbl inputs
1410 mov.l (%sp)+,%d0 # restore d0
1412 mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1414 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1415 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1416 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1418 frestore FP_SRC(%a6) # restore src op
1420 unlk %a6
1422 bra.l _fpsp_done
1424 tbl_except:
1425 short 0xe000,0xe006,0xe004,0xe005
1426 short 0xe003,0xe002,0xe001,0xe001
1428 fu_in_exc_unfl:
1429 mov.w &0x4,%d0
1430 bra.b fu_in_exc_exit
1431 fu_in_exc_ovfl:
1432 mov.w &0x03,%d0
1433 bra.b fu_in_exc_exit
1435 # If the input operand to this operation was opclass two and a single
1436 # or double precision denorm, inf, or nan, the operand needs to be
1437 # "corrected" in order to have the proper equivalent extended precision
1438 # number.
1439 global fix_skewed_ops
1440 fix_skewed_ops:
1441 bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1442 cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1443 beq.b fso_sgl # yes
1444 cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1445 beq.b fso_dbl # yes
1446 rts # no
1448 fso_sgl:
1449 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1450 andi.w &0x7fff,%d0 # strip sign
1451 cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1452 beq.b fso_sgl_dnrm_zero # yes
1453 cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1454 beq.b fso_infnan # yes
1455 rts # no
1457 fso_sgl_dnrm_zero:
1458 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1459 beq.b fso_zero # it's a skewed zero
1460 fso_sgl_dnrm:
1461 # here, we count on norm not to alter a0...
1462 bsr.l norm # normalize mantissa
1463 neg.w %d0 # -shft amt
1464 addi.w &0x3f81,%d0 # adjust new exponent
1465 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1466 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1469 fso_zero:
1470 andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1473 fso_infnan:
1474 andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1475 ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1478 fso_dbl:
1479 mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1480 andi.w &0x7fff,%d0 # strip sign
1481 cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1482 beq.b fso_dbl_dnrm_zero # yes
1483 cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1484 beq.b fso_infnan # yes
1485 rts # no
1487 fso_dbl_dnrm_zero:
1488 andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1489 bne.b fso_dbl_dnrm # it's a skewed denorm
1490 tst.l LOCAL_LO(%a0) # is it a zero?
1491 beq.b fso_zero # yes
1492 fso_dbl_dnrm:
1493 # here, we count on norm not to alter a0...
1494 bsr.l norm # normalize mantissa
1495 neg.w %d0 # -shft amt
1496 addi.w &0x3c01,%d0 # adjust new exponent
1497 andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1498 or.w %d0,LOCAL_EX(%a0) # insert new exponent
1501 #################################################################
1503 # fmove out took an unimplemented data type exception.
1504 # the src operand is in FP_SRC. Call _fout() to write out the result and
1505 # to determine which exceptions, if any, to take.
1506 fu_out:
1508 # Separate packed move outs from the UNNORM and DENORM move outs.
1509 bfextu EXC_CMDREG(%a6){&3:&3},%d0
1510 cmpi.b %d0,&0x3
1511 beq.w fu_out_pack
1512 cmpi.b %d0,&0x7
1513 beq.w fu_out_pack
1516 # I'm not sure at this point what FPSR bits are valid for this instruction.
1517 # so, since the emulation routines re-create them anyways, zero exception field.
1518 # fmove out doesn't affect ccodes.
1519 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1521 fmov.l &0x0,%fpcr # zero current control regs
1522 fmov.l &0x0,%fpsr
1524 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1525 # call here. just figure out what it is...
1526 mov.w FP_SRC_EX(%a6),%d0 # get exponent
1527 andi.w &0x7fff,%d0 # strip sign
1528 beq.b fu_out_denorm # it's a DENORM
1530 lea FP_SRC(%a6),%a0
1531 bsr.l unnorm_fix # yes; fix it
1533 mov.b %d0,STAG(%a6)
1535 bra.b fu_out_cont
1536 fu_out_denorm:
1537 mov.b &DENORM,STAG(%a6)
1538 fu_out_cont:
1540 clr.l %d0
1541 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1543 lea FP_SRC(%a6),%a0 # pass ptr to src operand
1545 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1546 bsr.l fout # call fmove out routine
1548 # Exceptions in order of precedence:
1549 # BSUN : none
1550 # SNAN : none
1551 # OPERR : fmove.{b,w,l} out of large UNNORM
1552 # OVFL : fmove.{s,d}
1553 # UNFL : fmove.{s,d,x}
1554 # DZ : none
1555 # INEX2 : all
1556 # INEX1 : none (packed doesn't travel through here)
1558 # determine the highest priority exception(if any) set by the
1559 # emulation routine that has also been enabled by the user.
1560 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1561 bne.w fu_out_ena # some are enabled
1563 fu_out_done:
1565 mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1567 # on extended precision opclass three instructions using pre-decrement or
1568 # post-increment addressing mode, the address register is not updated. is the
1569 # address register was the stack pointer used from user mode, then let's update
1570 # it here. if it was used from supervisor mode, then we have to handle this
1571 # as a special case.
1572 btst &0x5,EXC_SR(%a6)
1573 bne.b fu_out_done_s
1575 mov.l EXC_A7(%a6),%a0 # restore a7
1576 mov.l %a0,%usp
1578 fu_out_done_cont:
1579 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1580 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1581 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1583 unlk %a6
1585 btst &0x7,(%sp) # is trace on?
1586 bne.b fu_out_trace # yes
1588 bra.l _fpsp_done
1590 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1591 # ("fmov.x fpm,-(a7)") if so,
1592 fu_out_done_s:
1593 cmpi.b SPCOND_FLG(%a6),&mda7_flg
1594 bne.b fu_out_done_cont
1596 # the extended precision result is still in fp0. but, we need to save it
1597 # somewhere on the stack until we can copy it to its final resting place.
1598 # here, we're counting on the top of the stack to be the old place-holders
1599 # for fp0/fp1 which have already been restored. that way, we can write
1600 # over those destinations with the shifted stack frame.
1601 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1603 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1604 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1605 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1607 mov.l (%a6),%a6 # restore frame pointer
1609 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1610 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1612 # now, copy the result to the proper place on the stack
1613 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1614 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1615 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1617 add.l &LOCAL_SIZE-0x8,%sp
1619 btst &0x7,(%sp)
1620 bne.b fu_out_trace
1622 bra.l _fpsp_done
1624 fu_out_ena:
1625 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1626 bfffo %d0{&24:&8},%d0 # find highest priority exception
1627 bne.b fu_out_exc # there is at least one set
1629 # no exceptions were set.
1630 # if a disabled overflow occurred and inexact was enabled but the result
1631 # was exact, then a branch to _real_inex() is made.
1632 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1633 beq.w fu_out_done # no
1635 fu_out_ovflchk:
1636 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1637 beq.w fu_out_done # no
1638 bra.w fu_inex # yes
1641 # The fp move out that took the "Unimplemented Data Type" exception was
1642 # being traced. Since the stack frames are similar, get the "current" PC
1643 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1645 # UNSUPP FRAME TRACE FRAME
1646 # ***************** *****************
1647 # * EA * * Current *
1648 # * * * PC *
1649 # ***************** *****************
1650 # * 0x3 * 0x0dc * * 0x2 * 0x024 *
1651 # ***************** *****************
1652 # * Next * * Next *
1653 # * PC * * PC *
1654 # ***************** *****************
1655 # * SR * * SR *
1656 # ***************** *****************
1658 fu_out_trace:
1659 mov.w &0x2024,0x6(%sp)
1660 fmov.l %fpiar,0x8(%sp)
1661 bra.l _real_trace
1663 # an exception occurred and that exception was enabled.
1664 fu_out_exc:
1665 subi.l &24,%d0 # fix offset to be 0-8
1667 # we don't mess with the existing fsave frame. just re-insert it and
1668 # jump to the "_real_{}()" handler...
1669 mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1670 jmp (tbl_fu_out.b,%pc,%d0.w*1)
1672 swbeg &0x8
1673 tbl_fu_out:
1674 short tbl_fu_out - tbl_fu_out # BSUN can't happen
1675 short tbl_fu_out - tbl_fu_out # SNAN can't happen
1676 short fu_operr - tbl_fu_out # OPERR
1677 short fu_ovfl - tbl_fu_out # OVFL
1678 short fu_unfl - tbl_fu_out # UNFL
1679 short tbl_fu_out - tbl_fu_out # DZ can't happen
1680 short fu_inex - tbl_fu_out # INEX2
1681 short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1683 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1684 # frestore it.
1685 fu_snan:
1686 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1687 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1688 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1690 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1691 mov.w &0xe006,2+FP_SRC(%a6)
1693 frestore FP_SRC(%a6)
1695 unlk %a6
1698 bra.l _real_snan
1700 fu_operr:
1701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1705 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1706 mov.w &0xe004,2+FP_SRC(%a6)
1708 frestore FP_SRC(%a6)
1710 unlk %a6
1713 bra.l _real_operr
1715 fu_ovfl:
1716 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1718 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1719 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1720 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1722 mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1723 mov.w &0xe005,2+FP_SRC(%a6)
1725 frestore FP_SRC(%a6) # restore EXOP
1727 unlk %a6
1729 bra.l _real_ovfl
1731 # underflow can happen for extended precision. extended precision opclass
1732 # three instruction exceptions don't update the stack pointer. so, if the
1733 # exception occurred from user mode, then simply update a7 and exit normally.
1734 # if the exception occurred from supervisor mode, check if
1735 fu_unfl:
1736 mov.l EXC_A6(%a6),(%a6) # restore a6
1738 btst &0x5,EXC_SR(%a6)
1739 bne.w fu_unfl_s
1741 mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1742 mov.l %a0,%usp # to or not...
1744 fu_unfl_cont:
1745 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1747 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1748 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1749 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1751 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1752 mov.w &0xe003,2+FP_SRC(%a6)
1754 frestore FP_SRC(%a6) # restore EXOP
1756 unlk %a6
1758 bra.l _real_unfl
1760 fu_unfl_s:
1761 cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1762 bne.b fu_unfl_cont
1764 # the extended precision result is still in fp0. but, we need to save it
1765 # somewhere on the stack until we can copy it to its final resting place
1766 # (where the exc frame is currently). make sure it's not at the top of the
1767 # frame or it will get overwritten when the exc stack frame is shifted "down".
1768 fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1769 fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1771 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1772 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1773 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1775 mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1776 mov.w &0xe003,2+FP_DST(%a6)
1778 frestore FP_DST(%a6) # restore EXOP
1780 mov.l (%a6),%a6 # restore frame pointer
1782 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1783 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1784 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1786 # now, copy the result to the proper place on the stack
1787 mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1788 mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1789 mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1791 add.l &LOCAL_SIZE-0x8,%sp
1793 bra.l _real_unfl
1795 # fmove in and out enter here.
1796 fu_inex:
1797 fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1799 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1800 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1801 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1803 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1804 mov.w &0xe001,2+FP_SRC(%a6)
1806 frestore FP_SRC(%a6) # restore EXOP
1808 unlk %a6
1811 bra.l _real_inex
1813 #########################################################################
1814 #########################################################################
1815 fu_in_pack:
1818 # I'm not sure at this point what FPSR bits are valid for this instruction.
1819 # so, since the emulation routines re-create them anyways, zero exception field
1820 andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1822 fmov.l &0x0,%fpcr # zero current control regs
1823 fmov.l &0x0,%fpsr
1825 bsr.l get_packed # fetch packed src operand
1827 lea FP_SRC(%a6),%a0 # pass ptr to src
1828 bsr.l set_tag_x # set src optype tag
1830 mov.b %d0,STAG(%a6) # save src optype tag
1832 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1834 # bit five of the fp extension word separates the monadic and dyadic operations
1835 # at this point
1836 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1837 beq.b fu_extract_p # monadic
1838 cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1839 beq.b fu_extract_p # yes, so it's monadic, too
1841 bsr.l load_fpn2 # load dst into FP_DST
1843 lea FP_DST(%a6),%a0 # pass: ptr to dst op
1844 bsr.l set_tag_x # tag the operand type
1845 cmpi.b %d0,&UNNORM # is operand an UNNORM?
1846 bne.b fu_op2_done_p # no
1847 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1848 fu_op2_done_p:
1849 mov.b %d0,DTAG(%a6) # save dst optype tag
1851 fu_extract_p:
1852 clr.l %d0
1853 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1855 bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1857 lea FP_SRC(%a6),%a0
1858 lea FP_DST(%a6),%a1
1860 mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1861 jsr (tbl_unsupp.l,%pc,%d1.l*1)
1864 # Exceptions in order of precedence:
1865 # BSUN : none
1866 # SNAN : all dyadic ops
1867 # OPERR : fsqrt(-NORM)
1868 # OVFL : all except ftst,fcmp
1869 # UNFL : all except ftst,fcmp
1870 # DZ : fdiv
1871 # INEX2 : all except ftst,fcmp
1872 # INEX1 : all
1875 # we determine the highest priority exception(if any) set by the
1876 # emulation routine that has also been enabled by the user.
1877 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1878 bne.w fu_in_ena_p # some are enabled
1880 fu_in_cont_p:
1881 # fcmp and ftst do not store any result.
1882 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1883 andi.b &0x38,%d0 # extract bits 3-5
1884 cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1885 beq.b fu_in_exit_p # yes
1887 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1888 bsr.l store_fpreg # store the result
1890 fu_in_exit_p:
1892 btst &0x5,EXC_SR(%a6) # user or supervisor?
1893 bne.w fu_in_exit_s_p # supervisor
1895 mov.l EXC_A7(%a6),%a0 # update user a7
1896 mov.l %a0,%usp
1898 fu_in_exit_cont_p:
1899 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1900 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1901 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1903 unlk %a6 # unravel stack frame
1905 btst &0x7,(%sp) # is trace on?
1906 bne.w fu_trace_p # yes
1908 bra.l _fpsp_done # exit to os
1910 # the exception occurred in supervisor mode. check to see if the
1911 # addressing mode was (a7)+. if so, we'll need to shift the
1912 # stack frame "up".
1913 fu_in_exit_s_p:
1914 btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1915 beq.b fu_in_exit_cont_p # no
1917 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1918 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1919 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1921 unlk %a6 # unravel stack frame
1923 # shift the stack frame "up". we don't really care about the <ea> field.
1924 mov.l 0x4(%sp),0x10(%sp)
1925 mov.l 0x0(%sp),0xc(%sp)
1926 add.l &0xc,%sp
1928 btst &0x7,(%sp) # is trace on?
1929 bne.w fu_trace_p # yes
1931 bra.l _fpsp_done # exit to os
1933 fu_in_ena_p:
1934 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1935 bfffo %d0{&24:&8},%d0 # find highest priority exception
1936 bne.b fu_in_exc_p # at least one was set
1939 # No exceptions occurred that were also enabled. Now:
1941 # if (OVFL && ovfl_disabled && inexact_enabled) {
1942 # branch to _real_inex() (even if the result was exact!);
1943 # } else {
1944 # save the result in the proper fp reg (unless the op is fcmp or ftst);
1945 # return;
1948 btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1949 beq.w fu_in_cont_p # no
1951 fu_in_ovflchk_p:
1952 btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1953 beq.w fu_in_cont_p # no
1954 bra.w fu_in_exc_ovfl_p # do _real_inex() now
1957 # An exception occurred and that exception was enabled:
1959 # shift enabled exception field into lo byte of d0;
1960 # if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1961 # ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1962 # /*
1963 # * this is the case where we must call _real_inex() now or else
1964 # * there will be no other way to pass it the exceptional operand
1965 # */
1966 # call _real_inex();
1967 # } else {
1968 # restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1971 fu_in_exc_p:
1972 subi.l &24,%d0 # fix offset to be 0-8
1973 cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1974 blt.b fu_in_exc_exit_p # no
1976 # the enabled exception was inexact
1977 btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1978 bne.w fu_in_exc_unfl_p # yes
1979 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1980 bne.w fu_in_exc_ovfl_p # yes
1982 # here, we insert the correct fsave status value into the fsave frame for the
1983 # corresponding exception. the operand in the fsave frame should be the original
1984 # src operand.
1985 # as a reminder for future predicted pain and agony, we are passing in fsave the
1986 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1987 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1988 fu_in_exc_exit_p:
1989 btst &0x5,EXC_SR(%a6) # user or supervisor?
1990 bne.w fu_in_exc_exit_s_p # supervisor
1992 mov.l EXC_A7(%a6),%a0 # update user a7
1993 mov.l %a0,%usp
1995 fu_in_exc_exit_cont_p:
1996 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1998 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1999 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2000 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2002 frestore FP_SRC(%a6) # restore src op
2004 unlk %a6
2006 btst &0x7,(%sp) # is trace enabled?
2007 bne.w fu_trace_p # yes
2009 bra.l _fpsp_done
2011 tbl_except_p:
2012 short 0xe000,0xe006,0xe004,0xe005
2013 short 0xe003,0xe002,0xe001,0xe001
2015 fu_in_exc_ovfl_p:
2016 mov.w &0x3,%d0
2017 bra.w fu_in_exc_exit_p
2019 fu_in_exc_unfl_p:
2020 mov.w &0x4,%d0
2021 bra.w fu_in_exc_exit_p
2023 fu_in_exc_exit_s_p:
2024 btst &mia7_bit,SPCOND_FLG(%a6)
2025 beq.b fu_in_exc_exit_cont_p
2027 mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2030 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2033 frestore FP_SRC(%a6) # restore src op
2035 unlk %a6 # unravel stack frame
2037 # shift stack frame "up". who cares about <ea> field.
2038 mov.l 0x4(%sp),0x10(%sp)
2039 mov.l 0x0(%sp),0xc(%sp)
2040 add.l &0xc,%sp
2042 btst &0x7,(%sp) # is trace on?
2043 bne.b fu_trace_p # yes
2045 bra.l _fpsp_done # exit to os
2048 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2049 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2050 # trace stack frame then jump to _real_trace().
2052 # UNSUPP FRAME TRACE FRAME
2053 # ***************** *****************
2054 # * EA * * Current *
2055 # * * * PC *
2056 # ***************** *****************
2057 # * 0x2 * 0x0dc * * 0x2 * 0x024 *
2058 # ***************** *****************
2059 # * Next * * Next *
2060 # * PC * * PC *
2061 # ***************** *****************
2062 # * SR * * SR *
2063 # ***************** *****************
2064 fu_trace_p:
2065 mov.w &0x2024,0x6(%sp)
2066 fmov.l %fpiar,0x8(%sp)
2068 bra.l _real_trace
2070 #########################################################
2071 #########################################################
2072 fu_out_pack:
2075 # I'm not sure at this point what FPSR bits are valid for this instruction.
2076 # so, since the emulation routines re-create them anyways, zero exception field.
2077 # fmove out doesn't affect ccodes.
2078 and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2080 fmov.l &0x0,%fpcr # zero current control regs
2081 fmov.l &0x0,%fpsr
2083 bfextu EXC_CMDREG(%a6){&6:&3},%d0
2084 bsr.l load_fpn1
2086 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2087 # able to detect all operand types.
2088 lea FP_SRC(%a6),%a0
2089 bsr.l set_tag_x # tag the operand type
2090 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2091 bne.b fu_op2_p # no
2092 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2094 fu_op2_p:
2095 mov.b %d0,STAG(%a6) # save src optype tag
2097 clr.l %d0
2098 mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2100 lea FP_SRC(%a6),%a0 # pass ptr to src operand
2102 mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2103 bsr.l fout # call fmove out routine
2105 # Exceptions in order of precedence:
2106 # BSUN : no
2107 # SNAN : yes
2108 # OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2109 # OVFL : no
2110 # UNFL : no
2111 # DZ : no
2112 # INEX2 : yes
2113 # INEX1 : no
2115 # determine the highest priority exception(if any) set by the
2116 # emulation routine that has also been enabled by the user.
2117 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2118 bne.w fu_out_ena_p # some are enabled
2120 fu_out_exit_p:
2121 mov.l EXC_A6(%a6),(%a6) # restore a6
2123 btst &0x5,EXC_SR(%a6) # user or supervisor?
2124 bne.b fu_out_exit_s_p # supervisor
2126 mov.l EXC_A7(%a6),%a0 # update user a7
2127 mov.l %a0,%usp
2129 fu_out_exit_cont_p:
2130 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2131 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2132 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2134 unlk %a6 # unravel stack frame
2136 btst &0x7,(%sp) # is trace on?
2137 bne.w fu_trace_p # yes
2139 bra.l _fpsp_done # exit to os
2141 # the exception occurred in supervisor mode. check to see if the
2142 # addressing mode was -(a7). if so, we'll need to shift the
2143 # stack frame "down".
2144 fu_out_exit_s_p:
2145 btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2146 beq.b fu_out_exit_cont_p # no
2148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2152 mov.l (%a6),%a6 # restore frame pointer
2154 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2155 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2157 # now, copy the result to the proper place on the stack
2158 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2159 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2160 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2162 add.l &LOCAL_SIZE-0x8,%sp
2164 btst &0x7,(%sp)
2165 bne.w fu_trace_p
2167 bra.l _fpsp_done
2169 fu_out_ena_p:
2170 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2171 bfffo %d0{&24:&8},%d0 # find highest priority exception
2172 beq.w fu_out_exit_p
2174 mov.l EXC_A6(%a6),(%a6) # restore a6
2176 # an exception occurred and that exception was enabled.
2177 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2178 fu_out_exc_p:
2179 cmpi.b %d0,&0x1a
2180 bgt.w fu_inex_p2
2181 beq.w fu_operr_p
2183 fu_snan_p:
2184 btst &0x5,EXC_SR(%a6)
2185 bne.b fu_snan_s_p
2187 mov.l EXC_A7(%a6),%a0
2188 mov.l %a0,%usp
2189 bra.w fu_snan
2191 fu_snan_s_p:
2192 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2193 bne.w fu_snan
2195 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2196 # the strategy is to move the exception frame "down" 12 bytes. then, we
2197 # can store the default result where the exception frame was.
2198 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2199 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2200 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2202 mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2203 mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2205 frestore FP_SRC(%a6) # restore src operand
2207 mov.l (%a6),%a6 # restore frame pointer
2209 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2210 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2211 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2213 # now, we copy the default result to it's proper location
2214 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2215 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2216 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2218 add.l &LOCAL_SIZE-0x8,%sp
2221 bra.l _real_snan
2223 fu_operr_p:
2224 btst &0x5,EXC_SR(%a6)
2225 bne.w fu_operr_p_s
2227 mov.l EXC_A7(%a6),%a0
2228 mov.l %a0,%usp
2229 bra.w fu_operr
2231 fu_operr_p_s:
2232 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2233 bne.w fu_operr
2235 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2236 # the strategy is to move the exception frame "down" 12 bytes. then, we
2237 # can store the default result where the exception frame was.
2238 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2239 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2240 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2242 mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2243 mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2245 frestore FP_SRC(%a6) # restore src operand
2247 mov.l (%a6),%a6 # restore frame pointer
2249 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2250 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2251 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2253 # now, we copy the default result to it's proper location
2254 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2255 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2256 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2258 add.l &LOCAL_SIZE-0x8,%sp
2261 bra.l _real_operr
2263 fu_inex_p2:
2264 btst &0x5,EXC_SR(%a6)
2265 bne.w fu_inex_s_p2
2267 mov.l EXC_A7(%a6),%a0
2268 mov.l %a0,%usp
2269 bra.w fu_inex
2271 fu_inex_s_p2:
2272 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2273 bne.w fu_inex
2275 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2276 # the strategy is to move the exception frame "down" 12 bytes. then, we
2277 # can store the default result where the exception frame was.
2278 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2279 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2280 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2282 mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2283 mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2285 frestore FP_SRC(%a6) # restore src operand
2287 mov.l (%a6),%a6 # restore frame pointer
2289 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2290 mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2291 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2293 # now, we copy the default result to it's proper location
2294 mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2295 mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2296 mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2298 add.l &LOCAL_SIZE-0x8,%sp
2301 bra.l _real_inex
2303 #########################################################################
2306 # if we're stuffing a source operand back into an fsave frame then we
2307 # have to make sure that for single or double source operands that the
2308 # format stuffed is as weird as the hardware usually makes it.
2310 global funimp_skew
2311 funimp_skew:
2312 bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2313 cmpi.b %d0,&0x1 # was src sgl?
2314 beq.b funimp_skew_sgl # yes
2315 cmpi.b %d0,&0x5 # was src dbl?
2316 beq.b funimp_skew_dbl # yes
2319 funimp_skew_sgl:
2320 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2321 andi.w &0x7fff,%d0 # strip sign
2322 beq.b funimp_skew_sgl_not
2323 cmpi.w %d0,&0x3f80
2324 bgt.b funimp_skew_sgl_not
2325 neg.w %d0 # make exponent negative
2326 addi.w &0x3f81,%d0 # find amt to shift
2327 mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2328 lsr.l %d0,%d1 # shift it
2329 bset &31,%d1 # set j-bit
2330 mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2331 andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2332 ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2333 funimp_skew_sgl_not:
2336 funimp_skew_dbl:
2337 mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2338 andi.w &0x7fff,%d0 # strip sign
2339 beq.b funimp_skew_dbl_not
2340 cmpi.w %d0,&0x3c00
2341 bgt.b funimp_skew_dbl_not
2343 tst.b FP_SRC_EX(%a6) # make "internal format"
2344 smi.b 0x2+FP_SRC(%a6)
2345 mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2346 clr.l %d0 # clear g,r,s
2347 lea FP_SRC(%a6),%a0 # pass ptr to src op
2348 mov.w &0x3c01,%d1 # pass denorm threshold
2349 bsr.l dnrm_lp # denorm it
2350 mov.w &0x3c00,%d0 # new exponent
2351 tst.b 0x2+FP_SRC(%a6) # is sign set?
2352 beq.b fss_dbl_denorm_done # no
2353 bset &15,%d0 # set sign
2354 fss_dbl_denorm_done:
2355 bset &0x7,FP_SRC_HI(%a6) # set j-bit
2356 mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2357 funimp_skew_dbl_not:
2360 #########################################################################
2361 global _mem_write2
2362 _mem_write2:
2363 btst &0x5,EXC_SR(%a6)
2364 beq.l _dmem_write
2365 mov.l 0x0(%a0),FP_DST_EX(%a6)
2366 mov.l 0x4(%a0),FP_DST_HI(%a6)
2367 mov.l 0x8(%a0),FP_DST_LO(%a6)
2368 clr.l %d1
2371 #########################################################################
2372 # XDEF **************************************************************** #
2373 # _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2374 # effective address" exception. #
2376 # This handler should be the first code executed upon taking the #
2377 # FP Unimplemented Effective Address exception in an operating #
2378 # system. #
2380 # XREF **************************************************************** #
2381 # _imem_read_long() - read instruction longword #
2382 # fix_skewed_ops() - adjust src operand in fsave frame #
2383 # set_tag_x() - determine optype of src/dst operands #
2384 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
2385 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
2386 # load_fpn2() - load dst operand from FP regfile #
2387 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2388 # decbin() - convert packed data to FP binary data #
2389 # _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2390 # _real_access() - "callout" for access error exception #
2391 # _mem_read() - read extended immediate operand from memory #
2392 # _fpsp_done() - "callout" for exit; work all done #
2393 # _real_trace() - "callout" for Trace enabled exception #
2394 # fmovm_dynamic() - emulate dynamic fmovm instruction #
2395 # fmovm_ctrl() - emulate fmovm control instruction #
2397 # INPUT *************************************************************** #
2398 # - The system stack contains the "Unimplemented <ea>" stk frame #
2400 # OUTPUT ************************************************************** #
2401 # If access error: #
2402 # - The system stack is changed to an access error stack frame #
2403 # If FPU disabled: #
2404 # - The system stack is changed to an FPU disabled stack frame #
2405 # If Trace exception enabled: #
2406 # - The system stack is changed to a Trace exception stack frame #
2407 # Else: (normal case) #
2408 # - None (correct result has been stored as appropriate) #
2410 # ALGORITHM *********************************************************** #
2411 # This exception handles 3 types of operations: #
2412 # (1) FP Instructions using extended precision or packed immediate #
2413 # addressing mode. #
2414 # (2) The "fmovm.x" instruction w/ dynamic register specification. #
2415 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2417 # For immediate data operations, the data is read in w/ a #
2418 # _mem_read() "callout", converted to FP binary (if packed), and used #
2419 # as the source operand to the instruction specified by the instruction #
2420 # word. If no FP exception should be reported ads a result of the #
2421 # emulation, then the result is stored to the destination register and #
2422 # the handler exits through _fpsp_done(). If an enabled exc has been #
2423 # signalled as a result of emulation, then an fsave state frame #
2424 # corresponding to the FP exception type must be entered into the 060 #
2425 # FPU before exiting. In either the enabled or disabled cases, we #
2426 # must also check if a Trace exception is pending, in which case, we #
2427 # must create a Trace exception stack frame from the current exception #
2428 # stack frame. If no Trace is pending, we simply exit through #
2429 # _fpsp_done(). #
2430 # For "fmovm.x", call the routine fmovm_dynamic() which will #
2431 # decode and emulate the instruction. No FP exceptions can be pending #
2432 # as a result of this operation emulation. A Trace exception can be #
2433 # pending, though, which means the current stack frame must be changed #
2434 # to a Trace stack frame and an exit made through _real_trace(). #
2435 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2436 # was executed from supervisor mode, this handler must store the FP #
2437 # register file values to the system stack by itself since #
2438 # fmovm_dynamic() can't handle this. A normal exit is made through #
2439 # fpsp_done(). #
2440 # For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2441 # Again, a Trace exception may be pending and an exit made through #
2442 # _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2444 # Before any of the above is attempted, it must be checked to #
2445 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2446 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2447 # has higher priority, we check the disabled bit in the PCR. If set, #
2448 # then we must create an 8 word "FPU disabled" exception stack frame #
2449 # from the current 4 word exception stack frame. This includes #
2450 # reproducing the effective address of the instruction to put on the #
2451 # new stack frame. #
2453 # In the process of all emulation work, if a _mem_read() #
2454 # "callout" returns a failing result indicating an access error, then #
2455 # we must create an access error stack frame from the current stack #
2456 # frame. This information includes a faulting address and a fault- #
2457 # status-longword. These are created within this handler. #
2459 #########################################################################
2461 global _fpsp_effadd
2462 _fpsp_effadd:
2464 # This exception type takes priority over the "Line F Emulator"
2465 # exception. Therefore, the FPU could be disabled when entering here.
2466 # So, we must check to see if it's disabled and handle that case separately.
2467 mov.l %d0,-(%sp) # save d0
2468 movc %pcr,%d0 # load proc cr
2469 btst &0x1,%d0 # is FPU disabled?
2470 bne.w iea_disabled # yes
2471 mov.l (%sp)+,%d0 # restore d0
2473 link %a6,&-LOCAL_SIZE # init stack frame
2475 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2476 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2477 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2479 # PC of instruction that took the exception is the PC in the frame
2480 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2482 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2483 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2484 bsr.l _imem_read_long # fetch the instruction words
2485 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2487 #########################################################################
2489 tst.w %d0 # is operation fmovem?
2490 bmi.w iea_fmovm # yes
2493 # here, we will have:
2494 # fabs fdabs fsabs facos fmod
2495 # fadd fdadd fsadd fasin frem
2496 # fcmp fatan fscale
2497 # fdiv fddiv fsdiv fatanh fsin
2498 # fint fcos fsincos
2499 # fintrz fcosh fsinh
2500 # fmove fdmove fsmove fetox ftan
2501 # fmul fdmul fsmul fetoxm1 ftanh
2502 # fneg fdneg fsneg fgetexp ftentox
2503 # fsgldiv fgetman ftwotox
2504 # fsglmul flog10
2505 # fsqrt flog2
2506 # fsub fdsub fssub flogn
2507 # ftst flognp1
2508 # which can all use f<op>.{x,p}
2509 # so, now it's immediate data extended precision AND PACKED FORMAT!
2511 iea_op:
2512 andi.l &0x00ff00ff,USER_FPSR(%a6)
2514 btst &0xa,%d0 # is src fmt x or p?
2515 bne.b iea_op_pack # packed
2518 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2519 lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2520 mov.l &0xc,%d0 # pass: 12 bytes
2521 bsr.l _imem_read # read extended immediate
2523 tst.l %d1 # did ifetch fail?
2524 bne.w iea_iacc # yes
2526 bra.b iea_op_setsrc
2528 iea_op_pack:
2530 mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2531 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2532 mov.l &0xc,%d0 # pass: 12 bytes
2533 bsr.l _imem_read # read packed operand
2535 tst.l %d1 # did ifetch fail?
2536 bne.w iea_iacc # yes
2538 # The packed operand is an INF or a NAN if the exponent field is all ones.
2539 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2540 cmpi.w %d0,&0x7fff # INF or NAN?
2541 beq.b iea_op_setsrc # operand is an INF or NAN
2543 # The packed operand is a zero if the mantissa is all zero, else it's
2544 # a normal packed op.
2545 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2546 andi.b &0x0f,%d0 # clear all but last nybble
2547 bne.b iea_op_gp_not_spec # not a zero
2548 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2549 bne.b iea_op_gp_not_spec # not a zero
2550 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2551 beq.b iea_op_setsrc # operand is a ZERO
2552 iea_op_gp_not_spec:
2553 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2554 bsr.l decbin # convert to extended
2555 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2557 iea_op_setsrc:
2558 addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2560 # FP_SRC now holds the src operand.
2561 lea FP_SRC(%a6),%a0 # pass: ptr to src op
2562 bsr.l set_tag_x # tag the operand type
2563 mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2564 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2565 bne.b iea_op_getdst # no
2566 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2567 mov.b %d0,STAG(%a6) # set new optype tag
2568 iea_op_getdst:
2569 clr.b STORE_FLG(%a6) # clear "store result" boolean
2571 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2572 beq.b iea_op_extract # monadic
2573 btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2574 bne.b iea_op_spec # yes
2576 iea_op_loaddst:
2577 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2578 bsr.l load_fpn2 # load dst operand
2580 lea FP_DST(%a6),%a0 # pass: ptr to dst op
2581 bsr.l set_tag_x # tag the operand type
2582 mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2583 cmpi.b %d0,&UNNORM # is operand an UNNORM?
2584 bne.b iea_op_extract # no
2585 bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2586 mov.b %d0,DTAG(%a6) # set new optype tag
2587 bra.b iea_op_extract
2589 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2590 iea_op_spec:
2591 btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2592 beq.b iea_op_extract # yes
2593 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2594 # store a result. then, only fcmp will branch back and pick up a dst operand.
2595 st STORE_FLG(%a6) # don't store a final result
2596 btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2597 beq.b iea_op_loaddst # yes
2599 iea_op_extract:
2600 clr.l %d0
2601 mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2603 mov.b 1+EXC_CMDREG(%a6),%d1
2604 andi.w &0x007f,%d1 # extract extension
2606 fmov.l &0x0,%fpcr
2607 fmov.l &0x0,%fpsr
2609 lea FP_SRC(%a6),%a0
2610 lea FP_DST(%a6),%a1
2612 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2613 jsr (tbl_unsupp.l,%pc,%d1.l*1)
2616 # Exceptions in order of precedence:
2617 # BSUN : none
2618 # SNAN : all operations
2619 # OPERR : all reg-reg or mem-reg operations that can normally operr
2620 # OVFL : same as OPERR
2621 # UNFL : same as OPERR
2622 # DZ : same as OPERR
2623 # INEX2 : same as OPERR
2624 # INEX1 : all packed immediate operations
2627 # we determine the highest priority exception(if any) set by the
2628 # emulation routine that has also been enabled by the user.
2629 mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2630 bne.b iea_op_ena # some are enabled
2632 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2633 # these don't save results.
2634 iea_op_save:
2635 tst.b STORE_FLG(%a6) # does this op store a result?
2636 bne.b iea_op_exit1 # exit with no frestore
2638 iea_op_store:
2639 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2640 bsr.l store_fpreg # store the result
2642 iea_op_exit1:
2643 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2644 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2646 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2647 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2648 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2650 unlk %a6 # unravel the frame
2652 btst &0x7,(%sp) # is trace on?
2653 bne.w iea_op_trace # yes
2655 bra.l _fpsp_done # exit to os
2657 iea_op_ena:
2658 and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2659 bfffo %d0{&24:&8},%d0 # find highest priority exception
2660 bne.b iea_op_exc # at least one was set
2662 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2663 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2664 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2665 beq.b iea_op_save
2667 iea_op_ovfl:
2668 btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2669 beq.b iea_op_store # no
2670 bra.b iea_op_exc_ovfl # yes
2672 # an enabled exception occurred. we have to insert the exception type back into
2673 # the machine.
2674 iea_op_exc:
2675 subi.l &24,%d0 # fix offset to be 0-8
2676 cmpi.b %d0,&0x6 # is exception INEX?
2677 bne.b iea_op_exc_force # no
2679 # the enabled exception was inexact. so, if it occurs with an overflow
2680 # or underflow that was disabled, then we have to force an overflow or
2681 # underflow frame.
2682 btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2683 bne.b iea_op_exc_ovfl # yes
2684 btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2685 bne.b iea_op_exc_unfl # yes
2687 iea_op_exc_force:
2688 mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2689 bra.b iea_op_exit2 # exit with frestore
2691 tbl_iea_except:
2692 short 0xe002, 0xe006, 0xe004, 0xe005
2693 short 0xe003, 0xe002, 0xe001, 0xe001
2695 iea_op_exc_ovfl:
2696 mov.w &0xe005,2+FP_SRC(%a6)
2697 bra.b iea_op_exit2
2699 iea_op_exc_unfl:
2700 mov.w &0xe003,2+FP_SRC(%a6)
2702 iea_op_exit2:
2703 mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2704 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2706 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2707 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2708 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2710 frestore FP_SRC(%a6) # restore exceptional state
2712 unlk %a6 # unravel the frame
2714 btst &0x7,(%sp) # is trace on?
2715 bne.b iea_op_trace # yes
2717 bra.l _fpsp_done # exit to os
2720 # The opclass two instruction that took an "Unimplemented Effective Address"
2721 # exception was being traced. Make the "current" PC the FPIAR and put it in
2722 # the trace stack frame then jump to _real_trace().
2724 # UNIMP EA FRAME TRACE FRAME
2725 # ***************** *****************
2726 # * 0x0 * 0x0f0 * * Current *
2727 # ***************** * PC *
2728 # * Current * *****************
2729 # * PC * * 0x2 * 0x024 *
2730 # ***************** *****************
2731 # * SR * * Next *
2732 # ***************** * PC *
2733 # *****************
2734 # * SR *
2735 # *****************
2736 iea_op_trace:
2737 mov.l (%sp),-(%sp) # shift stack frame "down"
2738 mov.w 0x8(%sp),0x4(%sp)
2739 mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2740 fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2742 bra.l _real_trace
2744 #########################################################################
2745 iea_fmovm:
2746 btst &14,%d0 # ctrl or data reg
2747 beq.w iea_fmovm_ctrl
2749 iea_fmovm_data:
2751 btst &0x5,EXC_SR(%a6) # user or supervisor mode
2752 bne.b iea_fmovm_data_s
2754 iea_fmovm_data_u:
2755 mov.l %usp,%a0
2756 mov.l %a0,EXC_A7(%a6) # store current a7
2757 bsr.l fmovm_dynamic # do dynamic fmovm
2758 mov.l EXC_A7(%a6),%a0 # load possibly new a7
2759 mov.l %a0,%usp # update usp
2760 bra.w iea_fmovm_exit
2762 iea_fmovm_data_s:
2763 clr.b SPCOND_FLG(%a6)
2764 lea 0x2+EXC_VOFF(%a6),%a0
2765 mov.l %a0,EXC_A7(%a6)
2766 bsr.l fmovm_dynamic # do dynamic fmovm
2768 cmpi.b SPCOND_FLG(%a6),&mda7_flg
2769 beq.w iea_fmovm_data_predec
2770 cmpi.b SPCOND_FLG(%a6),&mia7_flg
2771 bne.w iea_fmovm_exit
2773 # right now, d0 = the size.
2774 # the data has been fetched from the supervisor stack, but we have not
2775 # incremented the stack pointer by the appropriate number of bytes.
2776 # do it here.
2777 iea_fmovm_data_postinc:
2778 btst &0x7,EXC_SR(%a6)
2779 bne.b iea_fmovm_data_pi_trace
2781 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2782 mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2783 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2785 lea (EXC_SR,%a6,%d0),%a0
2786 mov.l %a0,EXC_SR(%a6)
2788 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2789 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2790 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2792 unlk %a6
2793 mov.l (%sp)+,%sp
2794 bra.l _fpsp_done
2796 iea_fmovm_data_pi_trace:
2797 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2798 mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2799 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2800 mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2802 lea (EXC_SR-0x4,%a6,%d0),%a0
2803 mov.l %a0,EXC_SR(%a6)
2805 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2806 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2807 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2809 unlk %a6
2810 mov.l (%sp)+,%sp
2811 bra.l _real_trace
2813 # right now, d1 = size and d0 = the strg.
2814 iea_fmovm_data_predec:
2815 mov.b %d1,EXC_VOFF(%a6) # store strg
2816 mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2818 fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2819 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2820 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2822 mov.l (%a6),-(%sp) # make a copy of a6
2823 mov.l %d0,-(%sp) # save d0
2824 mov.l %d1,-(%sp) # save d1
2825 mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2827 clr.l %d0
2828 mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2829 neg.l %d0 # get negative of size
2831 btst &0x7,EXC_SR(%a6) # is trace enabled?
2832 beq.b iea_fmovm_data_p2
2834 mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2835 mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2836 mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2837 mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2839 pea (%a6,%d0) # create final sp
2840 bra.b iea_fmovm_data_p3
2842 iea_fmovm_data_p2:
2843 mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2844 mov.l (%sp)+,(EXC_PC,%a6,%d0)
2845 mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2847 pea (0x4,%a6,%d0) # create final sp
2849 iea_fmovm_data_p3:
2850 clr.l %d1
2851 mov.b EXC_VOFF(%a6),%d1 # fetch strg
2853 tst.b %d1
2854 bpl.b fm_1
2855 fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2856 addi.l &0xc,%d0
2857 fm_1:
2858 lsl.b &0x1,%d1
2859 bpl.b fm_2
2860 fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2861 addi.l &0xc,%d0
2862 fm_2:
2863 lsl.b &0x1,%d1
2864 bpl.b fm_3
2865 fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2866 addi.l &0xc,%d0
2867 fm_3:
2868 lsl.b &0x1,%d1
2869 bpl.b fm_4
2870 fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2871 addi.l &0xc,%d0
2872 fm_4:
2873 lsl.b &0x1,%d1
2874 bpl.b fm_5
2875 fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2876 addi.l &0xc,%d0
2877 fm_5:
2878 lsl.b &0x1,%d1
2879 bpl.b fm_6
2880 fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2881 addi.l &0xc,%d0
2882 fm_6:
2883 lsl.b &0x1,%d1
2884 bpl.b fm_7
2885 fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2886 addi.l &0xc,%d0
2887 fm_7:
2888 lsl.b &0x1,%d1
2889 bpl.b fm_end
2890 fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2891 fm_end:
2892 mov.l 0x4(%sp),%d1
2893 mov.l 0x8(%sp),%d0
2894 mov.l 0xc(%sp),%a6
2895 mov.l (%sp)+,%sp
2897 btst &0x7,(%sp) # is trace enabled?
2898 beq.l _fpsp_done
2899 bra.l _real_trace
2901 #########################################################################
2902 iea_fmovm_ctrl:
2904 bsr.l fmovm_ctrl # load ctrl regs
2906 iea_fmovm_exit:
2907 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2908 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2909 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2911 btst &0x7,EXC_SR(%a6) # is trace on?
2912 bne.b iea_fmovm_trace # yes
2914 mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2916 unlk %a6 # unravel the frame
2918 bra.l _fpsp_done # exit to os
2921 # The control reg instruction that took an "Unimplemented Effective Address"
2922 # exception was being traced. The "Current PC" for the trace frame is the
2923 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2924 # After fixing the stack frame, jump to _real_trace().
2926 # UNIMP EA FRAME TRACE FRAME
2927 # ***************** *****************
2928 # * 0x0 * 0x0f0 * * Current *
2929 # ***************** * PC *
2930 # * Current * *****************
2931 # * PC * * 0x2 * 0x024 *
2932 # ***************** *****************
2933 # * SR * * Next *
2934 # ***************** * PC *
2935 # *****************
2936 # * SR *
2937 # *****************
2938 # this ain't a pretty solution, but it works:
2939 # -restore a6 (not with unlk)
2940 # -shift stack frame down over where old a6 used to be
2941 # -add LOCAL_SIZE to stack pointer
2942 iea_fmovm_trace:
2943 mov.l (%a6),%a6 # restore frame pointer
2944 mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2945 mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2946 mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2947 mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2948 add.l &LOCAL_SIZE,%sp # clear stack frame
2950 bra.l _real_trace
2952 #########################################################################
2953 # The FPU is disabled and so we should really have taken the "Line
2954 # F Emulator" exception. So, here we create an 8-word stack frame
2955 # from our 4-word stack frame. This means we must calculate the length
2956 # of the faulting instruction to get the "next PC". This is trivial for
2957 # immediate operands but requires some extra work for fmovm dynamic
2958 # which can use most addressing modes.
2959 iea_disabled:
2960 mov.l (%sp)+,%d0 # restore d0
2962 link %a6,&-LOCAL_SIZE # init stack frame
2964 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2966 # PC of instruction that took the exception is the PC in the frame
2967 mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2968 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2969 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2970 bsr.l _imem_read_long # fetch the instruction words
2971 mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2973 tst.w %d0 # is instr fmovm?
2974 bmi.b iea_dis_fmovm # yes
2975 # instruction is using an extended precision immediate operand. therefore,
2976 # the total instruction length is 16 bytes.
2977 iea_dis_immed:
2978 mov.l &0x10,%d0 # 16 bytes of instruction
2979 bra.b iea_dis_cont
2980 iea_dis_fmovm:
2981 btst &0xe,%d0 # is instr fmovm ctrl
2982 bne.b iea_dis_fmovm_data # no
2983 # the instruction is a fmovm.l with 2 or 3 registers.
2984 bfextu %d0{&19:&3},%d1
2985 mov.l &0xc,%d0
2986 cmpi.b %d1,&0x7 # move all regs?
2987 bne.b iea_dis_cont
2988 addq.l &0x4,%d0
2989 bra.b iea_dis_cont
2990 # the instruction is an fmovm.x dynamic which can use many addressing
2991 # modes and thus can have several different total instruction lengths.
2992 # call fmovm_calc_ea which will go through the ea calc process and,
2993 # as a by-product, will tell us how long the instruction is.
2994 iea_dis_fmovm_data:
2995 clr.l %d0
2996 bsr.l fmovm_calc_ea
2997 mov.l EXC_EXTWPTR(%a6),%d0
2998 sub.l EXC_PC(%a6),%d0
2999 iea_dis_cont:
3000 mov.w %d0,EXC_VOFF(%a6) # store stack shift value
3002 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3004 unlk %a6
3006 # here, we actually create the 8-word frame from the 4-word frame,
3007 # with the "next PC" as additional info.
3008 # the <ea> field is let as undefined.
3009 subq.l &0x8,%sp # make room for new stack
3010 mov.l %d0,-(%sp) # save d0
3011 mov.w 0xc(%sp),0x4(%sp) # move SR
3012 mov.l 0xe(%sp),0x6(%sp) # move Current PC
3013 clr.l %d0
3014 mov.w 0x12(%sp),%d0
3015 mov.l 0x6(%sp),0x10(%sp) # move Current PC
3016 add.l %d0,0x6(%sp) # make Next PC
3017 mov.w &0x402c,0xa(%sp) # insert offset,frame format
3018 mov.l (%sp)+,%d0 # restore d0
3020 bra.l _real_fpu_disabled
3022 ##########
3024 iea_iacc:
3025 movc %pcr,%d0
3026 btst &0x1,%d0
3027 bne.b iea_iacc_cont
3028 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3029 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3030 iea_iacc_cont:
3031 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3033 unlk %a6
3035 subq.w &0x8,%sp # make stack frame bigger
3036 mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3037 mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3038 mov.w &0x4008,0x6(%sp) # store voff
3039 mov.l 0x2(%sp),0x8(%sp) # store ea
3040 mov.l &0x09428001,0xc(%sp) # store fslw
3042 iea_acc_done:
3043 btst &0x5,(%sp) # user or supervisor mode?
3044 beq.b iea_acc_done2 # user
3045 bset &0x2,0xd(%sp) # set supervisor TM bit
3047 iea_acc_done2:
3048 bra.l _real_access
3050 iea_dacc:
3051 lea -LOCAL_SIZE(%a6),%sp
3053 movc %pcr,%d1
3054 btst &0x1,%d1
3055 bne.b iea_dacc_cont
3056 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3057 fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3058 iea_dacc_cont:
3059 mov.l (%a6),%a6
3061 mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3062 mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3063 mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3064 mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3065 mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3066 mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3068 movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3069 add.w &LOCAL_SIZE-0x4,%sp
3071 bra.b iea_acc_done
3073 #########################################################################
3074 # XDEF **************************************************************** #
3075 # _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3077 # This handler should be the first code executed upon taking the #
3078 # FP Operand Error exception in an operating system. #
3080 # XREF **************************************************************** #
3081 # _imem_read_long() - read instruction longword #
3082 # fix_skewed_ops() - adjust src operand in fsave frame #
3083 # _real_operr() - "callout" to operating system operr handler #
3084 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3085 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3086 # facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3088 # INPUT *************************************************************** #
3089 # - The system stack contains the FP Operr exception frame #
3090 # - The fsave frame contains the source operand #
3092 # OUTPUT ************************************************************** #
3093 # No access error: #
3094 # - The system stack is unchanged #
3095 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3097 # ALGORITHM *********************************************************** #
3098 # In a system where the FP Operr exception is enabled, the goal #
3099 # is to get to the handler specified at _real_operr(). But, on the 060, #
3100 # for opclass zero and two instruction taking this exception, the #
3101 # input operand in the fsave frame may be incorrect for some cases #
3102 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3103 # do just this and then exits through _real_operr(). #
3104 # For opclass 3 instructions, the 060 doesn't store the default #
3105 # operr result out to memory or data register file as it should. #
3106 # This code must emulate the move out before finally exiting through #
3107 # _real_inex(). The move out, if to memory, is performed using #
3108 # _mem_write() "callout" routines that may return a failing result. #
3109 # In this special case, the handler must exit through facc_out() #
3110 # which creates an access error stack frame from the current operr #
3111 # stack frame. #
3113 #########################################################################
3115 global _fpsp_operr
3116 _fpsp_operr:
3118 link.w %a6,&-LOCAL_SIZE # init stack frame
3120 fsave FP_SRC(%a6) # grab the "busy" frame
3122 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3123 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3124 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3126 # the FPIAR holds the "current PC" of the faulting instruction
3127 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3129 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3130 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3131 bsr.l _imem_read_long # fetch the instruction words
3132 mov.l %d0,EXC_OPWORD(%a6)
3134 ##############################################################################
3136 btst &13,%d0 # is instr an fmove out?
3137 bne.b foperr_out # fmove out
3140 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3141 # this would be the case for opclass two operations with a source infinity or
3142 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3143 # cause an operr so we don't need to check for them here.
3144 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3145 bsr.l fix_skewed_ops # fix src op
3147 foperr_exit:
3148 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3149 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3150 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3152 frestore FP_SRC(%a6)
3154 unlk %a6
3155 bra.l _real_operr
3157 ########################################################################
3160 # the hardware does not save the default result to memory on enabled
3161 # operand error exceptions. we do this here before passing control to
3162 # the user operand error handler.
3164 # byte, word, and long destination format operations can pass
3165 # through here. we simply need to test the sign of the src
3166 # operand and save the appropriate minimum or maximum integer value
3167 # to the effective address as pointed to by the stacked effective address.
3169 # although packed opclass three operations can take operand error
3170 # exceptions, they won't pass through here since they are caught
3171 # first by the unsupported data format exception handler. that handler
3172 # sends them directly to _real_operr() if necessary.
3174 foperr_out:
3176 mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3177 andi.w &0x7fff,%d1
3178 cmpi.w %d1,&0x7fff
3179 bne.b foperr_out_not_qnan
3180 # the operand is either an infinity or a QNAN.
3181 tst.l FP_SRC_LO(%a6)
3182 bne.b foperr_out_qnan
3183 mov.l FP_SRC_HI(%a6),%d1
3184 andi.l &0x7fffffff,%d1
3185 beq.b foperr_out_not_qnan
3186 foperr_out_qnan:
3187 mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3188 bra.b foperr_out_jmp
3190 foperr_out_not_qnan:
3191 mov.l &0x7fffffff,%d1
3192 tst.b FP_SRC_EX(%a6)
3193 bpl.b foperr_out_not_qnan2
3194 addq.l &0x1,%d1
3195 foperr_out_not_qnan2:
3196 mov.l %d1,L_SCR1(%a6)
3198 foperr_out_jmp:
3199 bfextu %d0{&19:&3},%d0 # extract dst format field
3200 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3201 mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3202 jmp (tbl_operr.b,%pc,%a0)
3204 tbl_operr:
3205 short foperr_out_l - tbl_operr # long word integer
3206 short tbl_operr - tbl_operr # sgl prec shouldn't happen
3207 short tbl_operr - tbl_operr # ext prec shouldn't happen
3208 short foperr_exit - tbl_operr # packed won't enter here
3209 short foperr_out_w - tbl_operr # word integer
3210 short tbl_operr - tbl_operr # dbl prec shouldn't happen
3211 short foperr_out_b - tbl_operr # byte integer
3212 short tbl_operr - tbl_operr # packed won't enter here
3214 foperr_out_b:
3215 mov.b L_SCR1(%a6),%d0 # load positive default result
3216 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3217 ble.b foperr_out_b_save_dn # yes
3218 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3219 bsr.l _dmem_write_byte # write the default result
3221 tst.l %d1 # did dstore fail?
3222 bne.l facc_out_b # yes
3224 bra.w foperr_exit
3225 foperr_out_b_save_dn:
3226 andi.w &0x0007,%d1
3227 bsr.l store_dreg_b # store result to regfile
3228 bra.w foperr_exit
3230 foperr_out_w:
3231 mov.w L_SCR1(%a6),%d0 # load positive default result
3232 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3233 ble.b foperr_out_w_save_dn # yes
3234 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3235 bsr.l _dmem_write_word # write the default result
3237 tst.l %d1 # did dstore fail?
3238 bne.l facc_out_w # yes
3240 bra.w foperr_exit
3241 foperr_out_w_save_dn:
3242 andi.w &0x0007,%d1
3243 bsr.l store_dreg_w # store result to regfile
3244 bra.w foperr_exit
3246 foperr_out_l:
3247 mov.l L_SCR1(%a6),%d0 # load positive default result
3248 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3249 ble.b foperr_out_l_save_dn # yes
3250 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3251 bsr.l _dmem_write_long # write the default result
3253 tst.l %d1 # did dstore fail?
3254 bne.l facc_out_l # yes
3256 bra.w foperr_exit
3257 foperr_out_l_save_dn:
3258 andi.w &0x0007,%d1
3259 bsr.l store_dreg_l # store result to regfile
3260 bra.w foperr_exit
3262 #########################################################################
3263 # XDEF **************************************************************** #
3264 # _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3266 # This handler should be the first code executed upon taking the #
3267 # FP Signalling NAN exception in an operating system. #
3269 # XREF **************************************************************** #
3270 # _imem_read_long() - read instruction longword #
3271 # fix_skewed_ops() - adjust src operand in fsave frame #
3272 # _real_snan() - "callout" to operating system SNAN handler #
3273 # _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3274 # store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3275 # facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3276 # _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3278 # INPUT *************************************************************** #
3279 # - The system stack contains the FP SNAN exception frame #
3280 # - The fsave frame contains the source operand #
3282 # OUTPUT ************************************************************** #
3283 # No access error: #
3284 # - The system stack is unchanged #
3285 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3287 # ALGORITHM *********************************************************** #
3288 # In a system where the FP SNAN exception is enabled, the goal #
3289 # is to get to the handler specified at _real_snan(). But, on the 060, #
3290 # for opclass zero and two instructions taking this exception, the #
3291 # input operand in the fsave frame may be incorrect for some cases #
3292 # and needs to be corrected. This handler calls fix_skewed_ops() to #
3293 # do just this and then exits through _real_snan(). #
3294 # For opclass 3 instructions, the 060 doesn't store the default #
3295 # SNAN result out to memory or data register file as it should. #
3296 # This code must emulate the move out before finally exiting through #
3297 # _real_snan(). The move out, if to memory, is performed using #
3298 # _mem_write() "callout" routines that may return a failing result. #
3299 # In this special case, the handler must exit through facc_out() #
3300 # which creates an access error stack frame from the current SNAN #
3301 # stack frame. #
3302 # For the case of an extended precision opclass 3 instruction, #
3303 # if the effective addressing mode was -() or ()+, then the address #
3304 # register must get updated by calling _calc_ea_fout(). If the <ea> #
3305 # was -(a7) from supervisor mode, then the exception frame currently #
3306 # on the system stack must be carefully moved "down" to make room #
3307 # for the operand being moved. #
3309 #########################################################################
3311 global _fpsp_snan
3312 _fpsp_snan:
3314 link.w %a6,&-LOCAL_SIZE # init stack frame
3316 fsave FP_SRC(%a6) # grab the "busy" frame
3318 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3319 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3320 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3322 # the FPIAR holds the "current PC" of the faulting instruction
3323 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3325 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3326 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3327 bsr.l _imem_read_long # fetch the instruction words
3328 mov.l %d0,EXC_OPWORD(%a6)
3330 ##############################################################################
3332 btst &13,%d0 # is instr an fmove out?
3333 bne.w fsnan_out # fmove out
3336 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3337 # this would be the case for opclass two operations with a source infinity or
3338 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3339 # fixed here.
3340 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3341 bsr.l fix_skewed_ops # fix src op
3343 fsnan_exit:
3344 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3345 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3346 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3348 frestore FP_SRC(%a6)
3350 unlk %a6
3351 bra.l _real_snan
3353 ########################################################################
3356 # the hardware does not save the default result to memory on enabled
3357 # snan exceptions. we do this here before passing control to
3358 # the user snan handler.
3360 # byte, word, long, and packed destination format operations can pass
3361 # through here. since packed format operations already were handled by
3362 # fpsp_unsupp(), then we need to do nothing else for them here.
3363 # for byte, word, and long, we simply need to test the sign of the src
3364 # operand and save the appropriate minimum or maximum integer value
3365 # to the effective address as pointed to by the stacked effective address.
3367 fsnan_out:
3369 bfextu %d0{&19:&3},%d0 # extract dst format field
3370 mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3371 mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3372 jmp (tbl_snan.b,%pc,%a0)
3374 tbl_snan:
3375 short fsnan_out_l - tbl_snan # long word integer
3376 short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3377 short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3378 short tbl_snan - tbl_snan # packed needs no help
3379 short fsnan_out_w - tbl_snan # word integer
3380 short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3381 short fsnan_out_b - tbl_snan # byte integer
3382 short tbl_snan - tbl_snan # packed needs no help
3384 fsnan_out_b:
3385 mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3386 bset &6,%d0 # set SNAN bit
3387 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3388 ble.b fsnan_out_b_dn # yes
3389 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3390 bsr.l _dmem_write_byte # write the default result
3392 tst.l %d1 # did dstore fail?
3393 bne.l facc_out_b # yes
3395 bra.w fsnan_exit
3396 fsnan_out_b_dn:
3397 andi.w &0x0007,%d1
3398 bsr.l store_dreg_b # store result to regfile
3399 bra.w fsnan_exit
3401 fsnan_out_w:
3402 mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3403 bset &14,%d0 # set SNAN bit
3404 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3405 ble.b fsnan_out_w_dn # yes
3406 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3407 bsr.l _dmem_write_word # write the default result
3409 tst.l %d1 # did dstore fail?
3410 bne.l facc_out_w # yes
3412 bra.w fsnan_exit
3413 fsnan_out_w_dn:
3414 andi.w &0x0007,%d1
3415 bsr.l store_dreg_w # store result to regfile
3416 bra.w fsnan_exit
3418 fsnan_out_l:
3419 mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3420 bset &30,%d0 # set SNAN bit
3421 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3422 ble.b fsnan_out_l_dn # yes
3423 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3424 bsr.l _dmem_write_long # write the default result
3426 tst.l %d1 # did dstore fail?
3427 bne.l facc_out_l # yes
3429 bra.w fsnan_exit
3430 fsnan_out_l_dn:
3431 andi.w &0x0007,%d1
3432 bsr.l store_dreg_l # store result to regfile
3433 bra.w fsnan_exit
3435 fsnan_out_s:
3436 cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3437 ble.b fsnan_out_d_dn # yes
3438 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3439 andi.l &0x80000000,%d0 # keep sign
3440 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3441 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3442 lsr.l &0x8,%d1 # shift mantissa for sgl
3443 or.l %d1,%d0 # create sgl SNAN
3444 mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3445 bsr.l _dmem_write_long # write the default result
3447 tst.l %d1 # did dstore fail?
3448 bne.l facc_out_l # yes
3450 bra.w fsnan_exit
3451 fsnan_out_d_dn:
3452 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3453 andi.l &0x80000000,%d0 # keep sign
3454 ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3455 mov.l %d1,-(%sp)
3456 mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3457 lsr.l &0x8,%d1 # shift mantissa for sgl
3458 or.l %d1,%d0 # create sgl SNAN
3459 mov.l (%sp)+,%d1
3460 andi.w &0x0007,%d1
3461 bsr.l store_dreg_l # store result to regfile
3462 bra.w fsnan_exit
3464 fsnan_out_d:
3465 mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3466 andi.l &0x80000000,%d0 # keep sign
3467 ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3468 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3469 mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3470 mov.l &11,%d0 # load shift amt
3471 lsr.l %d0,%d1
3472 or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3473 mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3474 andi.l &0x000007ff,%d1
3475 ror.l %d0,%d1
3476 mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3477 mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3478 lsr.l %d0,%d1
3479 or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3480 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3481 mov.l EXC_EA(%a6),%a1 # pass: dst addr
3482 movq.l &0x8,%d0 # pass: size of 8 bytes
3483 bsr.l _dmem_write # write the default result
3485 tst.l %d1 # did dstore fail?
3486 bne.l facc_out_d # yes
3488 bra.w fsnan_exit
3490 # for extended precision, if the addressing mode is pre-decrement or
3491 # post-increment, then the address register did not get updated.
3492 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3493 fsnan_out_x:
3494 clr.b SPCOND_FLG(%a6) # clear special case flag
3496 mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3497 clr.w 2+FP_SCR0(%a6)
3498 mov.l FP_SRC_HI(%a6),%d0
3499 bset &30,%d0
3500 mov.l %d0,FP_SCR0_HI(%a6)
3501 mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3503 btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3504 bne.b fsnan_out_x_s # yes
3506 mov.l %usp,%a0 # fetch user stack pointer
3507 mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3508 mov.l (%a6),EXC_A6(%a6)
3510 bsr.l _calc_ea_fout # find the correct ea,update An
3511 mov.l %a0,%a1
3512 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3514 mov.l EXC_A7(%a6),%a0
3515 mov.l %a0,%usp # restore user stack pointer
3516 mov.l EXC_A6(%a6),(%a6)
3518 fsnan_out_x_save:
3519 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3520 movq.l &0xc,%d0 # pass: size of extended
3521 bsr.l _dmem_write # write the default result
3523 tst.l %d1 # did dstore fail?
3524 bne.l facc_out_x # yes
3526 bra.w fsnan_exit
3528 fsnan_out_x_s:
3529 mov.l (%a6),EXC_A6(%a6)
3531 bsr.l _calc_ea_fout # find the correct ea,update An
3532 mov.l %a0,%a1
3533 mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3535 mov.l EXC_A6(%a6),(%a6)
3537 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3538 bne.b fsnan_out_x_save # no
3540 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3541 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3542 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3543 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3545 frestore FP_SRC(%a6)
3547 mov.l EXC_A6(%a6),%a6 # restore frame pointer
3549 mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3550 mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3551 mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3553 mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3554 mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3555 mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3557 add.l &LOCAL_SIZE-0x8,%sp
3559 bra.l _real_snan
3561 #########################################################################
3562 # XDEF **************************************************************** #
3563 # _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3565 # This handler should be the first code executed upon taking the #
3566 # FP Inexact exception in an operating system. #
3568 # XREF **************************************************************** #
3569 # _imem_read_long() - read instruction longword #
3570 # fix_skewed_ops() - adjust src operand in fsave frame #
3571 # set_tag_x() - determine optype of src/dst operands #
3572 # store_fpreg() - store opclass 0 or 2 result to FP regfile #
3573 # unnorm_fix() - change UNNORM operands to NORM or ZERO #
3574 # load_fpn2() - load dst operand from FP regfile #
3575 # smovcr() - emulate an "fmovcr" instruction #
3576 # fout() - emulate an opclass 3 instruction #
3577 # tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3578 # _real_inex() - "callout" to operating system inexact handler #
3580 # INPUT *************************************************************** #
3581 # - The system stack contains the FP Inexact exception frame #
3582 # - The fsave frame contains the source operand #
3584 # OUTPUT ************************************************************** #
3585 # - The system stack is unchanged #
3586 # - The fsave frame contains the adjusted src op for opclass 0,2 #
3588 # ALGORITHM *********************************************************** #
3589 # In a system where the FP Inexact exception is enabled, the goal #
3590 # is to get to the handler specified at _real_inex(). But, on the 060, #
3591 # for opclass zero and two instruction taking this exception, the #
3592 # hardware doesn't store the correct result to the destination FP #
3593 # register as did the '040 and '881/2. This handler must emulate the #
3594 # instruction in order to get this value and then store it to the #
3595 # correct register before calling _real_inex(). #
3596 # For opclass 3 instructions, the 060 doesn't store the default #
3597 # inexact result out to memory or data register file as it should. #
3598 # This code must emulate the move out by calling fout() before finally #
3599 # exiting through _real_inex(). #
3601 #########################################################################
3603 global _fpsp_inex
3604 _fpsp_inex:
3606 link.w %a6,&-LOCAL_SIZE # init stack frame
3608 fsave FP_SRC(%a6) # grab the "busy" frame
3610 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3611 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3612 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3614 # the FPIAR holds the "current PC" of the faulting instruction
3615 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3617 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3618 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3619 bsr.l _imem_read_long # fetch the instruction words
3620 mov.l %d0,EXC_OPWORD(%a6)
3622 ##############################################################################
3624 btst &13,%d0 # is instr an fmove out?
3625 bne.w finex_out # fmove out
3628 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3629 # longword integer directly into the upper longword of the mantissa along
3630 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3631 bfextu %d0{&19:&3},%d0 # fetch instr size
3632 bne.b finex_cont # instr size is not long
3633 cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3634 bne.b finex_cont # no
3635 fmov.l &0x0,%fpcr
3636 fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3637 fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3638 mov.w &0xe001,0x2+FP_SRC(%a6)
3640 finex_cont:
3641 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3642 bsr.l fix_skewed_ops # fix src op
3644 # Here, we zero the ccode and exception byte field since we're going to
3645 # emulate the whole instruction. Notice, though, that we don't kill the
3646 # INEX1 bit. This is because a packed op has long since been converted
3647 # to extended before arriving here. Therefore, we need to retain the
3648 # INEX1 bit from when the operand was first converted.
3649 andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3651 fmov.l &0x0,%fpcr # zero current control regs
3652 fmov.l &0x0,%fpsr
3654 bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3655 cmpi.b %d1,&0x17 # is op an fmovecr?
3656 beq.w finex_fmovcr # yes
3658 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3659 bsr.l set_tag_x # tag the operand type
3660 mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3662 # bits four and five of the fp extension word separate the monadic and dyadic
3663 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3664 # will never take this exception, but fsincos will.
3665 btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3666 beq.b finex_extract # monadic
3668 btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3669 bne.b finex_extract # yes
3671 bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3672 bsr.l load_fpn2 # load dst into FP_DST
3674 lea FP_DST(%a6),%a0 # pass: ptr to dst op
3675 bsr.l set_tag_x # tag the operand type
3676 cmpi.b %d0,&UNNORM # is operand an UNNORM?
3677 bne.b finex_op2_done # no
3678 bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3679 finex_op2_done:
3680 mov.b %d0,DTAG(%a6) # save dst optype tag
3682 finex_extract:
3683 clr.l %d0
3684 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3686 mov.b 1+EXC_CMDREG(%a6),%d1
3687 andi.w &0x007f,%d1 # extract extension
3689 lea FP_SRC(%a6),%a0
3690 lea FP_DST(%a6),%a1
3692 mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3693 jsr (tbl_unsupp.l,%pc,%d1.l*1)
3695 # the operation has been emulated. the result is in fp0.
3696 finex_save:
3697 bfextu EXC_CMDREG(%a6){&6:&3},%d0
3698 bsr.l store_fpreg
3700 finex_exit:
3701 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3702 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3703 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3705 frestore FP_SRC(%a6)
3707 unlk %a6
3708 bra.l _real_inex
3710 finex_fmovcr:
3711 clr.l %d0
3712 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3713 mov.b 1+EXC_CMDREG(%a6),%d1
3714 andi.l &0x0000007f,%d1 # pass rom offset
3715 bsr.l smovcr
3716 bra.b finex_save
3718 ########################################################################
3721 # the hardware does not save the default result to memory on enabled
3722 # inexact exceptions. we do this here before passing control to
3723 # the user inexact handler.
3725 # byte, word, and long destination format operations can pass
3726 # through here. so can double and single precision.
3727 # although packed opclass three operations can take inexact
3728 # exceptions, they won't pass through here since they are caught
3729 # first by the unsupported data format exception handler. that handler
3730 # sends them directly to _real_inex() if necessary.
3732 finex_out:
3734 mov.b &NORM,STAG(%a6) # src is a NORM
3736 clr.l %d0
3737 mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3739 andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3741 lea FP_SRC(%a6),%a0 # pass ptr to src operand
3743 bsr.l fout # store the default result
3745 bra.b finex_exit
3747 #########################################################################
3748 # XDEF **************************************************************** #
3749 # _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3751 # This handler should be the first code executed upon taking #
3752 # the FP DZ exception in an operating system. #
3754 # XREF **************************************************************** #
3755 # _imem_read_long() - read instruction longword from memory #
3756 # fix_skewed_ops() - adjust fsave operand #
3757 # _real_dz() - "callout" exit point from FP DZ handler #
3759 # INPUT *************************************************************** #
3760 # - The system stack contains the FP DZ exception stack. #
3761 # - The fsave frame contains the source operand. #
3763 # OUTPUT ************************************************************** #
3764 # - The system stack contains the FP DZ exception stack. #
3765 # - The fsave frame contains the adjusted source operand. #
3767 # ALGORITHM *********************************************************** #
3768 # In a system where the DZ exception is enabled, the goal is to #
3769 # get to the handler specified at _real_dz(). But, on the 060, when the #
3770 # exception is taken, the input operand in the fsave state frame may #
3771 # be incorrect for some cases and need to be adjusted. So, this package #
3772 # adjusts the operand using fix_skewed_ops() and then branches to #
3773 # _real_dz(). #
3775 #########################################################################
3777 global _fpsp_dz
3778 _fpsp_dz:
3780 link.w %a6,&-LOCAL_SIZE # init stack frame
3782 fsave FP_SRC(%a6) # grab the "busy" frame
3784 movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3785 fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3786 fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3788 # the FPIAR holds the "current PC" of the faulting instruction
3789 mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3791 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3792 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3793 bsr.l _imem_read_long # fetch the instruction words
3794 mov.l %d0,EXC_OPWORD(%a6)
3796 ##############################################################################
3799 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3800 # this would be the case for opclass two operations with a source zero
3801 # in the sgl or dbl format.
3802 lea FP_SRC(%a6),%a0 # pass: ptr to src op
3803 bsr.l fix_skewed_ops # fix src op
3805 fdz_exit:
3806 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3807 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3808 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3810 frestore FP_SRC(%a6)
3812 unlk %a6
3813 bra.l _real_dz
3815 #########################################################################
3816 # XDEF **************************************************************** #
3817 # _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3818 # exception when the "reduced" version of the #
3819 # FPSP is implemented that does not emulate #
3820 # FP unimplemented instructions. #
3822 # This handler should be the first code executed upon taking a #
3823 # "Line F Emulator" exception in an operating system integrating #
3824 # the reduced version of 060FPSP. #
3826 # XREF **************************************************************** #
3827 # _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3828 # _real_fline() - Handle all other cases (treated equally) #
3830 # INPUT *************************************************************** #
3831 # - The system stack contains a "Line F Emulator" exception #
3832 # stack frame. #
3834 # OUTPUT ************************************************************** #
3835 # - The system stack is unchanged. #
3837 # ALGORITHM *********************************************************** #
3838 # When a "Line F Emulator" exception occurs in a system where #
3839 # "FPU Unimplemented" instructions will not be emulated, the exception #
3840 # can occur because then FPU is disabled or the instruction is to be #
3841 # classifed as "Line F". This module determines which case exists and #
3842 # calls the appropriate "callout". #
3844 #########################################################################
3846 global _fpsp_fline
3847 _fpsp_fline:
3849 # check to see if the FPU is disabled. if so, jump to the OS entry
3850 # point for that condition.
3851 cmpi.w 0x6(%sp),&0x402c
3852 beq.l _real_fpu_disabled
3854 bra.l _real_fline
3856 #########################################################################
3857 # XDEF **************************************************************** #
3858 # _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3860 # XREF **************************************************************** #
3861 # inc_areg() - increment an address register #
3862 # dec_areg() - decrement an address register #
3864 # INPUT *************************************************************** #
3865 # d0 = number of bytes to adjust <ea> by #
3867 # OUTPUT ************************************************************** #
3868 # None #
3870 # ALGORITHM *********************************************************** #
3871 # "Dummy" CALCulate Effective Address: #
3872 # The stacked <ea> for FP unimplemented instructions and opclass #
3873 # two packed instructions is correct with the exception of... #
3875 # 1) -(An) : The register is not updated regardless of size. #
3876 # Also, for extended precision and packed, the #
3877 # stacked <ea> value is 8 bytes too big #
3878 # 2) (An)+ : The register is not updated. #
3879 # 3) #<data> : The upper longword of the immediate operand is #
3880 # stacked b,w,l and s sizes are completely stacked. #
3881 # d,x, and p are not. #
3883 #########################################################################
3885 global _dcalc_ea
3886 _dcalc_ea:
3887 mov.l %d0, %a0 # move # bytes to %a0
3889 mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3890 mov.l %d0, %d1 # make a copy
3892 andi.w &0x38, %d0 # extract mode field
3893 andi.l &0x7, %d1 # extract reg field
3895 cmpi.b %d0,&0x18 # is mode (An)+ ?
3896 beq.b dcea_pi # yes
3898 cmpi.b %d0,&0x20 # is mode -(An) ?
3899 beq.b dcea_pd # yes
3901 or.w %d1,%d0 # concat mode,reg
3902 cmpi.b %d0,&0x3c # is mode #<data>?
3904 beq.b dcea_imm # yes
3906 mov.l EXC_EA(%a6),%a0 # return <ea>
3909 # need to set immediate data flag here since we'll need to do
3910 # an imem_read to fetch this later.
3911 dcea_imm:
3912 mov.b &immed_flg,SPCOND_FLG(%a6)
3913 lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3916 # here, the <ea> is stacked correctly. however, we must update the
3917 # address register...
3918 dcea_pi:
3919 mov.l %a0,%d0 # pass amt to inc by
3920 bsr.l inc_areg # inc addr register
3922 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3925 # the <ea> is stacked correctly for all but extended and packed which
3926 # the <ea>s are 8 bytes too large.
3927 # it would make no sense to have a pre-decrement to a7 in supervisor
3928 # mode so we don't even worry about this tricky case here : )
3929 dcea_pd:
3930 mov.l %a0,%d0 # pass amt to dec by
3931 bsr.l dec_areg # dec addr register
3933 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3935 cmpi.b %d0,&0xc # is opsize ext or packed?
3936 beq.b dcea_pd2 # yes
3938 dcea_pd2:
3939 sub.l &0x8,%a0 # correct <ea>
3940 mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3943 #########################################################################
3944 # XDEF **************************************************************** #
3945 # _calc_ea_fout(): calculate correct stacked <ea> for extended #
3946 # and packed data opclass 3 operations. #
3948 # XREF **************************************************************** #
3949 # None #
3951 # INPUT *************************************************************** #
3952 # None #
3954 # OUTPUT ************************************************************** #
3955 # a0 = return correct effective address #
3957 # ALGORITHM *********************************************************** #
3958 # For opclass 3 extended and packed data operations, the <ea> #
3959 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3960 # modes. Also, while we're at it, the index register itself must get #
3961 # updated. #
3962 # So, for -(an), we must subtract 8 off of the stacked <ea> value #
3963 # and return that value as the correct <ea> and store that value in An. #
3964 # For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3966 #########################################################################
3968 # This calc_ea is currently used to retrieve the correct <ea>
3969 # for fmove outs of type extended and packed.
3970 global _calc_ea_fout
3971 _calc_ea_fout:
3972 mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3973 mov.l %d0,%d1 # make a copy
3975 andi.w &0x38,%d0 # extract mode field
3976 andi.l &0x7,%d1 # extract reg field
3978 cmpi.b %d0,&0x18 # is mode (An)+ ?
3979 beq.b ceaf_pi # yes
3981 cmpi.b %d0,&0x20 # is mode -(An) ?
3982 beq.w ceaf_pd # yes
3984 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3987 # (An)+ : extended and packed fmove out
3988 # : stacked <ea> is correct
3989 # : "An" not updated
3990 ceaf_pi:
3991 mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3992 mov.l EXC_EA(%a6),%a0
3993 jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3995 swbeg &0x8
3996 tbl_ceaf_pi:
3997 short ceaf_pi0 - tbl_ceaf_pi
3998 short ceaf_pi1 - tbl_ceaf_pi
3999 short ceaf_pi2 - tbl_ceaf_pi
4000 short ceaf_pi3 - tbl_ceaf_pi
4001 short ceaf_pi4 - tbl_ceaf_pi
4002 short ceaf_pi5 - tbl_ceaf_pi
4003 short ceaf_pi6 - tbl_ceaf_pi
4004 short ceaf_pi7 - tbl_ceaf_pi
4006 ceaf_pi0:
4007 addi.l &0xc,EXC_DREGS+0x8(%a6)
4009 ceaf_pi1:
4010 addi.l &0xc,EXC_DREGS+0xc(%a6)
4012 ceaf_pi2:
4013 add.l &0xc,%a2
4015 ceaf_pi3:
4016 add.l &0xc,%a3
4018 ceaf_pi4:
4019 add.l &0xc,%a4
4021 ceaf_pi5:
4022 add.l &0xc,%a5
4024 ceaf_pi6:
4025 addi.l &0xc,EXC_A6(%a6)
4027 ceaf_pi7:
4028 mov.b &mia7_flg,SPCOND_FLG(%a6)
4029 addi.l &0xc,EXC_A7(%a6)
4032 # -(An) : extended and packed fmove out
4033 # : stacked <ea> = actual <ea> + 8
4034 # : "An" not updated
4035 ceaf_pd:
4036 mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4037 mov.l EXC_EA(%a6),%a0
4038 sub.l &0x8,%a0
4039 sub.l &0x8,EXC_EA(%a6)
4040 jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4042 swbeg &0x8
4043 tbl_ceaf_pd:
4044 short ceaf_pd0 - tbl_ceaf_pd
4045 short ceaf_pd1 - tbl_ceaf_pd
4046 short ceaf_pd2 - tbl_ceaf_pd
4047 short ceaf_pd3 - tbl_ceaf_pd
4048 short ceaf_pd4 - tbl_ceaf_pd
4049 short ceaf_pd5 - tbl_ceaf_pd
4050 short ceaf_pd6 - tbl_ceaf_pd
4051 short ceaf_pd7 - tbl_ceaf_pd
4053 ceaf_pd0:
4054 mov.l %a0,EXC_DREGS+0x8(%a6)
4056 ceaf_pd1:
4057 mov.l %a0,EXC_DREGS+0xc(%a6)
4059 ceaf_pd2:
4060 mov.l %a0,%a2
4062 ceaf_pd3:
4063 mov.l %a0,%a3
4065 ceaf_pd4:
4066 mov.l %a0,%a4
4068 ceaf_pd5:
4069 mov.l %a0,%a5
4071 ceaf_pd6:
4072 mov.l %a0,EXC_A6(%a6)
4074 ceaf_pd7:
4075 mov.l %a0,EXC_A7(%a6)
4076 mov.b &mda7_flg,SPCOND_FLG(%a6)
4080 # This table holds the offsets of the emulation routines for each individual
4081 # math operation relative to the address of this table. Included are
4082 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4083 # this table is for the version if the 060FPSP without transcendentals.
4084 # The location within the table is determined by the extension bits of the
4085 # operation longword.
4088 swbeg &109
4089 tbl_unsupp:
4090 long fin - tbl_unsupp # 00: fmove
4091 long fint - tbl_unsupp # 01: fint
4092 long tbl_unsupp - tbl_unsupp # 02: fsinh
4093 long fintrz - tbl_unsupp # 03: fintrz
4094 long fsqrt - tbl_unsupp # 04: fsqrt
4095 long tbl_unsupp - tbl_unsupp
4096 long tbl_unsupp - tbl_unsupp # 06: flognp1
4097 long tbl_unsupp - tbl_unsupp
4098 long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4099 long tbl_unsupp - tbl_unsupp # 09: ftanh
4100 long tbl_unsupp - tbl_unsupp # 0a: fatan
4101 long tbl_unsupp - tbl_unsupp
4102 long tbl_unsupp - tbl_unsupp # 0c: fasin
4103 long tbl_unsupp - tbl_unsupp # 0d: fatanh
4104 long tbl_unsupp - tbl_unsupp # 0e: fsin
4105 long tbl_unsupp - tbl_unsupp # 0f: ftan
4106 long tbl_unsupp - tbl_unsupp # 10: fetox
4107 long tbl_unsupp - tbl_unsupp # 11: ftwotox
4108 long tbl_unsupp - tbl_unsupp # 12: ftentox
4109 long tbl_unsupp - tbl_unsupp
4110 long tbl_unsupp - tbl_unsupp # 14: flogn
4111 long tbl_unsupp - tbl_unsupp # 15: flog10
4112 long tbl_unsupp - tbl_unsupp # 16: flog2
4113 long tbl_unsupp - tbl_unsupp
4114 long fabs - tbl_unsupp # 18: fabs
4115 long tbl_unsupp - tbl_unsupp # 19: fcosh
4116 long fneg - tbl_unsupp # 1a: fneg
4117 long tbl_unsupp - tbl_unsupp
4118 long tbl_unsupp - tbl_unsupp # 1c: facos
4119 long tbl_unsupp - tbl_unsupp # 1d: fcos
4120 long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4121 long tbl_unsupp - tbl_unsupp # 1f: fgetman
4122 long fdiv - tbl_unsupp # 20: fdiv
4123 long tbl_unsupp - tbl_unsupp # 21: fmod
4124 long fadd - tbl_unsupp # 22: fadd
4125 long fmul - tbl_unsupp # 23: fmul
4126 long fsgldiv - tbl_unsupp # 24: fsgldiv
4127 long tbl_unsupp - tbl_unsupp # 25: frem
4128 long tbl_unsupp - tbl_unsupp # 26: fscale
4129 long fsglmul - tbl_unsupp # 27: fsglmul
4130 long fsub - tbl_unsupp # 28: fsub
4131 long tbl_unsupp - tbl_unsupp
4132 long tbl_unsupp - tbl_unsupp
4133 long tbl_unsupp - tbl_unsupp
4134 long tbl_unsupp - tbl_unsupp
4135 long tbl_unsupp - tbl_unsupp
4136 long tbl_unsupp - tbl_unsupp
4137 long tbl_unsupp - tbl_unsupp
4138 long tbl_unsupp - tbl_unsupp # 30: fsincos
4139 long tbl_unsupp - tbl_unsupp # 31: fsincos
4140 long tbl_unsupp - tbl_unsupp # 32: fsincos
4141 long tbl_unsupp - tbl_unsupp # 33: fsincos
4142 long tbl_unsupp - tbl_unsupp # 34: fsincos
4143 long tbl_unsupp - tbl_unsupp # 35: fsincos
4144 long tbl_unsupp - tbl_unsupp # 36: fsincos
4145 long tbl_unsupp - tbl_unsupp # 37: fsincos
4146 long fcmp - tbl_unsupp # 38: fcmp
4147 long tbl_unsupp - tbl_unsupp
4148 long ftst - tbl_unsupp # 3a: ftst
4149 long tbl_unsupp - tbl_unsupp
4150 long tbl_unsupp - tbl_unsupp
4151 long tbl_unsupp - tbl_unsupp
4152 long tbl_unsupp - tbl_unsupp
4153 long tbl_unsupp - tbl_unsupp
4154 long fsin - tbl_unsupp # 40: fsmove
4155 long fssqrt - tbl_unsupp # 41: fssqrt
4156 long tbl_unsupp - tbl_unsupp
4157 long tbl_unsupp - tbl_unsupp
4158 long fdin - tbl_unsupp # 44: fdmove
4159 long fdsqrt - tbl_unsupp # 45: fdsqrt
4160 long tbl_unsupp - tbl_unsupp
4161 long tbl_unsupp - tbl_unsupp
4162 long tbl_unsupp - tbl_unsupp
4163 long tbl_unsupp - tbl_unsupp
4164 long tbl_unsupp - tbl_unsupp
4165 long tbl_unsupp - tbl_unsupp
4166 long tbl_unsupp - tbl_unsupp
4167 long tbl_unsupp - tbl_unsupp
4168 long tbl_unsupp - tbl_unsupp
4169 long tbl_unsupp - tbl_unsupp
4170 long tbl_unsupp - tbl_unsupp
4171 long tbl_unsupp - tbl_unsupp
4172 long tbl_unsupp - tbl_unsupp
4173 long tbl_unsupp - tbl_unsupp
4174 long tbl_unsupp - tbl_unsupp
4175 long tbl_unsupp - tbl_unsupp
4176 long tbl_unsupp - tbl_unsupp
4177 long tbl_unsupp - tbl_unsupp
4178 long fsabs - tbl_unsupp # 58: fsabs
4179 long tbl_unsupp - tbl_unsupp
4180 long fsneg - tbl_unsupp # 5a: fsneg
4181 long tbl_unsupp - tbl_unsupp
4182 long fdabs - tbl_unsupp # 5c: fdabs
4183 long tbl_unsupp - tbl_unsupp
4184 long fdneg - tbl_unsupp # 5e: fdneg
4185 long tbl_unsupp - tbl_unsupp
4186 long fsdiv - tbl_unsupp # 60: fsdiv
4187 long tbl_unsupp - tbl_unsupp
4188 long fsadd - tbl_unsupp # 62: fsadd
4189 long fsmul - tbl_unsupp # 63: fsmul
4190 long fddiv - tbl_unsupp # 64: fddiv
4191 long tbl_unsupp - tbl_unsupp
4192 long fdadd - tbl_unsupp # 66: fdadd
4193 long fdmul - tbl_unsupp # 67: fdmul
4194 long fssub - tbl_unsupp # 68: fssub
4195 long tbl_unsupp - tbl_unsupp
4196 long tbl_unsupp - tbl_unsupp
4197 long tbl_unsupp - tbl_unsupp
4198 long fdsub - tbl_unsupp # 6c: fdsub
4200 #################################################
4201 # Add this here so non-fp modules can compile.
4202 # (smovcr is called from fpsp_inex.)
4203 global smovcr
4204 smovcr:
4205 bra.b smovcr
4207 #########################################################################
4208 # XDEF **************************************************************** #
4209 # fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4211 # XREF **************************************************************** #
4212 # fetch_dreg() - fetch data register #
4213 # {i,d,}mem_read() - fetch data from memory #
4214 # _mem_write() - write data to memory #
4215 # iea_iacc() - instruction memory access error occurred #
4216 # iea_dacc() - data memory access error occurred #
4217 # restore() - restore An index regs if access error occurred #
4219 # INPUT *************************************************************** #
4220 # None #
4222 # OUTPUT ************************************************************** #
4223 # If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4224 # d0 = size of dump #
4225 # d1 = Dn #
4226 # Else if instruction access error, #
4227 # d0 = FSLW #
4228 # Else if data access error, #
4229 # d0 = FSLW #
4230 # a0 = address of fault #
4231 # Else #
4232 # none. #
4234 # ALGORITHM *********************************************************** #
4235 # The effective address must be calculated since this is entered #
4236 # from an "Unimplemented Effective Address" exception handler. So, we #
4237 # have our own fcalc_ea() routine here. If an access error is flagged #
4238 # by a _{i,d,}mem_read() call, we must exit through the special #
4239 # handler. #
4240 # The data register is determined and its value loaded to get the #
4241 # string of FP registers affected. This value is used as an index into #
4242 # a lookup table such that we can determine the number of bytes #
4243 # involved. #
4244 # If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4245 # to read in all FP values. Again, _mem_read() may fail and require a #
4246 # special exit. #
4247 # If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4248 # to write all FP values. _mem_write() may also fail. #
4249 # If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4250 # then we return the size of the dump and the string to the caller #
4251 # so that the move can occur outside of this routine. This special #
4252 # case is required so that moves to the system stack are handled #
4253 # correctly. #
4255 # DYNAMIC: #
4256 # fmovm.x dn, <ea> #
4257 # fmovm.x <ea>, dn #
4259 # <WORD 1> <WORD2> #
4260 # 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4262 # & = (0): predecrement addressing mode #
4263 # (1): postincrement or control addressing mode #
4264 # @ = (0): move listed regs from memory to the FPU #
4265 # (1): move listed regs from the FPU to memory #
4266 # $$$ : index of data register holding reg select mask #
4268 # NOTES: #
4269 # If the data register holds a zero, then the #
4270 # instruction is a nop. #
4272 #########################################################################
4274 global fmovm_dynamic
4275 fmovm_dynamic:
4277 # extract the data register in which the bit string resides...
4278 mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4279 andi.w &0x70,%d1 # extract reg bits
4280 lsr.b &0x4,%d1 # shift into lo bits
4282 # fetch the bit string into d0...
4283 bsr.l fetch_dreg # fetch reg string
4285 andi.l &0x000000ff,%d0 # keep only lo byte
4287 mov.l %d0,-(%sp) # save strg
4288 mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4289 mov.l %d0,-(%sp) # save size
4290 bsr.l fmovm_calc_ea # calculate <ea>
4291 mov.l (%sp)+,%d0 # restore size
4292 mov.l (%sp)+,%d1 # restore strg
4294 # if the bit string is a zero, then the operation is a no-op
4295 # but, make sure that we've calculated ea and advanced the opword pointer
4296 beq.w fmovm_data_done
4298 # separate move ins from move outs...
4299 btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4300 beq.w fmovm_data_in # it's a move out
4302 #############
4303 # MOVE OUT: #
4304 #############
4305 fmovm_data_out:
4306 btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4307 bne.w fmovm_out_ctrl # control
4309 ############################
4310 fmovm_out_predec:
4311 # for predecrement mode, the bit string is the opposite of both control
4312 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4313 # here, we convert it to be just like the others...
4314 mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4316 btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4317 beq.b fmovm_out_ctrl # user
4319 fmovm_out_predec_s:
4320 cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4321 bne.b fmovm_out_ctrl
4323 # the operation was unfortunately an: fmovm.x dn,-(sp)
4324 # called from supervisor mode.
4325 # we're also passing "size" and "strg" back to the calling routine
4328 ############################
4329 fmovm_out_ctrl:
4330 mov.l %a0,%a1 # move <ea> to a1
4332 sub.l %d0,%sp # subtract size of dump
4333 lea (%sp),%a0
4335 tst.b %d1 # should FP0 be moved?
4336 bpl.b fmovm_out_ctrl_fp1 # no
4338 mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4339 mov.l 0x4+EXC_FP0(%a6),(%a0)+
4340 mov.l 0x8+EXC_FP0(%a6),(%a0)+
4342 fmovm_out_ctrl_fp1:
4343 lsl.b &0x1,%d1 # should FP1 be moved?
4344 bpl.b fmovm_out_ctrl_fp2 # no
4346 mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4347 mov.l 0x4+EXC_FP1(%a6),(%a0)+
4348 mov.l 0x8+EXC_FP1(%a6),(%a0)+
4350 fmovm_out_ctrl_fp2:
4351 lsl.b &0x1,%d1 # should FP2 be moved?
4352 bpl.b fmovm_out_ctrl_fp3 # no
4354 fmovm.x &0x20,(%a0) # yes
4355 add.l &0xc,%a0
4357 fmovm_out_ctrl_fp3:
4358 lsl.b &0x1,%d1 # should FP3 be moved?
4359 bpl.b fmovm_out_ctrl_fp4 # no
4361 fmovm.x &0x10,(%a0) # yes
4362 add.l &0xc,%a0
4364 fmovm_out_ctrl_fp4:
4365 lsl.b &0x1,%d1 # should FP4 be moved?
4366 bpl.b fmovm_out_ctrl_fp5 # no
4368 fmovm.x &0x08,(%a0) # yes
4369 add.l &0xc,%a0
4371 fmovm_out_ctrl_fp5:
4372 lsl.b &0x1,%d1 # should FP5 be moved?
4373 bpl.b fmovm_out_ctrl_fp6 # no
4375 fmovm.x &0x04,(%a0) # yes
4376 add.l &0xc,%a0
4378 fmovm_out_ctrl_fp6:
4379 lsl.b &0x1,%d1 # should FP6 be moved?
4380 bpl.b fmovm_out_ctrl_fp7 # no
4382 fmovm.x &0x02,(%a0) # yes
4383 add.l &0xc,%a0
4385 fmovm_out_ctrl_fp7:
4386 lsl.b &0x1,%d1 # should FP7 be moved?
4387 bpl.b fmovm_out_ctrl_done # no
4389 fmovm.x &0x01,(%a0) # yes
4390 add.l &0xc,%a0
4392 fmovm_out_ctrl_done:
4393 mov.l %a1,L_SCR1(%a6)
4395 lea (%sp),%a0 # pass: supervisor src
4396 mov.l %d0,-(%sp) # save size
4397 bsr.l _dmem_write # copy data to user mem
4399 mov.l (%sp)+,%d0
4400 add.l %d0,%sp # clear fpreg data from stack
4402 tst.l %d1 # did dstore err?
4403 bne.w fmovm_out_err # yes
4407 ############
4408 # MOVE IN: #
4409 ############
4410 fmovm_data_in:
4411 mov.l %a0,L_SCR1(%a6)
4413 sub.l %d0,%sp # make room for fpregs
4414 lea (%sp),%a1
4416 mov.l %d1,-(%sp) # save bit string for later
4417 mov.l %d0,-(%sp) # save # of bytes
4419 bsr.l _dmem_read # copy data from user mem
4421 mov.l (%sp)+,%d0 # retrieve # of bytes
4423 tst.l %d1 # did dfetch fail?
4424 bne.w fmovm_in_err # yes
4426 mov.l (%sp)+,%d1 # load bit string
4428 lea (%sp),%a0 # addr of stack
4430 tst.b %d1 # should FP0 be moved?
4431 bpl.b fmovm_data_in_fp1 # no
4433 mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4434 mov.l (%a0)+,0x4+EXC_FP0(%a6)
4435 mov.l (%a0)+,0x8+EXC_FP0(%a6)
4437 fmovm_data_in_fp1:
4438 lsl.b &0x1,%d1 # should FP1 be moved?
4439 bpl.b fmovm_data_in_fp2 # no
4441 mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4442 mov.l (%a0)+,0x4+EXC_FP1(%a6)
4443 mov.l (%a0)+,0x8+EXC_FP1(%a6)
4445 fmovm_data_in_fp2:
4446 lsl.b &0x1,%d1 # should FP2 be moved?
4447 bpl.b fmovm_data_in_fp3 # no
4449 fmovm.x (%a0)+,&0x20 # yes
4451 fmovm_data_in_fp3:
4452 lsl.b &0x1,%d1 # should FP3 be moved?
4453 bpl.b fmovm_data_in_fp4 # no
4455 fmovm.x (%a0)+,&0x10 # yes
4457 fmovm_data_in_fp4:
4458 lsl.b &0x1,%d1 # should FP4 be moved?
4459 bpl.b fmovm_data_in_fp5 # no
4461 fmovm.x (%a0)+,&0x08 # yes
4463 fmovm_data_in_fp5:
4464 lsl.b &0x1,%d1 # should FP5 be moved?
4465 bpl.b fmovm_data_in_fp6 # no
4467 fmovm.x (%a0)+,&0x04 # yes
4469 fmovm_data_in_fp6:
4470 lsl.b &0x1,%d1 # should FP6 be moved?
4471 bpl.b fmovm_data_in_fp7 # no
4473 fmovm.x (%a0)+,&0x02 # yes
4475 fmovm_data_in_fp7:
4476 lsl.b &0x1,%d1 # should FP7 be moved?
4477 bpl.b fmovm_data_in_done # no
4479 fmovm.x (%a0)+,&0x01 # yes
4481 fmovm_data_in_done:
4482 add.l %d0,%sp # remove fpregs from stack
4485 #####################################
4487 fmovm_data_done:
4490 ##############################################################################
4493 # table indexed by the operation's bit string that gives the number
4494 # of bytes that will be moved.
4496 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4498 tbl_fmovm_size:
4499 byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4500 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4501 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4502 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4503 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4504 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4505 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4506 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4507 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4508 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4509 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4512 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4513 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4514 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4515 byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4516 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4517 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4518 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4519 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4520 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4521 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4522 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4523 byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4524 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4525 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4526 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4527 byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4528 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4529 byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4530 byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4533 # table to convert a pre-decrement bit string into a post-increment
4534 # or control bit string.
4535 # ex: 0x00 ==> 0x00
4536 # 0x01 ==> 0x80
4537 # 0x02 ==> 0x40
4540 # 0xfd ==> 0xbf
4541 # 0xfe ==> 0x7f
4542 # 0xff ==> 0xff
4544 tbl_fmovm_convert:
4545 byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4546 byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4547 byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4548 byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4549 byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4550 byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4551 byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4552 byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4553 byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4554 byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4555 byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4556 byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4557 byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4558 byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4559 byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4560 byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4561 byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4562 byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4563 byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4564 byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4565 byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4566 byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4567 byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4568 byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4569 byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4570 byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4571 byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4572 byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4573 byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4574 byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4575 byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4576 byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4578 global fmovm_calc_ea
4579 ###############################################
4580 # _fmovm_calc_ea: calculate effective address #
4581 ###############################################
4582 fmovm_calc_ea:
4583 mov.l %d0,%a0 # move # bytes to a0
4585 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4586 # easily changed if they were inputs passed in registers.
4587 mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4588 mov.w %d0,%d1 # make a copy
4590 andi.w &0x3f,%d0 # extract mode field
4591 andi.l &0x7,%d1 # extract reg field
4593 # jump to the corresponding function for each {MODE,REG} pair.
4594 mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4595 jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4597 swbeg &64
4598 tbl_fea_mode:
4599 short tbl_fea_mode - tbl_fea_mode
4600 short tbl_fea_mode - tbl_fea_mode
4601 short tbl_fea_mode - tbl_fea_mode
4602 short tbl_fea_mode - tbl_fea_mode
4603 short tbl_fea_mode - tbl_fea_mode
4604 short tbl_fea_mode - tbl_fea_mode
4605 short tbl_fea_mode - tbl_fea_mode
4606 short tbl_fea_mode - tbl_fea_mode
4608 short tbl_fea_mode - tbl_fea_mode
4609 short tbl_fea_mode - tbl_fea_mode
4610 short tbl_fea_mode - tbl_fea_mode
4611 short tbl_fea_mode - tbl_fea_mode
4612 short tbl_fea_mode - tbl_fea_mode
4613 short tbl_fea_mode - tbl_fea_mode
4614 short tbl_fea_mode - tbl_fea_mode
4615 short tbl_fea_mode - tbl_fea_mode
4617 short faddr_ind_a0 - tbl_fea_mode
4618 short faddr_ind_a1 - tbl_fea_mode
4619 short faddr_ind_a2 - tbl_fea_mode
4620 short faddr_ind_a3 - tbl_fea_mode
4621 short faddr_ind_a4 - tbl_fea_mode
4622 short faddr_ind_a5 - tbl_fea_mode
4623 short faddr_ind_a6 - tbl_fea_mode
4624 short faddr_ind_a7 - tbl_fea_mode
4626 short faddr_ind_p_a0 - tbl_fea_mode
4627 short faddr_ind_p_a1 - tbl_fea_mode
4628 short faddr_ind_p_a2 - tbl_fea_mode
4629 short faddr_ind_p_a3 - tbl_fea_mode
4630 short faddr_ind_p_a4 - tbl_fea_mode
4631 short faddr_ind_p_a5 - tbl_fea_mode
4632 short faddr_ind_p_a6 - tbl_fea_mode
4633 short faddr_ind_p_a7 - tbl_fea_mode
4635 short faddr_ind_m_a0 - tbl_fea_mode
4636 short faddr_ind_m_a1 - tbl_fea_mode
4637 short faddr_ind_m_a2 - tbl_fea_mode
4638 short faddr_ind_m_a3 - tbl_fea_mode
4639 short faddr_ind_m_a4 - tbl_fea_mode
4640 short faddr_ind_m_a5 - tbl_fea_mode
4641 short faddr_ind_m_a6 - tbl_fea_mode
4642 short faddr_ind_m_a7 - tbl_fea_mode
4644 short faddr_ind_disp_a0 - tbl_fea_mode
4645 short faddr_ind_disp_a1 - tbl_fea_mode
4646 short faddr_ind_disp_a2 - tbl_fea_mode
4647 short faddr_ind_disp_a3 - tbl_fea_mode
4648 short faddr_ind_disp_a4 - tbl_fea_mode
4649 short faddr_ind_disp_a5 - tbl_fea_mode
4650 short faddr_ind_disp_a6 - tbl_fea_mode
4651 short faddr_ind_disp_a7 - tbl_fea_mode
4653 short faddr_ind_ext - tbl_fea_mode
4654 short faddr_ind_ext - tbl_fea_mode
4655 short faddr_ind_ext - tbl_fea_mode
4656 short faddr_ind_ext - tbl_fea_mode
4657 short faddr_ind_ext - tbl_fea_mode
4658 short faddr_ind_ext - tbl_fea_mode
4659 short faddr_ind_ext - tbl_fea_mode
4660 short faddr_ind_ext - tbl_fea_mode
4662 short fabs_short - tbl_fea_mode
4663 short fabs_long - tbl_fea_mode
4664 short fpc_ind - tbl_fea_mode
4665 short fpc_ind_ext - tbl_fea_mode
4666 short tbl_fea_mode - tbl_fea_mode
4667 short tbl_fea_mode - tbl_fea_mode
4668 short tbl_fea_mode - tbl_fea_mode
4669 short tbl_fea_mode - tbl_fea_mode
4671 ###################################
4672 # Address register indirect: (An) #
4673 ###################################
4674 faddr_ind_a0:
4675 mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4678 faddr_ind_a1:
4679 mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4682 faddr_ind_a2:
4683 mov.l %a2,%a0 # Get current a2
4686 faddr_ind_a3:
4687 mov.l %a3,%a0 # Get current a3
4690 faddr_ind_a4:
4691 mov.l %a4,%a0 # Get current a4
4694 faddr_ind_a5:
4695 mov.l %a5,%a0 # Get current a5
4698 faddr_ind_a6:
4699 mov.l (%a6),%a0 # Get current a6
4702 faddr_ind_a7:
4703 mov.l EXC_A7(%a6),%a0 # Get current a7
4706 #####################################################
4707 # Address register indirect w/ postincrement: (An)+ #
4708 #####################################################
4709 faddr_ind_p_a0:
4710 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4711 mov.l %d0,%d1
4712 add.l %a0,%d1 # Increment
4713 mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4714 mov.l %d0,%a0
4717 faddr_ind_p_a1:
4718 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4719 mov.l %d0,%d1
4720 add.l %a0,%d1 # Increment
4721 mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4722 mov.l %d0,%a0
4725 faddr_ind_p_a2:
4726 mov.l %a2,%d0 # Get current a2
4727 mov.l %d0,%d1
4728 add.l %a0,%d1 # Increment
4729 mov.l %d1,%a2 # Save incr value
4730 mov.l %d0,%a0
4733 faddr_ind_p_a3:
4734 mov.l %a3,%d0 # Get current a3
4735 mov.l %d0,%d1
4736 add.l %a0,%d1 # Increment
4737 mov.l %d1,%a3 # Save incr value
4738 mov.l %d0,%a0
4741 faddr_ind_p_a4:
4742 mov.l %a4,%d0 # Get current a4
4743 mov.l %d0,%d1
4744 add.l %a0,%d1 # Increment
4745 mov.l %d1,%a4 # Save incr value
4746 mov.l %d0,%a0
4749 faddr_ind_p_a5:
4750 mov.l %a5,%d0 # Get current a5
4751 mov.l %d0,%d1
4752 add.l %a0,%d1 # Increment
4753 mov.l %d1,%a5 # Save incr value
4754 mov.l %d0,%a0
4757 faddr_ind_p_a6:
4758 mov.l (%a6),%d0 # Get current a6
4759 mov.l %d0,%d1
4760 add.l %a0,%d1 # Increment
4761 mov.l %d1,(%a6) # Save incr value
4762 mov.l %d0,%a0
4765 faddr_ind_p_a7:
4766 mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4768 mov.l EXC_A7(%a6),%d0 # Get current a7
4769 mov.l %d0,%d1
4770 add.l %a0,%d1 # Increment
4771 mov.l %d1,EXC_A7(%a6) # Save incr value
4772 mov.l %d0,%a0
4775 ####################################################
4776 # Address register indirect w/ predecrement: -(An) #
4777 ####################################################
4778 faddr_ind_m_a0:
4779 mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4780 sub.l %a0,%d0 # Decrement
4781 mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4782 mov.l %d0,%a0
4785 faddr_ind_m_a1:
4786 mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4787 sub.l %a0,%d0 # Decrement
4788 mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4789 mov.l %d0,%a0
4792 faddr_ind_m_a2:
4793 mov.l %a2,%d0 # Get current a2
4794 sub.l %a0,%d0 # Decrement
4795 mov.l %d0,%a2 # Save decr value
4796 mov.l %d0,%a0
4799 faddr_ind_m_a3:
4800 mov.l %a3,%d0 # Get current a3
4801 sub.l %a0,%d0 # Decrement
4802 mov.l %d0,%a3 # Save decr value
4803 mov.l %d0,%a0
4806 faddr_ind_m_a4:
4807 mov.l %a4,%d0 # Get current a4
4808 sub.l %a0,%d0 # Decrement
4809 mov.l %d0,%a4 # Save decr value
4810 mov.l %d0,%a0
4813 faddr_ind_m_a5:
4814 mov.l %a5,%d0 # Get current a5
4815 sub.l %a0,%d0 # Decrement
4816 mov.l %d0,%a5 # Save decr value
4817 mov.l %d0,%a0
4820 faddr_ind_m_a6:
4821 mov.l (%a6),%d0 # Get current a6
4822 sub.l %a0,%d0 # Decrement
4823 mov.l %d0,(%a6) # Save decr value
4824 mov.l %d0,%a0
4827 faddr_ind_m_a7:
4828 mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4830 mov.l EXC_A7(%a6),%d0 # Get current a7
4831 sub.l %a0,%d0 # Decrement
4832 mov.l %d0,EXC_A7(%a6) # Save decr value
4833 mov.l %d0,%a0
4836 ########################################################
4837 # Address register indirect w/ displacement: (d16, An) #
4838 ########################################################
4839 faddr_ind_disp_a0:
4840 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4841 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4842 bsr.l _imem_read_word
4844 tst.l %d1 # did ifetch fail?
4845 bne.l iea_iacc # yes
4847 mov.w %d0,%a0 # sign extend displacement
4849 add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4852 faddr_ind_disp_a1:
4853 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4854 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4855 bsr.l _imem_read_word
4857 tst.l %d1 # did ifetch fail?
4858 bne.l iea_iacc # yes
4860 mov.w %d0,%a0 # sign extend displacement
4862 add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4865 faddr_ind_disp_a2:
4866 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4867 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4868 bsr.l _imem_read_word
4870 tst.l %d1 # did ifetch fail?
4871 bne.l iea_iacc # yes
4873 mov.w %d0,%a0 # sign extend displacement
4875 add.l %a2,%a0 # a2 + d16
4878 faddr_ind_disp_a3:
4879 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4880 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4881 bsr.l _imem_read_word
4883 tst.l %d1 # did ifetch fail?
4884 bne.l iea_iacc # yes
4886 mov.w %d0,%a0 # sign extend displacement
4888 add.l %a3,%a0 # a3 + d16
4891 faddr_ind_disp_a4:
4892 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4893 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4894 bsr.l _imem_read_word
4896 tst.l %d1 # did ifetch fail?
4897 bne.l iea_iacc # yes
4899 mov.w %d0,%a0 # sign extend displacement
4901 add.l %a4,%a0 # a4 + d16
4904 faddr_ind_disp_a5:
4905 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4906 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4907 bsr.l _imem_read_word
4909 tst.l %d1 # did ifetch fail?
4910 bne.l iea_iacc # yes
4912 mov.w %d0,%a0 # sign extend displacement
4914 add.l %a5,%a0 # a5 + d16
4917 faddr_ind_disp_a6:
4918 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4919 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4920 bsr.l _imem_read_word
4922 tst.l %d1 # did ifetch fail?
4923 bne.l iea_iacc # yes
4925 mov.w %d0,%a0 # sign extend displacement
4927 add.l (%a6),%a0 # a6 + d16
4930 faddr_ind_disp_a7:
4931 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4932 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4933 bsr.l _imem_read_word
4935 tst.l %d1 # did ifetch fail?
4936 bne.l iea_iacc # yes
4938 mov.w %d0,%a0 # sign extend displacement
4940 add.l EXC_A7(%a6),%a0 # a7 + d16
4943 ########################################################################
4944 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4945 # " " " w/ " (base displacement): (bd, An, Xn) #
4946 # Memory indirect postindexed: ([bd, An], Xn, od) #
4947 # Memory indirect preindexed: ([bd, An, Xn], od) #
4948 ########################################################################
4949 faddr_ind_ext:
4950 addq.l &0x8,%d1
4951 bsr.l fetch_dreg # fetch base areg
4952 mov.l %d0,-(%sp)
4954 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4955 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4956 bsr.l _imem_read_word # fetch extword in d0
4958 tst.l %d1 # did ifetch fail?
4959 bne.l iea_iacc # yes
4961 mov.l (%sp)+,%a0
4963 btst &0x8,%d0
4964 bne.w fcalc_mem_ind
4966 mov.l %d0,L_SCR1(%a6) # hold opword
4968 mov.l %d0,%d1
4969 rol.w &0x4,%d1
4970 andi.w &0xf,%d1 # extract index regno
4972 # count on fetch_dreg() not to alter a0...
4973 bsr.l fetch_dreg # fetch index
4975 mov.l %d2,-(%sp) # save d2
4976 mov.l L_SCR1(%a6),%d2 # fetch opword
4978 btst &0xb,%d2 # is it word or long?
4979 bne.b faii8_long
4980 ext.l %d0 # sign extend word index
4981 faii8_long:
4982 mov.l %d2,%d1
4983 rol.w &0x7,%d1
4984 andi.l &0x3,%d1 # extract scale value
4986 lsl.l %d1,%d0 # shift index by scale
4988 extb.l %d2 # sign extend displacement
4989 add.l %d2,%d0 # index + disp
4990 add.l %d0,%a0 # An + (index + disp)
4992 mov.l (%sp)+,%d2 # restore old d2
4995 ###########################
4996 # Absolute short: (XXX).W #
4997 ###########################
4998 fabs_short:
4999 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5000 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5001 bsr.l _imem_read_word # fetch short address
5003 tst.l %d1 # did ifetch fail?
5004 bne.l iea_iacc # yes
5006 mov.w %d0,%a0 # return <ea> in a0
5009 ##########################
5010 # Absolute long: (XXX).L #
5011 ##########################
5012 fabs_long:
5013 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5014 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5015 bsr.l _imem_read_long # fetch long address
5017 tst.l %d1 # did ifetch fail?
5018 bne.l iea_iacc # yes
5020 mov.l %d0,%a0 # return <ea> in a0
5023 #######################################################
5024 # Program counter indirect w/ displacement: (d16, PC) #
5025 #######################################################
5026 fpc_ind:
5027 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5028 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5029 bsr.l _imem_read_word # fetch word displacement
5031 tst.l %d1 # did ifetch fail?
5032 bne.l iea_iacc # yes
5034 mov.w %d0,%a0 # sign extend displacement
5036 add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5038 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5039 subq.l &0x2,%a0 # adjust <ea>
5042 ##########################################################
5043 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5044 # " " w/ " (base displacement): (bd, PC, An) #
5045 # PC memory indirect postindexed: ([bd, PC], Xn, od) #
5046 # PC memory indirect preindexed: ([bd, PC, Xn], od) #
5047 ##########################################################
5048 fpc_ind_ext:
5049 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5050 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5051 bsr.l _imem_read_word # fetch ext word
5053 tst.l %d1 # did ifetch fail?
5054 bne.l iea_iacc # yes
5056 mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5057 subq.l &0x2,%a0 # adjust base
5059 btst &0x8,%d0 # is disp only 8 bits?
5060 bne.w fcalc_mem_ind # calc memory indirect
5062 mov.l %d0,L_SCR1(%a6) # store opword
5064 mov.l %d0,%d1 # make extword copy
5065 rol.w &0x4,%d1 # rotate reg num into place
5066 andi.w &0xf,%d1 # extract register number
5068 # count on fetch_dreg() not to alter a0...
5069 bsr.l fetch_dreg # fetch index
5071 mov.l %d2,-(%sp) # save d2
5072 mov.l L_SCR1(%a6),%d2 # fetch opword
5074 btst &0xb,%d2 # is index word or long?
5075 bne.b fpii8_long # long
5076 ext.l %d0 # sign extend word index
5077 fpii8_long:
5078 mov.l %d2,%d1
5079 rol.w &0x7,%d1 # rotate scale value into place
5080 andi.l &0x3,%d1 # extract scale value
5082 lsl.l %d1,%d0 # shift index by scale
5084 extb.l %d2 # sign extend displacement
5085 add.l %d2,%d0 # disp + index
5086 add.l %d0,%a0 # An + (index + disp)
5088 mov.l (%sp)+,%d2 # restore temp register
5091 # d2 = index
5092 # d3 = base
5093 # d4 = od
5094 # d5 = extword
5095 fcalc_mem_ind:
5096 btst &0x6,%d0 # is the index suppressed?
5097 beq.b fcalc_index
5099 movm.l &0x3c00,-(%sp) # save d2-d5
5101 mov.l %d0,%d5 # put extword in d5
5102 mov.l %a0,%d3 # put base in d3
5104 clr.l %d2 # yes, so index = 0
5105 bra.b fbase_supp_ck
5107 # index:
5108 fcalc_index:
5109 mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5110 bfextu %d0{&16:&4},%d1 # fetch dreg index
5111 bsr.l fetch_dreg
5113 movm.l &0x3c00,-(%sp) # save d2-d5
5114 mov.l %d0,%d2 # put index in d2
5115 mov.l L_SCR1(%a6),%d5
5116 mov.l %a0,%d3
5118 btst &0xb,%d5 # is index word or long?
5119 bne.b fno_ext
5120 ext.l %d2
5122 fno_ext:
5123 bfextu %d5{&21:&2},%d0
5124 lsl.l %d0,%d2
5126 # base address (passed as parameter in d3):
5127 # we clear the value here if it should actually be suppressed.
5128 fbase_supp_ck:
5129 btst &0x7,%d5 # is the bd suppressed?
5130 beq.b fno_base_sup
5131 clr.l %d3
5133 # base displacement:
5134 fno_base_sup:
5135 bfextu %d5{&26:&2},%d0 # get bd size
5136 # beq.l fmovm_error # if (size == 0) it's reserved
5138 cmpi.b %d0,&0x2
5139 blt.b fno_bd
5140 beq.b fget_word_bd
5142 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5143 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5144 bsr.l _imem_read_long
5146 tst.l %d1 # did ifetch fail?
5147 bne.l fcea_iacc # yes
5149 bra.b fchk_ind
5151 fget_word_bd:
5152 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5153 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5154 bsr.l _imem_read_word
5156 tst.l %d1 # did ifetch fail?
5157 bne.l fcea_iacc # yes
5159 ext.l %d0 # sign extend bd
5161 fchk_ind:
5162 add.l %d0,%d3 # base += bd
5164 # outer displacement:
5165 fno_bd:
5166 bfextu %d5{&30:&2},%d0 # is od suppressed?
5167 beq.w faii_bd
5169 cmpi.b %d0,&0x2
5170 blt.b fnull_od
5171 beq.b fword_od
5173 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5174 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5175 bsr.l _imem_read_long
5177 tst.l %d1 # did ifetch fail?
5178 bne.l fcea_iacc # yes
5180 bra.b fadd_them
5182 fword_od:
5183 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5184 addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5185 bsr.l _imem_read_word
5187 tst.l %d1 # did ifetch fail?
5188 bne.l fcea_iacc # yes
5190 ext.l %d0 # sign extend od
5191 bra.b fadd_them
5193 fnull_od:
5194 clr.l %d0
5196 fadd_them:
5197 mov.l %d0,%d4
5199 btst &0x2,%d5 # pre or post indexing?
5200 beq.b fpre_indexed
5202 mov.l %d3,%a0
5203 bsr.l _dmem_read_long
5205 tst.l %d1 # did dfetch fail?
5206 bne.w fcea_err # yes
5208 add.l %d2,%d0 # <ea> += index
5209 add.l %d4,%d0 # <ea> += od
5210 bra.b fdone_ea
5212 fpre_indexed:
5213 add.l %d2,%d3 # preindexing
5214 mov.l %d3,%a0
5215 bsr.l _dmem_read_long
5217 tst.l %d1 # did dfetch fail?
5218 bne.w fcea_err # yes
5220 add.l %d4,%d0 # ea += od
5221 bra.b fdone_ea
5223 faii_bd:
5224 add.l %d2,%d3 # ea = (base + bd) + index
5225 mov.l %d3,%d0
5226 fdone_ea:
5227 mov.l %d0,%a0
5229 movm.l (%sp)+,&0x003c # restore d2-d5
5232 #########################################################
5233 fcea_err:
5234 mov.l %d3,%a0
5236 movm.l (%sp)+,&0x003c # restore d2-d5
5237 mov.w &0x0101,%d0
5238 bra.l iea_dacc
5240 fcea_iacc:
5241 movm.l (%sp)+,&0x003c # restore d2-d5
5242 bra.l iea_iacc
5244 fmovm_out_err:
5245 bsr.l restore
5246 mov.w &0x00e1,%d0
5247 bra.b fmovm_err
5249 fmovm_in_err:
5250 bsr.l restore
5251 mov.w &0x0161,%d0
5253 fmovm_err:
5254 mov.l L_SCR1(%a6),%a0
5255 bra.l iea_dacc
5257 #########################################################################
5258 # XDEF **************************************************************** #
5259 # fmovm_ctrl(): emulate fmovm.l of control registers instr #
5261 # XREF **************************************************************** #
5262 # _imem_read_long() - read longword from memory #
5263 # iea_iacc() - _imem_read_long() failed; error recovery #
5265 # INPUT *************************************************************** #
5266 # None #
5268 # OUTPUT ************************************************************** #
5269 # If _imem_read_long() doesn't fail: #
5270 # USER_FPCR(a6) = new FPCR value #
5271 # USER_FPSR(a6) = new FPSR value #
5272 # USER_FPIAR(a6) = new FPIAR value #
5274 # ALGORITHM *********************************************************** #
5275 # Decode the instruction type by looking at the extension word #
5276 # in order to see how many control registers to fetch from memory. #
5277 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5278 # the special access error exit handler iea_iacc(). #
5280 # Instruction word decoding: #
5282 # fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5284 # WORD1 WORD2 #
5285 # 1111 0010 00 111100 100$ $$00 0000 0000 #
5287 # $$$ (100): FPCR #
5288 # (010): FPSR #
5289 # (001): FPIAR #
5290 # (000): FPIAR #
5292 #########################################################################
5294 global fmovm_ctrl
5295 fmovm_ctrl:
5296 mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5297 cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5298 beq.w fctrl_in_7 # yes
5299 cmpi.b %d0,&0x98 # fpcr & fpsr ?
5300 beq.w fctrl_in_6 # yes
5301 cmpi.b %d0,&0x94 # fpcr & fpiar ?
5302 beq.b fctrl_in_5 # yes
5304 # fmovem.l #<data>, fpsr/fpiar
5305 fctrl_in_3:
5306 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5307 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5308 bsr.l _imem_read_long # fetch FPSR from mem
5310 tst.l %d1 # did ifetch fail?
5311 bne.l iea_iacc # yes
5313 mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5314 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5315 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5316 bsr.l _imem_read_long # fetch FPIAR from mem
5318 tst.l %d1 # did ifetch fail?
5319 bne.l iea_iacc # yes
5321 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5324 # fmovem.l #<data>, fpcr/fpiar
5325 fctrl_in_5:
5326 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5327 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5328 bsr.l _imem_read_long # fetch FPCR from mem
5330 tst.l %d1 # did ifetch fail?
5331 bne.l iea_iacc # yes
5333 mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5334 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5335 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5336 bsr.l _imem_read_long # fetch FPIAR from mem
5338 tst.l %d1 # did ifetch fail?
5339 bne.l iea_iacc # yes
5341 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5344 # fmovem.l #<data>, fpcr/fpsr
5345 fctrl_in_6:
5346 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5347 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5348 bsr.l _imem_read_long # fetch FPCR from mem
5350 tst.l %d1 # did ifetch fail?
5351 bne.l iea_iacc # yes
5353 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5354 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5355 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5356 bsr.l _imem_read_long # fetch FPSR from mem
5358 tst.l %d1 # did ifetch fail?
5359 bne.l iea_iacc # yes
5361 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5364 # fmovem.l #<data>, fpcr/fpsr/fpiar
5365 fctrl_in_7:
5366 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5367 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5368 bsr.l _imem_read_long # fetch FPCR from mem
5370 tst.l %d1 # did ifetch fail?
5371 bne.l iea_iacc # yes
5373 mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5374 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5375 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5376 bsr.l _imem_read_long # fetch FPSR from mem
5378 tst.l %d1 # did ifetch fail?
5379 bne.l iea_iacc # yes
5381 mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5382 mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5383 addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5384 bsr.l _imem_read_long # fetch FPIAR from mem
5386 tst.l %d1 # did ifetch fail?
5387 bne.l iea_iacc # yes
5389 mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5392 ##########################################################################
5394 #########################################################################
5395 # XDEF **************************************************************** #
5396 # addsub_scaler2(): scale inputs to fadd/fsub such that no #
5397 # OVFL/UNFL exceptions will result #
5399 # XREF **************************************************************** #
5400 # norm() - normalize mantissa after adjusting exponent #
5402 # INPUT *************************************************************** #
5403 # FP_SRC(a6) = fp op1(src) #
5404 # FP_DST(a6) = fp op2(dst) #
5406 # OUTPUT ************************************************************** #
5407 # FP_SRC(a6) = fp op1 scaled(src) #
5408 # FP_DST(a6) = fp op2 scaled(dst) #
5409 # d0 = scale amount #
5411 # ALGORITHM *********************************************************** #
5412 # If the DST exponent is > the SRC exponent, set the DST exponent #
5413 # equal to 0x3fff and scale the SRC exponent by the value that the #
5414 # DST exponent was scaled by. If the SRC exponent is greater or equal, #
5415 # do the opposite. Return this scale factor in d0. #
5416 # If the two exponents differ by > the number of mantissa bits #
5417 # plus two, then set the smallest exponent to a very small value as a #
5418 # quick shortcut. #
5420 #########################################################################
5422 global addsub_scaler2
5423 addsub_scaler2:
5424 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5425 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5426 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5427 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5428 mov.w SRC_EX(%a0),%d0
5429 mov.w DST_EX(%a1),%d1
5430 mov.w %d0,FP_SCR0_EX(%a6)
5431 mov.w %d1,FP_SCR1_EX(%a6)
5433 andi.w &0x7fff,%d0
5434 andi.w &0x7fff,%d1
5435 mov.w %d0,L_SCR1(%a6) # store src exponent
5436 mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5438 cmp.w %d0, %d1 # is src exp >= dst exp?
5439 bge.l src_exp_ge2
5441 # dst exp is > src exp; scale dst to exp = 0x3fff
5442 dst_exp_gt2:
5443 bsr.l scale_to_zero_dst
5444 mov.l %d0,-(%sp) # save scale factor
5446 cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5447 bne.b cmpexp12
5449 lea FP_SCR0(%a6),%a0
5450 bsr.l norm # normalize the denorm; result is new exp
5451 neg.w %d0 # new exp = -(shft val)
5452 mov.w %d0,L_SCR1(%a6) # inset new exp
5454 cmpexp12:
5455 mov.w 2+L_SCR1(%a6),%d0
5456 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5458 cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5459 bge.b quick_scale12
5461 mov.w L_SCR1(%a6),%d0
5462 add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5463 mov.w FP_SCR0_EX(%a6),%d1
5464 and.w &0x8000,%d1
5465 or.w %d1,%d0 # concat {sgn,new exp}
5466 mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5468 mov.l (%sp)+,%d0 # return SCALE factor
5471 quick_scale12:
5472 andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5473 bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5475 mov.l (%sp)+,%d0 # return SCALE factor
5478 # src exp is >= dst exp; scale src to exp = 0x3fff
5479 src_exp_ge2:
5480 bsr.l scale_to_zero_src
5481 mov.l %d0,-(%sp) # save scale factor
5483 cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5484 bne.b cmpexp22
5485 lea FP_SCR1(%a6),%a0
5486 bsr.l norm # normalize the denorm; result is new exp
5487 neg.w %d0 # new exp = -(shft val)
5488 mov.w %d0,2+L_SCR1(%a6) # inset new exp
5490 cmpexp22:
5491 mov.w L_SCR1(%a6),%d0
5492 subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5494 cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5495 bge.b quick_scale22
5497 mov.w 2+L_SCR1(%a6),%d0
5498 add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5499 mov.w FP_SCR1_EX(%a6),%d1
5500 andi.w &0x8000,%d1
5501 or.w %d1,%d0 # concat {sgn,new exp}
5502 mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5504 mov.l (%sp)+,%d0 # return SCALE factor
5507 quick_scale22:
5508 andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5509 bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5511 mov.l (%sp)+,%d0 # return SCALE factor
5514 ##########################################################################
5516 #########################################################################
5517 # XDEF **************************************************************** #
5518 # scale_to_zero_src(): scale the exponent of extended precision #
5519 # value at FP_SCR0(a6). #
5521 # XREF **************************************************************** #
5522 # norm() - normalize the mantissa if the operand was a DENORM #
5524 # INPUT *************************************************************** #
5525 # FP_SCR0(a6) = extended precision operand to be scaled #
5527 # OUTPUT ************************************************************** #
5528 # FP_SCR0(a6) = scaled extended precision operand #
5529 # d0 = scale value #
5531 # ALGORITHM *********************************************************** #
5532 # Set the exponent of the input operand to 0x3fff. Save the value #
5533 # of the difference between the original and new exponent. Then, #
5534 # normalize the operand if it was a DENORM. Add this normalization #
5535 # value to the previous value. Return the result. #
5537 #########################################################################
5539 global scale_to_zero_src
5540 scale_to_zero_src:
5541 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5542 mov.w %d1,%d0 # make a copy
5544 andi.l &0x7fff,%d1 # extract operand's exponent
5546 andi.w &0x8000,%d0 # extract operand's sgn
5547 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5549 mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5551 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5552 beq.b stzs_denorm # normalize the DENORM
5554 stzs_norm:
5555 mov.l &0x3fff,%d0
5556 sub.l %d1,%d0 # scale = BIAS + (-exp)
5560 stzs_denorm:
5561 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5562 bsr.l norm # normalize denorm
5563 neg.l %d0 # new exponent = -(shft val)
5564 mov.l %d0,%d1 # prepare for op_norm call
5565 bra.b stzs_norm # finish scaling
5569 #########################################################################
5570 # XDEF **************************************************************** #
5571 # scale_sqrt(): scale the input operand exponent so a subsequent #
5572 # fsqrt operation won't take an exception. #
5574 # XREF **************************************************************** #
5575 # norm() - normalize the mantissa if the operand was a DENORM #
5577 # INPUT *************************************************************** #
5578 # FP_SCR0(a6) = extended precision operand to be scaled #
5580 # OUTPUT ************************************************************** #
5581 # FP_SCR0(a6) = scaled extended precision operand #
5582 # d0 = scale value #
5584 # ALGORITHM *********************************************************** #
5585 # If the input operand is a DENORM, normalize it. #
5586 # If the exponent of the input operand is even, set the exponent #
5587 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5588 # exponent of the input operand is off, set the exponent to ox3fff and #
5589 # return a scale factor of "(exp-0x3fff)/2". #
5591 #########################################################################
5593 global scale_sqrt
5594 scale_sqrt:
5595 cmpi.b STAG(%a6),&DENORM # is operand normalized?
5596 beq.b ss_denorm # normalize the DENORM
5598 mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5599 andi.l &0x7fff,%d1 # extract operand's exponent
5601 andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5603 btst &0x0,%d1 # is exp even or odd?
5604 beq.b ss_norm_even
5606 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5608 mov.l &0x3fff,%d0
5609 sub.l %d1,%d0 # scale = BIAS + (-exp)
5610 asr.l &0x1,%d0 # divide scale factor by 2
5613 ss_norm_even:
5614 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5616 mov.l &0x3ffe,%d0
5617 sub.l %d1,%d0 # scale = BIAS + (-exp)
5618 asr.l &0x1,%d0 # divide scale factor by 2
5621 ss_denorm:
5622 lea FP_SCR0(%a6),%a0 # pass ptr to src op
5623 bsr.l norm # normalize denorm
5625 btst &0x0,%d0 # is exp even or odd?
5626 beq.b ss_denorm_even
5628 ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5630 add.l &0x3fff,%d0
5631 asr.l &0x1,%d0 # divide scale factor by 2
5634 ss_denorm_even:
5635 ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5637 add.l &0x3ffe,%d0
5638 asr.l &0x1,%d0 # divide scale factor by 2
5643 #########################################################################
5644 # XDEF **************************************************************** #
5645 # scale_to_zero_dst(): scale the exponent of extended precision #
5646 # value at FP_SCR1(a6). #
5648 # XREF **************************************************************** #
5649 # norm() - normalize the mantissa if the operand was a DENORM #
5651 # INPUT *************************************************************** #
5652 # FP_SCR1(a6) = extended precision operand to be scaled #
5654 # OUTPUT ************************************************************** #
5655 # FP_SCR1(a6) = scaled extended precision operand #
5656 # d0 = scale value #
5658 # ALGORITHM *********************************************************** #
5659 # Set the exponent of the input operand to 0x3fff. Save the value #
5660 # of the difference between the original and new exponent. Then, #
5661 # normalize the operand if it was a DENORM. Add this normalization #
5662 # value to the previous value. Return the result. #
5664 #########################################################################
5666 global scale_to_zero_dst
5667 scale_to_zero_dst:
5668 mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5669 mov.w %d1,%d0 # make a copy
5671 andi.l &0x7fff,%d1 # extract operand's exponent
5673 andi.w &0x8000,%d0 # extract operand's sgn
5674 or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5676 mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5678 cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5679 beq.b stzd_denorm # normalize the DENORM
5681 stzd_norm:
5682 mov.l &0x3fff,%d0
5683 sub.l %d1,%d0 # scale = BIAS + (-exp)
5686 stzd_denorm:
5687 lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5688 bsr.l norm # normalize denorm
5689 neg.l %d0 # new exponent = -(shft val)
5690 mov.l %d0,%d1 # prepare for op_norm call
5691 bra.b stzd_norm # finish scaling
5693 ##########################################################################
5695 #########################################################################
5696 # XDEF **************************************************************** #
5697 # res_qnan(): return default result w/ QNAN operand for dyadic #
5698 # res_snan(): return default result w/ SNAN operand for dyadic #
5699 # res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5700 # res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5702 # XREF **************************************************************** #
5703 # None #
5705 # INPUT *************************************************************** #
5706 # FP_SRC(a6) = pointer to extended precision src operand #
5707 # FP_DST(a6) = pointer to extended precision dst operand #
5709 # OUTPUT ************************************************************** #
5710 # fp0 = default result #
5712 # ALGORITHM *********************************************************** #
5713 # If either operand (but not both operands) of an operation is a #
5714 # nonsignalling NAN, then that NAN is returned as the result. If both #
5715 # operands are nonsignalling NANs, then the destination operand #
5716 # nonsignalling NAN is returned as the result. #
5717 # If either operand to an operation is a signalling NAN (SNAN), #
5718 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5719 # enable bit is set in the FPCR, then the trap is taken and the #
5720 # destination is not modified. If the SNAN trap enable bit is not set, #
5721 # then the SNAN is converted to a nonsignalling NAN (by setting the #
5722 # SNAN bit in the operand to one), and the operation continues as #
5723 # described in the preceding paragraph, for nonsignalling NANs. #
5724 # Make sure the appropriate FPSR bits are set before exiting. #
5726 #########################################################################
5728 global res_qnan
5729 global res_snan
5730 res_qnan:
5731 res_snan:
5732 cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5733 beq.b dst_snan2
5734 cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5735 beq.b dst_qnan2
5736 src_nan:
5737 cmp.b STAG(%a6), &QNAN
5738 beq.b src_qnan2
5739 global res_snan_1op
5740 res_snan_1op:
5741 src_snan2:
5742 bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5743 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744 lea FP_SRC(%a6), %a0
5745 bra.b nan_comp
5746 global res_qnan_1op
5747 res_qnan_1op:
5748 src_qnan2:
5749 or.l &nan_mask, USER_FPSR(%a6)
5750 lea FP_SRC(%a6), %a0
5751 bra.b nan_comp
5752 dst_snan2:
5753 or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5754 bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5755 lea FP_DST(%a6), %a0
5756 bra.b nan_comp
5757 dst_qnan2:
5758 lea FP_DST(%a6), %a0
5759 cmp.b STAG(%a6), &SNAN
5760 bne nan_done
5761 or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5762 nan_done:
5763 or.l &nan_mask, USER_FPSR(%a6)
5764 nan_comp:
5765 btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5766 beq.b nan_not_neg
5767 or.l &neg_mask, USER_FPSR(%a6)
5768 nan_not_neg:
5769 fmovm.x (%a0), &0x80
5772 #########################################################################
5773 # XDEF **************************************************************** #
5774 # res_operr(): return default result during operand error #
5776 # XREF **************************************************************** #
5777 # None #
5779 # INPUT *************************************************************** #
5780 # None #
5782 # OUTPUT ************************************************************** #
5783 # fp0 = default operand error result #
5785 # ALGORITHM *********************************************************** #
5786 # An nonsignalling NAN is returned as the default result when #
5787 # an operand error occurs for the following cases: #
5789 # Multiply: (Infinity x Zero) #
5790 # Divide : (Zero / Zero) || (Infinity / Infinity) #
5792 #########################################################################
5794 global res_operr
5795 res_operr:
5796 or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5797 fmovm.x nan_return(%pc), &0x80
5800 nan_return:
5801 long 0x7fff0000, 0xffffffff, 0xffffffff
5803 #########################################################################
5804 # XDEF **************************************************************** #
5805 # _denorm(): denormalize an intermediate result #
5807 # XREF **************************************************************** #
5808 # None #
5810 # INPUT *************************************************************** #
5811 # a0 = points to the operand to be denormalized #
5812 # (in the internal extended format) #
5814 # d0 = rounding precision #
5816 # OUTPUT ************************************************************** #
5817 # a0 = pointer to the denormalized result #
5818 # (in the internal extended format) #
5820 # d0 = guard,round,sticky #
5822 # ALGORITHM *********************************************************** #
5823 # According to the exponent underflow threshold for the given #
5824 # precision, shift the mantissa bits to the right in order raise the #
5825 # exponent of the operand to the threshold value. While shifting the #
5826 # mantissa bits right, maintain the value of the guard, round, and #
5827 # sticky bits. #
5828 # other notes: #
5829 # (1) _denorm() is called by the underflow routines #
5830 # (2) _denorm() does NOT affect the status register #
5832 #########################################################################
5835 # table of exponent threshold values for each precision
5837 tbl_thresh:
5838 short 0x0
5839 short sgl_thresh
5840 short dbl_thresh
5842 global _denorm
5843 _denorm:
5845 # Load the exponent threshold for the precision selected and check
5846 # to see if (threshold - exponent) is > 65 in which case we can
5847 # simply calculate the sticky bit and zero the mantissa. otherwise
5848 # we have to call the denormalization routine.
5850 lsr.b &0x2, %d0 # shift prec to lo bits
5851 mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5852 mov.w %d1, %d0 # copy d1 into d0
5853 sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5854 cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5855 bpl.b denorm_set_stky # yes; just calc sticky
5857 clr.l %d0 # clear g,r,s
5858 btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5859 beq.b denorm_call # no; don't change anything
5860 bset &29, %d0 # yes; set sticky bit
5862 denorm_call:
5863 bsr.l dnrm_lp # denormalize the number
5867 # all bit would have been shifted off during the denorm so simply
5868 # calculate if the sticky should be set and clear the entire mantissa.
5870 denorm_set_stky:
5871 mov.l &0x20000000, %d0 # set sticky bit in return value
5872 mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5873 clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5874 clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5878 # dnrm_lp(): normalize exponent/mantissa to specified threshhold #
5880 # INPUT: #
5881 # %a0 : points to the operand to be denormalized #
5882 # %d0{31:29} : initial guard,round,sticky #
5883 # %d1{15:0} : denormalization threshold #
5884 # OUTPUT: #
5885 # %a0 : points to the denormalized operand #
5886 # %d0{31:29} : final guard,round,sticky #
5889 # *** Local Equates *** #
5890 set GRS, L_SCR2 # g,r,s temp storage
5891 set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5893 global dnrm_lp
5894 dnrm_lp:
5897 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5898 # in memory so as to make the bitfield extraction for denormalization easier.
5900 mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5901 mov.l %d0, GRS(%a6) # place g,r,s after it
5904 # check to see how much less than the underflow threshold the operand
5905 # exponent is.
5907 mov.l %d1, %d0 # copy the denorm threshold
5908 sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5909 ble.b dnrm_no_lp # d1 <= 0
5910 cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5911 blt.b case_1 # yes
5912 cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5913 blt.b case_2 # yes
5914 bra.w case_3 # (d1 >= 64)
5917 # No normalization necessary
5919 dnrm_no_lp:
5920 mov.l GRS(%a6), %d0 # restore original g,r,s
5924 # case (0<d1<32)
5926 # %d0 = denorm threshold
5927 # %d1 = "n" = amt to shift
5929 # ---------------------------------------------------------
5930 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5931 # ---------------------------------------------------------
5932 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5933 # \ \ \ \
5934 # \ \ \ \
5935 # \ \ \ \
5936 # \ \ \ \
5937 # \ \ \ \
5938 # \ \ \ \
5939 # \ \ \ \
5940 # \ \ \ \
5941 # <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5942 # ---------------------------------------------------------
5943 # |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5944 # ---------------------------------------------------------
5946 case_1:
5947 mov.l %d2, -(%sp) # create temp storage
5949 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5950 mov.l &32, %d0
5951 sub.w %d1, %d0 # %d0 = 32 - %d1
5953 cmpi.w %d1, &29 # is shft amt >= 29
5954 blt.b case1_extract # no; no fix needed
5955 mov.b GRS(%a6), %d2
5956 or.b %d2, 3+FTEMP_LO2(%a6)
5958 case1_extract:
5959 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5960 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5961 bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5963 mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5964 mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5966 bftst %d0{&2:&30} # were bits shifted off?
5967 beq.b case1_sticky_clear # no; go finish
5968 bset &rnd_stky_bit, %d0 # yes; set sticky bit
5970 case1_sticky_clear:
5971 and.l &0xe0000000, %d0 # clear all but G,R,S
5972 mov.l (%sp)+, %d2 # restore temp register
5976 # case (32<=d1<64)
5978 # %d0 = denorm threshold
5979 # %d1 = "n" = amt to shift
5981 # ---------------------------------------------------------
5982 # | FTEMP_HI | FTEMP_LO |grs000.........000|
5983 # ---------------------------------------------------------
5984 # <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5985 # \ \ \
5986 # \ \ \
5987 # \ \ -------------------
5988 # \ -------------------- \
5989 # ------------------- \ \
5990 # \ \ \
5991 # \ \ \
5992 # \ \ \
5993 # <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5994 # ---------------------------------------------------------
5995 # |0...............0|0....0| NEW_LO |grs |
5996 # ---------------------------------------------------------
5998 case_2:
5999 mov.l %d2, -(%sp) # create temp storage
6001 mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
6002 subi.w &0x20, %d1 # %d1 now between 0 and 32
6003 mov.l &0x20, %d0
6004 sub.w %d1, %d0 # %d0 = 32 - %d1
6006 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
6007 # the number of bits to check for the sticky detect.
6008 # it only plays a role in shift amounts of 61-63.
6009 mov.b GRS(%a6), %d2
6010 or.b %d2, 3+FTEMP_LO2(%a6)
6012 bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6013 bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6015 bftst %d1{&2:&30} # were any bits shifted off?
6016 bne.b case2_set_sticky # yes; set sticky bit
6017 bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6018 bne.b case2_set_sticky # yes; set sticky bit
6020 mov.l %d1, %d0 # move new G,R,S to %d0
6021 bra.b case2_end
6023 case2_set_sticky:
6024 mov.l %d1, %d0 # move new G,R,S to %d0
6025 bset &rnd_stky_bit, %d0 # set sticky bit
6027 case2_end:
6028 clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6029 mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6030 and.l &0xe0000000, %d0 # clear all but G,R,S
6032 mov.l (%sp)+,%d2 # restore temp register
6036 # case (d1>=64)
6038 # %d0 = denorm threshold
6039 # %d1 = amt to shift
6041 case_3:
6042 mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6044 cmpi.w %d1, &65 # is shift amt > 65?
6045 blt.b case3_64 # no; it's == 64
6046 beq.b case3_65 # no; it's == 65
6049 # case (d1>65)
6051 # Shift value is > 65 and out of range. All bits are shifted off.
6052 # Return a zero mantissa with the sticky bit set
6054 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6055 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6056 mov.l &0x20000000, %d0 # set sticky bit
6060 # case (d1 == 64)
6062 # ---------------------------------------------------------
6063 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6064 # ---------------------------------------------------------
6065 # <-------(32)------>
6066 # \ \
6067 # \ \
6068 # \ \
6069 # \ ------------------------------
6070 # ------------------------------- \
6071 # \ \
6072 # \ \
6073 # \ \
6074 # <-------(32)------>
6075 # ---------------------------------------------------------
6076 # |0...............0|0................0|grs |
6077 # ---------------------------------------------------------
6079 case3_64:
6080 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6081 mov.l %d0, %d1 # make a copy
6082 and.l &0xc0000000, %d0 # extract G,R
6083 and.l &0x3fffffff, %d1 # extract other bits
6085 bra.b case3_complete
6088 # case (d1 == 65)
6090 # ---------------------------------------------------------
6091 # | FTEMP_HI | FTEMP_LO |grs000.........000|
6092 # ---------------------------------------------------------
6093 # <-------(32)------>
6094 # \ \
6095 # \ \
6096 # \ \
6097 # \ ------------------------------
6098 # -------------------------------- \
6099 # \ \
6100 # \ \
6101 # \ \
6102 # <-------(31)----->
6103 # ---------------------------------------------------------
6104 # |0...............0|0................0|0rs |
6105 # ---------------------------------------------------------
6107 case3_65:
6108 mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6109 and.l &0x80000000, %d0 # extract R bit
6110 lsr.l &0x1, %d0 # shift high bit into R bit
6111 and.l &0x7fffffff, %d1 # extract other bits
6113 case3_complete:
6114 # last operation done was an "and" of the bits shifted off so the condition
6115 # codes are already set so branch accordingly.
6116 bne.b case3_set_sticky # yes; go set new sticky
6117 tst.l FTEMP_LO(%a0) # were any bits shifted off?
6118 bne.b case3_set_sticky # yes; go set new sticky
6119 tst.b GRS(%a6) # were any bits shifted off?
6120 bne.b case3_set_sticky # yes; go set new sticky
6123 # no bits were shifted off so don't set the sticky bit.
6124 # the guard and
6125 # the entire mantissa is zero.
6127 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6128 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6132 # some bits were shifted off so set the sticky bit.
6133 # the entire mantissa is zero.
6135 case3_set_sticky:
6136 bset &rnd_stky_bit,%d0 # set new sticky bit
6137 clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6138 clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6141 #########################################################################
6142 # XDEF **************************************************************** #
6143 # _round(): round result according to precision/mode #
6145 # XREF **************************************************************** #
6146 # None #
6148 # INPUT *************************************************************** #
6149 # a0 = ptr to input operand in internal extended format #
6150 # d1(hi) = contains rounding precision: #
6151 # ext = $0000xxxx #
6152 # sgl = $0004xxxx #
6153 # dbl = $0008xxxx #
6154 # d1(lo) = contains rounding mode: #
6155 # RN = $xxxx0000 #
6156 # RZ = $xxxx0001 #
6157 # RM = $xxxx0002 #
6158 # RP = $xxxx0003 #
6159 # d0{31:29} = contains the g,r,s bits (extended) #
6161 # OUTPUT ************************************************************** #
6162 # a0 = pointer to rounded result #
6164 # ALGORITHM *********************************************************** #
6165 # On return the value pointed to by a0 is correctly rounded, #
6166 # a0 is preserved and the g-r-s bits in d0 are cleared. #
6167 # The result is not typed - the tag field is invalid. The #
6168 # result is still in the internal extended format. #
6170 # The INEX bit of USER_FPSR will be set if the rounded result was #
6171 # inexact (i.e. if any of the g-r-s bits were set). #
6173 #########################################################################
6175 global _round
6176 _round:
6178 # ext_grs() looks at the rounding precision and sets the appropriate
6179 # G,R,S bits.
6180 # If (G,R,S == 0) then result is exact and round is done, else set
6181 # the inex flag in status reg and continue.
6183 bsr.l ext_grs # extract G,R,S
6185 tst.l %d0 # are G,R,S zero?
6186 beq.w truncate # yes; round is complete
6188 or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6191 # Use rounding mode as an index into a jump table for these modes.
6192 # All of the following assumes grs != 0.
6194 mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6195 jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6197 tbl_mode:
6198 short rnd_near - tbl_mode
6199 short truncate - tbl_mode # RZ always truncates
6200 short rnd_mnus - tbl_mode
6201 short rnd_plus - tbl_mode
6203 #################################################################
6204 # ROUND PLUS INFINITY #
6206 # If sign of fp number = 0 (positive), then add 1 to l. #
6207 #################################################################
6208 rnd_plus:
6209 tst.b FTEMP_SGN(%a0) # check for sign
6210 bmi.w truncate # if positive then truncate
6212 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6213 swap %d1 # set up d1 for round prec.
6215 cmpi.b %d1, &s_mode # is prec = sgl?
6216 beq.w add_sgl # yes
6217 bgt.w add_dbl # no; it's dbl
6218 bra.w add_ext # no; it's ext
6220 #################################################################
6221 # ROUND MINUS INFINITY #
6223 # If sign of fp number = 1 (negative), then add 1 to l. #
6224 #################################################################
6225 rnd_mnus:
6226 tst.b FTEMP_SGN(%a0) # check for sign
6227 bpl.w truncate # if negative then truncate
6229 mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6230 swap %d1 # set up d1 for round prec.
6232 cmpi.b %d1, &s_mode # is prec = sgl?
6233 beq.w add_sgl # yes
6234 bgt.w add_dbl # no; it's dbl
6235 bra.w add_ext # no; it's ext
6237 #################################################################
6238 # ROUND NEAREST #
6240 # If (g=1), then add 1 to l and if (r=s=0), then clear l #
6241 # Note that this will round to even in case of a tie. #
6242 #################################################################
6243 rnd_near:
6244 asl.l &0x1, %d0 # shift g-bit to c-bit
6245 bcc.w truncate # if (g=1) then
6247 swap %d1 # set up d1 for round prec.
6249 cmpi.b %d1, &s_mode # is prec = sgl?
6250 beq.w add_sgl # yes
6251 bgt.w add_dbl # no; it's dbl
6252 bra.w add_ext # no; it's ext
6254 # *** LOCAL EQUATES ***
6255 set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6256 set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6258 #########################
6259 # ADD SINGLE #
6260 #########################
6261 add_sgl:
6262 add.l &ad_1_sgl, FTEMP_HI(%a0)
6263 bcc.b scc_clr # no mantissa overflow
6264 roxr.w FTEMP_HI(%a0) # shift v-bit back in
6265 roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6266 add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6267 scc_clr:
6268 tst.l %d0 # test for rs = 0
6269 bne.b sgl_done
6270 and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6271 sgl_done:
6272 and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6273 clr.l FTEMP_LO(%a0) # clear d2
6276 #########################
6277 # ADD EXTENDED #
6278 #########################
6279 add_ext:
6280 addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6281 bcc.b xcc_clr # test for carry out
6282 addq.l &1,FTEMP_HI(%a0) # propogate carry
6283 bcc.b xcc_clr
6284 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6285 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6286 roxr.w FTEMP_LO(%a0)
6287 roxr.w FTEMP_LO+2(%a0)
6288 add.w &0x1,FTEMP_EX(%a0) # and inc exp
6289 xcc_clr:
6290 tst.l %d0 # test rs = 0
6291 bne.b add_ext_done
6292 and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6293 add_ext_done:
6296 #########################
6297 # ADD DOUBLE #
6298 #########################
6299 add_dbl:
6300 add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6301 bcc.b dcc_clr # no carry
6302 addq.l &0x1, FTEMP_HI(%a0) # propogate carry
6303 bcc.b dcc_clr # no carry
6305 roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6306 roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6307 roxr.w FTEMP_LO(%a0)
6308 roxr.w FTEMP_LO+2(%a0)
6309 addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6310 dcc_clr:
6311 tst.l %d0 # test for rs = 0
6312 bne.b dbl_done
6313 and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6315 dbl_done:
6316 and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6319 ###########################
6320 # Truncate all other bits #
6321 ###########################
6322 truncate:
6323 swap %d1 # select rnd prec
6325 cmpi.b %d1, &s_mode # is prec sgl?
6326 beq.w sgl_done # yes
6327 bgt.b dbl_done # no; it's dbl
6328 rts # no; it's ext
6332 # ext_grs(): extract guard, round and sticky bits according to
6333 # rounding precision.
6335 # INPUT
6336 # d0 = extended precision g,r,s (in d0{31:29})
6337 # d1 = {PREC,ROUND}
6338 # OUTPUT
6339 # d0{31:29} = guard, round, sticky
6341 # The ext_grs extract the guard/round/sticky bits according to the
6342 # selected rounding precision. It is called by the round subroutine
6343 # only. All registers except d0 are kept intact. d0 becomes an
6344 # updated guard,round,sticky in d0{31:29}
6346 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6347 # prior to usage, and needs to restore d1 to original. this
6348 # routine is tightly tied to the round routine and not meant to
6349 # uphold standard subroutine calling practices.
6352 ext_grs:
6353 swap %d1 # have d1.w point to round precision
6354 tst.b %d1 # is rnd prec = extended?
6355 bne.b ext_grs_not_ext # no; go handle sgl or dbl
6358 # %d0 actually already hold g,r,s since _round() had it before calling
6359 # this function. so, as long as we don't disturb it, we are "returning" it.
6361 ext_grs_ext:
6362 swap %d1 # yes; return to correct positions
6365 ext_grs_not_ext:
6366 movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6368 cmpi.b %d1, &s_mode # is rnd prec = sgl?
6369 bne.b ext_grs_dbl # no; go handle dbl
6372 # sgl:
6373 # 96 64 40 32 0
6374 # -----------------------------------------------------
6375 # | EXP |XXXXXXX| |xx | |grs|
6376 # -----------------------------------------------------
6377 # <--(24)--->nn\ /
6378 # ee ---------------------
6379 # ww |
6381 # gr new sticky
6383 ext_grs_sgl:
6384 bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6385 mov.l &30, %d2 # of the sgl prec. limits
6386 lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6387 mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6388 and.l &0x0000003f, %d2 # s bit is the or of all other
6389 bne.b ext_grs_st_stky # bits to the right of g-r
6390 tst.l FTEMP_LO(%a0) # test lower mantissa
6391 bne.b ext_grs_st_stky # if any are set, set sticky
6392 tst.l %d0 # test original g,r,s
6393 bne.b ext_grs_st_stky # if any are set, set sticky
6394 bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6397 # dbl:
6398 # 96 64 32 11 0
6399 # -----------------------------------------------------
6400 # | EXP |XXXXXXX| | |xx |grs|
6401 # -----------------------------------------------------
6402 # nn\ /
6403 # ee -------
6404 # ww |
6406 # gr new sticky
6408 ext_grs_dbl:
6409 bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6410 mov.l &30, %d2 # of the dbl prec. limits
6411 lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6412 mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6413 and.l &0x000001ff, %d2 # s bit is the or-ing of all
6414 bne.b ext_grs_st_stky # other bits to the right of g-r
6415 tst.l %d0 # test word original g,r,s
6416 bne.b ext_grs_st_stky # if any are set, set sticky
6417 bra.b ext_grs_end_sd # if clear, exit
6419 ext_grs_st_stky:
6420 bset &rnd_stky_bit, %d3 # set sticky bit
6421 ext_grs_end_sd:
6422 mov.l %d3, %d0 # return grs to d0
6424 movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6426 swap %d1 # restore d1 to original
6429 #########################################################################
6430 # norm(): normalize the mantissa of an extended precision input. the #
6431 # input operand should not be normalized already. #
6433 # XDEF **************************************************************** #
6434 # norm() #
6436 # XREF **************************************************************** #
6437 # none #
6439 # INPUT *************************************************************** #
6440 # a0 = pointer fp extended precision operand to normalize #
6442 # OUTPUT ************************************************************** #
6443 # d0 = number of bit positions the mantissa was shifted #
6444 # a0 = the input operand's mantissa is normalized; the exponent #
6445 # is unchanged. #
6447 #########################################################################
6448 global norm
6449 norm:
6450 mov.l %d2, -(%sp) # create some temp regs
6451 mov.l %d3, -(%sp)
6453 mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6454 mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6456 bfffo %d0{&0:&32}, %d2 # how many places to shift?
6457 beq.b norm_lo # hi(man) is all zeroes!
6459 norm_hi:
6460 lsl.l %d2, %d0 # left shift hi(man)
6461 bfextu %d1{&0:%d2}, %d3 # extract lo bits
6463 or.l %d3, %d0 # create hi(man)
6464 lsl.l %d2, %d1 # create lo(man)
6466 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6467 mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6469 mov.l %d2, %d0 # return shift amount
6471 mov.l (%sp)+, %d3 # restore temp regs
6472 mov.l (%sp)+, %d2
6476 norm_lo:
6477 bfffo %d1{&0:&32}, %d2 # how many places to shift?
6478 lsl.l %d2, %d1 # shift lo(man)
6479 add.l &32, %d2 # add 32 to shft amount
6481 mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6482 clr.l FTEMP_LO(%a0) # lo(man) is now zero
6484 mov.l %d2, %d0 # return shift amount
6486 mov.l (%sp)+, %d3 # restore temp regs
6487 mov.l (%sp)+, %d2
6491 #########################################################################
6492 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6493 # - returns corresponding optype tag #
6495 # XDEF **************************************************************** #
6496 # unnorm_fix() #
6498 # XREF **************************************************************** #
6499 # norm() - normalize the mantissa #
6501 # INPUT *************************************************************** #
6502 # a0 = pointer to unnormalized extended precision number #
6504 # OUTPUT ************************************************************** #
6505 # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6506 # a0 = input operand has been converted to a norm, denorm, or #
6507 # zero; both the exponent and mantissa are changed. #
6509 #########################################################################
6511 global unnorm_fix
6512 unnorm_fix:
6513 bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6514 bne.b unnorm_shift # hi(man) is not all zeroes
6517 # hi(man) is all zeroes so see if any bits in lo(man) are set
6519 unnorm_chk_lo:
6520 bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6521 beq.w unnorm_zero # yes
6523 add.w &32, %d0 # no; fix shift distance
6526 # d0 = # shifts needed for complete normalization
6528 unnorm_shift:
6529 clr.l %d1 # clear top word
6530 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6531 and.w &0x7fff, %d1 # strip off sgn
6533 cmp.w %d0, %d1 # will denorm push exp < 0?
6534 bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6537 # exponent would not go < 0. therefore, number stays normalized
6539 sub.w %d0, %d1 # shift exponent value
6540 mov.w FTEMP_EX(%a0), %d0 # load old exponent
6541 and.w &0x8000, %d0 # save old sign
6542 or.w %d0, %d1 # {sgn,new exp}
6543 mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6545 bsr.l norm # normalize UNNORM
6547 mov.b &NORM, %d0 # return new optype tag
6551 # exponent would go < 0, so only denormalize until exp = 0
6553 unnorm_nrm_zero:
6554 cmp.b %d1, &32 # is exp <= 32?
6555 bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6557 bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6558 mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6560 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6561 lsl.l %d1, %d0 # extract new lo(man)
6562 mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6564 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6566 mov.b &DENORM, %d0 # return new optype tag
6570 # only mantissa bits set are in lo(man)
6572 unnorm_nrm_zero_lrg:
6573 sub.w &32, %d1 # adjust shft amt by 32
6575 mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6576 lsl.l %d1, %d0 # left shift lo(man)
6578 mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6579 clr.l FTEMP_LO(%a0) # lo(man) = 0
6581 and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6583 mov.b &DENORM, %d0 # return new optype tag
6587 # whole mantissa is zero so this UNNORM is actually a zero
6589 unnorm_zero:
6590 and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6592 mov.b &ZERO, %d0 # fix optype tag
6595 #########################################################################
6596 # XDEF **************************************************************** #
6597 # set_tag_x(): return the optype of the input ext fp number #
6599 # XREF **************************************************************** #
6600 # None #
6602 # INPUT *************************************************************** #
6603 # a0 = pointer to extended precision operand #
6605 # OUTPUT ************************************************************** #
6606 # d0 = value of type tag #
6607 # one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6609 # ALGORITHM *********************************************************** #
6610 # Simply test the exponent, j-bit, and mantissa values to #
6611 # determine the type of operand. #
6612 # If it's an unnormalized zero, alter the operand and force it #
6613 # to be a normal zero. #
6615 #########################################################################
6617 global set_tag_x
6618 set_tag_x:
6619 mov.w FTEMP_EX(%a0), %d0 # extract exponent
6620 andi.w &0x7fff, %d0 # strip off sign
6621 cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6622 beq.b inf_or_nan_x
6623 not_inf_or_nan_x:
6624 btst &0x7,FTEMP_HI(%a0)
6625 beq.b not_norm_x
6626 is_norm_x:
6627 mov.b &NORM, %d0
6629 not_norm_x:
6630 tst.w %d0 # is exponent = 0?
6631 bne.b is_unnorm_x
6632 not_unnorm_x:
6633 tst.l FTEMP_HI(%a0)
6634 bne.b is_denorm_x
6635 tst.l FTEMP_LO(%a0)
6636 bne.b is_denorm_x
6637 is_zero_x:
6638 mov.b &ZERO, %d0
6640 is_denorm_x:
6641 mov.b &DENORM, %d0
6643 # must distinguish now "Unnormalized zeroes" which we
6644 # must convert to zero.
6645 is_unnorm_x:
6646 tst.l FTEMP_HI(%a0)
6647 bne.b is_unnorm_reg_x
6648 tst.l FTEMP_LO(%a0)
6649 bne.b is_unnorm_reg_x
6650 # it's an "unnormalized zero". let's convert it to an actual zero...
6651 andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6652 mov.b &ZERO, %d0
6654 is_unnorm_reg_x:
6655 mov.b &UNNORM, %d0
6657 inf_or_nan_x:
6658 tst.l FTEMP_LO(%a0)
6659 bne.b is_nan_x
6660 mov.l FTEMP_HI(%a0), %d0
6661 and.l &0x7fffffff, %d0 # msb is a don't care!
6662 bne.b is_nan_x
6663 is_inf_x:
6664 mov.b &INF, %d0
6666 is_nan_x:
6667 btst &0x6, FTEMP_HI(%a0)
6668 beq.b is_snan_x
6669 mov.b &QNAN, %d0
6671 is_snan_x:
6672 mov.b &SNAN, %d0
6675 #########################################################################
6676 # XDEF **************************************************************** #
6677 # set_tag_d(): return the optype of the input dbl fp number #
6679 # XREF **************************************************************** #
6680 # None #
6682 # INPUT *************************************************************** #
6683 # a0 = points to double precision operand #
6685 # OUTPUT ************************************************************** #
6686 # d0 = value of type tag #
6687 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6689 # ALGORITHM *********************************************************** #
6690 # Simply test the exponent, j-bit, and mantissa values to #
6691 # determine the type of operand. #
6693 #########################################################################
6695 global set_tag_d
6696 set_tag_d:
6697 mov.l FTEMP(%a0), %d0
6698 mov.l %d0, %d1
6700 andi.l &0x7ff00000, %d0
6701 beq.b zero_or_denorm_d
6703 cmpi.l %d0, &0x7ff00000
6704 beq.b inf_or_nan_d
6706 is_norm_d:
6707 mov.b &NORM, %d0
6709 zero_or_denorm_d:
6710 and.l &0x000fffff, %d1
6711 bne is_denorm_d
6712 tst.l 4+FTEMP(%a0)
6713 bne is_denorm_d
6714 is_zero_d:
6715 mov.b &ZERO, %d0
6717 is_denorm_d:
6718 mov.b &DENORM, %d0
6720 inf_or_nan_d:
6721 and.l &0x000fffff, %d1
6722 bne is_nan_d
6723 tst.l 4+FTEMP(%a0)
6724 bne is_nan_d
6725 is_inf_d:
6726 mov.b &INF, %d0
6728 is_nan_d:
6729 btst &19, %d1
6730 bne is_qnan_d
6731 is_snan_d:
6732 mov.b &SNAN, %d0
6734 is_qnan_d:
6735 mov.b &QNAN, %d0
6738 #########################################################################
6739 # XDEF **************************************************************** #
6740 # set_tag_s(): return the optype of the input sgl fp number #
6742 # XREF **************************************************************** #
6743 # None #
6745 # INPUT *************************************************************** #
6746 # a0 = pointer to single precision operand #
6748 # OUTPUT ************************************************************** #
6749 # d0 = value of type tag #
6750 # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6752 # ALGORITHM *********************************************************** #
6753 # Simply test the exponent, j-bit, and mantissa values to #
6754 # determine the type of operand. #
6756 #########################################################################
6758 global set_tag_s
6759 set_tag_s:
6760 mov.l FTEMP(%a0), %d0
6761 mov.l %d0, %d1
6763 andi.l &0x7f800000, %d0
6764 beq.b zero_or_denorm_s
6766 cmpi.l %d0, &0x7f800000
6767 beq.b inf_or_nan_s
6769 is_norm_s:
6770 mov.b &NORM, %d0
6772 zero_or_denorm_s:
6773 and.l &0x007fffff, %d1
6774 bne is_denorm_s
6775 is_zero_s:
6776 mov.b &ZERO, %d0
6778 is_denorm_s:
6779 mov.b &DENORM, %d0
6781 inf_or_nan_s:
6782 and.l &0x007fffff, %d1
6783 bne is_nan_s
6784 is_inf_s:
6785 mov.b &INF, %d0
6787 is_nan_s:
6788 btst &22, %d1
6789 bne is_qnan_s
6790 is_snan_s:
6791 mov.b &SNAN, %d0
6793 is_qnan_s:
6794 mov.b &QNAN, %d0
6797 #########################################################################
6798 # XDEF **************************************************************** #
6799 # unf_res(): routine to produce default underflow result of a #
6800 # scaled extended precision number; this is used by #
6801 # fadd/fdiv/fmul/etc. emulation routines. #
6802 # unf_res4(): same as above but for fsglmul/fsgldiv which use #
6803 # single round prec and extended prec mode. #
6805 # XREF **************************************************************** #
6806 # _denorm() - denormalize according to scale factor #
6807 # _round() - round denormalized number according to rnd prec #
6809 # INPUT *************************************************************** #
6810 # a0 = pointer to extended precison operand #
6811 # d0 = scale factor #
6812 # d1 = rounding precision/mode #
6814 # OUTPUT ************************************************************** #
6815 # a0 = pointer to default underflow result in extended precision #
6816 # d0.b = result FPSR_cc which caller may or may not want to save #
6818 # ALGORITHM *********************************************************** #
6819 # Convert the input operand to "internal format" which means the #
6820 # exponent is extended to 16 bits and the sign is stored in the unused #
6821 # portion of the extended precison operand. Denormalize the number #
6822 # according to the scale factor passed in d0. Then, round the #
6823 # denormalized result. #
6824 # Set the FPSR_exc bits as appropriate but return the cc bits in #
6825 # d0 in case the caller doesn't want to save them (as is the case for #
6826 # fmove out). #
6827 # unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6828 # precision and the rounding mode to single. #
6830 #########################################################################
6831 global unf_res
6832 unf_res:
6833 mov.l %d1, -(%sp) # save rnd prec,mode on stack
6835 btst &0x7, FTEMP_EX(%a0) # make "internal" format
6836 sne FTEMP_SGN(%a0)
6838 mov.w FTEMP_EX(%a0), %d1 # extract exponent
6839 and.w &0x7fff, %d1
6840 sub.w %d0, %d1
6841 mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6843 mov.l %a0, -(%sp) # save operand ptr during calls
6845 mov.l 0x4(%sp),%d0 # pass rnd prec.
6846 andi.w &0x00c0,%d0
6847 lsr.w &0x4,%d0
6848 bsr.l _denorm # denorm result
6850 mov.l (%sp),%a0
6851 mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6852 andi.w &0xc0,%d1 # extract rnd prec
6853 lsr.w &0x4,%d1
6854 swap %d1
6855 mov.w 0x6(%sp),%d1
6856 andi.w &0x30,%d1
6857 lsr.w &0x4,%d1
6858 bsr.l _round # round the denorm
6860 mov.l (%sp)+, %a0
6862 # result is now rounded properly. convert back to normal format
6863 bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6864 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6865 beq.b unf_res_chkifzero # no; result is positive
6866 bset &0x7, FTEMP_EX(%a0) # set result sgn
6867 clr.b FTEMP_SGN(%a0) # clear temp sign
6869 # the number may have become zero after rounding. set ccodes accordingly.
6870 unf_res_chkifzero:
6871 clr.l %d0
6872 tst.l FTEMP_HI(%a0) # is value now a zero?
6873 bne.b unf_res_cont # no
6874 tst.l FTEMP_LO(%a0)
6875 bne.b unf_res_cont # no
6876 # bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6877 bset &z_bit, %d0 # yes; set zero ccode bit
6879 unf_res_cont:
6882 # can inex1 also be set along with unfl and inex2???
6884 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6886 btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6887 beq.b unf_res_end # no
6888 bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6890 unf_res_end:
6891 add.l &0x4, %sp # clear stack
6894 # unf_res() for fsglmul() and fsgldiv().
6895 global unf_res4
6896 unf_res4:
6897 mov.l %d1,-(%sp) # save rnd prec,mode on stack
6899 btst &0x7,FTEMP_EX(%a0) # make "internal" format
6900 sne FTEMP_SGN(%a0)
6902 mov.w FTEMP_EX(%a0),%d1 # extract exponent
6903 and.w &0x7fff,%d1
6904 sub.w %d0,%d1
6905 mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6907 mov.l %a0,-(%sp) # save operand ptr during calls
6909 clr.l %d0 # force rnd prec = ext
6910 bsr.l _denorm # denorm result
6912 mov.l (%sp),%a0
6913 mov.w &s_mode,%d1 # force rnd prec = sgl
6914 swap %d1
6915 mov.w 0x6(%sp),%d1 # load rnd mode
6916 andi.w &0x30,%d1 # extract rnd prec
6917 lsr.w &0x4,%d1
6918 bsr.l _round # round the denorm
6920 mov.l (%sp)+,%a0
6922 # result is now rounded properly. convert back to normal format
6923 bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6924 tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6925 beq.b unf_res4_chkifzero # no; result is positive
6926 bset &0x7,FTEMP_EX(%a0) # set result sgn
6927 clr.b FTEMP_SGN(%a0) # clear temp sign
6929 # the number may have become zero after rounding. set ccodes accordingly.
6930 unf_res4_chkifzero:
6931 clr.l %d0
6932 tst.l FTEMP_HI(%a0) # is value now a zero?
6933 bne.b unf_res4_cont # no
6934 tst.l FTEMP_LO(%a0)
6935 bne.b unf_res4_cont # no
6936 # bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6937 bset &z_bit,%d0 # yes; set zero ccode bit
6939 unf_res4_cont:
6942 # can inex1 also be set along with unfl and inex2???
6944 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6946 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6947 beq.b unf_res4_end # no
6948 bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6950 unf_res4_end:
6951 add.l &0x4,%sp # clear stack
6954 #########################################################################
6955 # XDEF **************************************************************** #
6956 # ovf_res(): routine to produce the default overflow result of #
6957 # an overflowing number. #
6958 # ovf_res2(): same as above but the rnd mode/prec are passed #
6959 # differently. #
6961 # XREF **************************************************************** #
6962 # none #
6964 # INPUT *************************************************************** #
6965 # d1.b = '-1' => (-); '0' => (+) #
6966 # ovf_res(): #
6967 # d0 = rnd mode/prec #
6968 # ovf_res2(): #
6969 # hi(d0) = rnd prec #
6970 # lo(d0) = rnd mode #
6972 # OUTPUT ************************************************************** #
6973 # a0 = points to extended precision result #
6974 # d0.b = condition code bits #
6976 # ALGORITHM *********************************************************** #
6977 # The default overflow result can be determined by the sign of #
6978 # the result and the rounding mode/prec in effect. These bits are #
6979 # concatenated together to create an index into the default result #
6980 # table. A pointer to the correct result is returned in a0. The #
6981 # resulting condition codes are returned in d0 in case the caller #
6982 # doesn't want FPSR_cc altered (as is the case for fmove out). #
6984 #########################################################################
6986 global ovf_res
6987 ovf_res:
6988 andi.w &0x10,%d1 # keep result sign
6989 lsr.b &0x4,%d0 # shift prec/mode
6990 or.b %d0,%d1 # concat the two
6991 mov.w %d1,%d0 # make a copy
6992 lsl.b &0x1,%d1 # multiply d1 by 2
6993 bra.b ovf_res_load
6995 global ovf_res2
6996 ovf_res2:
6997 and.w &0x10, %d1 # keep result sign
6998 or.b %d0, %d1 # insert rnd mode
6999 swap %d0
7000 or.b %d0, %d1 # insert rnd prec
7001 mov.w %d1, %d0 # make a copy
7002 lsl.b &0x1, %d1 # shift left by 1
7005 # use the rounding mode, precision, and result sign as in index into the
7006 # two tables below to fetch the default result and the result ccodes.
7008 ovf_res_load:
7009 mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7010 lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7014 tbl_ovfl_cc:
7015 byte 0x2, 0x0, 0x0, 0x2
7016 byte 0x2, 0x0, 0x0, 0x2
7017 byte 0x2, 0x0, 0x0, 0x2
7018 byte 0x0, 0x0, 0x0, 0x0
7019 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7020 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7021 byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7023 tbl_ovfl_result:
7024 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7026 long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7027 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7029 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7030 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7031 long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7032 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7034 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7035 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7036 long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7037 long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7039 long 0x00000000,0x00000000,0x00000000,0x00000000
7040 long 0x00000000,0x00000000,0x00000000,0x00000000
7041 long 0x00000000,0x00000000,0x00000000,0x00000000
7042 long 0x00000000,0x00000000,0x00000000,0x00000000
7044 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7046 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047 long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7049 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7050 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7051 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7052 long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7054 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7055 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7056 long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7057 long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7059 #########################################################################
7060 # XDEF **************************************************************** #
7061 # fout(): move from fp register to memory or data register #
7063 # XREF **************************************************************** #
7064 # _round() - needed to create EXOP for sgl/dbl precision #
7065 # norm() - needed to create EXOP for extended precision #
7066 # ovf_res() - create default overflow result for sgl/dbl precision#
7067 # unf_res() - create default underflow result for sgl/dbl prec. #
7068 # dst_dbl() - create rounded dbl precision result. #
7069 # dst_sgl() - create rounded sgl precision result. #
7070 # fetch_dreg() - fetch dynamic k-factor reg for packed. #
7071 # bindec() - convert FP binary number to packed number. #
7072 # _mem_write() - write data to memory. #
7073 # _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7074 # _dmem_write_{byte,word,long}() - write data to memory. #
7075 # store_dreg_{b,w,l}() - store data to data register file. #
7076 # facc_out_{b,w,l,d,x}() - data access error occurred. #
7078 # INPUT *************************************************************** #
7079 # a0 = pointer to extended precision source operand #
7080 # d0 = round prec,mode #
7082 # OUTPUT ************************************************************** #
7083 # fp0 : intermediate underflow or overflow result if #
7084 # OVFL/UNFL occurred for a sgl or dbl operand #
7086 # ALGORITHM *********************************************************** #
7087 # This routine is accessed by many handlers that need to do an #
7088 # opclass three move of an operand out to memory. #
7089 # Decode an fmove out (opclass 3) instruction to determine if #
7090 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7091 # register or memory. The algorithm uses a standard "fmove" to create #
7092 # the rounded result. Also, since exceptions are disabled, this also #
7093 # create the correct OPERR default result if appropriate. #
7094 # For sgl or dbl precision, overflow or underflow can occur. If #
7095 # either occurs and is enabled, the EXOP. #
7096 # For extended precision, the stacked <ea> must be fixed along #
7097 # w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7098 # the source is a denorm and if underflow is enabled, an EXOP must be #
7099 # created. #
7100 # For packed, the k-factor must be fetched from the instruction #
7101 # word or a data register. The <ea> must be fixed as w/ extended #
7102 # precision. Then, bindec() is called to create the appropriate #
7103 # packed result. #
7104 # If at any time an access error is flagged by one of the move- #
7105 # to-memory routines, then a special exit must be made so that the #
7106 # access error can be handled properly. #
7108 #########################################################################
7110 global fout
7111 fout:
7112 bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7113 mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7114 jmp (tbl_fout.b,%pc,%a1) # jump to routine
7116 swbeg &0x8
7117 tbl_fout:
7118 short fout_long - tbl_fout
7119 short fout_sgl - tbl_fout
7120 short fout_ext - tbl_fout
7121 short fout_pack - tbl_fout
7122 short fout_word - tbl_fout
7123 short fout_dbl - tbl_fout
7124 short fout_byte - tbl_fout
7125 short fout_pack - tbl_fout
7127 #################################################################
7128 # fmove.b out ###################################################
7129 #################################################################
7131 # Only "Unimplemented Data Type" exceptions enter here. The operand
7132 # is either a DENORM or a NORM.
7133 fout_byte:
7134 tst.b STAG(%a6) # is operand normalized?
7135 bne.b fout_byte_denorm # no
7137 fmovm.x SRC(%a0),&0x80 # load value
7139 fout_byte_norm:
7140 fmov.l %d0,%fpcr # insert rnd prec,mode
7142 fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7144 fmov.l &0x0,%fpcr # clear FPCR
7145 fmov.l %fpsr,%d1 # fetch FPSR
7146 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7148 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7149 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7150 beq.b fout_byte_dn # must save to integer regfile
7152 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7153 bsr.l _dmem_write_byte # write byte
7155 tst.l %d1 # did dstore fail?
7156 bne.l facc_out_b # yes
7160 fout_byte_dn:
7161 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7162 andi.w &0x7,%d1
7163 bsr.l store_dreg_b
7166 fout_byte_denorm:
7167 mov.l SRC_EX(%a0),%d1
7168 andi.l &0x80000000,%d1 # keep DENORM sign
7169 ori.l &0x00800000,%d1 # make smallest sgl
7170 fmov.s %d1,%fp0
7171 bra.b fout_byte_norm
7173 #################################################################
7174 # fmove.w out ###################################################
7175 #################################################################
7177 # Only "Unimplemented Data Type" exceptions enter here. The operand
7178 # is either a DENORM or a NORM.
7179 fout_word:
7180 tst.b STAG(%a6) # is operand normalized?
7181 bne.b fout_word_denorm # no
7183 fmovm.x SRC(%a0),&0x80 # load value
7185 fout_word_norm:
7186 fmov.l %d0,%fpcr # insert rnd prec:mode
7188 fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7190 fmov.l &0x0,%fpcr # clear FPCR
7191 fmov.l %fpsr,%d1 # fetch FPSR
7192 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7194 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7195 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7196 beq.b fout_word_dn # must save to integer regfile
7198 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7199 bsr.l _dmem_write_word # write word
7201 tst.l %d1 # did dstore fail?
7202 bne.l facc_out_w # yes
7206 fout_word_dn:
7207 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7208 andi.w &0x7,%d1
7209 bsr.l store_dreg_w
7212 fout_word_denorm:
7213 mov.l SRC_EX(%a0),%d1
7214 andi.l &0x80000000,%d1 # keep DENORM sign
7215 ori.l &0x00800000,%d1 # make smallest sgl
7216 fmov.s %d1,%fp0
7217 bra.b fout_word_norm
7219 #################################################################
7220 # fmove.l out ###################################################
7221 #################################################################
7223 # Only "Unimplemented Data Type" exceptions enter here. The operand
7224 # is either a DENORM or a NORM.
7225 fout_long:
7226 tst.b STAG(%a6) # is operand normalized?
7227 bne.b fout_long_denorm # no
7229 fmovm.x SRC(%a0),&0x80 # load value
7231 fout_long_norm:
7232 fmov.l %d0,%fpcr # insert rnd prec:mode
7234 fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7236 fmov.l &0x0,%fpcr # clear FPCR
7237 fmov.l %fpsr,%d1 # fetch FPSR
7238 or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7240 fout_long_write:
7241 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7242 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7243 beq.b fout_long_dn # must save to integer regfile
7245 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7246 bsr.l _dmem_write_long # write long
7248 tst.l %d1 # did dstore fail?
7249 bne.l facc_out_l # yes
7253 fout_long_dn:
7254 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7255 andi.w &0x7,%d1
7256 bsr.l store_dreg_l
7259 fout_long_denorm:
7260 mov.l SRC_EX(%a0),%d1
7261 andi.l &0x80000000,%d1 # keep DENORM sign
7262 ori.l &0x00800000,%d1 # make smallest sgl
7263 fmov.s %d1,%fp0
7264 bra.b fout_long_norm
7266 #################################################################
7267 # fmove.x out ###################################################
7268 #################################################################
7270 # Only "Unimplemented Data Type" exceptions enter here. The operand
7271 # is either a DENORM or a NORM.
7272 # The DENORM causes an Underflow exception.
7273 fout_ext:
7275 # we copy the extended precision result to FP_SCR0 so that the reserved
7276 # 16-bit field gets zeroed. we do this since we promise not to disturb
7277 # what's at SRC(a0).
7278 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7279 clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7280 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7281 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7283 fmovm.x SRC(%a0),&0x80 # return result
7285 bsr.l _calc_ea_fout # fix stacked <ea>
7287 mov.l %a0,%a1 # pass: dst addr
7288 lea FP_SCR0(%a6),%a0 # pass: src addr
7289 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7291 # we must not yet write the extended precision data to the stack
7292 # in the pre-decrement case from supervisor mode or else we'll corrupt
7293 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7294 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7295 beq.b fout_ext_a7
7297 bsr.l _dmem_write # write ext prec number to memory
7299 tst.l %d1 # did dstore fail?
7300 bne.w fout_ext_err # yes
7302 tst.b STAG(%a6) # is operand normalized?
7303 bne.b fout_ext_denorm # no
7306 # the number is a DENORM. must set the underflow exception bit
7307 fout_ext_denorm:
7308 bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7310 mov.b FPCR_ENABLE(%a6),%d0
7311 andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7312 bne.b fout_ext_exc # yes
7315 # we don't want to do the write if the exception occurred in supervisor mode
7316 # so _mem_write2() handles this for us.
7317 fout_ext_a7:
7318 bsr.l _mem_write2 # write ext prec number to memory
7320 tst.l %d1 # did dstore fail?
7321 bne.w fout_ext_err # yes
7323 tst.b STAG(%a6) # is operand normalized?
7324 bne.b fout_ext_denorm # no
7327 fout_ext_exc:
7328 lea FP_SCR0(%a6),%a0
7329 bsr.l norm # normalize the mantissa
7330 neg.w %d0 # new exp = -(shft amt)
7331 andi.w &0x7fff,%d0
7332 andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7333 or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7334 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7337 fout_ext_err:
7338 mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7339 bra.l facc_out_x
7341 #########################################################################
7342 # fmove.s out ###########################################################
7343 #########################################################################
7344 fout_sgl:
7345 andi.b &0x30,%d0 # clear rnd prec
7346 ori.b &s_mode*0x10,%d0 # insert sgl prec
7347 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7350 # operand is a normalized number. first, we check to see if the move out
7351 # would cause either an underflow or overflow. these cases are handled
7352 # separately. otherwise, set the FPCR to the proper rounding mode and
7353 # execute the move.
7355 mov.w SRC_EX(%a0),%d0 # extract exponent
7356 andi.w &0x7fff,%d0 # strip sign
7358 cmpi.w %d0,&SGL_HI # will operand overflow?
7359 bgt.w fout_sgl_ovfl # yes; go handle OVFL
7360 beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7361 cmpi.w %d0,&SGL_LO # will operand underflow?
7362 blt.w fout_sgl_unfl # yes; go handle underflow
7365 # NORMs(in range) can be stored out by a simple "fmov.s"
7366 # Unnormalized inputs can come through this point.
7368 fout_sgl_exg:
7369 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7371 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7372 fmov.l &0x0,%fpsr # clear FPSR
7374 fmov.s %fp0,%d0 # store does convert and round
7376 fmov.l &0x0,%fpcr # clear FPCR
7377 fmov.l %fpsr,%d1 # save FPSR
7379 or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7381 fout_sgl_exg_write:
7382 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7383 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7384 beq.b fout_sgl_exg_write_dn # must save to integer regfile
7386 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7387 bsr.l _dmem_write_long # write long
7389 tst.l %d1 # did dstore fail?
7390 bne.l facc_out_l # yes
7394 fout_sgl_exg_write_dn:
7395 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7396 andi.w &0x7,%d1
7397 bsr.l store_dreg_l
7401 # here, we know that the operand would UNFL if moved out to single prec,
7402 # so, denorm and round and then use generic store single routine to
7403 # write the value to memory.
7405 fout_sgl_unfl:
7406 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7408 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7409 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7410 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7411 mov.l %a0,-(%sp)
7413 clr.l %d0 # pass: S.F. = 0
7415 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7416 bne.b fout_sgl_unfl_cont # let DENORMs fall through
7418 lea FP_SCR0(%a6),%a0
7419 bsr.l norm # normalize the DENORM
7421 fout_sgl_unfl_cont:
7422 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7423 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7424 bsr.l unf_res # calc default underflow result
7426 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7427 bsr.l dst_sgl # convert to single prec
7429 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7430 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7431 beq.b fout_sgl_unfl_dn # must save to integer regfile
7433 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7434 bsr.l _dmem_write_long # write long
7436 tst.l %d1 # did dstore fail?
7437 bne.l facc_out_l # yes
7439 bra.b fout_sgl_unfl_chkexc
7441 fout_sgl_unfl_dn:
7442 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7443 andi.w &0x7,%d1
7444 bsr.l store_dreg_l
7446 fout_sgl_unfl_chkexc:
7447 mov.b FPCR_ENABLE(%a6),%d1
7448 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7449 bne.w fout_sd_exc_unfl # yes
7450 addq.l &0x4,%sp
7454 # it's definitely an overflow so call ovf_res to get the correct answer
7456 fout_sgl_ovfl:
7457 tst.b 3+SRC_HI(%a0) # is result inexact?
7458 bne.b fout_sgl_ovfl_inex2
7459 tst.l SRC_LO(%a0) # is result inexact?
7460 bne.b fout_sgl_ovfl_inex2
7461 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7462 bra.b fout_sgl_ovfl_cont
7463 fout_sgl_ovfl_inex2:
7464 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7466 fout_sgl_ovfl_cont:
7467 mov.l %a0,-(%sp)
7469 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7470 # overflow result. DON'T save the returned ccodes from ovf_res() since
7471 # fmove out doesn't alter them.
7472 tst.b SRC_EX(%a0) # is operand negative?
7473 smi %d1 # set if so
7474 mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7475 bsr.l ovf_res # calc OVFL result
7476 fmovm.x (%a0),&0x80 # load default overflow result
7477 fmov.s %fp0,%d0 # store to single
7479 mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7480 andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7481 beq.b fout_sgl_ovfl_dn # must save to integer regfile
7483 mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7484 bsr.l _dmem_write_long # write long
7486 tst.l %d1 # did dstore fail?
7487 bne.l facc_out_l # yes
7489 bra.b fout_sgl_ovfl_chkexc
7491 fout_sgl_ovfl_dn:
7492 mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7493 andi.w &0x7,%d1
7494 bsr.l store_dreg_l
7496 fout_sgl_ovfl_chkexc:
7497 mov.b FPCR_ENABLE(%a6),%d1
7498 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7499 bne.w fout_sd_exc_ovfl # yes
7500 addq.l &0x4,%sp
7504 # move out MAY overflow:
7505 # (1) force the exp to 0x3fff
7506 # (2) do a move w/ appropriate rnd mode
7507 # (3) if exp still equals zero, then insert original exponent
7508 # for the correct result.
7509 # if exp now equals one, then it overflowed so call ovf_res.
7511 fout_sgl_may_ovfl:
7512 mov.w SRC_EX(%a0),%d1 # fetch current sign
7513 andi.w &0x8000,%d1 # keep it,clear exp
7514 ori.w &0x3fff,%d1 # insert exp = 0
7515 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7516 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7517 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7519 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7521 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7522 fmov.l &0x0,%fpcr # clear FPCR
7524 fabs.x %fp0 # need absolute value
7525 fcmp.b %fp0,&0x2 # did exponent increase?
7526 fblt.w fout_sgl_exg # no; go finish NORM
7527 bra.w fout_sgl_ovfl # yes; go handle overflow
7529 ################
7531 fout_sd_exc_unfl:
7532 mov.l (%sp)+,%a0
7534 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7535 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7536 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7538 cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7539 bne.b fout_sd_exc_cont # no
7541 lea FP_SCR0(%a6),%a0
7542 bsr.l norm
7543 neg.l %d0
7544 andi.w &0x7fff,%d0
7545 bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7546 bra.b fout_sd_exc_cont
7548 fout_sd_exc:
7549 fout_sd_exc_ovfl:
7550 mov.l (%sp)+,%a0 # restore a0
7552 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7553 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7554 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7556 fout_sd_exc_cont:
7557 bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7558 sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7559 lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7561 mov.b 3+L_SCR3(%a6),%d1
7562 lsr.b &0x4,%d1
7563 andi.w &0x0c,%d1
7564 swap %d1
7565 mov.b 3+L_SCR3(%a6),%d1
7566 lsr.b &0x4,%d1
7567 andi.w &0x03,%d1
7568 clr.l %d0 # pass: zero g,r,s
7569 bsr.l _round # round the DENORM
7571 tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7572 beq.b fout_sd_exc_done # no
7573 bset &0x7,FP_SCR0_EX(%a6) # yes
7575 fout_sd_exc_done:
7576 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7579 #################################################################
7580 # fmove.d out ###################################################
7581 #################################################################
7582 fout_dbl:
7583 andi.b &0x30,%d0 # clear rnd prec
7584 ori.b &d_mode*0x10,%d0 # insert dbl prec
7585 mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7588 # operand is a normalized number. first, we check to see if the move out
7589 # would cause either an underflow or overflow. these cases are handled
7590 # separately. otherwise, set the FPCR to the proper rounding mode and
7591 # execute the move.
7593 mov.w SRC_EX(%a0),%d0 # extract exponent
7594 andi.w &0x7fff,%d0 # strip sign
7596 cmpi.w %d0,&DBL_HI # will operand overflow?
7597 bgt.w fout_dbl_ovfl # yes; go handle OVFL
7598 beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7599 cmpi.w %d0,&DBL_LO # will operand underflow?
7600 blt.w fout_dbl_unfl # yes; go handle underflow
7603 # NORMs(in range) can be stored out by a simple "fmov.d"
7604 # Unnormalized inputs can come through this point.
7606 fout_dbl_exg:
7607 fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7609 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7610 fmov.l &0x0,%fpsr # clear FPSR
7612 fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7614 fmov.l &0x0,%fpcr # clear FPCR
7615 fmov.l %fpsr,%d0 # save FPSR
7617 or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7619 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7620 lea L_SCR1(%a6),%a0 # pass: src addr
7621 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7622 bsr.l _dmem_write # store dbl fop to memory
7624 tst.l %d1 # did dstore fail?
7625 bne.l facc_out_d # yes
7627 rts # no; so we're finished
7630 # here, we know that the operand would UNFL if moved out to double prec,
7631 # so, denorm and round and then use generic store double routine to
7632 # write the value to memory.
7634 fout_dbl_unfl:
7635 bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7637 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7638 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7639 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7640 mov.l %a0,-(%sp)
7642 clr.l %d0 # pass: S.F. = 0
7644 cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7645 bne.b fout_dbl_unfl_cont # let DENORMs fall through
7647 lea FP_SCR0(%a6),%a0
7648 bsr.l norm # normalize the DENORM
7650 fout_dbl_unfl_cont:
7651 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7653 bsr.l unf_res # calc default underflow result
7655 lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7656 bsr.l dst_dbl # convert to single prec
7657 mov.l %d0,L_SCR1(%a6)
7658 mov.l %d1,L_SCR2(%a6)
7660 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7661 lea L_SCR1(%a6),%a0 # pass: src addr
7662 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7663 bsr.l _dmem_write # store dbl fop to memory
7665 tst.l %d1 # did dstore fail?
7666 bne.l facc_out_d # yes
7668 mov.b FPCR_ENABLE(%a6),%d1
7669 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7670 bne.w fout_sd_exc_unfl # yes
7671 addq.l &0x4,%sp
7675 # it's definitely an overflow so call ovf_res to get the correct answer
7677 fout_dbl_ovfl:
7678 mov.w 2+SRC_LO(%a0),%d0
7679 andi.w &0x7ff,%d0
7680 bne.b fout_dbl_ovfl_inex2
7682 ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7683 bra.b fout_dbl_ovfl_cont
7684 fout_dbl_ovfl_inex2:
7685 ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7687 fout_dbl_ovfl_cont:
7688 mov.l %a0,-(%sp)
7690 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7691 # overflow result. DON'T save the returned ccodes from ovf_res() since
7692 # fmove out doesn't alter them.
7693 tst.b SRC_EX(%a0) # is operand negative?
7694 smi %d1 # set if so
7695 mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7696 bsr.l ovf_res # calc OVFL result
7697 fmovm.x (%a0),&0x80 # load default overflow result
7698 fmov.d %fp0,L_SCR1(%a6) # store to double
7700 mov.l EXC_EA(%a6),%a1 # pass: dst addr
7701 lea L_SCR1(%a6),%a0 # pass: src addr
7702 movq.l &0x8,%d0 # pass: opsize is 8 bytes
7703 bsr.l _dmem_write # store dbl fop to memory
7705 tst.l %d1 # did dstore fail?
7706 bne.l facc_out_d # yes
7708 mov.b FPCR_ENABLE(%a6),%d1
7709 andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7710 bne.w fout_sd_exc_ovfl # yes
7711 addq.l &0x4,%sp
7715 # move out MAY overflow:
7716 # (1) force the exp to 0x3fff
7717 # (2) do a move w/ appropriate rnd mode
7718 # (3) if exp still equals zero, then insert original exponent
7719 # for the correct result.
7720 # if exp now equals one, then it overflowed so call ovf_res.
7722 fout_dbl_may_ovfl:
7723 mov.w SRC_EX(%a0),%d1 # fetch current sign
7724 andi.w &0x8000,%d1 # keep it,clear exp
7725 ori.w &0x3fff,%d1 # insert exp = 0
7726 mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7727 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7728 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7730 fmov.l L_SCR3(%a6),%fpcr # set FPCR
7732 fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7733 fmov.l &0x0,%fpcr # clear FPCR
7735 fabs.x %fp0 # need absolute value
7736 fcmp.b %fp0,&0x2 # did exponent increase?
7737 fblt.w fout_dbl_exg # no; go finish NORM
7738 bra.w fout_dbl_ovfl # yes; go handle overflow
7740 #########################################################################
7741 # XDEF **************************************************************** #
7742 # dst_dbl(): create double precision value from extended prec. #
7744 # XREF **************************************************************** #
7745 # None #
7747 # INPUT *************************************************************** #
7748 # a0 = pointer to source operand in extended precision #
7750 # OUTPUT ************************************************************** #
7751 # d0 = hi(double precision result) #
7752 # d1 = lo(double precision result) #
7754 # ALGORITHM *********************************************************** #
7756 # Changes extended precision to double precision. #
7757 # Note: no attempt is made to round the extended value to double. #
7758 # dbl_sign = ext_sign #
7759 # dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7760 # get rid of ext integer bit #
7761 # dbl_mant = ext_mant{62:12} #
7763 # --------------- --------------- --------------- #
7764 # extended -> |s| exp | |1| ms mant | | ls mant | #
7765 # --------------- --------------- --------------- #
7766 # 95 64 63 62 32 31 11 0 #
7767 # | | #
7768 # | | #
7769 # | | #
7770 # v v #
7771 # --------------- --------------- #
7772 # double -> |s|exp| mant | | mant | #
7773 # --------------- --------------- #
7774 # 63 51 32 31 0 #
7776 #########################################################################
7778 dst_dbl:
7779 clr.l %d0 # clear d0
7780 mov.w FTEMP_EX(%a0),%d0 # get exponent
7781 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7782 addi.w &DBL_BIAS,%d0 # add double precision bias
7783 tst.b FTEMP_HI(%a0) # is number a denorm?
7784 bmi.b dst_get_dupper # no
7785 subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7786 dst_get_dupper:
7787 swap %d0 # d0 now in upper word
7788 lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7789 tst.b FTEMP_EX(%a0) # test sign
7790 bpl.b dst_get_dman # if postive, go process mantissa
7791 bset &0x1f,%d0 # if negative, set sign
7792 dst_get_dman:
7793 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7794 bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7795 or.l %d1,%d0 # put these bits in ms word of double
7796 mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7797 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7798 mov.l &21,%d0 # load shift count
7799 lsl.l %d0,%d1 # put lower 11 bits in upper bits
7800 mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7801 mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7802 bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7803 mov.l L_SCR2(%a6),%d1
7804 or.l %d0,%d1 # put them in double result
7805 mov.l L_SCR1(%a6),%d0
7808 #########################################################################
7809 # XDEF **************************************************************** #
7810 # dst_sgl(): create single precision value from extended prec #
7812 # XREF **************************************************************** #
7814 # INPUT *************************************************************** #
7815 # a0 = pointer to source operand in extended precision #
7817 # OUTPUT ************************************************************** #
7818 # d0 = single precision result #
7820 # ALGORITHM *********************************************************** #
7822 # Changes extended precision to single precision. #
7823 # sgl_sign = ext_sign #
7824 # sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7825 # get rid of ext integer bit #
7826 # sgl_mant = ext_mant{62:12} #
7828 # --------------- --------------- --------------- #
7829 # extended -> |s| exp | |1| ms mant | | ls mant | #
7830 # --------------- --------------- --------------- #
7831 # 95 64 63 62 40 32 31 12 0 #
7832 # | | #
7833 # | | #
7834 # | | #
7835 # v v #
7836 # --------------- #
7837 # single -> |s|exp| mant | #
7838 # --------------- #
7839 # 31 22 0 #
7841 #########################################################################
7843 dst_sgl:
7844 clr.l %d0
7845 mov.w FTEMP_EX(%a0),%d0 # get exponent
7846 subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7847 addi.w &SGL_BIAS,%d0 # add single precision bias
7848 tst.b FTEMP_HI(%a0) # is number a denorm?
7849 bmi.b dst_get_supper # no
7850 subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7851 dst_get_supper:
7852 swap %d0 # put exp in upper word of d0
7853 lsl.l &0x7,%d0 # shift it into single exp bits
7854 tst.b FTEMP_EX(%a0) # test sign
7855 bpl.b dst_get_sman # if positive, continue
7856 bset &0x1f,%d0 # if negative, put in sign first
7857 dst_get_sman:
7858 mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7859 andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7860 lsr.l &0x8,%d1 # and put them flush right
7861 or.l %d1,%d0 # put these bits in ms word of single
7864 ##############################################################################
7865 fout_pack:
7866 bsr.l _calc_ea_fout # fetch the <ea>
7867 mov.l %a0,-(%sp)
7869 mov.b STAG(%a6),%d0 # fetch input type
7870 bne.w fout_pack_not_norm # input is not NORM
7872 fout_pack_norm:
7873 btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7874 beq.b fout_pack_s # static
7876 fout_pack_d:
7877 mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7878 lsr.b &0x4,%d1
7879 andi.w &0x7,%d1
7881 bsr.l fetch_dreg # fetch Dn w/ k-factor
7883 bra.b fout_pack_type
7884 fout_pack_s:
7885 mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7887 fout_pack_type:
7888 bfexts %d0{&25:&7},%d0 # extract k-factor
7889 mov.l %d0,-(%sp)
7891 lea FP_SRC(%a6),%a0 # pass: ptr to input
7893 # bindec is currently scrambling FP_SRC for denorm inputs.
7894 # we'll have to change this, but for now, tough luck!!!
7895 bsr.l bindec # convert xprec to packed
7897 # andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7898 andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7900 mov.l (%sp)+,%d0
7902 tst.b 3+FP_SCR0_EX(%a6)
7903 bne.b fout_pack_set
7904 tst.l FP_SCR0_HI(%a6)
7905 bne.b fout_pack_set
7906 tst.l FP_SCR0_LO(%a6)
7907 bne.b fout_pack_set
7909 # add the extra condition that only if the k-factor was zero, too, should
7910 # we zero the exponent
7911 tst.l %d0
7912 bne.b fout_pack_set
7913 # "mantissa" is all zero which means that the answer is zero. but, the '040
7914 # algorithm allows the exponent to be non-zero. the 881/2 do not. therefore,
7915 # if the mantissa is zero, I will zero the exponent, too.
7916 # the question now is whether the exponents sign bit is allowed to be non-zero
7917 # for a zero, also...
7918 andi.w &0xf000,FP_SCR0(%a6)
7920 fout_pack_set:
7922 lea FP_SCR0(%a6),%a0 # pass: src addr
7924 fout_pack_write:
7925 mov.l (%sp)+,%a1 # pass: dst addr
7926 mov.l &0xc,%d0 # pass: opsize is 12 bytes
7928 cmpi.b SPCOND_FLG(%a6),&mda7_flg
7929 beq.b fout_pack_a7
7931 bsr.l _dmem_write # write ext prec number to memory
7933 tst.l %d1 # did dstore fail?
7934 bne.w fout_ext_err # yes
7938 # we don't want to do the write if the exception occurred in supervisor mode
7939 # so _mem_write2() handles this for us.
7940 fout_pack_a7:
7941 bsr.l _mem_write2 # write ext prec number to memory
7943 tst.l %d1 # did dstore fail?
7944 bne.w fout_ext_err # yes
7948 fout_pack_not_norm:
7949 cmpi.b %d0,&DENORM # is it a DENORM?
7950 beq.w fout_pack_norm # yes
7951 lea FP_SRC(%a6),%a0
7952 clr.w 2+FP_SRC_EX(%a6)
7953 cmpi.b %d0,&SNAN # is it an SNAN?
7954 beq.b fout_pack_snan # yes
7955 bra.b fout_pack_write # no
7957 fout_pack_snan:
7958 ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7959 bset &0x6,FP_SRC_HI(%a6) # set snan bit
7960 bra.b fout_pack_write
7962 #########################################################################
7963 # XDEF **************************************************************** #
7964 # fmul(): emulates the fmul instruction #
7965 # fsmul(): emulates the fsmul instruction #
7966 # fdmul(): emulates the fdmul instruction #
7968 # XREF **************************************************************** #
7969 # scale_to_zero_src() - scale src exponent to zero #
7970 # scale_to_zero_dst() - scale dst exponent to zero #
7971 # unf_res() - return default underflow result #
7972 # ovf_res() - return default overflow result #
7973 # res_qnan() - return QNAN result #
7974 # res_snan() - return SNAN result #
7976 # INPUT *************************************************************** #
7977 # a0 = pointer to extended precision source operand #
7978 # a1 = pointer to extended precision destination operand #
7979 # d0 rnd prec,mode #
7981 # OUTPUT ************************************************************** #
7982 # fp0 = result #
7983 # fp1 = EXOP (if exception occurred) #
7985 # ALGORITHM *********************************************************** #
7986 # Handle NANs, infinities, and zeroes as special cases. Divide #
7987 # norms/denorms into ext/sgl/dbl precision. #
7988 # For norms/denorms, scale the exponents such that a multiply #
7989 # instruction won't cause an exception. Use the regular fmul to #
7990 # compute a result. Check if the regular operands would have taken #
7991 # an exception. If so, return the default overflow/underflow result #
7992 # and return the EXOP if exceptions are enabled. Else, scale the #
7993 # result operand to the proper exponent. #
7995 #########################################################################
7997 align 0x10
7998 tbl_fmul_ovfl:
7999 long 0x3fff - 0x7ffe # ext_max
8000 long 0x3fff - 0x407e # sgl_max
8001 long 0x3fff - 0x43fe # dbl_max
8002 tbl_fmul_unfl:
8003 long 0x3fff + 0x0001 # ext_unfl
8004 long 0x3fff - 0x3f80 # sgl_unfl
8005 long 0x3fff - 0x3c00 # dbl_unfl
8007 global fsmul
8008 fsmul:
8009 andi.b &0x30,%d0 # clear rnd prec
8010 ori.b &s_mode*0x10,%d0 # insert sgl prec
8011 bra.b fmul
8013 global fdmul
8014 fdmul:
8015 andi.b &0x30,%d0
8016 ori.b &d_mode*0x10,%d0 # insert dbl prec
8018 global fmul
8019 fmul:
8020 mov.l %d0,L_SCR3(%a6) # store rnd info
8022 clr.w %d1
8023 mov.b DTAG(%a6),%d1
8024 lsl.b &0x3,%d1
8025 or.b STAG(%a6),%d1 # combine src tags
8026 bne.w fmul_not_norm # optimize on non-norm input
8028 fmul_norm:
8029 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8030 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8031 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8033 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8034 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8035 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8037 bsr.l scale_to_zero_src # scale src exponent
8038 mov.l %d0,-(%sp) # save scale factor 1
8040 bsr.l scale_to_zero_dst # scale dst exponent
8042 add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8044 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8045 lsr.b &0x6,%d1 # shift to lo bits
8046 mov.l (%sp)+,%d0 # load S.F.
8047 cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8048 beq.w fmul_may_ovfl # result may rnd to overflow
8049 blt.w fmul_ovfl # result will overflow
8051 cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8052 beq.w fmul_may_unfl # result may rnd to no unfl
8053 bgt.w fmul_unfl # result will underflow
8056 # NORMAL:
8057 # - the result of the multiply operation will neither overflow nor underflow.
8058 # - do the multiply to the proper precision and rounding mode.
8059 # - scale the result exponent using the scale factor. if both operands were
8060 # normalized then we really don't need to go through this scaling. but for now,
8061 # this will do.
8063 fmul_normal:
8064 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8066 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8067 fmov.l &0x0,%fpsr # clear FPSR
8069 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8071 fmov.l %fpsr,%d1 # save status
8072 fmov.l &0x0,%fpcr # clear FPCR
8074 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8076 fmul_normal_exit:
8077 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8078 mov.l %d2,-(%sp) # save d2
8079 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8080 mov.l %d1,%d2 # make a copy
8081 andi.l &0x7fff,%d1 # strip sign
8082 andi.w &0x8000,%d2 # keep old sign
8083 sub.l %d0,%d1 # add scale factor
8084 or.w %d2,%d1 # concat old sign,new exp
8085 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8086 mov.l (%sp)+,%d2 # restore d2
8087 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8091 # OVERFLOW:
8092 # - the result of the multiply operation is an overflow.
8093 # - do the multiply to the proper precision and rounding mode in order to
8094 # set the inexact bits.
8095 # - calculate the default result and return it in fp0.
8096 # - if overflow or inexact is enabled, we need a multiply result rounded to
8097 # extended precision. if the original operation was extended, then we have this
8098 # result. if the original operation was single or double, we have to do another
8099 # multiply using extended precision and the correct rounding mode. the result
8100 # of this operation then has its exponent scaled by -0x6000 to create the
8101 # exceptional operand.
8103 fmul_ovfl:
8104 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8106 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8107 fmov.l &0x0,%fpsr # clear FPSR
8109 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8111 fmov.l %fpsr,%d1 # save status
8112 fmov.l &0x0,%fpcr # clear FPCR
8114 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8116 # save setting this until now because this is where fmul_may_ovfl may jump in
8117 fmul_ovfl_tst:
8118 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8120 mov.b FPCR_ENABLE(%a6),%d1
8121 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8122 bne.b fmul_ovfl_ena # yes
8124 # calculate the default result
8125 fmul_ovfl_dis:
8126 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8127 sne %d1 # set sign param accordingly
8128 mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8129 bsr.l ovf_res # calculate default result
8130 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8131 fmovm.x (%a0),&0x80 # return default result in fp0
8135 # OVFL is enabled; Create EXOP:
8136 # - if precision is extended, then we have the EXOP. simply bias the exponent
8137 # with an extra -0x6000. if the precision is single or double, we need to
8138 # calculate a result rounded to extended precision.
8140 fmul_ovfl_ena:
8141 mov.l L_SCR3(%a6),%d1
8142 andi.b &0xc0,%d1 # test the rnd prec
8143 bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8145 fmul_ovfl_ena_cont:
8146 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8148 mov.l %d2,-(%sp) # save d2
8149 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8150 mov.w %d1,%d2 # make a copy
8151 andi.l &0x7fff,%d1 # strip sign
8152 sub.l %d0,%d1 # add scale factor
8153 subi.l &0x6000,%d1 # subtract bias
8154 andi.w &0x7fff,%d1 # clear sign bit
8155 andi.w &0x8000,%d2 # keep old sign
8156 or.w %d2,%d1 # concat old sign,new exp
8157 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8158 mov.l (%sp)+,%d2 # restore d2
8159 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8160 bra.b fmul_ovfl_dis
8162 fmul_ovfl_ena_sd:
8163 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8165 mov.l L_SCR3(%a6),%d1
8166 andi.b &0x30,%d1 # keep rnd mode only
8167 fmov.l %d1,%fpcr # set FPCR
8169 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8171 fmov.l &0x0,%fpcr # clear FPCR
8172 bra.b fmul_ovfl_ena_cont
8175 # may OVERFLOW:
8176 # - the result of the multiply operation MAY overflow.
8177 # - do the multiply to the proper precision and rounding mode in order to
8178 # set the inexact bits.
8179 # - calculate the default result and return it in fp0.
8181 fmul_may_ovfl:
8182 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8184 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8185 fmov.l &0x0,%fpsr # clear FPSR
8187 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8189 fmov.l %fpsr,%d1 # save status
8190 fmov.l &0x0,%fpcr # clear FPCR
8192 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8194 fabs.x %fp0,%fp1 # make a copy of result
8195 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8196 fbge.w fmul_ovfl_tst # yes; overflow has occurred
8198 # no, it didn't overflow; we have correct result
8199 bra.w fmul_normal_exit
8202 # UNDERFLOW:
8203 # - the result of the multiply operation is an underflow.
8204 # - do the multiply to the proper precision and rounding mode in order to
8205 # set the inexact bits.
8206 # - calculate the default result and return it in fp0.
8207 # - if overflow or inexact is enabled, we need a multiply result rounded to
8208 # extended precision. if the original operation was extended, then we have this
8209 # result. if the original operation was single or double, we have to do another
8210 # multiply using extended precision and the correct rounding mode. the result
8211 # of this operation then has its exponent scaled by -0x6000 to create the
8212 # exceptional operand.
8214 fmul_unfl:
8215 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8217 # for fun, let's use only extended precision, round to zero. then, let
8218 # the unf_res() routine figure out all the rest.
8219 # will we get the correct answer.
8220 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8222 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8223 fmov.l &0x0,%fpsr # clear FPSR
8225 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8227 fmov.l %fpsr,%d1 # save status
8228 fmov.l &0x0,%fpcr # clear FPCR
8230 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8232 mov.b FPCR_ENABLE(%a6),%d1
8233 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8234 bne.b fmul_unfl_ena # yes
8236 fmul_unfl_dis:
8237 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8239 lea FP_SCR0(%a6),%a0 # pass: result addr
8240 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8241 bsr.l unf_res # calculate default result
8242 or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8243 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8247 # UNFL is enabled.
8249 fmul_unfl_ena:
8250 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8252 mov.l L_SCR3(%a6),%d1
8253 andi.b &0xc0,%d1 # is precision extended?
8254 bne.b fmul_unfl_ena_sd # no, sgl or dbl
8256 # if the rnd mode is anything but RZ, then we have to re-do the above
8257 # multiplication because we used RZ for all.
8258 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8260 fmul_unfl_ena_cont:
8261 fmov.l &0x0,%fpsr # clear FPSR
8263 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8265 fmov.l &0x0,%fpcr # clear FPCR
8267 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8268 mov.l %d2,-(%sp) # save d2
8269 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8270 mov.l %d1,%d2 # make a copy
8271 andi.l &0x7fff,%d1 # strip sign
8272 andi.w &0x8000,%d2 # keep old sign
8273 sub.l %d0,%d1 # add scale factor
8274 addi.l &0x6000,%d1 # add bias
8275 andi.w &0x7fff,%d1
8276 or.w %d2,%d1 # concat old sign,new exp
8277 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8278 mov.l (%sp)+,%d2 # restore d2
8279 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8280 bra.w fmul_unfl_dis
8282 fmul_unfl_ena_sd:
8283 mov.l L_SCR3(%a6),%d1
8284 andi.b &0x30,%d1 # use only rnd mode
8285 fmov.l %d1,%fpcr # set FPCR
8287 bra.b fmul_unfl_ena_cont
8289 # MAY UNDERFLOW:
8290 # -use the correct rounding mode and precision. this code favors operations
8291 # that do not underflow.
8292 fmul_may_unfl:
8293 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8295 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8296 fmov.l &0x0,%fpsr # clear FPSR
8298 fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8300 fmov.l %fpsr,%d1 # save status
8301 fmov.l &0x0,%fpcr # clear FPCR
8303 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8305 fabs.x %fp0,%fp1 # make a copy of result
8306 fcmp.b %fp1,&0x2 # is |result| > 2.b?
8307 fbgt.w fmul_normal_exit # no; no underflow occurred
8308 fblt.w fmul_unfl # yes; underflow occurred
8311 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8312 # we don't know if the result was an underflow that rounded up to a 2 or
8313 # a normalized number that rounded down to a 2. so, redo the entire operation
8314 # using RZ as the rounding mode to see what the pre-rounded result is.
8315 # this case should be relatively rare.
8317 fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8319 mov.l L_SCR3(%a6),%d1
8320 andi.b &0xc0,%d1 # keep rnd prec
8321 ori.b &rz_mode*0x10,%d1 # insert RZ
8323 fmov.l %d1,%fpcr # set FPCR
8324 fmov.l &0x0,%fpsr # clear FPSR
8326 fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8328 fmov.l &0x0,%fpcr # clear FPCR
8329 fabs.x %fp1 # make absolute value
8330 fcmp.b %fp1,&0x2 # is |result| < 2.b?
8331 fbge.w fmul_normal_exit # no; no underflow occurred
8332 bra.w fmul_unfl # yes, underflow occurred
8334 ################################################################################
8337 # Multiply: inputs are not both normalized; what are they?
8339 fmul_not_norm:
8340 mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8341 jmp (tbl_fmul_op.b,%pc,%d1.w)
8343 swbeg &48
8344 tbl_fmul_op:
8345 short fmul_norm - tbl_fmul_op # NORM x NORM
8346 short fmul_zero - tbl_fmul_op # NORM x ZERO
8347 short fmul_inf_src - tbl_fmul_op # NORM x INF
8348 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8349 short fmul_norm - tbl_fmul_op # NORM x DENORM
8350 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8351 short tbl_fmul_op - tbl_fmul_op #
8352 short tbl_fmul_op - tbl_fmul_op #
8354 short fmul_zero - tbl_fmul_op # ZERO x NORM
8355 short fmul_zero - tbl_fmul_op # ZERO x ZERO
8356 short fmul_res_operr - tbl_fmul_op # ZERO x INF
8357 short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8358 short fmul_zero - tbl_fmul_op # ZERO x DENORM
8359 short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8360 short tbl_fmul_op - tbl_fmul_op #
8361 short tbl_fmul_op - tbl_fmul_op #
8363 short fmul_inf_dst - tbl_fmul_op # INF x NORM
8364 short fmul_res_operr - tbl_fmul_op # INF x ZERO
8365 short fmul_inf_dst - tbl_fmul_op # INF x INF
8366 short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8367 short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8368 short fmul_res_snan - tbl_fmul_op # INF x SNAN
8369 short tbl_fmul_op - tbl_fmul_op #
8370 short tbl_fmul_op - tbl_fmul_op #
8372 short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8373 short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8374 short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8375 short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8376 short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8377 short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8378 short tbl_fmul_op - tbl_fmul_op #
8379 short tbl_fmul_op - tbl_fmul_op #
8381 short fmul_norm - tbl_fmul_op # NORM x NORM
8382 short fmul_zero - tbl_fmul_op # NORM x ZERO
8383 short fmul_inf_src - tbl_fmul_op # NORM x INF
8384 short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8385 short fmul_norm - tbl_fmul_op # NORM x DENORM
8386 short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8387 short tbl_fmul_op - tbl_fmul_op #
8388 short tbl_fmul_op - tbl_fmul_op #
8390 short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8391 short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8392 short fmul_res_snan - tbl_fmul_op # SNAN x INF
8393 short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8394 short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8395 short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8396 short tbl_fmul_op - tbl_fmul_op #
8397 short tbl_fmul_op - tbl_fmul_op #
8399 fmul_res_operr:
8400 bra.l res_operr
8401 fmul_res_snan:
8402 bra.l res_snan
8403 fmul_res_qnan:
8404 bra.l res_qnan
8407 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8409 global fmul_zero # global for fsglmul
8410 fmul_zero:
8411 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8412 mov.b DST_EX(%a1),%d1
8413 eor.b %d0,%d1
8414 bpl.b fmul_zero_p # result ZERO is pos.
8415 fmul_zero_n:
8416 fmov.s &0x80000000,%fp0 # load -ZERO
8417 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8419 fmul_zero_p:
8420 fmov.s &0x00000000,%fp0 # load +ZERO
8421 mov.b &z_bmask,FPSR_CC(%a6) # set Z
8425 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8427 # Note: The j-bit for an infinity is a don't-care. However, to be
8428 # strictly compatible w/ the 68881/882, we make sure to return an
8429 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8430 # INFs take priority.
8432 global fmul_inf_dst # global for fsglmul
8433 fmul_inf_dst:
8434 fmovm.x DST(%a1),&0x80 # return INF result in fp0
8435 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8436 mov.b DST_EX(%a1),%d1
8437 eor.b %d0,%d1
8438 bpl.b fmul_inf_dst_p # result INF is pos.
8439 fmul_inf_dst_n:
8440 fabs.x %fp0 # clear result sign
8441 fneg.x %fp0 # set result sign
8442 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8444 fmul_inf_dst_p:
8445 fabs.x %fp0 # clear result sign
8446 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8449 global fmul_inf_src # global for fsglmul
8450 fmul_inf_src:
8451 fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8452 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8453 mov.b DST_EX(%a1),%d1
8454 eor.b %d0,%d1
8455 bpl.b fmul_inf_dst_p # result INF is pos.
8456 bra.b fmul_inf_dst_n
8458 #########################################################################
8459 # XDEF **************************************************************** #
8460 # fin(): emulates the fmove instruction #
8461 # fsin(): emulates the fsmove instruction #
8462 # fdin(): emulates the fdmove instruction #
8464 # XREF **************************************************************** #
8465 # norm() - normalize mantissa for EXOP on denorm #
8466 # scale_to_zero_src() - scale src exponent to zero #
8467 # ovf_res() - return default overflow result #
8468 # unf_res() - return default underflow result #
8469 # res_qnan_1op() - return QNAN result #
8470 # res_snan_1op() - return SNAN result #
8472 # INPUT *************************************************************** #
8473 # a0 = pointer to extended precision source operand #
8474 # d0 = round prec/mode #
8476 # OUTPUT ************************************************************** #
8477 # fp0 = result #
8478 # fp1 = EXOP (if exception occurred) #
8480 # ALGORITHM *********************************************************** #
8481 # Handle NANs, infinities, and zeroes as special cases. Divide #
8482 # norms into extended, single, and double precision. #
8483 # Norms can be emulated w/ a regular fmove instruction. For #
8484 # sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8485 # if the result would have overflowed/underflowed. If so, use unf_res() #
8486 # or ovf_res() to return the default result. Also return EXOP if #
8487 # exception is enabled. If no exception, return the default result. #
8488 # Unnorms don't pass through here. #
8490 #########################################################################
8492 global fsin
8493 fsin:
8494 andi.b &0x30,%d0 # clear rnd prec
8495 ori.b &s_mode*0x10,%d0 # insert sgl precision
8496 bra.b fin
8498 global fdin
8499 fdin:
8500 andi.b &0x30,%d0 # clear rnd prec
8501 ori.b &d_mode*0x10,%d0 # insert dbl precision
8503 global fin
8504 fin:
8505 mov.l %d0,L_SCR3(%a6) # store rnd info
8507 mov.b STAG(%a6),%d1 # fetch src optype tag
8508 bne.w fin_not_norm # optimize on non-norm input
8511 # FP MOVE IN: NORMs and DENORMs ONLY!
8513 fin_norm:
8514 andi.b &0xc0,%d0 # is precision extended?
8515 bne.w fin_not_ext # no, so go handle dbl or sgl
8518 # precision selected is extended. so...we cannot get an underflow
8519 # or overflow because of rounding to the correct precision. so...
8520 # skip the scaling and unscaling...
8522 tst.b SRC_EX(%a0) # is the operand negative?
8523 bpl.b fin_norm_done # no
8524 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8525 fin_norm_done:
8526 fmovm.x SRC(%a0),&0x80 # return result in fp0
8530 # for an extended precision DENORM, the UNFL exception bit is set
8531 # the accrued bit is NOT set in this instance(no inexactness!)
8533 fin_denorm:
8534 andi.b &0xc0,%d0 # is precision extended?
8535 bne.w fin_not_ext # no, so go handle dbl or sgl
8537 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8538 tst.b SRC_EX(%a0) # is the operand negative?
8539 bpl.b fin_denorm_done # no
8540 bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8541 fin_denorm_done:
8542 fmovm.x SRC(%a0),&0x80 # return result in fp0
8543 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8544 bne.b fin_denorm_unfl_ena # yes
8548 # the input is an extended DENORM and underflow is enabled in the FPCR.
8549 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8550 # exponent and insert back into the operand.
8552 fin_denorm_unfl_ena:
8553 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8554 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8555 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8556 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8557 bsr.l norm # normalize result
8558 neg.w %d0 # new exponent = -(shft val)
8559 addi.w &0x6000,%d0 # add new bias to exponent
8560 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8561 andi.w &0x8000,%d1 # keep old sign
8562 andi.w &0x7fff,%d0 # clear sign position
8563 or.w %d1,%d0 # concat new exo,old sign
8564 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8565 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8569 # operand is to be rounded to single or double precision
8571 fin_not_ext:
8572 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8573 bne.b fin_dbl
8576 # operand is to be rounded to single precision
8578 fin_sgl:
8579 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8580 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8581 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8582 bsr.l scale_to_zero_src # calculate scale factor
8584 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8585 bge.w fin_sd_unfl # yes; go handle underflow
8586 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8587 beq.w fin_sd_may_ovfl # maybe; go check
8588 blt.w fin_sd_ovfl # yes; go handle overflow
8591 # operand will NOT overflow or underflow when moved into the fp reg file
8593 fin_sd_normal:
8594 fmov.l &0x0,%fpsr # clear FPSR
8595 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8597 fmov.x FP_SCR0(%a6),%fp0 # perform move
8599 fmov.l %fpsr,%d1 # save FPSR
8600 fmov.l &0x0,%fpcr # clear FPCR
8602 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8604 fin_sd_normal_exit:
8605 mov.l %d2,-(%sp) # save d2
8606 fmovm.x &0x80,FP_SCR0(%a6) # store out result
8607 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8608 mov.w %d1,%d2 # make a copy
8609 andi.l &0x7fff,%d1 # strip sign
8610 sub.l %d0,%d1 # add scale factor
8611 andi.w &0x8000,%d2 # keep old sign
8612 or.w %d1,%d2 # concat old sign,new exponent
8613 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8614 mov.l (%sp)+,%d2 # restore d2
8615 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8619 # operand is to be rounded to double precision
8621 fin_dbl:
8622 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8623 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8624 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8625 bsr.l scale_to_zero_src # calculate scale factor
8627 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8628 bge.w fin_sd_unfl # yes; go handle underflow
8629 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8630 beq.w fin_sd_may_ovfl # maybe; go check
8631 blt.w fin_sd_ovfl # yes; go handle overflow
8632 bra.w fin_sd_normal # no; ho handle normalized op
8635 # operand WILL underflow when moved in to the fp register file
8637 fin_sd_unfl:
8638 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8640 tst.b FP_SCR0_EX(%a6) # is operand negative?
8641 bpl.b fin_sd_unfl_tst
8642 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8644 # if underflow or inexact is enabled, then go calculate the EXOP first.
8645 fin_sd_unfl_tst:
8646 mov.b FPCR_ENABLE(%a6),%d1
8647 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8648 bne.b fin_sd_unfl_ena # yes
8650 fin_sd_unfl_dis:
8651 lea FP_SCR0(%a6),%a0 # pass: result addr
8652 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8653 bsr.l unf_res # calculate default result
8654 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8655 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8656 rts
8659 # operand will underflow AND underflow or inexact is enabled.
8660 # therefore, we must return the result rounded to extended precision.
8662 fin_sd_unfl_ena:
8663 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8664 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8665 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8667 mov.l %d2,-(%sp) # save d2
8668 mov.w %d1,%d2 # make a copy
8669 andi.l &0x7fff,%d1 # strip sign
8670 sub.l %d0,%d1 # subtract scale factor
8671 andi.w &0x8000,%d2 # extract old sign
8672 addi.l &0x6000,%d1 # add new bias
8673 andi.w &0x7fff,%d1
8674 or.w %d1,%d2 # concat old sign,new exp
8675 mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8676 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8677 mov.l (%sp)+,%d2 # restore d2
8678 bra.b fin_sd_unfl_dis
8681 # operand WILL overflow.
8683 fin_sd_ovfl:
8684 fmov.l &0x0,%fpsr # clear FPSR
8685 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8687 fmov.x FP_SCR0(%a6),%fp0 # perform move
8689 fmov.l &0x0,%fpcr # clear FPCR
8690 fmov.l %fpsr,%d1 # save FPSR
8692 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8694 fin_sd_ovfl_tst:
8695 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8697 mov.b FPCR_ENABLE(%a6),%d1
8698 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8699 bne.b fin_sd_ovfl_ena # yes
8702 # OVFL is not enabled; therefore, we must create the default result by
8703 # calling ovf_res().
8705 fin_sd_ovfl_dis:
8706 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8707 sne %d1 # set sign param accordingly
8708 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8709 bsr.l ovf_res # calculate default result
8710 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8711 fmovm.x (%a0),&0x80 # return default result in fp0
8715 # OVFL is enabled.
8716 # the INEX2 bit has already been updated by the round to the correct precision.
8717 # now, round to extended(and don't alter the FPSR).
8719 fin_sd_ovfl_ena:
8720 mov.l %d2,-(%sp) # save d2
8721 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8722 mov.l %d1,%d2 # make a copy
8723 andi.l &0x7fff,%d1 # strip sign
8724 andi.w &0x8000,%d2 # keep old sign
8725 sub.l %d0,%d1 # add scale factor
8726 sub.l &0x6000,%d1 # subtract bias
8727 andi.w &0x7fff,%d1
8728 or.w %d2,%d1
8729 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8730 mov.l (%sp)+,%d2 # restore d2
8731 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8732 bra.b fin_sd_ovfl_dis
8735 # the move in MAY overflow. so...
8737 fin_sd_may_ovfl:
8738 fmov.l &0x0,%fpsr # clear FPSR
8739 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8741 fmov.x FP_SCR0(%a6),%fp0 # perform the move
8743 fmov.l %fpsr,%d1 # save status
8744 fmov.l &0x0,%fpcr # clear FPCR
8746 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8748 fabs.x %fp0,%fp1 # make a copy of result
8749 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8750 fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8752 # no, it didn't overflow; we have correct result
8753 bra.w fin_sd_normal_exit
8755 ##########################################################################
8758 # operand is not a NORM: check its optype and branch accordingly
8760 fin_not_norm:
8761 cmpi.b %d1,&DENORM # weed out DENORM
8762 beq.w fin_denorm
8763 cmpi.b %d1,&SNAN # weed out SNANs
8764 beq.l res_snan_1op
8765 cmpi.b %d1,&QNAN # weed out QNANs
8766 beq.l res_qnan_1op
8769 # do the fmove in; at this point, only possible ops are ZERO and INF.
8770 # use fmov to determine ccodes.
8771 # prec:mode should be zero at this point but it won't affect answer anyways.
8773 fmov.x SRC(%a0),%fp0 # do fmove in
8774 fmov.l %fpsr,%d0 # no exceptions possible
8775 rol.l &0x8,%d0 # put ccodes in lo byte
8776 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8779 #########################################################################
8780 # XDEF **************************************************************** #
8781 # fdiv(): emulates the fdiv instruction #
8782 # fsdiv(): emulates the fsdiv instruction #
8783 # fddiv(): emulates the fddiv instruction #
8785 # XREF **************************************************************** #
8786 # scale_to_zero_src() - scale src exponent to zero #
8787 # scale_to_zero_dst() - scale dst exponent to zero #
8788 # unf_res() - return default underflow result #
8789 # ovf_res() - return default overflow result #
8790 # res_qnan() - return QNAN result #
8791 # res_snan() - return SNAN result #
8793 # INPUT *************************************************************** #
8794 # a0 = pointer to extended precision source operand #
8795 # a1 = pointer to extended precision destination operand #
8796 # d0 rnd prec,mode #
8798 # OUTPUT ************************************************************** #
8799 # fp0 = result #
8800 # fp1 = EXOP (if exception occurred) #
8802 # ALGORITHM *********************************************************** #
8803 # Handle NANs, infinities, and zeroes as special cases. Divide #
8804 # norms/denorms into ext/sgl/dbl precision. #
8805 # For norms/denorms, scale the exponents such that a divide #
8806 # instruction won't cause an exception. Use the regular fdiv to #
8807 # compute a result. Check if the regular operands would have taken #
8808 # an exception. If so, return the default overflow/underflow result #
8809 # and return the EXOP if exceptions are enabled. Else, scale the #
8810 # result operand to the proper exponent. #
8812 #########################################################################
8814 align 0x10
8815 tbl_fdiv_unfl:
8816 long 0x3fff - 0x0000 # ext_unfl
8817 long 0x3fff - 0x3f81 # sgl_unfl
8818 long 0x3fff - 0x3c01 # dbl_unfl
8820 tbl_fdiv_ovfl:
8821 long 0x3fff - 0x7ffe # ext overflow exponent
8822 long 0x3fff - 0x407e # sgl overflow exponent
8823 long 0x3fff - 0x43fe # dbl overflow exponent
8825 global fsdiv
8826 fsdiv:
8827 andi.b &0x30,%d0 # clear rnd prec
8828 ori.b &s_mode*0x10,%d0 # insert sgl prec
8829 bra.b fdiv
8831 global fddiv
8832 fddiv:
8833 andi.b &0x30,%d0 # clear rnd prec
8834 ori.b &d_mode*0x10,%d0 # insert dbl prec
8836 global fdiv
8837 fdiv:
8838 mov.l %d0,L_SCR3(%a6) # store rnd info
8840 clr.w %d1
8841 mov.b DTAG(%a6),%d1
8842 lsl.b &0x3,%d1
8843 or.b STAG(%a6),%d1 # combine src tags
8845 bne.w fdiv_not_norm # optimize on non-norm input
8848 # DIVIDE: NORMs and DENORMs ONLY!
8850 fdiv_norm:
8851 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8852 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8853 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8855 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8856 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8857 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8859 bsr.l scale_to_zero_src # scale src exponent
8860 mov.l %d0,-(%sp) # save scale factor 1
8862 bsr.l scale_to_zero_dst # scale dst exponent
8864 neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8865 add.l %d0,(%sp)
8867 mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8868 lsr.b &0x6,%d1 # shift to lo bits
8869 mov.l (%sp)+,%d0 # load S.F.
8870 cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8871 ble.w fdiv_may_ovfl # result will overflow
8873 cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8874 beq.w fdiv_may_unfl # maybe
8875 bgt.w fdiv_unfl # yes; go handle underflow
8877 fdiv_normal:
8878 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8880 fmov.l L_SCR3(%a6),%fpcr # save FPCR
8881 fmov.l &0x0,%fpsr # clear FPSR
8883 fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8885 fmov.l %fpsr,%d1 # save FPSR
8886 fmov.l &0x0,%fpcr # clear FPCR
8888 or.l %d1,USER_FPSR(%a6) # save INEX2,N
8890 fdiv_normal_exit:
8891 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8892 mov.l %d2,-(%sp) # store d2
8893 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8894 mov.l %d1,%d2 # make a copy
8895 andi.l &0x7fff,%d1 # strip sign
8896 andi.w &0x8000,%d2 # keep old sign
8897 sub.l %d0,%d1 # add scale factor
8898 or.w %d2,%d1 # concat old sign,new exp
8899 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8900 mov.l (%sp)+,%d2 # restore d2
8901 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8904 tbl_fdiv_ovfl2:
8905 long 0x7fff
8906 long 0x407f
8907 long 0x43ff
8909 fdiv_no_ovfl:
8910 mov.l (%sp)+,%d0 # restore scale factor
8911 bra.b fdiv_normal_exit
8913 fdiv_may_ovfl:
8914 mov.l %d0,-(%sp) # save scale factor
8916 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8918 fmov.l L_SCR3(%a6),%fpcr # set FPCR
8919 fmov.l &0x0,%fpsr # set FPSR
8921 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8923 fmov.l %fpsr,%d0
8924 fmov.l &0x0,%fpcr
8926 or.l %d0,USER_FPSR(%a6) # save INEX,N
8928 fmovm.x &0x01,-(%sp) # save result to stack
8929 mov.w (%sp),%d0 # fetch new exponent
8930 add.l &0xc,%sp # clear result from stack
8931 andi.l &0x7fff,%d0 # strip sign
8932 sub.l (%sp),%d0 # add scale factor
8933 cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8934 blt.b fdiv_no_ovfl
8935 mov.l (%sp)+,%d0
8937 fdiv_ovfl_tst:
8938 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8940 mov.b FPCR_ENABLE(%a6),%d1
8941 andi.b &0x13,%d1 # is OVFL or INEX enabled?
8942 bne.b fdiv_ovfl_ena # yes
8944 fdiv_ovfl_dis:
8945 btst &neg_bit,FPSR_CC(%a6) # is result negative?
8946 sne %d1 # set sign param accordingly
8947 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8948 bsr.l ovf_res # calculate default result
8949 or.b %d0,FPSR_CC(%a6) # set INF if applicable
8950 fmovm.x (%a0),&0x80 # return default result in fp0
8953 fdiv_ovfl_ena:
8954 mov.l L_SCR3(%a6),%d1
8955 andi.b &0xc0,%d1 # is precision extended?
8956 bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8958 fdiv_ovfl_ena_cont:
8959 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8961 mov.l %d2,-(%sp) # save d2
8962 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8963 mov.w %d1,%d2 # make a copy
8964 andi.l &0x7fff,%d1 # strip sign
8965 sub.l %d0,%d1 # add scale factor
8966 subi.l &0x6000,%d1 # subtract bias
8967 andi.w &0x7fff,%d1 # clear sign bit
8968 andi.w &0x8000,%d2 # keep old sign
8969 or.w %d2,%d1 # concat old sign,new exp
8970 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8971 mov.l (%sp)+,%d2 # restore d2
8972 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8973 bra.b fdiv_ovfl_dis
8975 fdiv_ovfl_ena_sd:
8976 fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8978 mov.l L_SCR3(%a6),%d1
8979 andi.b &0x30,%d1 # keep rnd mode
8980 fmov.l %d1,%fpcr # set FPCR
8982 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8984 fmov.l &0x0,%fpcr # clear FPCR
8985 bra.b fdiv_ovfl_ena_cont
8987 fdiv_unfl:
8988 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8990 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8992 fmov.l &rz_mode*0x10,%fpcr # set FPCR
8993 fmov.l &0x0,%fpsr # clear FPSR
8995 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8997 fmov.l %fpsr,%d1 # save status
8998 fmov.l &0x0,%fpcr # clear FPCR
9000 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9002 mov.b FPCR_ENABLE(%a6),%d1
9003 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9004 bne.b fdiv_unfl_ena # yes
9006 fdiv_unfl_dis:
9007 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9009 lea FP_SCR0(%a6),%a0 # pass: result addr
9010 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9011 bsr.l unf_res # calculate default result
9012 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9013 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9017 # UNFL is enabled.
9019 fdiv_unfl_ena:
9020 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9022 mov.l L_SCR3(%a6),%d1
9023 andi.b &0xc0,%d1 # is precision extended?
9024 bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9026 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9028 fdiv_unfl_ena_cont:
9029 fmov.l &0x0,%fpsr # clear FPSR
9031 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9033 fmov.l &0x0,%fpcr # clear FPCR
9035 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9036 mov.l %d2,-(%sp) # save d2
9037 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9038 mov.l %d1,%d2 # make a copy
9039 andi.l &0x7fff,%d1 # strip sign
9040 andi.w &0x8000,%d2 # keep old sign
9041 sub.l %d0,%d1 # add scale factoer
9042 addi.l &0x6000,%d1 # add bias
9043 andi.w &0x7fff,%d1
9044 or.w %d2,%d1 # concat old sign,new exp
9045 mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9046 mov.l (%sp)+,%d2 # restore d2
9047 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9048 bra.w fdiv_unfl_dis
9050 fdiv_unfl_ena_sd:
9051 mov.l L_SCR3(%a6),%d1
9052 andi.b &0x30,%d1 # use only rnd mode
9053 fmov.l %d1,%fpcr # set FPCR
9055 bra.b fdiv_unfl_ena_cont
9058 # the divide operation MAY underflow:
9060 fdiv_may_unfl:
9061 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9063 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9064 fmov.l &0x0,%fpsr # clear FPSR
9066 fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9068 fmov.l %fpsr,%d1 # save status
9069 fmov.l &0x0,%fpcr # clear FPCR
9071 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9073 fabs.x %fp0,%fp1 # make a copy of result
9074 fcmp.b %fp1,&0x1 # is |result| > 1.b?
9075 fbgt.w fdiv_normal_exit # no; no underflow occurred
9076 fblt.w fdiv_unfl # yes; underflow occurred
9079 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9080 # we don't know if the result was an underflow that rounded up to a 1
9081 # or a normalized number that rounded down to a 1. so, redo the entire
9082 # operation using RZ as the rounding mode to see what the pre-rounded
9083 # result is. this case should be relatively rare.
9085 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9087 mov.l L_SCR3(%a6),%d1
9088 andi.b &0xc0,%d1 # keep rnd prec
9089 ori.b &rz_mode*0x10,%d1 # insert RZ
9091 fmov.l %d1,%fpcr # set FPCR
9092 fmov.l &0x0,%fpsr # clear FPSR
9094 fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9096 fmov.l &0x0,%fpcr # clear FPCR
9097 fabs.x %fp1 # make absolute value
9098 fcmp.b %fp1,&0x1 # is |result| < 1.b?
9099 fbge.w fdiv_normal_exit # no; no underflow occurred
9100 bra.w fdiv_unfl # yes; underflow occurred
9102 ############################################################################
9105 # Divide: inputs are not both normalized; what are they?
9107 fdiv_not_norm:
9108 mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9109 jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9111 swbeg &48
9112 tbl_fdiv_op:
9113 short fdiv_norm - tbl_fdiv_op # NORM / NORM
9114 short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9115 short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9116 short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9117 short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9118 short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9119 short tbl_fdiv_op - tbl_fdiv_op #
9120 short tbl_fdiv_op - tbl_fdiv_op #
9122 short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9123 short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9124 short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9125 short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9126 short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9127 short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9128 short tbl_fdiv_op - tbl_fdiv_op #
9129 short tbl_fdiv_op - tbl_fdiv_op #
9131 short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9132 short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9133 short fdiv_res_operr - tbl_fdiv_op # INF / INF
9134 short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9135 short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9136 short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9137 short tbl_fdiv_op - tbl_fdiv_op #
9138 short tbl_fdiv_op - tbl_fdiv_op #
9140 short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9141 short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9142 short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9143 short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9144 short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9145 short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9146 short tbl_fdiv_op - tbl_fdiv_op #
9147 short tbl_fdiv_op - tbl_fdiv_op #
9149 short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9150 short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9151 short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9152 short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9153 short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9154 short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9155 short tbl_fdiv_op - tbl_fdiv_op #
9156 short tbl_fdiv_op - tbl_fdiv_op #
9158 short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9159 short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9160 short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9161 short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9162 short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9163 short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9164 short tbl_fdiv_op - tbl_fdiv_op #
9165 short tbl_fdiv_op - tbl_fdiv_op #
9167 fdiv_res_qnan:
9168 bra.l res_qnan
9169 fdiv_res_snan:
9170 bra.l res_snan
9171 fdiv_res_operr:
9172 bra.l res_operr
9174 global fdiv_zero_load # global for fsgldiv
9175 fdiv_zero_load:
9176 mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9177 mov.b DST_EX(%a1),%d1 # or of input signs.
9178 eor.b %d0,%d1
9179 bpl.b fdiv_zero_load_p # result is positive
9180 fmov.s &0x80000000,%fp0 # load a -ZERO
9181 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9183 fdiv_zero_load_p:
9184 fmov.s &0x00000000,%fp0 # load a +ZERO
9185 mov.b &z_bmask,FPSR_CC(%a6) # set Z
9189 # The destination was In Range and the source was a ZERO. The result,
9190 # therefore, is an INF w/ the proper sign.
9191 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9193 global fdiv_inf_load # global for fsgldiv
9194 fdiv_inf_load:
9195 ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9196 mov.b SRC_EX(%a0),%d0 # load both signs
9197 mov.b DST_EX(%a1),%d1
9198 eor.b %d0,%d1
9199 bpl.b fdiv_inf_load_p # result is positive
9200 fmov.s &0xff800000,%fp0 # make result -INF
9201 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9203 fdiv_inf_load_p:
9204 fmov.s &0x7f800000,%fp0 # make result +INF
9205 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9209 # The destination was an INF w/ an In Range or ZERO source, the result is
9210 # an INF w/ the proper sign.
9211 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9212 # dst INF is set, then then j-bit of the result INF is also set).
9214 global fdiv_inf_dst # global for fsgldiv
9215 fdiv_inf_dst:
9216 mov.b DST_EX(%a1),%d0 # load both signs
9217 mov.b SRC_EX(%a0),%d1
9218 eor.b %d0,%d1
9219 bpl.b fdiv_inf_dst_p # result is positive
9221 fmovm.x DST(%a1),&0x80 # return result in fp0
9222 fabs.x %fp0 # clear sign bit
9223 fneg.x %fp0 # set sign bit
9224 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9227 fdiv_inf_dst_p:
9228 fmovm.x DST(%a1),&0x80 # return result in fp0
9229 fabs.x %fp0 # return positive INF
9230 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9233 #########################################################################
9234 # XDEF **************************************************************** #
9235 # fneg(): emulates the fneg instruction #
9236 # fsneg(): emulates the fsneg instruction #
9237 # fdneg(): emulates the fdneg instruction #
9239 # XREF **************************************************************** #
9240 # norm() - normalize a denorm to provide EXOP #
9241 # scale_to_zero_src() - scale sgl/dbl source exponent #
9242 # ovf_res() - return default overflow result #
9243 # unf_res() - return default underflow result #
9244 # res_qnan_1op() - return QNAN result #
9245 # res_snan_1op() - return SNAN result #
9247 # INPUT *************************************************************** #
9248 # a0 = pointer to extended precision source operand #
9249 # d0 = rnd prec,mode #
9251 # OUTPUT ************************************************************** #
9252 # fp0 = result #
9253 # fp1 = EXOP (if exception occurred) #
9255 # ALGORITHM *********************************************************** #
9256 # Handle NANs, zeroes, and infinities as special cases. Separate #
9257 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9258 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9259 # and an actual fneg performed to see if overflow/underflow would have #
9260 # occurred. If so, return default underflow/overflow result. Else, #
9261 # scale the result exponent and return result. FPSR gets set based on #
9262 # the result value. #
9264 #########################################################################
9266 global fsneg
9267 fsneg:
9268 andi.b &0x30,%d0 # clear rnd prec
9269 ori.b &s_mode*0x10,%d0 # insert sgl precision
9270 bra.b fneg
9272 global fdneg
9273 fdneg:
9274 andi.b &0x30,%d0 # clear rnd prec
9275 ori.b &d_mode*0x10,%d0 # insert dbl prec
9277 global fneg
9278 fneg:
9279 mov.l %d0,L_SCR3(%a6) # store rnd info
9280 mov.b STAG(%a6),%d1
9281 bne.w fneg_not_norm # optimize on non-norm input
9284 # NEGATE SIGN : norms and denorms ONLY!
9286 fneg_norm:
9287 andi.b &0xc0,%d0 # is precision extended?
9288 bne.w fneg_not_ext # no; go handle sgl or dbl
9291 # precision selected is extended. so...we can not get an underflow
9292 # or overflow because of rounding to the correct precision. so...
9293 # skip the scaling and unscaling...
9295 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9296 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9297 mov.w SRC_EX(%a0),%d0
9298 eori.w &0x8000,%d0 # negate sign
9299 bpl.b fneg_norm_load # sign is positive
9300 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9301 fneg_norm_load:
9302 mov.w %d0,FP_SCR0_EX(%a6)
9303 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9307 # for an extended precision DENORM, the UNFL exception bit is set
9308 # the accrued bit is NOT set in this instance(no inexactness!)
9310 fneg_denorm:
9311 andi.b &0xc0,%d0 # is precision extended?
9312 bne.b fneg_not_ext # no; go handle sgl or dbl
9314 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9316 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9317 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9318 mov.w SRC_EX(%a0),%d0
9319 eori.w &0x8000,%d0 # negate sign
9320 bpl.b fneg_denorm_done # no
9321 mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9322 fneg_denorm_done:
9323 mov.w %d0,FP_SCR0_EX(%a6)
9324 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9326 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9327 bne.b fneg_ext_unfl_ena # yes
9331 # the input is an extended DENORM and underflow is enabled in the FPCR.
9332 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9333 # exponent and insert back into the operand.
9335 fneg_ext_unfl_ena:
9336 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9337 bsr.l norm # normalize result
9338 neg.w %d0 # new exponent = -(shft val)
9339 addi.w &0x6000,%d0 # add new bias to exponent
9340 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9341 andi.w &0x8000,%d1 # keep old sign
9342 andi.w &0x7fff,%d0 # clear sign position
9343 or.w %d1,%d0 # concat old sign, new exponent
9344 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9345 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9349 # operand is either single or double
9351 fneg_not_ext:
9352 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9353 bne.b fneg_dbl
9356 # operand is to be rounded to single precision
9358 fneg_sgl:
9359 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9360 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9361 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9362 bsr.l scale_to_zero_src # calculate scale factor
9364 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9365 bge.w fneg_sd_unfl # yes; go handle underflow
9366 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9367 beq.w fneg_sd_may_ovfl # maybe; go check
9368 blt.w fneg_sd_ovfl # yes; go handle overflow
9371 # operand will NOT overflow or underflow when moved in to the fp reg file
9373 fneg_sd_normal:
9374 fmov.l &0x0,%fpsr # clear FPSR
9375 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9377 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9379 fmov.l %fpsr,%d1 # save FPSR
9380 fmov.l &0x0,%fpcr # clear FPCR
9382 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9384 fneg_sd_normal_exit:
9385 mov.l %d2,-(%sp) # save d2
9386 fmovm.x &0x80,FP_SCR0(%a6) # store out result
9387 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9388 mov.w %d1,%d2 # make a copy
9389 andi.l &0x7fff,%d1 # strip sign
9390 sub.l %d0,%d1 # add scale factor
9391 andi.w &0x8000,%d2 # keep old sign
9392 or.w %d1,%d2 # concat old sign,new exp
9393 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9394 mov.l (%sp)+,%d2 # restore d2
9395 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9399 # operand is to be rounded to double precision
9401 fneg_dbl:
9402 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9403 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9404 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9405 bsr.l scale_to_zero_src # calculate scale factor
9407 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9408 bge.b fneg_sd_unfl # yes; go handle underflow
9409 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9410 beq.w fneg_sd_may_ovfl # maybe; go check
9411 blt.w fneg_sd_ovfl # yes; go handle overflow
9412 bra.w fneg_sd_normal # no; ho handle normalized op
9415 # operand WILL underflow when moved in to the fp register file
9417 fneg_sd_unfl:
9418 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9420 eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9421 bpl.b fneg_sd_unfl_tst
9422 bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9424 # if underflow or inexact is enabled, go calculate EXOP first.
9425 fneg_sd_unfl_tst:
9426 mov.b FPCR_ENABLE(%a6),%d1
9427 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9428 bne.b fneg_sd_unfl_ena # yes
9430 fneg_sd_unfl_dis:
9431 lea FP_SCR0(%a6),%a0 # pass: result addr
9432 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9433 bsr.l unf_res # calculate default result
9434 or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9435 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9436 rts
9439 # operand will underflow AND underflow is enabled.
9440 # therefore, we must return the result rounded to extended precision.
9442 fneg_sd_unfl_ena:
9443 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9444 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9445 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9447 mov.l %d2,-(%sp) # save d2
9448 mov.l %d1,%d2 # make a copy
9449 andi.l &0x7fff,%d1 # strip sign
9450 andi.w &0x8000,%d2 # keep old sign
9451 sub.l %d0,%d1 # subtract scale factor
9452 addi.l &0x6000,%d1 # add new bias
9453 andi.w &0x7fff,%d1
9454 or.w %d2,%d1 # concat new sign,new exp
9455 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9456 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9457 mov.l (%sp)+,%d2 # restore d2
9458 bra.b fneg_sd_unfl_dis
9461 # operand WILL overflow.
9463 fneg_sd_ovfl:
9464 fmov.l &0x0,%fpsr # clear FPSR
9465 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9467 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9469 fmov.l &0x0,%fpcr # clear FPCR
9470 fmov.l %fpsr,%d1 # save FPSR
9472 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9474 fneg_sd_ovfl_tst:
9475 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9477 mov.b FPCR_ENABLE(%a6),%d1
9478 andi.b &0x13,%d1 # is OVFL or INEX enabled?
9479 bne.b fneg_sd_ovfl_ena # yes
9482 # OVFL is not enabled; therefore, we must create the default result by
9483 # calling ovf_res().
9485 fneg_sd_ovfl_dis:
9486 btst &neg_bit,FPSR_CC(%a6) # is result negative?
9487 sne %d1 # set sign param accordingly
9488 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9489 bsr.l ovf_res # calculate default result
9490 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9491 fmovm.x (%a0),&0x80 # return default result in fp0
9495 # OVFL is enabled.
9496 # the INEX2 bit has already been updated by the round to the correct precision.
9497 # now, round to extended(and don't alter the FPSR).
9499 fneg_sd_ovfl_ena:
9500 mov.l %d2,-(%sp) # save d2
9501 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9502 mov.l %d1,%d2 # make a copy
9503 andi.l &0x7fff,%d1 # strip sign
9504 andi.w &0x8000,%d2 # keep old sign
9505 sub.l %d0,%d1 # add scale factor
9506 subi.l &0x6000,%d1 # subtract bias
9507 andi.w &0x7fff,%d1
9508 or.w %d2,%d1 # concat sign,exp
9509 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9510 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9511 mov.l (%sp)+,%d2 # restore d2
9512 bra.b fneg_sd_ovfl_dis
9515 # the move in MAY underflow. so...
9517 fneg_sd_may_ovfl:
9518 fmov.l &0x0,%fpsr # clear FPSR
9519 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9521 fneg.x FP_SCR0(%a6),%fp0 # perform negation
9523 fmov.l %fpsr,%d1 # save status
9524 fmov.l &0x0,%fpcr # clear FPCR
9526 or.l %d1,USER_FPSR(%a6) # save INEX2,N
9528 fabs.x %fp0,%fp1 # make a copy of result
9529 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9530 fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9532 # no, it didn't overflow; we have correct result
9533 bra.w fneg_sd_normal_exit
9535 ##########################################################################
9538 # input is not normalized; what is it?
9540 fneg_not_norm:
9541 cmpi.b %d1,&DENORM # weed out DENORM
9542 beq.w fneg_denorm
9543 cmpi.b %d1,&SNAN # weed out SNAN
9544 beq.l res_snan_1op
9545 cmpi.b %d1,&QNAN # weed out QNAN
9546 beq.l res_qnan_1op
9549 # do the fneg; at this point, only possible ops are ZERO and INF.
9550 # use fneg to determine ccodes.
9551 # prec:mode should be zero at this point but it won't affect answer anyways.
9553 fneg.x SRC_EX(%a0),%fp0 # do fneg
9554 fmov.l %fpsr,%d0
9555 rol.l &0x8,%d0 # put ccodes in lo byte
9556 mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9559 #########################################################################
9560 # XDEF **************************************************************** #
9561 # ftst(): emulates the ftest instruction #
9563 # XREF **************************************************************** #
9564 # res{s,q}nan_1op() - set NAN result for monadic instruction #
9566 # INPUT *************************************************************** #
9567 # a0 = pointer to extended precision source operand #
9569 # OUTPUT ************************************************************** #
9570 # none #
9572 # ALGORITHM *********************************************************** #
9573 # Check the source operand tag (STAG) and set the FPCR according #
9574 # to the operand type and sign. #
9576 #########################################################################
9578 global ftst
9579 ftst:
9580 mov.b STAG(%a6),%d1
9581 bne.b ftst_not_norm # optimize on non-norm input
9584 # Norm:
9586 ftst_norm:
9587 tst.b SRC_EX(%a0) # is operand negative?
9588 bmi.b ftst_norm_m # yes
9590 ftst_norm_m:
9591 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9595 # input is not normalized; what is it?
9597 ftst_not_norm:
9598 cmpi.b %d1,&ZERO # weed out ZERO
9599 beq.b ftst_zero
9600 cmpi.b %d1,&INF # weed out INF
9601 beq.b ftst_inf
9602 cmpi.b %d1,&SNAN # weed out SNAN
9603 beq.l res_snan_1op
9604 cmpi.b %d1,&QNAN # weed out QNAN
9605 beq.l res_qnan_1op
9608 # Denorm:
9610 ftst_denorm:
9611 tst.b SRC_EX(%a0) # is operand negative?
9612 bmi.b ftst_denorm_m # yes
9614 ftst_denorm_m:
9615 mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9619 # Infinity:
9621 ftst_inf:
9622 tst.b SRC_EX(%a0) # is operand negative?
9623 bmi.b ftst_inf_m # yes
9624 ftst_inf_p:
9625 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9627 ftst_inf_m:
9628 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9632 # Zero:
9634 ftst_zero:
9635 tst.b SRC_EX(%a0) # is operand negative?
9636 bmi.b ftst_zero_m # yes
9637 ftst_zero_p:
9638 mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9640 ftst_zero_m:
9641 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9644 #########################################################################
9645 # XDEF **************************************************************** #
9646 # fint(): emulates the fint instruction #
9648 # XREF **************************************************************** #
9649 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9651 # INPUT *************************************************************** #
9652 # a0 = pointer to extended precision source operand #
9653 # d0 = round precision/mode #
9655 # OUTPUT ************************************************************** #
9656 # fp0 = result #
9658 # ALGORITHM *********************************************************** #
9659 # Separate according to operand type. Unnorms don't pass through #
9660 # here. For norms, load the rounding mode/prec, execute a "fint", then #
9661 # store the resulting FPSR bits. #
9662 # For denorms, force the j-bit to a one and do the same as for #
9663 # norms. Denorms are so low that the answer will either be a zero or a #
9664 # one. #
9665 # For zeroes/infs/NANs, return the same while setting the FPSR #
9666 # as appropriate. #
9668 #########################################################################
9670 global fint
9671 fint:
9672 mov.b STAG(%a6),%d1
9673 bne.b fint_not_norm # optimize on non-norm input
9676 # Norm:
9678 fint_norm:
9679 andi.b &0x30,%d0 # set prec = ext
9681 fmov.l %d0,%fpcr # set FPCR
9682 fmov.l &0x0,%fpsr # clear FPSR
9684 fint.x SRC(%a0),%fp0 # execute fint
9686 fmov.l &0x0,%fpcr # clear FPCR
9687 fmov.l %fpsr,%d0 # save FPSR
9688 or.l %d0,USER_FPSR(%a6) # set exception bits
9693 # input is not normalized; what is it?
9695 fint_not_norm:
9696 cmpi.b %d1,&ZERO # weed out ZERO
9697 beq.b fint_zero
9698 cmpi.b %d1,&INF # weed out INF
9699 beq.b fint_inf
9700 cmpi.b %d1,&DENORM # weed out DENORM
9701 beq.b fint_denorm
9702 cmpi.b %d1,&SNAN # weed out SNAN
9703 beq.l res_snan_1op
9704 bra.l res_qnan_1op # weed out QNAN
9707 # Denorm:
9709 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9710 # also, the INEX2 and AINEX exception bits will be set.
9711 # so, we could either set these manually or force the DENORM
9712 # to a very small NORM and ship it to the NORM routine.
9713 # I do the latter.
9715 fint_denorm:
9716 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9717 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9718 lea FP_SCR0(%a6),%a0
9719 bra.b fint_norm
9722 # Zero:
9724 fint_zero:
9725 tst.b SRC_EX(%a0) # is ZERO negative?
9726 bmi.b fint_zero_m # yes
9727 fint_zero_p:
9728 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9729 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9731 fint_zero_m:
9732 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9733 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9737 # Infinity:
9739 fint_inf:
9740 fmovm.x SRC(%a0),&0x80 # return result in fp0
9741 tst.b SRC_EX(%a0) # is INF negative?
9742 bmi.b fint_inf_m # yes
9743 fint_inf_p:
9744 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9746 fint_inf_m:
9747 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9750 #########################################################################
9751 # XDEF **************************************************************** #
9752 # fintrz(): emulates the fintrz instruction #
9754 # XREF **************************************************************** #
9755 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9757 # INPUT *************************************************************** #
9758 # a0 = pointer to extended precision source operand #
9759 # d0 = round precision/mode #
9761 # OUTPUT ************************************************************** #
9762 # fp0 = result #
9764 # ALGORITHM *********************************************************** #
9765 # Separate according to operand type. Unnorms don't pass through #
9766 # here. For norms, load the rounding mode/prec, execute a "fintrz", #
9767 # then store the resulting FPSR bits. #
9768 # For denorms, force the j-bit to a one and do the same as for #
9769 # norms. Denorms are so low that the answer will either be a zero or a #
9770 # one. #
9771 # For zeroes/infs/NANs, return the same while setting the FPSR #
9772 # as appropriate. #
9774 #########################################################################
9776 global fintrz
9777 fintrz:
9778 mov.b STAG(%a6),%d1
9779 bne.b fintrz_not_norm # optimize on non-norm input
9782 # Norm:
9784 fintrz_norm:
9785 fmov.l &0x0,%fpsr # clear FPSR
9787 fintrz.x SRC(%a0),%fp0 # execute fintrz
9789 fmov.l %fpsr,%d0 # save FPSR
9790 or.l %d0,USER_FPSR(%a6) # set exception bits
9795 # input is not normalized; what is it?
9797 fintrz_not_norm:
9798 cmpi.b %d1,&ZERO # weed out ZERO
9799 beq.b fintrz_zero
9800 cmpi.b %d1,&INF # weed out INF
9801 beq.b fintrz_inf
9802 cmpi.b %d1,&DENORM # weed out DENORM
9803 beq.b fintrz_denorm
9804 cmpi.b %d1,&SNAN # weed out SNAN
9805 beq.l res_snan_1op
9806 bra.l res_qnan_1op # weed out QNAN
9809 # Denorm:
9811 # for DENORMs, the result will be (+/-)ZERO.
9812 # also, the INEX2 and AINEX exception bits will be set.
9813 # so, we could either set these manually or force the DENORM
9814 # to a very small NORM and ship it to the NORM routine.
9815 # I do the latter.
9817 fintrz_denorm:
9818 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9819 mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9820 lea FP_SCR0(%a6),%a0
9821 bra.b fintrz_norm
9824 # Zero:
9826 fintrz_zero:
9827 tst.b SRC_EX(%a0) # is ZERO negative?
9828 bmi.b fintrz_zero_m # yes
9829 fintrz_zero_p:
9830 fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9831 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9833 fintrz_zero_m:
9834 fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9835 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9839 # Infinity:
9841 fintrz_inf:
9842 fmovm.x SRC(%a0),&0x80 # return result in fp0
9843 tst.b SRC_EX(%a0) # is INF negative?
9844 bmi.b fintrz_inf_m # yes
9845 fintrz_inf_p:
9846 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9848 fintrz_inf_m:
9849 mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9852 #########################################################################
9853 # XDEF **************************************************************** #
9854 # fabs(): emulates the fabs instruction #
9855 # fsabs(): emulates the fsabs instruction #
9856 # fdabs(): emulates the fdabs instruction #
9858 # XREF **************************************************************** #
9859 # norm() - normalize denorm mantissa to provide EXOP #
9860 # scale_to_zero_src() - make exponent. = 0; get scale factor #
9861 # unf_res() - calculate underflow result #
9862 # ovf_res() - calculate overflow result #
9863 # res_{s,q}nan_1op() - set NAN result for monadic operation #
9865 # INPUT *************************************************************** #
9866 # a0 = pointer to extended precision source operand #
9867 # d0 = rnd precision/mode #
9869 # OUTPUT ************************************************************** #
9870 # fp0 = result #
9871 # fp1 = EXOP (if exception occurred) #
9873 # ALGORITHM *********************************************************** #
9874 # Handle NANs, infinities, and zeroes as special cases. Divide #
9875 # norms into extended, single, and double precision. #
9876 # Simply clear sign for extended precision norm. Ext prec denorm #
9877 # gets an EXOP created for it since it's an underflow. #
9878 # Double and single precision can overflow and underflow. First, #
9879 # scale the operand such that the exponent is zero. Perform an "fabs" #
9880 # using the correct rnd mode/prec. Check to see if the original #
9881 # exponent would take an exception. If so, use unf_res() or ovf_res() #
9882 # to calculate the default result. Also, create the EXOP for the #
9883 # exceptional case. If no exception should occur, insert the correct #
9884 # result exponent and return. #
9885 # Unnorms don't pass through here. #
9887 #########################################################################
9889 global fsabs
9890 fsabs:
9891 andi.b &0x30,%d0 # clear rnd prec
9892 ori.b &s_mode*0x10,%d0 # insert sgl precision
9893 bra.b fabs
9895 global fdabs
9896 fdabs:
9897 andi.b &0x30,%d0 # clear rnd prec
9898 ori.b &d_mode*0x10,%d0 # insert dbl precision
9900 global fabs
9901 fabs:
9902 mov.l %d0,L_SCR3(%a6) # store rnd info
9903 mov.b STAG(%a6),%d1
9904 bne.w fabs_not_norm # optimize on non-norm input
9907 # ABSOLUTE VALUE: norms and denorms ONLY!
9909 fabs_norm:
9910 andi.b &0xc0,%d0 # is precision extended?
9911 bne.b fabs_not_ext # no; go handle sgl or dbl
9914 # precision selected is extended. so...we can not get an underflow
9915 # or overflow because of rounding to the correct precision. so...
9916 # skip the scaling and unscaling...
9918 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9919 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9920 mov.w SRC_EX(%a0),%d1
9921 bclr &15,%d1 # force absolute value
9922 mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9923 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9927 # for an extended precision DENORM, the UNFL exception bit is set
9928 # the accrued bit is NOT set in this instance(no inexactness!)
9930 fabs_denorm:
9931 andi.b &0xc0,%d0 # is precision extended?
9932 bne.b fabs_not_ext # no
9934 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9936 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9937 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9938 mov.w SRC_EX(%a0),%d0
9939 bclr &15,%d0 # clear sign
9940 mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9942 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9944 btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9945 bne.b fabs_ext_unfl_ena
9949 # the input is an extended DENORM and underflow is enabled in the FPCR.
9950 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9951 # exponent and insert back into the operand.
9953 fabs_ext_unfl_ena:
9954 lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9955 bsr.l norm # normalize result
9956 neg.w %d0 # new exponent = -(shft val)
9957 addi.w &0x6000,%d0 # add new bias to exponent
9958 mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9959 andi.w &0x8000,%d1 # keep old sign
9960 andi.w &0x7fff,%d0 # clear sign position
9961 or.w %d1,%d0 # concat old sign, new exponent
9962 mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9963 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9967 # operand is either single or double
9969 fabs_not_ext:
9970 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9971 bne.b fabs_dbl
9974 # operand is to be rounded to single precision
9976 fabs_sgl:
9977 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9978 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9979 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9980 bsr.l scale_to_zero_src # calculate scale factor
9982 cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9983 bge.w fabs_sd_unfl # yes; go handle underflow
9984 cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9985 beq.w fabs_sd_may_ovfl # maybe; go check
9986 blt.w fabs_sd_ovfl # yes; go handle overflow
9989 # operand will NOT overflow or underflow when moved in to the fp reg file
9991 fabs_sd_normal:
9992 fmov.l &0x0,%fpsr # clear FPSR
9993 fmov.l L_SCR3(%a6),%fpcr # set FPCR
9995 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9997 fmov.l %fpsr,%d1 # save FPSR
9998 fmov.l &0x0,%fpcr # clear FPCR
10000 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10002 fabs_sd_normal_exit:
10003 mov.l %d2,-(%sp) # save d2
10004 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10005 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
10006 mov.l %d1,%d2 # make a copy
10007 andi.l &0x7fff,%d1 # strip sign
10008 sub.l %d0,%d1 # add scale factor
10009 andi.w &0x8000,%d2 # keep old sign
10010 or.w %d1,%d2 # concat old sign,new exp
10011 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10012 mov.l (%sp)+,%d2 # restore d2
10013 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10017 # operand is to be rounded to double precision
10019 fabs_dbl:
10020 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10021 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10022 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10023 bsr.l scale_to_zero_src # calculate scale factor
10025 cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10026 bge.b fabs_sd_unfl # yes; go handle underflow
10027 cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10028 beq.w fabs_sd_may_ovfl # maybe; go check
10029 blt.w fabs_sd_ovfl # yes; go handle overflow
10030 bra.w fabs_sd_normal # no; ho handle normalized op
10033 # operand WILL underflow when moved in to the fp register file
10035 fabs_sd_unfl:
10036 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10038 bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10040 # if underflow or inexact is enabled, go calculate EXOP first.
10041 mov.b FPCR_ENABLE(%a6),%d1
10042 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10043 bne.b fabs_sd_unfl_ena # yes
10045 fabs_sd_unfl_dis:
10046 lea FP_SCR0(%a6),%a0 # pass: result addr
10047 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10048 bsr.l unf_res # calculate default result
10049 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10050 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10051 rts
10054 # operand will underflow AND underflow is enabled.
10055 # therefore, we must return the result rounded to extended precision.
10057 fabs_sd_unfl_ena:
10058 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10059 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10060 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10062 mov.l %d2,-(%sp) # save d2
10063 mov.l %d1,%d2 # make a copy
10064 andi.l &0x7fff,%d1 # strip sign
10065 andi.w &0x8000,%d2 # keep old sign
10066 sub.l %d0,%d1 # subtract scale factor
10067 addi.l &0x6000,%d1 # add new bias
10068 andi.w &0x7fff,%d1
10069 or.w %d2,%d1 # concat new sign,new exp
10070 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10071 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10072 mov.l (%sp)+,%d2 # restore d2
10073 bra.b fabs_sd_unfl_dis
10076 # operand WILL overflow.
10078 fabs_sd_ovfl:
10079 fmov.l &0x0,%fpsr # clear FPSR
10080 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10082 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10084 fmov.l &0x0,%fpcr # clear FPCR
10085 fmov.l %fpsr,%d1 # save FPSR
10087 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10089 fabs_sd_ovfl_tst:
10090 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10092 mov.b FPCR_ENABLE(%a6),%d1
10093 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10094 bne.b fabs_sd_ovfl_ena # yes
10097 # OVFL is not enabled; therefore, we must create the default result by
10098 # calling ovf_res().
10100 fabs_sd_ovfl_dis:
10101 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10102 sne %d1 # set sign param accordingly
10103 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10104 bsr.l ovf_res # calculate default result
10105 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10106 fmovm.x (%a0),&0x80 # return default result in fp0
10110 # OVFL is enabled.
10111 # the INEX2 bit has already been updated by the round to the correct precision.
10112 # now, round to extended(and don't alter the FPSR).
10114 fabs_sd_ovfl_ena:
10115 mov.l %d2,-(%sp) # save d2
10116 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10117 mov.l %d1,%d2 # make a copy
10118 andi.l &0x7fff,%d1 # strip sign
10119 andi.w &0x8000,%d2 # keep old sign
10120 sub.l %d0,%d1 # add scale factor
10121 subi.l &0x6000,%d1 # subtract bias
10122 andi.w &0x7fff,%d1
10123 or.w %d2,%d1 # concat sign,exp
10124 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10125 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10126 mov.l (%sp)+,%d2 # restore d2
10127 bra.b fabs_sd_ovfl_dis
10130 # the move in MAY underflow. so...
10132 fabs_sd_may_ovfl:
10133 fmov.l &0x0,%fpsr # clear FPSR
10134 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10136 fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10138 fmov.l %fpsr,%d1 # save status
10139 fmov.l &0x0,%fpcr # clear FPCR
10141 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10143 fabs.x %fp0,%fp1 # make a copy of result
10144 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10145 fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10147 # no, it didn't overflow; we have correct result
10148 bra.w fabs_sd_normal_exit
10150 ##########################################################################
10153 # input is not normalized; what is it?
10155 fabs_not_norm:
10156 cmpi.b %d1,&DENORM # weed out DENORM
10157 beq.w fabs_denorm
10158 cmpi.b %d1,&SNAN # weed out SNAN
10159 beq.l res_snan_1op
10160 cmpi.b %d1,&QNAN # weed out QNAN
10161 beq.l res_qnan_1op
10163 fabs.x SRC(%a0),%fp0 # force absolute value
10165 cmpi.b %d1,&INF # weed out INF
10166 beq.b fabs_inf
10167 fabs_zero:
10168 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10170 fabs_inf:
10171 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10174 #########################################################################
10175 # XDEF **************************************************************** #
10176 # fcmp(): fp compare op routine #
10178 # XREF **************************************************************** #
10179 # res_qnan() - return QNAN result #
10180 # res_snan() - return SNAN result #
10182 # INPUT *************************************************************** #
10183 # a0 = pointer to extended precision source operand #
10184 # a1 = pointer to extended precision destination operand #
10185 # d0 = round prec/mode #
10187 # OUTPUT ************************************************************** #
10188 # None #
10190 # ALGORITHM *********************************************************** #
10191 # Handle NANs and denorms as special cases. For everything else, #
10192 # just use the actual fcmp instruction to produce the correct condition #
10193 # codes. #
10195 #########################################################################
10197 global fcmp
10198 fcmp:
10199 clr.w %d1
10200 mov.b DTAG(%a6),%d1
10201 lsl.b &0x3,%d1
10202 or.b STAG(%a6),%d1
10203 bne.b fcmp_not_norm # optimize on non-norm input
10206 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10208 fcmp_norm:
10209 fmovm.x DST(%a1),&0x80 # load dst op
10211 fcmp.x %fp0,SRC(%a0) # do compare
10213 fmov.l %fpsr,%d0 # save FPSR
10214 rol.l &0x8,%d0 # extract ccode bits
10215 mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10220 # fcmp: inputs are not both normalized; what are they?
10222 fcmp_not_norm:
10223 mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10224 jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10226 swbeg &48
10227 tbl_fcmp_op:
10228 short fcmp_norm - tbl_fcmp_op # NORM - NORM
10229 short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10230 short fcmp_norm - tbl_fcmp_op # NORM - INF
10231 short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10232 short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10233 short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10234 short tbl_fcmp_op - tbl_fcmp_op #
10235 short tbl_fcmp_op - tbl_fcmp_op #
10237 short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10238 short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10239 short fcmp_norm - tbl_fcmp_op # ZERO - INF
10240 short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10241 short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10242 short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10243 short tbl_fcmp_op - tbl_fcmp_op #
10244 short tbl_fcmp_op - tbl_fcmp_op #
10246 short fcmp_norm - tbl_fcmp_op # INF - NORM
10247 short fcmp_norm - tbl_fcmp_op # INF - ZERO
10248 short fcmp_norm - tbl_fcmp_op # INF - INF
10249 short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10250 short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10251 short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10252 short tbl_fcmp_op - tbl_fcmp_op #
10253 short tbl_fcmp_op - tbl_fcmp_op #
10255 short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10256 short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10257 short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10258 short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10259 short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10260 short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10261 short tbl_fcmp_op - tbl_fcmp_op #
10262 short tbl_fcmp_op - tbl_fcmp_op #
10264 short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10265 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10266 short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10267 short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10268 short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10269 short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10270 short tbl_fcmp_op - tbl_fcmp_op #
10271 short tbl_fcmp_op - tbl_fcmp_op #
10273 short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10274 short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10275 short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10276 short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10277 short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10278 short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10279 short tbl_fcmp_op - tbl_fcmp_op #
10280 short tbl_fcmp_op - tbl_fcmp_op #
10282 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10283 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10284 fcmp_res_qnan:
10285 bsr.l res_qnan
10286 andi.b &0xf7,FPSR_CC(%a6)
10288 fcmp_res_snan:
10289 bsr.l res_snan
10290 andi.b &0xf7,FPSR_CC(%a6)
10294 # DENORMs are a little more difficult.
10295 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10296 # and use the fcmp_norm routine.
10297 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10298 # and use the fcmp_norm routine.
10299 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10300 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10301 # (1) signs are (+) and the DENORM is the dst or
10302 # (2) signs are (-) and the DENORM is the src
10305 fcmp_dnrm_s:
10306 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10307 mov.l SRC_HI(%a0),%d0
10308 bset &31,%d0 # DENORM src; make into small norm
10309 mov.l %d0,FP_SCR0_HI(%a6)
10310 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10311 lea FP_SCR0(%a6),%a0
10312 bra.w fcmp_norm
10314 fcmp_dnrm_d:
10315 mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10316 mov.l DST_HI(%a1),%d0
10317 bset &31,%d0 # DENORM src; make into small norm
10318 mov.l %d0,FP_SCR0_HI(%a6)
10319 mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10320 lea FP_SCR0(%a6),%a1
10321 bra.w fcmp_norm
10323 fcmp_dnrm_sd:
10324 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10325 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10326 mov.l DST_HI(%a1),%d0
10327 bset &31,%d0 # DENORM dst; make into small norm
10328 mov.l %d0,FP_SCR1_HI(%a6)
10329 mov.l SRC_HI(%a0),%d0
10330 bset &31,%d0 # DENORM dst; make into small norm
10331 mov.l %d0,FP_SCR0_HI(%a6)
10332 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10333 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10334 lea FP_SCR1(%a6),%a1
10335 lea FP_SCR0(%a6),%a0
10336 bra.w fcmp_norm
10338 fcmp_nrm_dnrm:
10339 mov.b SRC_EX(%a0),%d0 # determine if like signs
10340 mov.b DST_EX(%a1),%d1
10341 eor.b %d0,%d1
10342 bmi.w fcmp_dnrm_s
10344 # signs are the same, so must determine the answer ourselves.
10345 tst.b %d0 # is src op negative?
10346 bmi.b fcmp_nrm_dnrm_m # yes
10348 fcmp_nrm_dnrm_m:
10349 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10352 fcmp_dnrm_nrm:
10353 mov.b SRC_EX(%a0),%d0 # determine if like signs
10354 mov.b DST_EX(%a1),%d1
10355 eor.b %d0,%d1
10356 bmi.w fcmp_dnrm_d
10358 # signs are the same, so must determine the answer ourselves.
10359 tst.b %d0 # is src op negative?
10360 bpl.b fcmp_dnrm_nrm_m # no
10362 fcmp_dnrm_nrm_m:
10363 mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10366 #########################################################################
10367 # XDEF **************************************************************** #
10368 # fsglmul(): emulates the fsglmul instruction #
10370 # XREF **************************************************************** #
10371 # scale_to_zero_src() - scale src exponent to zero #
10372 # scale_to_zero_dst() - scale dst exponent to zero #
10373 # unf_res4() - return default underflow result for sglop #
10374 # ovf_res() - return default overflow result #
10375 # res_qnan() - return QNAN result #
10376 # res_snan() - return SNAN result #
10378 # INPUT *************************************************************** #
10379 # a0 = pointer to extended precision source operand #
10380 # a1 = pointer to extended precision destination operand #
10381 # d0 rnd prec,mode #
10383 # OUTPUT ************************************************************** #
10384 # fp0 = result #
10385 # fp1 = EXOP (if exception occurred) #
10387 # ALGORITHM *********************************************************** #
10388 # Handle NANs, infinities, and zeroes as special cases. Divide #
10389 # norms/denorms into ext/sgl/dbl precision. #
10390 # For norms/denorms, scale the exponents such that a multiply #
10391 # instruction won't cause an exception. Use the regular fsglmul to #
10392 # compute a result. Check if the regular operands would have taken #
10393 # an exception. If so, return the default overflow/underflow result #
10394 # and return the EXOP if exceptions are enabled. Else, scale the #
10395 # result operand to the proper exponent. #
10397 #########################################################################
10399 global fsglmul
10400 fsglmul:
10401 mov.l %d0,L_SCR3(%a6) # store rnd info
10403 clr.w %d1
10404 mov.b DTAG(%a6),%d1
10405 lsl.b &0x3,%d1
10406 or.b STAG(%a6),%d1
10408 bne.w fsglmul_not_norm # optimize on non-norm input
10410 fsglmul_norm:
10411 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10412 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10413 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10415 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10416 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10417 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10419 bsr.l scale_to_zero_src # scale exponent
10420 mov.l %d0,-(%sp) # save scale factor 1
10422 bsr.l scale_to_zero_dst # scale dst exponent
10424 add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10426 cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10427 beq.w fsglmul_may_ovfl # result may rnd to overflow
10428 blt.w fsglmul_ovfl # result will overflow
10430 cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10431 beq.w fsglmul_may_unfl # result may rnd to no unfl
10432 bgt.w fsglmul_unfl # result will underflow
10434 fsglmul_normal:
10435 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10437 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10438 fmov.l &0x0,%fpsr # clear FPSR
10440 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10442 fmov.l %fpsr,%d1 # save status
10443 fmov.l &0x0,%fpcr # clear FPCR
10445 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10447 fsglmul_normal_exit:
10448 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10449 mov.l %d2,-(%sp) # save d2
10450 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10451 mov.l %d1,%d2 # make a copy
10452 andi.l &0x7fff,%d1 # strip sign
10453 andi.w &0x8000,%d2 # keep old sign
10454 sub.l %d0,%d1 # add scale factor
10455 or.w %d2,%d1 # concat old sign,new exp
10456 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10457 mov.l (%sp)+,%d2 # restore d2
10458 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10461 fsglmul_ovfl:
10462 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10464 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10465 fmov.l &0x0,%fpsr # clear FPSR
10467 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10469 fmov.l %fpsr,%d1 # save status
10470 fmov.l &0x0,%fpcr # clear FPCR
10472 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10474 fsglmul_ovfl_tst:
10476 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10477 or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10479 mov.b FPCR_ENABLE(%a6),%d1
10480 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10481 bne.b fsglmul_ovfl_ena # yes
10483 fsglmul_ovfl_dis:
10484 btst &neg_bit,FPSR_CC(%a6) # is result negative?
10485 sne %d1 # set sign param accordingly
10486 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10487 andi.b &0x30,%d0 # force prec = ext
10488 bsr.l ovf_res # calculate default result
10489 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10490 fmovm.x (%a0),&0x80 # return default result in fp0
10493 fsglmul_ovfl_ena:
10494 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10496 mov.l %d2,-(%sp) # save d2
10497 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10498 mov.l %d1,%d2 # make a copy
10499 andi.l &0x7fff,%d1 # strip sign
10500 sub.l %d0,%d1 # add scale factor
10501 subi.l &0x6000,%d1 # subtract bias
10502 andi.w &0x7fff,%d1
10503 andi.w &0x8000,%d2 # keep old sign
10504 or.w %d2,%d1 # concat old sign,new exp
10505 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10506 mov.l (%sp)+,%d2 # restore d2
10507 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10508 bra.b fsglmul_ovfl_dis
10510 fsglmul_may_ovfl:
10511 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10513 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10514 fmov.l &0x0,%fpsr # clear FPSR
10516 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10518 fmov.l %fpsr,%d1 # save status
10519 fmov.l &0x0,%fpcr # clear FPCR
10521 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10523 fabs.x %fp0,%fp1 # make a copy of result
10524 fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10525 fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10527 # no, it didn't overflow; we have correct result
10528 bra.w fsglmul_normal_exit
10530 fsglmul_unfl:
10531 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10533 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10535 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10536 fmov.l &0x0,%fpsr # clear FPSR
10538 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10540 fmov.l %fpsr,%d1 # save status
10541 fmov.l &0x0,%fpcr # clear FPCR
10543 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10545 mov.b FPCR_ENABLE(%a6),%d1
10546 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10547 bne.b fsglmul_unfl_ena # yes
10549 fsglmul_unfl_dis:
10550 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10552 lea FP_SCR0(%a6),%a0 # pass: result addr
10553 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10554 bsr.l unf_res4 # calculate default result
10555 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10556 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10560 # UNFL is enabled.
10562 fsglmul_unfl_ena:
10563 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10565 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10566 fmov.l &0x0,%fpsr # clear FPSR
10568 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10570 fmov.l &0x0,%fpcr # clear FPCR
10572 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10573 mov.l %d2,-(%sp) # save d2
10574 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10575 mov.l %d1,%d2 # make a copy
10576 andi.l &0x7fff,%d1 # strip sign
10577 andi.w &0x8000,%d2 # keep old sign
10578 sub.l %d0,%d1 # add scale factor
10579 addi.l &0x6000,%d1 # add bias
10580 andi.w &0x7fff,%d1
10581 or.w %d2,%d1 # concat old sign,new exp
10582 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10583 mov.l (%sp)+,%d2 # restore d2
10584 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10585 bra.w fsglmul_unfl_dis
10587 fsglmul_may_unfl:
10588 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10590 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10591 fmov.l &0x0,%fpsr # clear FPSR
10593 fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10595 fmov.l %fpsr,%d1 # save status
10596 fmov.l &0x0,%fpcr # clear FPCR
10598 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10600 fabs.x %fp0,%fp1 # make a copy of result
10601 fcmp.b %fp1,&0x2 # is |result| > 2.b?
10602 fbgt.w fsglmul_normal_exit # no; no underflow occurred
10603 fblt.w fsglmul_unfl # yes; underflow occurred
10606 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10607 # we don't know if the result was an underflow that rounded up to a 2 or
10608 # a normalized number that rounded down to a 2. so, redo the entire operation
10609 # using RZ as the rounding mode to see what the pre-rounded result is.
10610 # this case should be relatively rare.
10612 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10614 mov.l L_SCR3(%a6),%d1
10615 andi.b &0xc0,%d1 # keep rnd prec
10616 ori.b &rz_mode*0x10,%d1 # insert RZ
10618 fmov.l %d1,%fpcr # set FPCR
10619 fmov.l &0x0,%fpsr # clear FPSR
10621 fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10623 fmov.l &0x0,%fpcr # clear FPCR
10624 fabs.x %fp1 # make absolute value
10625 fcmp.b %fp1,&0x2 # is |result| < 2.b?
10626 fbge.w fsglmul_normal_exit # no; no underflow occurred
10627 bra.w fsglmul_unfl # yes, underflow occurred
10629 ##############################################################################
10632 # Single Precision Multiply: inputs are not both normalized; what are they?
10634 fsglmul_not_norm:
10635 mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10636 jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10638 swbeg &48
10639 tbl_fsglmul_op:
10640 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10641 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10642 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10643 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10644 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10645 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10646 short tbl_fsglmul_op - tbl_fsglmul_op #
10647 short tbl_fsglmul_op - tbl_fsglmul_op #
10649 short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10650 short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10651 short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10652 short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10653 short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10654 short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10655 short tbl_fsglmul_op - tbl_fsglmul_op #
10656 short tbl_fsglmul_op - tbl_fsglmul_op #
10658 short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10659 short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10660 short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10661 short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10662 short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10663 short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10664 short tbl_fsglmul_op - tbl_fsglmul_op #
10665 short tbl_fsglmul_op - tbl_fsglmul_op #
10667 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10668 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10669 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10670 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10671 short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10672 short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10673 short tbl_fsglmul_op - tbl_fsglmul_op #
10674 short tbl_fsglmul_op - tbl_fsglmul_op #
10676 short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10677 short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10678 short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10679 short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10680 short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10681 short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10682 short tbl_fsglmul_op - tbl_fsglmul_op #
10683 short tbl_fsglmul_op - tbl_fsglmul_op #
10685 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10686 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10687 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10688 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10689 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10690 short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10691 short tbl_fsglmul_op - tbl_fsglmul_op #
10692 short tbl_fsglmul_op - tbl_fsglmul_op #
10694 fsglmul_res_operr:
10695 bra.l res_operr
10696 fsglmul_res_snan:
10697 bra.l res_snan
10698 fsglmul_res_qnan:
10699 bra.l res_qnan
10700 fsglmul_zero:
10701 bra.l fmul_zero
10702 fsglmul_inf_src:
10703 bra.l fmul_inf_src
10704 fsglmul_inf_dst:
10705 bra.l fmul_inf_dst
10707 #########################################################################
10708 # XDEF **************************************************************** #
10709 # fsgldiv(): emulates the fsgldiv instruction #
10711 # XREF **************************************************************** #
10712 # scale_to_zero_src() - scale src exponent to zero #
10713 # scale_to_zero_dst() - scale dst exponent to zero #
10714 # unf_res4() - return default underflow result for sglop #
10715 # ovf_res() - return default overflow result #
10716 # res_qnan() - return QNAN result #
10717 # res_snan() - return SNAN result #
10719 # INPUT *************************************************************** #
10720 # a0 = pointer to extended precision source operand #
10721 # a1 = pointer to extended precision destination operand #
10722 # d0 rnd prec,mode #
10724 # OUTPUT ************************************************************** #
10725 # fp0 = result #
10726 # fp1 = EXOP (if exception occurred) #
10728 # ALGORITHM *********************************************************** #
10729 # Handle NANs, infinities, and zeroes as special cases. Divide #
10730 # norms/denorms into ext/sgl/dbl precision. #
10731 # For norms/denorms, scale the exponents such that a divide #
10732 # instruction won't cause an exception. Use the regular fsgldiv to #
10733 # compute a result. Check if the regular operands would have taken #
10734 # an exception. If so, return the default overflow/underflow result #
10735 # and return the EXOP if exceptions are enabled. Else, scale the #
10736 # result operand to the proper exponent. #
10738 #########################################################################
10740 global fsgldiv
10741 fsgldiv:
10742 mov.l %d0,L_SCR3(%a6) # store rnd info
10744 clr.w %d1
10745 mov.b DTAG(%a6),%d1
10746 lsl.b &0x3,%d1
10747 or.b STAG(%a6),%d1 # combine src tags
10749 bne.w fsgldiv_not_norm # optimize on non-norm input
10752 # DIVIDE: NORMs and DENORMs ONLY!
10754 fsgldiv_norm:
10755 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10756 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10757 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10759 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10760 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10761 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10763 bsr.l scale_to_zero_src # calculate scale factor 1
10764 mov.l %d0,-(%sp) # save scale factor 1
10766 bsr.l scale_to_zero_dst # calculate scale factor 2
10768 neg.l (%sp) # S.F. = scale1 - scale2
10769 add.l %d0,(%sp)
10771 mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10772 lsr.b &0x6,%d1
10773 mov.l (%sp)+,%d0
10774 cmpi.l %d0,&0x3fff-0x7ffe
10775 ble.w fsgldiv_may_ovfl
10777 cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10778 beq.w fsgldiv_may_unfl # maybe
10779 bgt.w fsgldiv_unfl # yes; go handle underflow
10781 fsgldiv_normal:
10782 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10784 fmov.l L_SCR3(%a6),%fpcr # save FPCR
10785 fmov.l &0x0,%fpsr # clear FPSR
10787 fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10789 fmov.l %fpsr,%d1 # save FPSR
10790 fmov.l &0x0,%fpcr # clear FPCR
10792 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10794 fsgldiv_normal_exit:
10795 fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10796 mov.l %d2,-(%sp) # save d2
10797 mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10798 mov.l %d1,%d2 # make a copy
10799 andi.l &0x7fff,%d1 # strip sign
10800 andi.w &0x8000,%d2 # keep old sign
10801 sub.l %d0,%d1 # add scale factor
10802 or.w %d2,%d1 # concat old sign,new exp
10803 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10804 mov.l (%sp)+,%d2 # restore d2
10805 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10808 fsgldiv_may_ovfl:
10809 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10811 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10812 fmov.l &0x0,%fpsr # set FPSR
10814 fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10816 fmov.l %fpsr,%d1
10817 fmov.l &0x0,%fpcr
10819 or.l %d1,USER_FPSR(%a6) # save INEX,N
10821 fmovm.x &0x01,-(%sp) # save result to stack
10822 mov.w (%sp),%d1 # fetch new exponent
10823 add.l &0xc,%sp # clear result
10824 andi.l &0x7fff,%d1 # strip sign
10825 sub.l %d0,%d1 # add scale factor
10826 cmp.l %d1,&0x7fff # did divide overflow?
10827 blt.b fsgldiv_normal_exit
10829 fsgldiv_ovfl_tst:
10830 or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10832 mov.b FPCR_ENABLE(%a6),%d1
10833 andi.b &0x13,%d1 # is OVFL or INEX enabled?
10834 bne.b fsgldiv_ovfl_ena # yes
10836 fsgldiv_ovfl_dis:
10837 btst &neg_bit,FPSR_CC(%a6) # is result negative
10838 sne %d1 # set sign param accordingly
10839 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10840 andi.b &0x30,%d0 # kill precision
10841 bsr.l ovf_res # calculate default result
10842 or.b %d0,FPSR_CC(%a6) # set INF if applicable
10843 fmovm.x (%a0),&0x80 # return default result in fp0
10846 fsgldiv_ovfl_ena:
10847 fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10849 mov.l %d2,-(%sp) # save d2
10850 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10851 mov.l %d1,%d2 # make a copy
10852 andi.l &0x7fff,%d1 # strip sign
10853 andi.w &0x8000,%d2 # keep old sign
10854 sub.l %d0,%d1 # add scale factor
10855 subi.l &0x6000,%d1 # subtract new bias
10856 andi.w &0x7fff,%d1 # clear ms bit
10857 or.w %d2,%d1 # concat old sign,new exp
10858 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10859 mov.l (%sp)+,%d2 # restore d2
10860 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10861 bra.b fsgldiv_ovfl_dis
10863 fsgldiv_unfl:
10864 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10866 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10868 fmov.l &rz_mode*0x10,%fpcr # set FPCR
10869 fmov.l &0x0,%fpsr # clear FPSR
10871 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10873 fmov.l %fpsr,%d1 # save status
10874 fmov.l &0x0,%fpcr # clear FPCR
10876 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10878 mov.b FPCR_ENABLE(%a6),%d1
10879 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10880 bne.b fsgldiv_unfl_ena # yes
10882 fsgldiv_unfl_dis:
10883 fmovm.x &0x80,FP_SCR0(%a6) # store out result
10885 lea FP_SCR0(%a6),%a0 # pass: result addr
10886 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10887 bsr.l unf_res4 # calculate default result
10888 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10889 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10893 # UNFL is enabled.
10895 fsgldiv_unfl_ena:
10896 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10898 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10899 fmov.l &0x0,%fpsr # clear FPSR
10901 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10903 fmov.l &0x0,%fpcr # clear FPCR
10905 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10906 mov.l %d2,-(%sp) # save d2
10907 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10908 mov.l %d1,%d2 # make a copy
10909 andi.l &0x7fff,%d1 # strip sign
10910 andi.w &0x8000,%d2 # keep old sign
10911 sub.l %d0,%d1 # add scale factor
10912 addi.l &0x6000,%d1 # add bias
10913 andi.w &0x7fff,%d1 # clear top bit
10914 or.w %d2,%d1 # concat old sign, new exp
10915 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10916 mov.l (%sp)+,%d2 # restore d2
10917 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10918 bra.b fsgldiv_unfl_dis
10921 # the divide operation MAY underflow:
10923 fsgldiv_may_unfl:
10924 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10926 fmov.l L_SCR3(%a6),%fpcr # set FPCR
10927 fmov.l &0x0,%fpsr # clear FPSR
10929 fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10931 fmov.l %fpsr,%d1 # save status
10932 fmov.l &0x0,%fpcr # clear FPCR
10934 or.l %d1,USER_FPSR(%a6) # save INEX2,N
10936 fabs.x %fp0,%fp1 # make a copy of result
10937 fcmp.b %fp1,&0x1 # is |result| > 1.b?
10938 fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10939 fblt.w fsgldiv_unfl # yes; underflow occurred
10942 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10943 # we don't know if the result was an underflow that rounded up to a 1
10944 # or a normalized number that rounded down to a 1. so, redo the entire
10945 # operation using RZ as the rounding mode to see what the pre-rounded
10946 # result is. this case should be relatively rare.
10948 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10950 clr.l %d1 # clear scratch register
10951 ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10953 fmov.l %d1,%fpcr # set FPCR
10954 fmov.l &0x0,%fpsr # clear FPSR
10956 fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10958 fmov.l &0x0,%fpcr # clear FPCR
10959 fabs.x %fp1 # make absolute value
10960 fcmp.b %fp1,&0x1 # is |result| < 1.b?
10961 fbge.w fsgldiv_normal_exit # no; no underflow occurred
10962 bra.w fsgldiv_unfl # yes; underflow occurred
10964 ############################################################################
10967 # Divide: inputs are not both normalized; what are they?
10969 fsgldiv_not_norm:
10970 mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10971 jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10973 swbeg &48
10974 tbl_fsgldiv_op:
10975 short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10976 short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10977 short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10978 short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10979 short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10980 short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10981 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10982 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10984 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10985 short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10986 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10987 short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10988 short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10989 short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10990 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10991 short tbl_fsgldiv_op - tbl_fsgldiv_op #
10993 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10994 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10995 short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10996 short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10997 short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10998 short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
10999 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11000 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11002 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
11003 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
11004 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
11005 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
11006 short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
11007 short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
11008 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11009 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11011 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11012 short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11013 short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11014 short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11015 short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11016 short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11017 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11018 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11020 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11021 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11022 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11023 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11024 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11025 short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11026 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11027 short tbl_fsgldiv_op - tbl_fsgldiv_op #
11029 fsgldiv_res_qnan:
11030 bra.l res_qnan
11031 fsgldiv_res_snan:
11032 bra.l res_snan
11033 fsgldiv_res_operr:
11034 bra.l res_operr
11035 fsgldiv_inf_load:
11036 bra.l fdiv_inf_load
11037 fsgldiv_zero_load:
11038 bra.l fdiv_zero_load
11039 fsgldiv_inf_dst:
11040 bra.l fdiv_inf_dst
11042 #########################################################################
11043 # XDEF **************************************************************** #
11044 # fadd(): emulates the fadd instruction #
11045 # fsadd(): emulates the fadd instruction #
11046 # fdadd(): emulates the fdadd instruction #
11048 # XREF **************************************************************** #
11049 # addsub_scaler2() - scale the operands so they won't take exc #
11050 # ovf_res() - return default overflow result #
11051 # unf_res() - return default underflow result #
11052 # res_qnan() - set QNAN result #
11053 # res_snan() - set SNAN result #
11054 # res_operr() - set OPERR result #
11055 # scale_to_zero_src() - set src operand exponent equal to zero #
11056 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11058 # INPUT *************************************************************** #
11059 # a0 = pointer to extended precision source operand #
11060 # a1 = pointer to extended precision destination operand #
11062 # OUTPUT ************************************************************** #
11063 # fp0 = result #
11064 # fp1 = EXOP (if exception occurred) #
11066 # ALGORITHM *********************************************************** #
11067 # Handle NANs, infinities, and zeroes as special cases. Divide #
11068 # norms into extended, single, and double precision. #
11069 # Do addition after scaling exponents such that exception won't #
11070 # occur. Then, check result exponent to see if exception would have #
11071 # occurred. If so, return default result and maybe EXOP. Else, insert #
11072 # the correct result exponent and return. Set FPSR bits as appropriate. #
11074 #########################################################################
11076 global fsadd
11077 fsadd:
11078 andi.b &0x30,%d0 # clear rnd prec
11079 ori.b &s_mode*0x10,%d0 # insert sgl prec
11080 bra.b fadd
11082 global fdadd
11083 fdadd:
11084 andi.b &0x30,%d0 # clear rnd prec
11085 ori.b &d_mode*0x10,%d0 # insert dbl prec
11087 global fadd
11088 fadd:
11089 mov.l %d0,L_SCR3(%a6) # store rnd info
11091 clr.w %d1
11092 mov.b DTAG(%a6),%d1
11093 lsl.b &0x3,%d1
11094 or.b STAG(%a6),%d1 # combine src tags
11096 bne.w fadd_not_norm # optimize on non-norm input
11099 # ADD: norms and denorms
11101 fadd_norm:
11102 bsr.l addsub_scaler2 # scale exponents
11104 fadd_zero_entry:
11105 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11107 fmov.l &0x0,%fpsr # clear FPSR
11108 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11110 fadd.x FP_SCR0(%a6),%fp0 # execute add
11112 fmov.l &0x0,%fpcr # clear FPCR
11113 fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11115 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11117 fbeq.w fadd_zero_exit # if result is zero, end now
11119 mov.l %d2,-(%sp) # save d2
11121 fmovm.x &0x01,-(%sp) # save result to stack
11123 mov.w 2+L_SCR3(%a6),%d1
11124 lsr.b &0x6,%d1
11126 mov.w (%sp),%d2 # fetch new sign, exp
11127 andi.l &0x7fff,%d2 # strip sign
11128 sub.l %d0,%d2 # add scale factor
11130 cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11131 bge.b fadd_ovfl # yes
11133 cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11134 blt.w fadd_unfl # yes
11135 beq.w fadd_may_unfl # maybe; go find out
11137 fadd_normal:
11138 mov.w (%sp),%d1
11139 andi.w &0x8000,%d1 # keep sign
11140 or.w %d2,%d1 # concat sign,new exp
11141 mov.w %d1,(%sp) # insert new exponent
11143 fmovm.x (%sp)+,&0x80 # return result in fp0
11145 mov.l (%sp)+,%d2 # restore d2
11148 fadd_zero_exit:
11149 # fmov.s &0x00000000,%fp0 # return zero in fp0
11152 tbl_fadd_ovfl:
11153 long 0x7fff # ext ovfl
11154 long 0x407f # sgl ovfl
11155 long 0x43ff # dbl ovfl
11157 tbl_fadd_unfl:
11158 long 0x0000 # ext unfl
11159 long 0x3f81 # sgl unfl
11160 long 0x3c01 # dbl unfl
11162 fadd_ovfl:
11163 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11165 mov.b FPCR_ENABLE(%a6),%d1
11166 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11167 bne.b fadd_ovfl_ena # yes
11169 add.l &0xc,%sp
11170 fadd_ovfl_dis:
11171 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11172 sne %d1 # set sign param accordingly
11173 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11174 bsr.l ovf_res # calculate default result
11175 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11176 fmovm.x (%a0),&0x80 # return default result in fp0
11177 mov.l (%sp)+,%d2 # restore d2
11180 fadd_ovfl_ena:
11181 mov.b L_SCR3(%a6),%d1
11182 andi.b &0xc0,%d1 # is precision extended?
11183 bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11185 fadd_ovfl_ena_cont:
11186 mov.w (%sp),%d1
11187 andi.w &0x8000,%d1 # keep sign
11188 subi.l &0x6000,%d2 # add extra bias
11189 andi.w &0x7fff,%d2
11190 or.w %d2,%d1 # concat sign,new exp
11191 mov.w %d1,(%sp) # insert new exponent
11193 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11194 bra.b fadd_ovfl_dis
11196 fadd_ovfl_ena_sd:
11197 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11199 mov.l L_SCR3(%a6),%d1
11200 andi.b &0x30,%d1 # keep rnd mode
11201 fmov.l %d1,%fpcr # set FPCR
11203 fadd.x FP_SCR0(%a6),%fp0 # execute add
11205 fmov.l &0x0,%fpcr # clear FPCR
11207 add.l &0xc,%sp
11208 fmovm.x &0x01,-(%sp)
11209 bra.b fadd_ovfl_ena_cont
11211 fadd_unfl:
11212 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11214 add.l &0xc,%sp
11216 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11218 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11219 fmov.l &0x0,%fpsr # clear FPSR
11221 fadd.x FP_SCR0(%a6),%fp0 # execute add
11223 fmov.l &0x0,%fpcr # clear FPCR
11224 fmov.l %fpsr,%d1 # save status
11226 or.l %d1,USER_FPSR(%a6) # save INEX,N
11228 mov.b FPCR_ENABLE(%a6),%d1
11229 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11230 bne.b fadd_unfl_ena # yes
11232 fadd_unfl_dis:
11233 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11235 lea FP_SCR0(%a6),%a0 # pass: result addr
11236 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11237 bsr.l unf_res # calculate default result
11238 or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11239 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11240 mov.l (%sp)+,%d2 # restore d2
11243 fadd_unfl_ena:
11244 fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11246 mov.l L_SCR3(%a6),%d1
11247 andi.b &0xc0,%d1 # is precision extended?
11248 bne.b fadd_unfl_ena_sd # no; sgl or dbl
11250 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11252 fadd_unfl_ena_cont:
11253 fmov.l &0x0,%fpsr # clear FPSR
11255 fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11257 fmov.l &0x0,%fpcr # clear FPCR
11259 fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11260 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11261 mov.l %d1,%d2 # make a copy
11262 andi.l &0x7fff,%d1 # strip sign
11263 andi.w &0x8000,%d2 # keep old sign
11264 sub.l %d0,%d1 # add scale factor
11265 addi.l &0x6000,%d1 # add new bias
11266 andi.w &0x7fff,%d1 # clear top bit
11267 or.w %d2,%d1 # concat sign,new exp
11268 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11269 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11270 bra.w fadd_unfl_dis
11272 fadd_unfl_ena_sd:
11273 mov.l L_SCR3(%a6),%d1
11274 andi.b &0x30,%d1 # use only rnd mode
11275 fmov.l %d1,%fpcr # set FPCR
11277 bra.b fadd_unfl_ena_cont
11280 # result is equal to the smallest normalized number in the selected precision
11281 # if the precision is extended, this result could not have come from an
11282 # underflow that rounded up.
11284 fadd_may_unfl:
11285 mov.l L_SCR3(%a6),%d1
11286 andi.b &0xc0,%d1
11287 beq.w fadd_normal # yes; no underflow occurred
11289 mov.l 0x4(%sp),%d1 # extract hi(man)
11290 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11291 bne.w fadd_normal # no; no underflow occurred
11293 tst.l 0x8(%sp) # is lo(man) = 0x0?
11294 bne.w fadd_normal # no; no underflow occurred
11296 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11297 beq.w fadd_normal # no; no underflow occurred
11300 # ok, so now the result has a exponent equal to the smallest normalized
11301 # exponent for the selected precision. also, the mantissa is equal to
11302 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11303 # g,r,s.
11304 # now, we must determine whether the pre-rounded result was an underflow
11305 # rounded "up" or a normalized number rounded "down".
11306 # so, we do this be re-executing the add using RZ as the rounding mode and
11307 # seeing if the new result is smaller or equal to the current result.
11309 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11311 mov.l L_SCR3(%a6),%d1
11312 andi.b &0xc0,%d1 # keep rnd prec
11313 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11314 fmov.l %d1,%fpcr # set FPCR
11315 fmov.l &0x0,%fpsr # clear FPSR
11317 fadd.x FP_SCR0(%a6),%fp1 # execute add
11319 fmov.l &0x0,%fpcr # clear FPCR
11321 fabs.x %fp0 # compare absolute values
11322 fabs.x %fp1
11323 fcmp.x %fp0,%fp1 # is first result > second?
11325 fbgt.w fadd_unfl # yes; it's an underflow
11326 bra.w fadd_normal # no; it's not an underflow
11328 ##########################################################################
11331 # Add: inputs are not both normalized; what are they?
11333 fadd_not_norm:
11334 mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11335 jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11337 swbeg &48
11338 tbl_fadd_op:
11339 short fadd_norm - tbl_fadd_op # NORM + NORM
11340 short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11341 short fadd_inf_src - tbl_fadd_op # NORM + INF
11342 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11343 short fadd_norm - tbl_fadd_op # NORM + DENORM
11344 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11345 short tbl_fadd_op - tbl_fadd_op #
11346 short tbl_fadd_op - tbl_fadd_op #
11348 short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11349 short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11350 short fadd_inf_src - tbl_fadd_op # ZERO + INF
11351 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11352 short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11353 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11354 short tbl_fadd_op - tbl_fadd_op #
11355 short tbl_fadd_op - tbl_fadd_op #
11357 short fadd_inf_dst - tbl_fadd_op # INF + NORM
11358 short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11359 short fadd_inf_2 - tbl_fadd_op # INF + INF
11360 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11361 short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11362 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11363 short tbl_fadd_op - tbl_fadd_op #
11364 short tbl_fadd_op - tbl_fadd_op #
11366 short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11367 short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11368 short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11369 short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11370 short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11371 short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11372 short tbl_fadd_op - tbl_fadd_op #
11373 short tbl_fadd_op - tbl_fadd_op #
11375 short fadd_norm - tbl_fadd_op # DENORM + NORM
11376 short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11377 short fadd_inf_src - tbl_fadd_op # DENORM + INF
11378 short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11379 short fadd_norm - tbl_fadd_op # DENORM + DENORM
11380 short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11381 short tbl_fadd_op - tbl_fadd_op #
11382 short tbl_fadd_op - tbl_fadd_op #
11384 short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11385 short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11386 short fadd_res_snan - tbl_fadd_op # SNAN + INF
11387 short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11388 short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11389 short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11390 short tbl_fadd_op - tbl_fadd_op #
11391 short tbl_fadd_op - tbl_fadd_op #
11393 fadd_res_qnan:
11394 bra.l res_qnan
11395 fadd_res_snan:
11396 bra.l res_snan
11399 # both operands are ZEROes
11401 fadd_zero_2:
11402 mov.b SRC_EX(%a0),%d0 # are the signs opposite
11403 mov.b DST_EX(%a1),%d1
11404 eor.b %d0,%d1
11405 bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11407 # the signs are the same. so determine whether they are positive or negative
11408 # and return the appropriately signed zero.
11409 tst.b %d0 # are ZEROes positive or negative?
11410 bmi.b fadd_zero_rm # negative
11411 fmov.s &0x00000000,%fp0 # return +ZERO
11412 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11416 # the ZEROes have opposite signs:
11417 # - therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11418 # - -ZERO is returned in the case of RM.
11420 fadd_zero_2_chk_rm:
11421 mov.b 3+L_SCR3(%a6),%d1
11422 andi.b &0x30,%d1 # extract rnd mode
11423 cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11424 beq.b fadd_zero_rm # yes
11425 fmov.s &0x00000000,%fp0 # return +ZERO
11426 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11429 fadd_zero_rm:
11430 fmov.s &0x80000000,%fp0 # return -ZERO
11431 mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11435 # one operand is a ZERO and the other is a DENORM or NORM. scale
11436 # the DENORM or NORM and jump to the regular fadd routine.
11438 fadd_zero_dst:
11439 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11440 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11441 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11442 bsr.l scale_to_zero_src # scale the operand
11443 clr.w FP_SCR1_EX(%a6)
11444 clr.l FP_SCR1_HI(%a6)
11445 clr.l FP_SCR1_LO(%a6)
11446 bra.w fadd_zero_entry # go execute fadd
11448 fadd_zero_src:
11449 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11450 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11451 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11452 bsr.l scale_to_zero_dst # scale the operand
11453 clr.w FP_SCR0_EX(%a6)
11454 clr.l FP_SCR0_HI(%a6)
11455 clr.l FP_SCR0_LO(%a6)
11456 bra.w fadd_zero_entry # go execute fadd
11459 # both operands are INFs. an OPERR will result if the INFs have
11460 # different signs. else, an INF of the same sign is returned
11462 fadd_inf_2:
11463 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11464 mov.b DST_EX(%a1),%d1
11465 eor.b %d1,%d0
11466 bmi.l res_operr # weed out (-INF)+(+INF)
11468 # ok, so it's not an OPERR. but, we do have to remember to return the
11469 # src INF since that's where the 881/882 gets the j-bit from...
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11474 fadd_inf_src:
11475 fmovm.x SRC(%a0),&0x80 # return src INF
11476 tst.b SRC_EX(%a0) # is INF positive?
11477 bpl.b fadd_inf_done # yes; we're done
11478 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11482 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11484 fadd_inf_dst:
11485 fmovm.x DST(%a1),&0x80 # return dst INF
11486 tst.b DST_EX(%a1) # is INF positive?
11487 bpl.b fadd_inf_done # yes; we're done
11488 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11491 fadd_inf_done:
11492 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11495 #########################################################################
11496 # XDEF **************************************************************** #
11497 # fsub(): emulates the fsub instruction #
11498 # fssub(): emulates the fssub instruction #
11499 # fdsub(): emulates the fdsub instruction #
11501 # XREF **************************************************************** #
11502 # addsub_scaler2() - scale the operands so they won't take exc #
11503 # ovf_res() - return default overflow result #
11504 # unf_res() - return default underflow result #
11505 # res_qnan() - set QNAN result #
11506 # res_snan() - set SNAN result #
11507 # res_operr() - set OPERR result #
11508 # scale_to_zero_src() - set src operand exponent equal to zero #
11509 # scale_to_zero_dst() - set dst operand exponent equal to zero #
11511 # INPUT *************************************************************** #
11512 # a0 = pointer to extended precision source operand #
11513 # a1 = pointer to extended precision destination operand #
11515 # OUTPUT ************************************************************** #
11516 # fp0 = result #
11517 # fp1 = EXOP (if exception occurred) #
11519 # ALGORITHM *********************************************************** #
11520 # Handle NANs, infinities, and zeroes as special cases. Divide #
11521 # norms into extended, single, and double precision. #
11522 # Do subtraction after scaling exponents such that exception won't#
11523 # occur. Then, check result exponent to see if exception would have #
11524 # occurred. If so, return default result and maybe EXOP. Else, insert #
11525 # the correct result exponent and return. Set FPSR bits as appropriate. #
11527 #########################################################################
11529 global fssub
11530 fssub:
11531 andi.b &0x30,%d0 # clear rnd prec
11532 ori.b &s_mode*0x10,%d0 # insert sgl prec
11533 bra.b fsub
11535 global fdsub
11536 fdsub:
11537 andi.b &0x30,%d0 # clear rnd prec
11538 ori.b &d_mode*0x10,%d0 # insert dbl prec
11540 global fsub
11541 fsub:
11542 mov.l %d0,L_SCR3(%a6) # store rnd info
11544 clr.w %d1
11545 mov.b DTAG(%a6),%d1
11546 lsl.b &0x3,%d1
11547 or.b STAG(%a6),%d1 # combine src tags
11549 bne.w fsub_not_norm # optimize on non-norm input
11552 # SUB: norms and denorms
11554 fsub_norm:
11555 bsr.l addsub_scaler2 # scale exponents
11557 fsub_zero_entry:
11558 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11560 fmov.l &0x0,%fpsr # clear FPSR
11561 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11563 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11565 fmov.l &0x0,%fpcr # clear FPCR
11566 fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11568 or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11570 fbeq.w fsub_zero_exit # if result zero, end now
11572 mov.l %d2,-(%sp) # save d2
11574 fmovm.x &0x01,-(%sp) # save result to stack
11576 mov.w 2+L_SCR3(%a6),%d1
11577 lsr.b &0x6,%d1
11579 mov.w (%sp),%d2 # fetch new exponent
11580 andi.l &0x7fff,%d2 # strip sign
11581 sub.l %d0,%d2 # add scale factor
11583 cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11584 bge.b fsub_ovfl # yes
11586 cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11587 blt.w fsub_unfl # yes
11588 beq.w fsub_may_unfl # maybe; go find out
11590 fsub_normal:
11591 mov.w (%sp),%d1
11592 andi.w &0x8000,%d1 # keep sign
11593 or.w %d2,%d1 # insert new exponent
11594 mov.w %d1,(%sp) # insert new exponent
11596 fmovm.x (%sp)+,&0x80 # return result in fp0
11598 mov.l (%sp)+,%d2 # restore d2
11601 fsub_zero_exit:
11602 # fmov.s &0x00000000,%fp0 # return zero in fp0
11605 tbl_fsub_ovfl:
11606 long 0x7fff # ext ovfl
11607 long 0x407f # sgl ovfl
11608 long 0x43ff # dbl ovfl
11610 tbl_fsub_unfl:
11611 long 0x0000 # ext unfl
11612 long 0x3f81 # sgl unfl
11613 long 0x3c01 # dbl unfl
11615 fsub_ovfl:
11616 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11618 mov.b FPCR_ENABLE(%a6),%d1
11619 andi.b &0x13,%d1 # is OVFL or INEX enabled?
11620 bne.b fsub_ovfl_ena # yes
11622 add.l &0xc,%sp
11623 fsub_ovfl_dis:
11624 btst &neg_bit,FPSR_CC(%a6) # is result negative?
11625 sne %d1 # set sign param accordingly
11626 mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11627 bsr.l ovf_res # calculate default result
11628 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11629 fmovm.x (%a0),&0x80 # return default result in fp0
11630 mov.l (%sp)+,%d2 # restore d2
11633 fsub_ovfl_ena:
11634 mov.b L_SCR3(%a6),%d1
11635 andi.b &0xc0,%d1 # is precision extended?
11636 bne.b fsub_ovfl_ena_sd # no
11638 fsub_ovfl_ena_cont:
11639 mov.w (%sp),%d1 # fetch {sgn,exp}
11640 andi.w &0x8000,%d1 # keep sign
11641 subi.l &0x6000,%d2 # subtract new bias
11642 andi.w &0x7fff,%d2 # clear top bit
11643 or.w %d2,%d1 # concat sign,exp
11644 mov.w %d1,(%sp) # insert new exponent
11646 fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11647 bra.b fsub_ovfl_dis
11649 fsub_ovfl_ena_sd:
11650 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11652 mov.l L_SCR3(%a6),%d1
11653 andi.b &0x30,%d1 # clear rnd prec
11654 fmov.l %d1,%fpcr # set FPCR
11656 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11658 fmov.l &0x0,%fpcr # clear FPCR
11660 add.l &0xc,%sp
11661 fmovm.x &0x01,-(%sp)
11662 bra.b fsub_ovfl_ena_cont
11664 fsub_unfl:
11665 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11667 add.l &0xc,%sp
11669 fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11671 fmov.l &rz_mode*0x10,%fpcr # set FPCR
11672 fmov.l &0x0,%fpsr # clear FPSR
11674 fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11676 fmov.l &0x0,%fpcr # clear FPCR
11677 fmov.l %fpsr,%d1 # save status
11679 or.l %d1,USER_FPSR(%a6)
11681 mov.b FPCR_ENABLE(%a6),%d1
11682 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11683 bne.b fsub_unfl_ena # yes
11685 fsub_unfl_dis:
11686 fmovm.x &0x80,FP_SCR0(%a6) # store out result
11688 lea FP_SCR0(%a6),%a0 # pass: result addr
11689 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11690 bsr.l unf_res # calculate default result
11691 or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11692 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11693 mov.l (%sp)+,%d2 # restore d2
11696 fsub_unfl_ena:
11697 fmovm.x FP_SCR1(%a6),&0x40
11699 mov.l L_SCR3(%a6),%d1
11700 andi.b &0xc0,%d1 # is precision extended?
11701 bne.b fsub_unfl_ena_sd # no
11703 fmov.l L_SCR3(%a6),%fpcr # set FPCR
11705 fsub_unfl_ena_cont:
11706 fmov.l &0x0,%fpsr # clear FPSR
11708 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11710 fmov.l &0x0,%fpcr # clear FPCR
11712 fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11713 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11714 mov.l %d1,%d2 # make a copy
11715 andi.l &0x7fff,%d1 # strip sign
11716 andi.w &0x8000,%d2 # keep old sign
11717 sub.l %d0,%d1 # add scale factor
11718 addi.l &0x6000,%d1 # subtract new bias
11719 andi.w &0x7fff,%d1 # clear top bit
11720 or.w %d2,%d1 # concat sgn,exp
11721 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11722 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11723 bra.w fsub_unfl_dis
11725 fsub_unfl_ena_sd:
11726 mov.l L_SCR3(%a6),%d1
11727 andi.b &0x30,%d1 # clear rnd prec
11728 fmov.l %d1,%fpcr # set FPCR
11730 bra.b fsub_unfl_ena_cont
11733 # result is equal to the smallest normalized number in the selected precision
11734 # if the precision is extended, this result could not have come from an
11735 # underflow that rounded up.
11737 fsub_may_unfl:
11738 mov.l L_SCR3(%a6),%d1
11739 andi.b &0xc0,%d1 # fetch rnd prec
11740 beq.w fsub_normal # yes; no underflow occurred
11742 mov.l 0x4(%sp),%d1
11743 cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11744 bne.w fsub_normal # no; no underflow occurred
11746 tst.l 0x8(%sp) # is lo(man) = 0x0?
11747 bne.w fsub_normal # no; no underflow occurred
11749 btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11750 beq.w fsub_normal # no; no underflow occurred
11753 # ok, so now the result has a exponent equal to the smallest normalized
11754 # exponent for the selected precision. also, the mantissa is equal to
11755 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11756 # g,r,s.
11757 # now, we must determine whether the pre-rounded result was an underflow
11758 # rounded "up" or a normalized number rounded "down".
11759 # so, we do this be re-executing the add using RZ as the rounding mode and
11760 # seeing if the new result is smaller or equal to the current result.
11762 fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11764 mov.l L_SCR3(%a6),%d1
11765 andi.b &0xc0,%d1 # keep rnd prec
11766 ori.b &rz_mode*0x10,%d1 # insert rnd mode
11767 fmov.l %d1,%fpcr # set FPCR
11768 fmov.l &0x0,%fpsr # clear FPSR
11770 fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11772 fmov.l &0x0,%fpcr # clear FPCR
11774 fabs.x %fp0 # compare absolute values
11775 fabs.x %fp1
11776 fcmp.x %fp0,%fp1 # is first result > second?
11778 fbgt.w fsub_unfl # yes; it's an underflow
11779 bra.w fsub_normal # no; it's not an underflow
11781 ##########################################################################
11784 # Sub: inputs are not both normalized; what are they?
11786 fsub_not_norm:
11787 mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11788 jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11790 swbeg &48
11791 tbl_fsub_op:
11792 short fsub_norm - tbl_fsub_op # NORM - NORM
11793 short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11794 short fsub_inf_src - tbl_fsub_op # NORM - INF
11795 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11796 short fsub_norm - tbl_fsub_op # NORM - DENORM
11797 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11798 short tbl_fsub_op - tbl_fsub_op #
11799 short tbl_fsub_op - tbl_fsub_op #
11801 short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11802 short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11803 short fsub_inf_src - tbl_fsub_op # ZERO - INF
11804 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11805 short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11806 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11807 short tbl_fsub_op - tbl_fsub_op #
11808 short tbl_fsub_op - tbl_fsub_op #
11810 short fsub_inf_dst - tbl_fsub_op # INF - NORM
11811 short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11812 short fsub_inf_2 - tbl_fsub_op # INF - INF
11813 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11814 short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11815 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11816 short tbl_fsub_op - tbl_fsub_op #
11817 short tbl_fsub_op - tbl_fsub_op #
11819 short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11820 short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11821 short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11822 short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11823 short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11824 short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11825 short tbl_fsub_op - tbl_fsub_op #
11826 short tbl_fsub_op - tbl_fsub_op #
11828 short fsub_norm - tbl_fsub_op # DENORM - NORM
11829 short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11830 short fsub_inf_src - tbl_fsub_op # DENORM - INF
11831 short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11832 short fsub_norm - tbl_fsub_op # DENORM - DENORM
11833 short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11834 short tbl_fsub_op - tbl_fsub_op #
11835 short tbl_fsub_op - tbl_fsub_op #
11837 short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11838 short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11839 short fsub_res_snan - tbl_fsub_op # SNAN - INF
11840 short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11841 short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11842 short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11843 short tbl_fsub_op - tbl_fsub_op #
11844 short tbl_fsub_op - tbl_fsub_op #
11846 fsub_res_qnan:
11847 bra.l res_qnan
11848 fsub_res_snan:
11849 bra.l res_snan
11852 # both operands are ZEROes
11854 fsub_zero_2:
11855 mov.b SRC_EX(%a0),%d0
11856 mov.b DST_EX(%a1),%d1
11857 eor.b %d1,%d0
11858 bpl.b fsub_zero_2_chk_rm
11860 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11861 tst.b %d0 # is dst negative?
11862 bmi.b fsub_zero_2_rm # yes
11863 fmov.s &0x00000000,%fp0 # no; return +ZERO
11864 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11868 # the ZEROes have the same signs:
11869 # - therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11870 # - -ZERO is returned in the case of RM.
11872 fsub_zero_2_chk_rm:
11873 mov.b 3+L_SCR3(%a6),%d1
11874 andi.b &0x30,%d1 # extract rnd mode
11875 cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11876 beq.b fsub_zero_2_rm # yes
11877 fmov.s &0x00000000,%fp0 # no; return +ZERO
11878 mov.b &z_bmask,FPSR_CC(%a6) # set Z
11881 fsub_zero_2_rm:
11882 fmov.s &0x80000000,%fp0 # return -ZERO
11883 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11887 # one operand is a ZERO and the other is a DENORM or a NORM.
11888 # scale the DENORM or NORM and jump to the regular fsub routine.
11890 fsub_zero_dst:
11891 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11892 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11893 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11894 bsr.l scale_to_zero_src # scale the operand
11895 clr.w FP_SCR1_EX(%a6)
11896 clr.l FP_SCR1_HI(%a6)
11897 clr.l FP_SCR1_LO(%a6)
11898 bra.w fsub_zero_entry # go execute fsub
11900 fsub_zero_src:
11901 mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11902 mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11903 mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11904 bsr.l scale_to_zero_dst # scale the operand
11905 clr.w FP_SCR0_EX(%a6)
11906 clr.l FP_SCR0_HI(%a6)
11907 clr.l FP_SCR0_LO(%a6)
11908 bra.w fsub_zero_entry # go execute fsub
11911 # both operands are INFs. an OPERR will result if the INFs have the
11912 # same signs. else,
11914 fsub_inf_2:
11915 mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11916 mov.b DST_EX(%a1),%d1
11917 eor.b %d1,%d0
11918 bpl.l res_operr # weed out (-INF)+(+INF)
11920 # ok, so it's not an OPERR. but we do have to remember to return
11921 # the src INF since that's where the 881/882 gets the j-bit.
11923 fsub_inf_src:
11924 fmovm.x SRC(%a0),&0x80 # return src INF
11925 fneg.x %fp0 # invert sign
11926 fbge.w fsub_inf_done # sign is now positive
11927 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11930 fsub_inf_dst:
11931 fmovm.x DST(%a1),&0x80 # return dst INF
11932 tst.b DST_EX(%a1) # is INF negative?
11933 bpl.b fsub_inf_done # no
11934 mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11937 fsub_inf_done:
11938 mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11941 #########################################################################
11942 # XDEF **************************************************************** #
11943 # fsqrt(): emulates the fsqrt instruction #
11944 # fssqrt(): emulates the fssqrt instruction #
11945 # fdsqrt(): emulates the fdsqrt instruction #
11947 # XREF **************************************************************** #
11948 # scale_sqrt() - scale the source operand #
11949 # unf_res() - return default underflow result #
11950 # ovf_res() - return default overflow result #
11951 # res_qnan_1op() - return QNAN result #
11952 # res_snan_1op() - return SNAN result #
11954 # INPUT *************************************************************** #
11955 # a0 = pointer to extended precision source operand #
11956 # d0 rnd prec,mode #
11958 # OUTPUT ************************************************************** #
11959 # fp0 = result #
11960 # fp1 = EXOP (if exception occurred) #
11962 # ALGORITHM *********************************************************** #
11963 # Handle NANs, infinities, and zeroes as special cases. Divide #
11964 # norms/denorms into ext/sgl/dbl precision. #
11965 # For norms/denorms, scale the exponents such that a sqrt #
11966 # instruction won't cause an exception. Use the regular fsqrt to #
11967 # compute a result. Check if the regular operands would have taken #
11968 # an exception. If so, return the default overflow/underflow result #
11969 # and return the EXOP if exceptions are enabled. Else, scale the #
11970 # result operand to the proper exponent. #
11972 #########################################################################
11974 global fssqrt
11975 fssqrt:
11976 andi.b &0x30,%d0 # clear rnd prec
11977 ori.b &s_mode*0x10,%d0 # insert sgl precision
11978 bra.b fsqrt
11980 global fdsqrt
11981 fdsqrt:
11982 andi.b &0x30,%d0 # clear rnd prec
11983 ori.b &d_mode*0x10,%d0 # insert dbl precision
11985 global fsqrt
11986 fsqrt:
11987 mov.l %d0,L_SCR3(%a6) # store rnd info
11988 clr.w %d1
11989 mov.b STAG(%a6),%d1
11990 bne.w fsqrt_not_norm # optimize on non-norm input
11993 # SQUARE ROOT: norms and denorms ONLY!
11995 fsqrt_norm:
11996 tst.b SRC_EX(%a0) # is operand negative?
11997 bmi.l res_operr # yes
11999 andi.b &0xc0,%d0 # is precision extended?
12000 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12002 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12003 fmov.l &0x0,%fpsr # clear FPSR
12005 fsqrt.x (%a0),%fp0 # execute square root
12007 fmov.l %fpsr,%d1
12008 or.l %d1,USER_FPSR(%a6) # set N,INEX
12012 fsqrt_denorm:
12013 tst.b SRC_EX(%a0) # is operand negative?
12014 bmi.l res_operr # yes
12016 andi.b &0xc0,%d0 # is precision extended?
12017 bne.b fsqrt_not_ext # no; go handle sgl or dbl
12019 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12020 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12021 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12023 bsr.l scale_sqrt # calculate scale factor
12025 bra.w fsqrt_sd_normal
12028 # operand is either single or double
12030 fsqrt_not_ext:
12031 cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12032 bne.w fsqrt_dbl
12035 # operand is to be rounded to single precision
12037 fsqrt_sgl:
12038 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12039 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12040 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12042 bsr.l scale_sqrt # calculate scale factor
12044 cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12045 beq.w fsqrt_sd_may_unfl
12046 bgt.w fsqrt_sd_unfl # yes; go handle underflow
12047 cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12048 beq.w fsqrt_sd_may_ovfl # maybe; go check
12049 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12052 # operand will NOT overflow or underflow when moved in to the fp reg file
12054 fsqrt_sd_normal:
12055 fmov.l &0x0,%fpsr # clear FPSR
12056 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12058 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12060 fmov.l %fpsr,%d1 # save FPSR
12061 fmov.l &0x0,%fpcr # clear FPCR
12063 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12065 fsqrt_sd_normal_exit:
12066 mov.l %d2,-(%sp) # save d2
12067 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12068 mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12069 mov.l %d1,%d2 # make a copy
12070 andi.l &0x7fff,%d1 # strip sign
12071 sub.l %d0,%d1 # add scale factor
12072 andi.w &0x8000,%d2 # keep old sign
12073 or.w %d1,%d2 # concat old sign,new exp
12074 mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12075 mov.l (%sp)+,%d2 # restore d2
12076 fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12080 # operand is to be rounded to double precision
12082 fsqrt_dbl:
12083 mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12084 mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12085 mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12087 bsr.l scale_sqrt # calculate scale factor
12089 cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12090 beq.w fsqrt_sd_may_unfl
12091 bgt.b fsqrt_sd_unfl # yes; go handle underflow
12092 cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12093 beq.w fsqrt_sd_may_ovfl # maybe; go check
12094 blt.w fsqrt_sd_ovfl # yes; go handle overflow
12095 bra.w fsqrt_sd_normal # no; ho handle normalized op
12097 # we're on the line here and the distinguising characteristic is whether
12098 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12099 # elsewise fall through to underflow.
12100 fsqrt_sd_may_unfl:
12101 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12102 bne.w fsqrt_sd_normal # yes, so no underflow
12105 # operand WILL underflow when moved in to the fp register file
12107 fsqrt_sd_unfl:
12108 bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12110 fmov.l &rz_mode*0x10,%fpcr # set FPCR
12111 fmov.l &0x0,%fpsr # clear FPSR
12113 fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12115 fmov.l %fpsr,%d1 # save status
12116 fmov.l &0x0,%fpcr # clear FPCR
12118 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12120 # if underflow or inexact is enabled, go calculate EXOP first.
12121 mov.b FPCR_ENABLE(%a6),%d1
12122 andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12123 bne.b fsqrt_sd_unfl_ena # yes
12125 fsqrt_sd_unfl_dis:
12126 fmovm.x &0x80,FP_SCR0(%a6) # store out result
12128 lea FP_SCR0(%a6),%a0 # pass: result addr
12129 mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12130 bsr.l unf_res # calculate default result
12131 or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12132 fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12133 rts
12136 # operand will underflow AND underflow is enabled.
12137 # therefore, we must return the result rounded to extended precision.
12139 fsqrt_sd_unfl_ena:
12140 mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12141 mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12142 mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12144 mov.l %d2,-(%sp) # save d2
12145 mov.l %d1,%d2 # make a copy
12146 andi.l &0x7fff,%d1 # strip sign
12147 andi.w &0x8000,%d2 # keep old sign
12148 sub.l %d0,%d1 # subtract scale factor
12149 addi.l &0x6000,%d1 # add new bias
12150 andi.w &0x7fff,%d1
12151 or.w %d2,%d1 # concat new sign,new exp
12152 mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12153 fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12154 mov.l (%sp)+,%d2 # restore d2
12155 bra.b fsqrt_sd_unfl_dis
12158 # operand WILL overflow.
12160 fsqrt_sd_ovfl:
12161 fmov.l &0x0,%fpsr # clear FPSR
12162 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12164 fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12166 fmov.l &0x0,%fpcr # clear FPCR
12167 fmov.l %fpsr,%d1 # save FPSR
12169 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12171 fsqrt_sd_ovfl_tst:
12172 or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12174 mov.b FPCR_ENABLE(%a6),%d1
12175 andi.b &0x13,%d1 # is OVFL or INEX enabled?
12176 bne.b fsqrt_sd_ovfl_ena # yes
12179 # OVFL is not enabled; therefore, we must create the default result by
12180 # calling ovf_res().
12182 fsqrt_sd_ovfl_dis:
12183 btst &neg_bit,FPSR_CC(%a6) # is result negative?
12184 sne %d1 # set sign param accordingly
12185 mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12186 bsr.l ovf_res # calculate default result
12187 or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12188 fmovm.x (%a0),&0x80 # return default result in fp0
12192 # OVFL is enabled.
12193 # the INEX2 bit has already been updated by the round to the correct precision.
12194 # now, round to extended(and don't alter the FPSR).
12196 fsqrt_sd_ovfl_ena:
12197 mov.l %d2,-(%sp) # save d2
12198 mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12199 mov.l %d1,%d2 # make a copy
12200 andi.l &0x7fff,%d1 # strip sign
12201 andi.w &0x8000,%d2 # keep old sign
12202 sub.l %d0,%d1 # add scale factor
12203 subi.l &0x6000,%d1 # subtract bias
12204 andi.w &0x7fff,%d1
12205 or.w %d2,%d1 # concat sign,exp
12206 mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12207 fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12208 mov.l (%sp)+,%d2 # restore d2
12209 bra.b fsqrt_sd_ovfl_dis
12212 # the move in MAY underflow. so...
12214 fsqrt_sd_may_ovfl:
12215 btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12216 bne.w fsqrt_sd_ovfl # yes, so overflow
12218 fmov.l &0x0,%fpsr # clear FPSR
12219 fmov.l L_SCR3(%a6),%fpcr # set FPCR
12221 fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12223 fmov.l %fpsr,%d1 # save status
12224 fmov.l &0x0,%fpcr # clear FPCR
12226 or.l %d1,USER_FPSR(%a6) # save INEX2,N
12228 fmov.x %fp0,%fp1 # make a copy of result
12229 fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12230 fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12232 # no, it didn't overflow; we have correct result
12233 bra.w fsqrt_sd_normal_exit
12235 ##########################################################################
12238 # input is not normalized; what is it?
12240 fsqrt_not_norm:
12241 cmpi.b %d1,&DENORM # weed out DENORM
12242 beq.w fsqrt_denorm
12243 cmpi.b %d1,&ZERO # weed out ZERO
12244 beq.b fsqrt_zero
12245 cmpi.b %d1,&INF # weed out INF
12246 beq.b fsqrt_inf
12247 cmpi.b %d1,&SNAN # weed out SNAN
12248 beq.l res_snan_1op
12249 bra.l res_qnan_1op
12252 # fsqrt(+0) = +0
12253 # fsqrt(-0) = -0
12254 # fsqrt(+INF) = +INF
12255 # fsqrt(-INF) = OPERR
12257 fsqrt_zero:
12258 tst.b SRC_EX(%a0) # is ZERO positive or negative?
12259 bmi.b fsqrt_zero_m # negative
12260 fsqrt_zero_p:
12261 fmov.s &0x00000000,%fp0 # return +ZERO
12262 mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12264 fsqrt_zero_m:
12265 fmov.s &0x80000000,%fp0 # return -ZERO
12266 mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12269 fsqrt_inf:
12270 tst.b SRC_EX(%a0) # is INF positive or negative?
12271 bmi.l res_operr # negative
12272 fsqrt_inf_p:
12273 fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12274 mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12277 #########################################################################
12278 # XDEF **************************************************************** #
12279 # fetch_dreg(): fetch register according to index in d1 #
12281 # XREF **************************************************************** #
12282 # None #
12284 # INPUT *************************************************************** #
12285 # d1 = index of register to fetch from #
12287 # OUTPUT ************************************************************** #
12288 # d0 = value of register fetched #
12290 # ALGORITHM *********************************************************** #
12291 # According to the index value in d1 which can range from zero #
12292 # to fifteen, load the corresponding register file value (where #
12293 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12294 # stack. The rest should still be in their original places. #
12296 #########################################################################
12298 # this routine leaves d1 intact for subsequent store_dreg calls.
12299 global fetch_dreg
12300 fetch_dreg:
12301 mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12302 jmp (tbl_fdreg.b,%pc,%d0.w*1)
12304 tbl_fdreg:
12305 short fdreg0 - tbl_fdreg
12306 short fdreg1 - tbl_fdreg
12307 short fdreg2 - tbl_fdreg
12308 short fdreg3 - tbl_fdreg
12309 short fdreg4 - tbl_fdreg
12310 short fdreg5 - tbl_fdreg
12311 short fdreg6 - tbl_fdreg
12312 short fdreg7 - tbl_fdreg
12313 short fdreg8 - tbl_fdreg
12314 short fdreg9 - tbl_fdreg
12315 short fdrega - tbl_fdreg
12316 short fdregb - tbl_fdreg
12317 short fdregc - tbl_fdreg
12318 short fdregd - tbl_fdreg
12319 short fdrege - tbl_fdreg
12320 short fdregf - tbl_fdreg
12322 fdreg0:
12323 mov.l EXC_DREGS+0x0(%a6),%d0
12325 fdreg1:
12326 mov.l EXC_DREGS+0x4(%a6),%d0
12328 fdreg2:
12329 mov.l %d2,%d0
12331 fdreg3:
12332 mov.l %d3,%d0
12334 fdreg4:
12335 mov.l %d4,%d0
12337 fdreg5:
12338 mov.l %d5,%d0
12340 fdreg6:
12341 mov.l %d6,%d0
12343 fdreg7:
12344 mov.l %d7,%d0
12346 fdreg8:
12347 mov.l EXC_DREGS+0x8(%a6),%d0
12349 fdreg9:
12350 mov.l EXC_DREGS+0xc(%a6),%d0
12352 fdrega:
12353 mov.l %a2,%d0
12355 fdregb:
12356 mov.l %a3,%d0
12358 fdregc:
12359 mov.l %a4,%d0
12361 fdregd:
12362 mov.l %a5,%d0
12364 fdrege:
12365 mov.l (%a6),%d0
12367 fdregf:
12368 mov.l EXC_A7(%a6),%d0
12371 #########################################################################
12372 # XDEF **************************************************************** #
12373 # store_dreg_l(): store longword to data register specified by d1 #
12375 # XREF **************************************************************** #
12376 # None #
12378 # INPUT *************************************************************** #
12379 # d0 = longowrd value to store #
12380 # d1 = index of register to fetch from #
12382 # OUTPUT ************************************************************** #
12383 # (data register is updated) #
12385 # ALGORITHM *********************************************************** #
12386 # According to the index value in d1, store the longword value #
12387 # in d0 to the corresponding data register. D0/D1 are on the stack #
12388 # while the rest are in their initial places. #
12390 #########################################################################
12392 global store_dreg_l
12393 store_dreg_l:
12394 mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12395 jmp (tbl_sdregl.b,%pc,%d1.w*1)
12397 tbl_sdregl:
12398 short sdregl0 - tbl_sdregl
12399 short sdregl1 - tbl_sdregl
12400 short sdregl2 - tbl_sdregl
12401 short sdregl3 - tbl_sdregl
12402 short sdregl4 - tbl_sdregl
12403 short sdregl5 - tbl_sdregl
12404 short sdregl6 - tbl_sdregl
12405 short sdregl7 - tbl_sdregl
12407 sdregl0:
12408 mov.l %d0,EXC_DREGS+0x0(%a6)
12410 sdregl1:
12411 mov.l %d0,EXC_DREGS+0x4(%a6)
12413 sdregl2:
12414 mov.l %d0,%d2
12416 sdregl3:
12417 mov.l %d0,%d3
12419 sdregl4:
12420 mov.l %d0,%d4
12422 sdregl5:
12423 mov.l %d0,%d5
12425 sdregl6:
12426 mov.l %d0,%d6
12428 sdregl7:
12429 mov.l %d0,%d7
12432 #########################################################################
12433 # XDEF **************************************************************** #
12434 # store_dreg_w(): store word to data register specified by d1 #
12436 # XREF **************************************************************** #
12437 # None #
12439 # INPUT *************************************************************** #
12440 # d0 = word value to store #
12441 # d1 = index of register to fetch from #
12443 # OUTPUT ************************************************************** #
12444 # (data register is updated) #
12446 # ALGORITHM *********************************************************** #
12447 # According to the index value in d1, store the word value #
12448 # in d0 to the corresponding data register. D0/D1 are on the stack #
12449 # while the rest are in their initial places. #
12451 #########################################################################
12453 global store_dreg_w
12454 store_dreg_w:
12455 mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12456 jmp (tbl_sdregw.b,%pc,%d1.w*1)
12458 tbl_sdregw:
12459 short sdregw0 - tbl_sdregw
12460 short sdregw1 - tbl_sdregw
12461 short sdregw2 - tbl_sdregw
12462 short sdregw3 - tbl_sdregw
12463 short sdregw4 - tbl_sdregw
12464 short sdregw5 - tbl_sdregw
12465 short sdregw6 - tbl_sdregw
12466 short sdregw7 - tbl_sdregw
12468 sdregw0:
12469 mov.w %d0,2+EXC_DREGS+0x0(%a6)
12471 sdregw1:
12472 mov.w %d0,2+EXC_DREGS+0x4(%a6)
12474 sdregw2:
12475 mov.w %d0,%d2
12477 sdregw3:
12478 mov.w %d0,%d3
12480 sdregw4:
12481 mov.w %d0,%d4
12483 sdregw5:
12484 mov.w %d0,%d5
12486 sdregw6:
12487 mov.w %d0,%d6
12489 sdregw7:
12490 mov.w %d0,%d7
12493 #########################################################################
12494 # XDEF **************************************************************** #
12495 # store_dreg_b(): store byte to data register specified by d1 #
12497 # XREF **************************************************************** #
12498 # None #
12500 # INPUT *************************************************************** #
12501 # d0 = byte value to store #
12502 # d1 = index of register to fetch from #
12504 # OUTPUT ************************************************************** #
12505 # (data register is updated) #
12507 # ALGORITHM *********************************************************** #
12508 # According to the index value in d1, store the byte value #
12509 # in d0 to the corresponding data register. D0/D1 are on the stack #
12510 # while the rest are in their initial places. #
12512 #########################################################################
12514 global store_dreg_b
12515 store_dreg_b:
12516 mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12517 jmp (tbl_sdregb.b,%pc,%d1.w*1)
12519 tbl_sdregb:
12520 short sdregb0 - tbl_sdregb
12521 short sdregb1 - tbl_sdregb
12522 short sdregb2 - tbl_sdregb
12523 short sdregb3 - tbl_sdregb
12524 short sdregb4 - tbl_sdregb
12525 short sdregb5 - tbl_sdregb
12526 short sdregb6 - tbl_sdregb
12527 short sdregb7 - tbl_sdregb
12529 sdregb0:
12530 mov.b %d0,3+EXC_DREGS+0x0(%a6)
12532 sdregb1:
12533 mov.b %d0,3+EXC_DREGS+0x4(%a6)
12535 sdregb2:
12536 mov.b %d0,%d2
12538 sdregb3:
12539 mov.b %d0,%d3
12541 sdregb4:
12542 mov.b %d0,%d4
12544 sdregb5:
12545 mov.b %d0,%d5
12547 sdregb6:
12548 mov.b %d0,%d6
12550 sdregb7:
12551 mov.b %d0,%d7
12554 #########################################################################
12555 # XDEF **************************************************************** #
12556 # inc_areg(): increment an address register by the value in d0 #
12558 # XREF **************************************************************** #
12559 # None #
12561 # INPUT *************************************************************** #
12562 # d0 = amount to increment by #
12563 # d1 = index of address register to increment #
12565 # OUTPUT ************************************************************** #
12566 # (address register is updated) #
12568 # ALGORITHM *********************************************************** #
12569 # Typically used for an instruction w/ a post-increment <ea>, #
12570 # this routine adds the increment value in d0 to the address register #
12571 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12572 # in their original places. #
12573 # For a7, if the increment amount is one, then we have to #
12574 # increment by two. For any a7 update, set the mia7_flag so that if #
12575 # an access error exception occurs later in emulation, this address #
12576 # register update can be undone. #
12578 #########################################################################
12580 global inc_areg
12581 inc_areg:
12582 mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12583 jmp (tbl_iareg.b,%pc,%d1.w*1)
12585 tbl_iareg:
12586 short iareg0 - tbl_iareg
12587 short iareg1 - tbl_iareg
12588 short iareg2 - tbl_iareg
12589 short iareg3 - tbl_iareg
12590 short iareg4 - tbl_iareg
12591 short iareg5 - tbl_iareg
12592 short iareg6 - tbl_iareg
12593 short iareg7 - tbl_iareg
12595 iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12597 iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12599 iareg2: add.l %d0,%a2
12601 iareg3: add.l %d0,%a3
12603 iareg4: add.l %d0,%a4
12605 iareg5: add.l %d0,%a5
12607 iareg6: add.l %d0,(%a6)
12609 iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12610 cmpi.b %d0,&0x1
12611 beq.b iareg7b
12612 add.l %d0,EXC_A7(%a6)
12614 iareg7b:
12615 addq.l &0x2,EXC_A7(%a6)
12618 #########################################################################
12619 # XDEF **************************************************************** #
12620 # dec_areg(): decrement an address register by the value in d0 #
12622 # XREF **************************************************************** #
12623 # None #
12625 # INPUT *************************************************************** #
12626 # d0 = amount to decrement by #
12627 # d1 = index of address register to decrement #
12629 # OUTPUT ************************************************************** #
12630 # (address register is updated) #
12632 # ALGORITHM *********************************************************** #
12633 # Typically used for an instruction w/ a pre-decrement <ea>, #
12634 # this routine adds the decrement value in d0 to the address register #
12635 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12636 # in their original places. #
12637 # For a7, if the decrement amount is one, then we have to #
12638 # decrement by two. For any a7 update, set the mda7_flag so that if #
12639 # an access error exception occurs later in emulation, this address #
12640 # register update can be undone. #
12642 #########################################################################
12644 global dec_areg
12645 dec_areg:
12646 mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12647 jmp (tbl_dareg.b,%pc,%d1.w*1)
12649 tbl_dareg:
12650 short dareg0 - tbl_dareg
12651 short dareg1 - tbl_dareg
12652 short dareg2 - tbl_dareg
12653 short dareg3 - tbl_dareg
12654 short dareg4 - tbl_dareg
12655 short dareg5 - tbl_dareg
12656 short dareg6 - tbl_dareg
12657 short dareg7 - tbl_dareg
12659 dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12661 dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12663 dareg2: sub.l %d0,%a2
12665 dareg3: sub.l %d0,%a3
12667 dareg4: sub.l %d0,%a4
12669 dareg5: sub.l %d0,%a5
12671 dareg6: sub.l %d0,(%a6)
12673 dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12674 cmpi.b %d0,&0x1
12675 beq.b dareg7b
12676 sub.l %d0,EXC_A7(%a6)
12678 dareg7b:
12679 subq.l &0x2,EXC_A7(%a6)
12682 ##############################################################################
12684 #########################################################################
12685 # XDEF **************************************************************** #
12686 # load_fpn1(): load FP register value into FP_SRC(a6). #
12688 # XREF **************************************************************** #
12689 # None #
12691 # INPUT *************************************************************** #
12692 # d0 = index of FP register to load #
12694 # OUTPUT ************************************************************** #
12695 # FP_SRC(a6) = value loaded from FP register file #
12697 # ALGORITHM *********************************************************** #
12698 # Using the index in d0, load FP_SRC(a6) with a number from the #
12699 # FP register file. #
12701 #########################################################################
12703 global load_fpn1
12704 load_fpn1:
12705 mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12706 jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12708 tbl_load_fpn1:
12709 short load_fpn1_0 - tbl_load_fpn1
12710 short load_fpn1_1 - tbl_load_fpn1
12711 short load_fpn1_2 - tbl_load_fpn1
12712 short load_fpn1_3 - tbl_load_fpn1
12713 short load_fpn1_4 - tbl_load_fpn1
12714 short load_fpn1_5 - tbl_load_fpn1
12715 short load_fpn1_6 - tbl_load_fpn1
12716 short load_fpn1_7 - tbl_load_fpn1
12718 load_fpn1_0:
12719 mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12720 mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12721 mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12722 lea FP_SRC(%a6), %a0
12724 load_fpn1_1:
12725 mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12726 mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12727 mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12728 lea FP_SRC(%a6), %a0
12730 load_fpn1_2:
12731 fmovm.x &0x20, FP_SRC(%a6)
12732 lea FP_SRC(%a6), %a0
12734 load_fpn1_3:
12735 fmovm.x &0x10, FP_SRC(%a6)
12736 lea FP_SRC(%a6), %a0
12738 load_fpn1_4:
12739 fmovm.x &0x08, FP_SRC(%a6)
12740 lea FP_SRC(%a6), %a0
12742 load_fpn1_5:
12743 fmovm.x &0x04, FP_SRC(%a6)
12744 lea FP_SRC(%a6), %a0
12746 load_fpn1_6:
12747 fmovm.x &0x02, FP_SRC(%a6)
12748 lea FP_SRC(%a6), %a0
12750 load_fpn1_7:
12751 fmovm.x &0x01, FP_SRC(%a6)
12752 lea FP_SRC(%a6), %a0
12755 #############################################################################
12757 #########################################################################
12758 # XDEF **************************************************************** #
12759 # load_fpn2(): load FP register value into FP_DST(a6). #
12761 # XREF **************************************************************** #
12762 # None #
12764 # INPUT *************************************************************** #
12765 # d0 = index of FP register to load #
12767 # OUTPUT ************************************************************** #
12768 # FP_DST(a6) = value loaded from FP register file #
12770 # ALGORITHM *********************************************************** #
12771 # Using the index in d0, load FP_DST(a6) with a number from the #
12772 # FP register file. #
12774 #########################################################################
12776 global load_fpn2
12777 load_fpn2:
12778 mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12779 jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12781 tbl_load_fpn2:
12782 short load_fpn2_0 - tbl_load_fpn2
12783 short load_fpn2_1 - tbl_load_fpn2
12784 short load_fpn2_2 - tbl_load_fpn2
12785 short load_fpn2_3 - tbl_load_fpn2
12786 short load_fpn2_4 - tbl_load_fpn2
12787 short load_fpn2_5 - tbl_load_fpn2
12788 short load_fpn2_6 - tbl_load_fpn2
12789 short load_fpn2_7 - tbl_load_fpn2
12791 load_fpn2_0:
12792 mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12793 mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12794 mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12795 lea FP_DST(%a6), %a0
12797 load_fpn2_1:
12798 mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12799 mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12800 mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12801 lea FP_DST(%a6), %a0
12803 load_fpn2_2:
12804 fmovm.x &0x20, FP_DST(%a6)
12805 lea FP_DST(%a6), %a0
12807 load_fpn2_3:
12808 fmovm.x &0x10, FP_DST(%a6)
12809 lea FP_DST(%a6), %a0
12811 load_fpn2_4:
12812 fmovm.x &0x08, FP_DST(%a6)
12813 lea FP_DST(%a6), %a0
12815 load_fpn2_5:
12816 fmovm.x &0x04, FP_DST(%a6)
12817 lea FP_DST(%a6), %a0
12819 load_fpn2_6:
12820 fmovm.x &0x02, FP_DST(%a6)
12821 lea FP_DST(%a6), %a0
12823 load_fpn2_7:
12824 fmovm.x &0x01, FP_DST(%a6)
12825 lea FP_DST(%a6), %a0
12828 #############################################################################
12830 #########################################################################
12831 # XDEF **************************************************************** #
12832 # store_fpreg(): store an fp value to the fpreg designated d0. #
12834 # XREF **************************************************************** #
12835 # None #
12837 # INPUT *************************************************************** #
12838 # fp0 = extended precision value to store #
12839 # d0 = index of floating-point register #
12841 # OUTPUT ************************************************************** #
12842 # None #
12844 # ALGORITHM *********************************************************** #
12845 # Store the value in fp0 to the FP register designated by the #
12846 # value in d0. The FP number can be DENORM or SNAN so we have to be #
12847 # careful that we don't take an exception here. #
12849 #########################################################################
12851 global store_fpreg
12852 store_fpreg:
12853 mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12854 jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12856 tbl_store_fpreg:
12857 short store_fpreg_0 - tbl_store_fpreg
12858 short store_fpreg_1 - tbl_store_fpreg
12859 short store_fpreg_2 - tbl_store_fpreg
12860 short store_fpreg_3 - tbl_store_fpreg
12861 short store_fpreg_4 - tbl_store_fpreg
12862 short store_fpreg_5 - tbl_store_fpreg
12863 short store_fpreg_6 - tbl_store_fpreg
12864 short store_fpreg_7 - tbl_store_fpreg
12866 store_fpreg_0:
12867 fmovm.x &0x80, EXC_FP0(%a6)
12869 store_fpreg_1:
12870 fmovm.x &0x80, EXC_FP1(%a6)
12872 store_fpreg_2:
12873 fmovm.x &0x01, -(%sp)
12874 fmovm.x (%sp)+, &0x20
12876 store_fpreg_3:
12877 fmovm.x &0x01, -(%sp)
12878 fmovm.x (%sp)+, &0x10
12880 store_fpreg_4:
12881 fmovm.x &0x01, -(%sp)
12882 fmovm.x (%sp)+, &0x08
12884 store_fpreg_5:
12885 fmovm.x &0x01, -(%sp)
12886 fmovm.x (%sp)+, &0x04
12888 store_fpreg_6:
12889 fmovm.x &0x01, -(%sp)
12890 fmovm.x (%sp)+, &0x02
12892 store_fpreg_7:
12893 fmovm.x &0x01, -(%sp)
12894 fmovm.x (%sp)+, &0x01
12897 #########################################################################
12898 # XDEF **************************************************************** #
12899 # get_packed(): fetch a packed operand from memory and then #
12900 # convert it to a floating-point binary number. #
12902 # XREF **************************************************************** #
12903 # _dcalc_ea() - calculate the correct <ea> #
12904 # _mem_read() - fetch the packed operand from memory #
12905 # facc_in_x() - the fetch failed so jump to special exit code #
12906 # decbin() - convert packed to binary extended precision #
12908 # INPUT *************************************************************** #
12909 # None #
12911 # OUTPUT ************************************************************** #
12912 # If no failure on _mem_read(): #
12913 # FP_SRC(a6) = packed operand now as a binary FP number #
12915 # ALGORITHM *********************************************************** #
12916 # Get the correct <ea> whihc is the value on the exception stack #
12917 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12918 # Then, fetch the operand from memory. If the fetch fails, exit #
12919 # through facc_in_x(). #
12920 # If the packed operand is a ZERO,NAN, or INF, convert it to #
12921 # its binary representation here. Else, call decbin() which will #
12922 # convert the packed value to an extended precision binary value. #
12924 #########################################################################
12926 # the stacked <ea> for packed is correct except for -(An).
12927 # the base reg must be updated for both -(An) and (An)+.
12928 global get_packed
12929 get_packed:
12930 mov.l &0xc,%d0 # packed is 12 bytes
12931 bsr.l _dcalc_ea # fetch <ea>; correct An
12933 lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12934 mov.l &0xc,%d0 # pass: 12 bytes
12935 bsr.l _dmem_read # read packed operand
12937 tst.l %d1 # did dfetch fail?
12938 bne.l facc_in_x # yes
12940 # The packed operand is an INF or a NAN if the exponent field is all ones.
12941 bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12942 cmpi.w %d0,&0x7fff # INF or NAN?
12943 bne.b gp_try_zero # no
12944 rts # operand is an INF or NAN
12946 # The packed operand is a zero if the mantissa is all zero, else it's
12947 # a normal packed op.
12948 gp_try_zero:
12949 mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12950 andi.b &0x0f,%d0 # clear all but last nybble
12951 bne.b gp_not_spec # not a zero
12952 tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12953 bne.b gp_not_spec # not a zero
12954 tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12955 bne.b gp_not_spec # not a zero
12956 rts # operand is a ZERO
12957 gp_not_spec:
12958 lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12959 bsr.l decbin # convert to extended
12960 fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12963 #########################################################################
12964 # decbin(): Converts normalized packed bcd value pointed to by register #
12965 # a0 to extended-precision value in fp0. #
12967 # INPUT *************************************************************** #
12968 # a0 = pointer to normalized packed bcd value #
12970 # OUTPUT ************************************************************** #
12971 # fp0 = exact fp representation of the packed bcd value. #
12973 # ALGORITHM *********************************************************** #
12974 # Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12975 # and NaN operands are dispatched without entering this routine) #
12976 # value in 68881/882 format at location (a0). #
12978 # A1. Convert the bcd exponent to binary by successive adds and #
12979 # muls. Set the sign according to SE. Subtract 16 to compensate #
12980 # for the mantissa which is to be interpreted as 17 integer #
12981 # digits, rather than 1 integer and 16 fraction digits. #
12982 # Note: this operation can never overflow. #
12984 # A2. Convert the bcd mantissa to binary by successive #
12985 # adds and muls in FP0. Set the sign according to SM. #
12986 # The mantissa digits will be converted with the decimal point #
12987 # assumed following the least-significant digit. #
12988 # Note: this operation can never overflow. #
12990 # A3. Count the number of leading/trailing zeros in the #
12991 # bcd string. If SE is positive, count the leading zeros; #
12992 # if negative, count the trailing zeros. Set the adjusted #
12993 # exponent equal to the exponent from A1 and the zero count #
12994 # added if SM = 1 and subtracted if SM = 0. Scale the #
12995 # mantissa the equivalent of forcing in the bcd value: #
12997 # SM = 0 a non-zero digit in the integer position #
12998 # SM = 1 a non-zero digit in Mant0, lsd of the fraction #
13000 # this will insure that any value, regardless of its #
13001 # representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
13002 # consistently. #
13004 # A4. Calculate the factor 10^exp in FP1 using a table of #
13005 # 10^(2^n) values. To reduce the error in forming factors #
13006 # greater than 10^27, a directed rounding scheme is used with #
13007 # tables rounded to RN, RM, and RP, according to the table #
13008 # in the comments of the pwrten section. #
13010 # A5. Form the final binary number by scaling the mantissa by #
13011 # the exponent factor. This is done by multiplying the #
13012 # mantissa in FP0 by the factor in FP1 if the adjusted #
13013 # exponent sign is positive, and dividing FP0 by FP1 if #
13014 # it is negative. #
13016 # Clean up and return. Check if the final mul or div was inexact. #
13017 # If so, set INEX1 in USER_FPSR. #
13019 #########################################################################
13022 # PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13023 # to nearest, minus, and plus, respectively. The tables include
13024 # 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13025 # is required until the power is greater than 27, however, all
13026 # tables include the first 5 for ease of indexing.
13028 RTABLE:
13029 byte 0,0,0,0
13030 byte 2,3,2,3
13031 byte 2,3,3,2
13032 byte 3,2,2,3
13034 set FNIBS,7
13035 set FSTRT,0
13037 set ESTRT,4
13038 set EDIGITS,2
13040 global decbin
13041 decbin:
13042 mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13043 mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13044 mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13046 lea FP_SCR0(%a6),%a0
13048 movm.l &0x3c00,-(%sp) # save d2-d5
13049 fmovm.x &0x1,-(%sp) # save fp1
13051 # Calculate exponent:
13052 # 1. Copy bcd value in memory for use as a working copy.
13053 # 2. Calculate absolute value of exponent in d1 by mul and add.
13054 # 3. Correct for exponent sign.
13055 # 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13056 # (i.e., all digits assumed left of the decimal point.)
13058 # Register usage:
13060 # calc_e:
13061 # (*) d0: temp digit storage
13062 # (*) d1: accumulator for binary exponent
13063 # (*) d2: digit count
13064 # (*) d3: offset pointer
13065 # ( ) d4: first word of bcd
13066 # ( ) a0: pointer to working bcd value
13067 # ( ) a6: pointer to original bcd value
13068 # (*) FP_SCR1: working copy of original bcd value
13069 # (*) L_SCR1: copy of original exponent word
13071 calc_e:
13072 mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13073 mov.l &ESTRT,%d3 # counter to pick up digits
13074 mov.l (%a0),%d4 # get first word of bcd
13075 clr.l %d1 # zero d1 for accumulator
13076 e_gd:
13077 mulu.l &0xa,%d1 # mul partial product by one digit place
13078 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13079 add.l %d0,%d1 # d1 = d1 + d0
13080 addq.b &4,%d3 # advance d3 to the next digit
13081 dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13082 btst &30,%d4 # get SE
13083 beq.b e_pos # don't negate if pos
13084 neg.l %d1 # negate before subtracting
13085 e_pos:
13086 sub.l &16,%d1 # sub to compensate for shift of mant
13087 bge.b e_save # if still pos, do not neg
13088 neg.l %d1 # now negative, make pos and set SE
13089 or.l &0x40000000,%d4 # set SE in d4,
13090 or.l &0x40000000,(%a0) # and in working bcd
13091 e_save:
13092 mov.l %d1,-(%sp) # save exp on stack
13095 # Calculate mantissa:
13096 # 1. Calculate absolute value of mantissa in fp0 by mul and add.
13097 # 2. Correct for mantissa sign.
13098 # (i.e., all digits assumed left of the decimal point.)
13100 # Register usage:
13102 # calc_m:
13103 # (*) d0: temp digit storage
13104 # (*) d1: lword counter
13105 # (*) d2: digit count
13106 # (*) d3: offset pointer
13107 # ( ) d4: words 2 and 3 of bcd
13108 # ( ) a0: pointer to working bcd value
13109 # ( ) a6: pointer to original bcd value
13110 # (*) fp0: mantissa accumulator
13111 # ( ) FP_SCR1: working copy of original bcd value
13112 # ( ) L_SCR1: copy of original exponent word
13114 calc_m:
13115 mov.l &1,%d1 # word counter, init to 1
13116 fmov.s &0x00000000,%fp0 # accumulator
13119 # Since the packed number has a long word between the first & second parts,
13120 # get the integer digit then skip down & get the rest of the
13121 # mantissa. We will unroll the loop once.
13123 bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13124 fadd.b %d0,%fp0 # add digit to sum in fp0
13127 # Get the rest of the mantissa.
13129 loadlw:
13130 mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13131 mov.l &FSTRT,%d3 # counter to pick up digits
13132 mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13133 md2b:
13134 fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13135 bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13136 fadd.b %d0,%fp0 # fp0 = fp0 + digit
13139 # If all the digits (8) in that long word have been converted (d2=0),
13140 # then inc d1 (=2) to point to the next long word and reset d3 to 0
13141 # to initialize the digit offset, and set d2 to 7 for the digit count;
13142 # else continue with this long word.
13144 addq.b &4,%d3 # advance d3 to the next digit
13145 dbf.w %d2,md2b # check for last digit in this lw
13146 nextlw:
13147 addq.l &1,%d1 # inc lw pointer in mantissa
13148 cmp.l %d1,&2 # test for last lw
13149 ble.b loadlw # if not, get last one
13151 # Check the sign of the mant and make the value in fp0 the same sign.
13153 m_sign:
13154 btst &31,(%a0) # test sign of the mantissa
13155 beq.b ap_st_z # if clear, go to append/strip zeros
13156 fneg.x %fp0 # if set, negate fp0
13158 # Append/strip zeros:
13160 # For adjusted exponents which have an absolute value greater than 27*,
13161 # this routine calculates the amount needed to normalize the mantissa
13162 # for the adjusted exponent. That number is subtracted from the exp
13163 # if the exp was positive, and added if it was negative. The purpose
13164 # of this is to reduce the value of the exponent and the possibility
13165 # of error in calculation of pwrten.
13167 # 1. Branch on the sign of the adjusted exponent.
13168 # 2p.(positive exp)
13169 # 2. Check M16 and the digits in lwords 2 and 3 in decending order.
13170 # 3. Add one for each zero encountered until a non-zero digit.
13171 # 4. Subtract the count from the exp.
13172 # 5. Check if the exp has crossed zero in #3 above; make the exp abs
13173 # and set SE.
13174 # 6. Multiply the mantissa by 10**count.
13175 # 2n.(negative exp)
13176 # 2. Check the digits in lwords 3 and 2 in decending order.
13177 # 3. Add one for each zero encountered until a non-zero digit.
13178 # 4. Add the count to the exp.
13179 # 5. Check if the exp has crossed zero in #3 above; clear SE.
13180 # 6. Divide the mantissa by 10**count.
13182 # *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13183 # any adjustment due to append/strip zeros will drive the resultane
13184 # exponent towards zero. Since all pwrten constants with a power
13185 # of 27 or less are exact, there is no need to use this routine to
13186 # attempt to lessen the resultant exponent.
13188 # Register usage:
13190 # ap_st_z:
13191 # (*) d0: temp digit storage
13192 # (*) d1: zero count
13193 # (*) d2: digit count
13194 # (*) d3: offset pointer
13195 # ( ) d4: first word of bcd
13196 # (*) d5: lword counter
13197 # ( ) a0: pointer to working bcd value
13198 # ( ) FP_SCR1: working copy of original bcd value
13199 # ( ) L_SCR1: copy of original exponent word
13202 # First check the absolute value of the exponent to see if this
13203 # routine is necessary. If so, then check the sign of the exponent
13204 # and do append (+) or strip (-) zeros accordingly.
13205 # This section handles a positive adjusted exponent.
13207 ap_st_z:
13208 mov.l (%sp),%d1 # load expA for range test
13209 cmp.l %d1,&27 # test is with 27
13210 ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13211 btst &30,(%a0) # check sign of exp
13212 bne.b ap_st_n # if neg, go to neg side
13213 clr.l %d1 # zero count reg
13214 mov.l (%a0),%d4 # load lword 1 to d4
13215 bfextu %d4{&28:&4},%d0 # get M16 in d0
13216 bne.b ap_p_fx # if M16 is non-zero, go fix exp
13217 addq.l &1,%d1 # inc zero count
13218 mov.l &1,%d5 # init lword counter
13219 mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13220 bne.b ap_p_cl # if lw 2 is zero, skip it
13221 addq.l &8,%d1 # and inc count by 8
13222 addq.l &1,%d5 # inc lword counter
13223 mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13224 ap_p_cl:
13225 clr.l %d3 # init offset reg
13226 mov.l &7,%d2 # init digit counter
13227 ap_p_gd:
13228 bfextu %d4{%d3:&4},%d0 # get digit
13229 bne.b ap_p_fx # if non-zero, go to fix exp
13230 addq.l &4,%d3 # point to next digit
13231 addq.l &1,%d1 # inc digit counter
13232 dbf.w %d2,ap_p_gd # get next digit
13233 ap_p_fx:
13234 mov.l %d1,%d0 # copy counter to d2
13235 mov.l (%sp),%d1 # get adjusted exp from memory
13236 sub.l %d0,%d1 # subtract count from exp
13237 bge.b ap_p_fm # if still pos, go to pwrten
13238 neg.l %d1 # now its neg; get abs
13239 mov.l (%a0),%d4 # load lword 1 to d4
13240 or.l &0x40000000,%d4 # and set SE in d4
13241 or.l &0x40000000,(%a0) # and in memory
13243 # Calculate the mantissa multiplier to compensate for the striping of
13244 # zeros from the mantissa.
13246 ap_p_fm:
13247 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13248 clr.l %d3 # init table index
13249 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13250 mov.l &3,%d2 # init d2 to count bits in counter
13251 ap_p_el:
13252 asr.l &1,%d0 # shift lsb into carry
13253 bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13254 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13255 ap_p_en:
13256 add.l &12,%d3 # inc d3 to next rtable entry
13257 tst.l %d0 # check if d0 is zero
13258 bne.b ap_p_el # if not, get next bit
13259 fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13260 bra.b pwrten # go calc pwrten
13262 # This section handles a negative adjusted exponent.
13264 ap_st_n:
13265 clr.l %d1 # clr counter
13266 mov.l &2,%d5 # set up d5 to point to lword 3
13267 mov.l (%a0,%d5.L*4),%d4 # get lword 3
13268 bne.b ap_n_cl # if not zero, check digits
13269 sub.l &1,%d5 # dec d5 to point to lword 2
13270 addq.l &8,%d1 # inc counter by 8
13271 mov.l (%a0,%d5.L*4),%d4 # get lword 2
13272 ap_n_cl:
13273 mov.l &28,%d3 # point to last digit
13274 mov.l &7,%d2 # init digit counter
13275 ap_n_gd:
13276 bfextu %d4{%d3:&4},%d0 # get digit
13277 bne.b ap_n_fx # if non-zero, go to exp fix
13278 subq.l &4,%d3 # point to previous digit
13279 addq.l &1,%d1 # inc digit counter
13280 dbf.w %d2,ap_n_gd # get next digit
13281 ap_n_fx:
13282 mov.l %d1,%d0 # copy counter to d0
13283 mov.l (%sp),%d1 # get adjusted exp from memory
13284 sub.l %d0,%d1 # subtract count from exp
13285 bgt.b ap_n_fm # if still pos, go fix mantissa
13286 neg.l %d1 # take abs of exp and clr SE
13287 mov.l (%a0),%d4 # load lword 1 to d4
13288 and.l &0xbfffffff,%d4 # and clr SE in d4
13289 and.l &0xbfffffff,(%a0) # and in memory
13291 # Calculate the mantissa multiplier to compensate for the appending of
13292 # zeros to the mantissa.
13294 ap_n_fm:
13295 lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13296 clr.l %d3 # init table index
13297 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13298 mov.l &3,%d2 # init d2 to count bits in counter
13299 ap_n_el:
13300 asr.l &1,%d0 # shift lsb into carry
13301 bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13302 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13303 ap_n_en:
13304 add.l &12,%d3 # inc d3 to next rtable entry
13305 tst.l %d0 # check if d0 is zero
13306 bne.b ap_n_el # if not, get next bit
13307 fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13310 # Calculate power-of-ten factor from adjusted and shifted exponent.
13312 # Register usage:
13314 # pwrten:
13315 # (*) d0: temp
13316 # ( ) d1: exponent
13317 # (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13318 # (*) d3: FPCR work copy
13319 # ( ) d4: first word of bcd
13320 # (*) a1: RTABLE pointer
13321 # calc_p:
13322 # (*) d0: temp
13323 # ( ) d1: exponent
13324 # (*) d3: PWRTxx table index
13325 # ( ) a0: pointer to working copy of bcd
13326 # (*) a1: PWRTxx pointer
13327 # (*) fp1: power-of-ten accumulator
13329 # Pwrten calculates the exponent factor in the selected rounding mode
13330 # according to the following table:
13332 # Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13334 # ANY ANY RN RN
13336 # + + RP RP
13337 # - + RP RM
13338 # + - RP RM
13339 # - - RP RP
13341 # + + RM RM
13342 # - + RM RP
13343 # + - RM RP
13344 # - - RM RM
13346 # + + RZ RM
13347 # - + RZ RM
13348 # + - RZ RP
13349 # - - RZ RP
13352 pwrten:
13353 mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13354 bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13355 mov.l (%a0),%d4 # reload 1st bcd word to d4
13356 asl.l &2,%d2 # format d2 to be
13357 bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13358 add.l %d0,%d2 # in d2 as index into RTABLE
13359 lea.l RTABLE(%pc),%a1 # load rtable base
13360 mov.b (%a1,%d2),%d0 # load new rounding bits from table
13361 clr.l %d3 # clear d3 to force no exc and extended
13362 bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13363 fmov.l %d3,%fpcr # write new FPCR
13364 asr.l &1,%d0 # write correct PTENxx table
13365 bcc.b not_rp # to a1
13366 lea.l PTENRP(%pc),%a1 # it is RP
13367 bra.b calc_p # go to init section
13368 not_rp:
13369 asr.l &1,%d0 # keep checking
13370 bcc.b not_rm
13371 lea.l PTENRM(%pc),%a1 # it is RM
13372 bra.b calc_p # go to init section
13373 not_rm:
13374 lea.l PTENRN(%pc),%a1 # it is RN
13375 calc_p:
13376 mov.l %d1,%d0 # copy exp to d0;use d0
13377 bpl.b no_neg # if exp is negative,
13378 neg.l %d0 # invert it
13379 or.l &0x40000000,(%a0) # and set SE bit
13380 no_neg:
13381 clr.l %d3 # table index
13382 fmov.s &0x3f800000,%fp1 # init fp1 to 1
13383 e_loop:
13384 asr.l &1,%d0 # shift next bit into carry
13385 bcc.b e_next # if zero, skip the mul
13386 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13387 e_next:
13388 add.l &12,%d3 # inc d3 to next rtable entry
13389 tst.l %d0 # check if d0 is zero
13390 bne.b e_loop # not zero, continue shifting
13393 # Check the sign of the adjusted exp and make the value in fp0 the
13394 # same sign. If the exp was pos then multiply fp1*fp0;
13395 # else divide fp0/fp1.
13397 # Register Usage:
13398 # norm:
13399 # ( ) a0: pointer to working bcd value
13400 # (*) fp0: mantissa accumulator
13401 # ( ) fp1: scaling factor - 10**(abs(exp))
13403 pnorm:
13404 btst &30,(%a0) # test the sign of the exponent
13405 beq.b mul # if clear, go to multiply
13406 div:
13407 fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13408 bra.b end_dec
13409 mul:
13410 fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13413 # Clean up and return with result in fp0.
13415 # If the final mul/div in decbin incurred an inex exception,
13416 # it will be inex2, but will be reported as inex1 by get_op.
13418 end_dec:
13419 fmov.l %fpsr,%d0 # get status register
13420 bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13421 beq.b no_exc # skip this if no exc
13422 ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13423 no_exc:
13424 add.l &0x4,%sp # clear 1 lw param
13425 fmovm.x (%sp)+,&0x40 # restore fp1
13426 movm.l (%sp)+,&0x3c # restore d2-d5
13427 fmov.l &0x0,%fpcr
13428 fmov.l &0x0,%fpsr
13431 #########################################################################
13432 # bindec(): Converts an input in extended precision format to bcd format#
13434 # INPUT *************************************************************** #
13435 # a0 = pointer to the input extended precision value in memory. #
13436 # the input may be either normalized, unnormalized, or #
13437 # denormalized. #
13438 # d0 = contains the k-factor sign-extended to 32-bits. #
13440 # OUTPUT ************************************************************** #
13441 # FP_SCR0(a6) = bcd format result on the stack. #
13443 # ALGORITHM *********************************************************** #
13445 # A1. Set RM and size ext; Set SIGMA = sign of input. #
13446 # The k-factor is saved for use in d7. Clear the #
13447 # BINDEC_FLG for separating normalized/denormalized #
13448 # input. If input is unnormalized or denormalized, #
13449 # normalize it. #
13451 # A2. Set X = abs(input). #
13453 # A3. Compute ILOG. #
13454 # ILOG is the log base 10 of the input value. It is #
13455 # approximated by adding e + 0.f when the original #
13456 # value is viewed as 2^^e * 1.f in extended precision. #
13457 # This value is stored in d6. #
13459 # A4. Clr INEX bit. #
13460 # The operation in A3 above may have set INEX2. #
13462 # A5. Set ICTR = 0; #
13463 # ICTR is a flag used in A13. It must be set before the #
13464 # loop entry A6. #
13466 # A6. Calculate LEN. #
13467 # LEN is the number of digits to be displayed. The #
13468 # k-factor can dictate either the total number of digits, #
13469 # if it is a positive number, or the number of digits #
13470 # after the decimal point which are to be included as #
13471 # significant. See the 68882 manual for examples. #
13472 # If LEN is computed to be greater than 17, set OPERR in #
13473 # USER_FPSR. LEN is stored in d4. #
13475 # A7. Calculate SCALE. #
13476 # SCALE is equal to 10^ISCALE, where ISCALE is the number #
13477 # of decimal places needed to insure LEN integer digits #
13478 # in the output before conversion to bcd. LAMBDA is the #
13479 # sign of ISCALE, used in A9. Fp1 contains #
13480 # 10^^(abs(ISCALE)) using a rounding mode which is a #
13481 # function of the original rounding mode and the signs #
13482 # of ISCALE and X. A table is given in the code. #
13484 # A8. Clr INEX; Force RZ. #
13485 # The operation in A3 above may have set INEX2. #
13486 # RZ mode is forced for the scaling operation to insure #
13487 # only one rounding error. The grs bits are collected in #
13488 # the INEX flag for use in A10. #
13490 # A9. Scale X -> Y. #
13491 # The mantissa is scaled to the desired number of #
13492 # significant digits. The excess digits are collected #
13493 # in INEX2. #
13495 # A10. Or in INEX. #
13496 # If INEX is set, round error occurred. This is #
13497 # compensated for by 'or-ing' in the INEX2 flag to #
13498 # the lsb of Y. #
13500 # A11. Restore original FPCR; set size ext. #
13501 # Perform FINT operation in the user's rounding mode. #
13502 # Keep the size to extended. #
13504 # A12. Calculate YINT = FINT(Y) according to user's rounding #
13505 # mode. The FPSP routine sintd0 is used. The output #
13506 # is in fp0. #
13508 # A13. Check for LEN digits. #
13509 # If the int operation results in more than LEN digits, #
13510 # or less than LEN -1 digits, adjust ILOG and repeat from #
13511 # A6. This test occurs only on the first pass. If the #
13512 # result is exactly 10^LEN, decrement ILOG and divide #
13513 # the mantissa by 10. #
13515 # A14. Convert the mantissa to bcd. #
13516 # The binstr routine is used to convert the LEN digit #
13517 # mantissa to bcd in memory. The input to binstr is #
13518 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13519 # such that the decimal point is to the left of bit 63. #
13520 # The bcd digits are stored in the correct position in #
13521 # the final string area in memory. #
13523 # A15. Convert the exponent to bcd. #
13524 # As in A14 above, the exp is converted to bcd and the #
13525 # digits are stored in the final string. #
13526 # Test the length of the final exponent string. If the #
13527 # length is 4, set operr. #
13529 # A16. Write sign bits to final string. #
13531 #########################################################################
13533 set BINDEC_FLG, EXC_TEMP # DENORM flag
13535 # Constants in extended precision
13536 PLOG2:
13537 long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13538 PLOG2UP1:
13539 long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13541 # Constants in single precision
13542 FONE:
13543 long 0x3F800000,0x00000000,0x00000000,0x00000000
13544 FTWO:
13545 long 0x40000000,0x00000000,0x00000000,0x00000000
13546 FTEN:
13547 long 0x41200000,0x00000000,0x00000000,0x00000000
13548 F4933:
13549 long 0x459A2800,0x00000000,0x00000000,0x00000000
13551 RBDTBL:
13552 byte 0,0,0,0
13553 byte 3,3,2,2
13554 byte 3,2,2,3
13555 byte 2,3,3,2
13557 # Implementation Notes:
13559 # The registers are used as follows:
13561 # d0: scratch; LEN input to binstr
13562 # d1: scratch
13563 # d2: upper 32-bits of mantissa for binstr
13564 # d3: scratch;lower 32-bits of mantissa for binstr
13565 # d4: LEN
13566 # d5: LAMBDA/ICTR
13567 # d6: ILOG
13568 # d7: k-factor
13569 # a0: ptr for original operand/final result
13570 # a1: scratch pointer
13571 # a2: pointer to FP_X; abs(original value) in ext
13572 # fp0: scratch
13573 # fp1: scratch
13574 # fp2: scratch
13575 # F_SCR1:
13576 # F_SCR2:
13577 # L_SCR1:
13578 # L_SCR2:
13580 global bindec
13581 bindec:
13582 movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13583 fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13585 # A1. Set RM and size ext. Set SIGMA = sign input;
13586 # The k-factor is saved for use in d7. Clear BINDEC_FLG for
13587 # separating normalized/denormalized input. If the input
13588 # is a denormalized number, set the BINDEC_FLG memory word
13589 # to signal denorm. If the input is unnormalized, normalize
13590 # the input and test for denormalized result.
13592 fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13593 mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13594 mov.l %d0,%d7 # move k-factor to d7
13596 clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13597 cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13598 bne.w A2_str # no; input is a NORM
13601 # Normalize the denorm
13603 un_de_norm:
13604 mov.w (%a0),%d0
13605 and.w &0x7fff,%d0 # strip sign of normalized exp
13606 mov.l 4(%a0),%d1
13607 mov.l 8(%a0),%d2
13608 norm_loop:
13609 sub.w &1,%d0
13610 lsl.l &1,%d2
13611 roxl.l &1,%d1
13612 tst.l %d1
13613 bge.b norm_loop
13615 # Test if the normalized input is denormalized
13617 tst.w %d0
13618 bgt.b pos_exp # if greater than zero, it is a norm
13619 st BINDEC_FLG(%a6) # set flag for denorm
13620 pos_exp:
13621 and.w &0x7fff,%d0 # strip sign of normalized exp
13622 mov.w %d0,(%a0)
13623 mov.l %d1,4(%a0)
13624 mov.l %d2,8(%a0)
13626 # A2. Set X = abs(input).
13628 A2_str:
13629 mov.l (%a0),FP_SCR1(%a6) # move input to work space
13630 mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13631 mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13632 and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13634 # A3. Compute ILOG.
13635 # ILOG is the log base 10 of the input value. It is approx-
13636 # imated by adding e + 0.f when the original value is viewed
13637 # as 2^^e * 1.f in extended precision. This value is stored
13638 # in d6.
13640 # Register usage:
13641 # Input/Output
13642 # d0: k-factor/exponent
13643 # d2: x/x
13644 # d3: x/x
13645 # d4: x/x
13646 # d5: x/x
13647 # d6: x/ILOG
13648 # d7: k-factor/Unchanged
13649 # a0: ptr for original operand/final result
13650 # a1: x/x
13651 # a2: x/x
13652 # fp0: x/float(ILOG)
13653 # fp1: x/x
13654 # fp2: x/x
13655 # F_SCR1:x/x
13656 # F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13657 # L_SCR1:x/x
13658 # L_SCR2:first word of X packed/Unchanged
13660 tst.b BINDEC_FLG(%a6) # check for denorm
13661 beq.b A3_cont # if clr, continue with norm
13662 mov.l &-4933,%d6 # force ILOG = -4933
13663 bra.b A4_str
13664 A3_cont:
13665 mov.w FP_SCR1(%a6),%d0 # move exp to d0
13666 mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13667 fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13668 sub.w &0x3fff,%d0 # strip off bias
13669 fadd.w %d0,%fp0 # add in exp
13670 fsub.s FONE(%pc),%fp0 # subtract off 1.0
13671 fbge.w pos_res # if pos, branch
13672 fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13673 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13674 bra.b A4_str # go move out ILOG
13675 pos_res:
13676 fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13677 fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13680 # A4. Clr INEX bit.
13681 # The operation in A3 above may have set INEX2.
13683 A4_str:
13684 fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13687 # A5. Set ICTR = 0;
13688 # ICTR is a flag used in A13. It must be set before the
13689 # loop entry A6. The lower word of d5 is used for ICTR.
13691 clr.w %d5 # clear ICTR
13693 # A6. Calculate LEN.
13694 # LEN is the number of digits to be displayed. The k-factor
13695 # can dictate either the total number of digits, if it is
13696 # a positive number, or the number of digits after the
13697 # original decimal point which are to be included as
13698 # significant. See the 68882 manual for examples.
13699 # If LEN is computed to be greater than 17, set OPERR in
13700 # USER_FPSR. LEN is stored in d4.
13702 # Register usage:
13703 # Input/Output
13704 # d0: exponent/Unchanged
13705 # d2: x/x/scratch
13706 # d3: x/x
13707 # d4: exc picture/LEN
13708 # d5: ICTR/Unchanged
13709 # d6: ILOG/Unchanged
13710 # d7: k-factor/Unchanged
13711 # a0: ptr for original operand/final result
13712 # a1: x/x
13713 # a2: x/x
13714 # fp0: float(ILOG)/Unchanged
13715 # fp1: x/x
13716 # fp2: x/x
13717 # F_SCR1:x/x
13718 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13719 # L_SCR1:x/x
13720 # L_SCR2:first word of X packed/Unchanged
13722 A6_str:
13723 tst.l %d7 # branch on sign of k
13724 ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13725 mov.l %d7,%d4 # if k > 0, LEN = k
13726 bra.b len_ck # skip to LEN check
13727 k_neg:
13728 mov.l %d6,%d4 # first load ILOG to d4
13729 sub.l %d7,%d4 # subtract off k
13730 addq.l &1,%d4 # add in the 1
13731 len_ck:
13732 tst.l %d4 # LEN check: branch on sign of LEN
13733 ble.b LEN_ng # if neg, set LEN = 1
13734 cmp.l %d4,&17 # test if LEN > 17
13735 ble.b A7_str # if not, forget it
13736 mov.l &17,%d4 # set max LEN = 17
13737 tst.l %d7 # if negative, never set OPERR
13738 ble.b A7_str # if positive, continue
13739 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13740 bra.b A7_str # finished here
13741 LEN_ng:
13742 mov.l &1,%d4 # min LEN is 1
13745 # A7. Calculate SCALE.
13746 # SCALE is equal to 10^ISCALE, where ISCALE is the number
13747 # of decimal places needed to insure LEN integer digits
13748 # in the output before conversion to bcd. LAMBDA is the sign
13749 # of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13750 # the rounding mode as given in the following table (see
13751 # Coonen, p. 7.23 as ref.; however, the SCALE variable is
13752 # of opposite sign in bindec.sa from Coonen).
13754 # Initial USE
13755 # FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13756 # ----------------------------------------------
13757 # RN 00 0 0 00/0 RN
13758 # RN 00 0 1 00/0 RN
13759 # RN 00 1 0 00/0 RN
13760 # RN 00 1 1 00/0 RN
13761 # RZ 01 0 0 11/3 RP
13762 # RZ 01 0 1 11/3 RP
13763 # RZ 01 1 0 10/2 RM
13764 # RZ 01 1 1 10/2 RM
13765 # RM 10 0 0 11/3 RP
13766 # RM 10 0 1 10/2 RM
13767 # RM 10 1 0 10/2 RM
13768 # RM 10 1 1 11/3 RP
13769 # RP 11 0 0 10/2 RM
13770 # RP 11 0 1 11/3 RP
13771 # RP 11 1 0 11/3 RP
13772 # RP 11 1 1 10/2 RM
13774 # Register usage:
13775 # Input/Output
13776 # d0: exponent/scratch - final is 0
13777 # d2: x/0 or 24 for A9
13778 # d3: x/scratch - offset ptr into PTENRM array
13779 # d4: LEN/Unchanged
13780 # d5: 0/ICTR:LAMBDA
13781 # d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13782 # d7: k-factor/Unchanged
13783 # a0: ptr for original operand/final result
13784 # a1: x/ptr to PTENRM array
13785 # a2: x/x
13786 # fp0: float(ILOG)/Unchanged
13787 # fp1: x/10^ISCALE
13788 # fp2: x/x
13789 # F_SCR1:x/x
13790 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13791 # L_SCR1:x/x
13792 # L_SCR2:first word of X packed/Unchanged
13794 A7_str:
13795 tst.l %d7 # test sign of k
13796 bgt.b k_pos # if pos and > 0, skip this
13797 cmp.l %d7,%d6 # test k - ILOG
13798 blt.b k_pos # if ILOG >= k, skip this
13799 mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13800 k_pos:
13801 mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13802 addq.l &1,%d0 # add the 1
13803 sub.l %d4,%d0 # sub off LEN
13804 swap %d5 # use upper word of d5 for LAMBDA
13805 clr.w %d5 # set it zero initially
13806 clr.w %d2 # set up d2 for very small case
13807 tst.l %d0 # test sign of ISCALE
13808 bge.b iscale # if pos, skip next inst
13809 addq.w &1,%d5 # if neg, set LAMBDA true
13810 cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13811 bgt.b no_inf # if false, skip rest
13812 add.l &24,%d0 # add in 24 to iscale
13813 mov.l &24,%d2 # put 24 in d2 for A9
13814 no_inf:
13815 neg.l %d0 # and take abs of ISCALE
13816 iscale:
13817 fmov.s FONE(%pc),%fp1 # init fp1 to 1
13818 bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13819 lsl.w &1,%d1 # put them in bits 2:1
13820 add.w %d5,%d1 # add in LAMBDA
13821 lsl.w &1,%d1 # put them in bits 3:1
13822 tst.l L_SCR2(%a6) # test sign of original x
13823 bge.b x_pos # if pos, don't set bit 0
13824 addq.l &1,%d1 # if neg, set bit 0
13825 x_pos:
13826 lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13827 mov.b (%a2,%d1),%d3 # load d3 with new rmode
13828 lsl.l &4,%d3 # put bits in proper position
13829 fmov.l %d3,%fpcr # load bits into fpu
13830 lsr.l &4,%d3 # put bits in proper position
13831 tst.b %d3 # decode new rmode for pten table
13832 bne.b not_rn # if zero, it is RN
13833 lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13834 bra.b rmode # exit decode
13835 not_rn:
13836 lsr.b &1,%d3 # get lsb in carry
13837 bcc.b not_rp2 # if carry clear, it is RM
13838 lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13839 bra.b rmode # exit decode
13840 not_rp2:
13841 lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13842 rmode:
13843 clr.l %d3 # clr table index
13844 e_loop2:
13845 lsr.l &1,%d0 # shift next bit into carry
13846 bcc.b e_next2 # if zero, skip the mul
13847 fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13848 e_next2:
13849 add.l &12,%d3 # inc d3 to next pwrten table entry
13850 tst.l %d0 # test if ISCALE is zero
13851 bne.b e_loop2 # if not, loop
13853 # A8. Clr INEX; Force RZ.
13854 # The operation in A3 above may have set INEX2.
13855 # RZ mode is forced for the scaling operation to insure
13856 # only one rounding error. The grs bits are collected in
13857 # the INEX flag for use in A10.
13859 # Register usage:
13860 # Input/Output
13862 fmov.l &0,%fpsr # clr INEX
13863 fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13865 # A9. Scale X -> Y.
13866 # The mantissa is scaled to the desired number of significant
13867 # digits. The excess digits are collected in INEX2. If mul,
13868 # Check d2 for excess 10 exponential value. If not zero,
13869 # the iscale value would have caused the pwrten calculation
13870 # to overflow. Only a negative iscale can cause this, so
13871 # multiply by 10^(d2), which is now only allowed to be 24,
13872 # with a multiply by 10^8 and 10^16, which is exact since
13873 # 10^24 is exact. If the input was denormalized, we must
13874 # create a busy stack frame with the mul command and the
13875 # two operands, and allow the fpu to complete the multiply.
13877 # Register usage:
13878 # Input/Output
13879 # d0: FPCR with RZ mode/Unchanged
13880 # d2: 0 or 24/unchanged
13881 # d3: x/x
13882 # d4: LEN/Unchanged
13883 # d5: ICTR:LAMBDA
13884 # d6: ILOG/Unchanged
13885 # d7: k-factor/Unchanged
13886 # a0: ptr for original operand/final result
13887 # a1: ptr to PTENRM array/Unchanged
13888 # a2: x/x
13889 # fp0: float(ILOG)/X adjusted for SCALE (Y)
13890 # fp1: 10^ISCALE/Unchanged
13891 # fp2: x/x
13892 # F_SCR1:x/x
13893 # F_SCR2:Abs(X) with $3fff exponent/Unchanged
13894 # L_SCR1:x/x
13895 # L_SCR2:first word of X packed/Unchanged
13897 A9_str:
13898 fmov.x (%a0),%fp0 # load X from memory
13899 fabs.x %fp0 # use abs(X)
13900 tst.w %d5 # LAMBDA is in lower word of d5
13901 bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13902 fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13903 bra.w A10_st # branch to A10
13905 sc_mul:
13906 tst.b BINDEC_FLG(%a6) # check for denorm
13907 beq.w A9_norm # if norm, continue with mul
13909 # for DENORM, we must calculate:
13910 # fp0 = input_op * 10^ISCALE * 10^24
13911 # since the input operand is a DENORM, we can't multiply it directly.
13912 # so, we do the multiplication of the exponents and mantissas separately.
13913 # in this way, we avoid underflow on intermediate stages of the
13914 # multiplication and guarantee a result without exception.
13915 fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13917 mov.w (%sp),%d3 # grab exponent
13918 andi.w &0x7fff,%d3 # clear sign
13919 ori.w &0x8000,(%a0) # make DENORM exp negative
13920 add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13921 subi.w &0x3fff,%d3 # subtract BIAS
13922 add.w 36(%a1),%d3
13923 subi.w &0x3fff,%d3 # subtract BIAS
13924 add.w 48(%a1),%d3
13925 subi.w &0x3fff,%d3 # subtract BIAS
13927 bmi.w sc_mul_err # is result is DENORM, punt!!!
13929 andi.w &0x8000,(%sp) # keep sign
13930 or.w %d3,(%sp) # insert new exponent
13931 andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13932 mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13933 mov.l 0x4(%a0),-(%sp)
13934 mov.l &0x3fff0000,-(%sp) # force exp to zero
13935 fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13936 fmul.x (%sp)+,%fp0
13938 # fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13939 # fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13940 mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13941 mov.l 36+4(%a1),-(%sp)
13942 mov.l &0x3fff0000,-(%sp) # force exp to zero
13943 mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13944 mov.l 48+4(%a1),-(%sp)
13945 mov.l &0x3fff0000,-(%sp)# force exp to zero
13946 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13947 fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13948 bra.b A10_st
13950 sc_mul_err:
13951 bra.b sc_mul_err
13953 A9_norm:
13954 tst.w %d2 # test for small exp case
13955 beq.b A9_con # if zero, continue as normal
13956 fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13957 fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13958 A9_con:
13959 fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13961 # A10. Or in INEX.
13962 # If INEX is set, round error occurred. This is compensated
13963 # for by 'or-ing' in the INEX2 flag to the lsb of Y.
13965 # Register usage:
13966 # Input/Output
13967 # d0: FPCR with RZ mode/FPSR with INEX2 isolated
13968 # d2: x/x
13969 # d3: x/x
13970 # d4: LEN/Unchanged
13971 # d5: ICTR:LAMBDA
13972 # d6: ILOG/Unchanged
13973 # d7: k-factor/Unchanged
13974 # a0: ptr for original operand/final result
13975 # a1: ptr to PTENxx array/Unchanged
13976 # a2: x/ptr to FP_SCR1(a6)
13977 # fp0: Y/Y with lsb adjusted
13978 # fp1: 10^ISCALE/Unchanged
13979 # fp2: x/x
13981 A10_st:
13982 fmov.l %fpsr,%d0 # get FPSR
13983 fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13984 lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13985 btst &9,%d0 # check if INEX2 set
13986 beq.b A11_st # if clear, skip rest
13987 or.l &1,8(%a2) # or in 1 to lsb of mantissa
13988 fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13991 # A11. Restore original FPCR; set size ext.
13992 # Perform FINT operation in the user's rounding mode. Keep
13993 # the size to extended. The sintdo entry point in the sint
13994 # routine expects the FPCR value to be in USER_FPCR for
13995 # mode and precision. The original FPCR is saved in L_SCR1.
13997 A11_st:
13998 mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
13999 and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
14000 # ;block exceptions
14003 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
14004 # The FPSP routine sintd0 is used. The output is in fp0.
14006 # Register usage:
14007 # Input/Output
14008 # d0: FPSR with AINEX cleared/FPCR with size set to ext
14009 # d2: x/x/scratch
14010 # d3: x/x
14011 # d4: LEN/Unchanged
14012 # d5: ICTR:LAMBDA/Unchanged
14013 # d6: ILOG/Unchanged
14014 # d7: k-factor/Unchanged
14015 # a0: ptr for original operand/src ptr for sintdo
14016 # a1: ptr to PTENxx array/Unchanged
14017 # a2: ptr to FP_SCR1(a6)/Unchanged
14018 # a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14019 # fp0: Y/YINT
14020 # fp1: 10^ISCALE/Unchanged
14021 # fp2: x/x
14022 # F_SCR1:x/x
14023 # F_SCR2:Y adjusted for inex/Y with original exponent
14024 # L_SCR1:x/original USER_FPCR
14025 # L_SCR2:first word of X packed/Unchanged
14027 A12_st:
14028 movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14029 mov.l L_SCR1(%a6),-(%sp)
14030 mov.l L_SCR2(%a6),-(%sp)
14032 lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14033 fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14034 tst.l L_SCR2(%a6) # test sign of original operand
14035 bge.b do_fint12 # if pos, use Y
14036 or.l &0x80000000,(%a0) # if neg, use -Y
14037 do_fint12:
14038 mov.l USER_FPSR(%a6),-(%sp)
14039 # bsr sintdo # sint routine returns int in fp0
14041 fmov.l USER_FPCR(%a6),%fpcr
14042 fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14043 ## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14044 ## andi.l &0x00000030,%d0
14045 ## fmov.l %d0,%fpcr
14046 fint.x FP_SCR1(%a6),%fp0 # do fint()
14047 fmov.l %fpsr,%d0
14048 or.w %d0,FPSR_EXCEPT(%a6)
14049 ## fmov.l &0x0,%fpcr
14050 ## fmov.l %fpsr,%d0 # don't keep ccodes
14051 ## or.w %d0,FPSR_EXCEPT(%a6)
14053 mov.b (%sp),USER_FPSR(%a6)
14054 add.l &4,%sp
14056 mov.l (%sp)+,L_SCR2(%a6)
14057 mov.l (%sp)+,L_SCR1(%a6)
14058 movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14060 mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14061 mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14063 # A13. Check for LEN digits.
14064 # If the int operation results in more than LEN digits,
14065 # or less than LEN -1 digits, adjust ILOG and repeat from
14066 # A6. This test occurs only on the first pass. If the
14067 # result is exactly 10^LEN, decrement ILOG and divide
14068 # the mantissa by 10. The calculation of 10^LEN cannot
14069 # be inexact, since all powers of ten upto 10^27 are exact
14070 # in extended precision, so the use of a previous power-of-ten
14071 # table will introduce no error.
14074 # Register usage:
14075 # Input/Output
14076 # d0: FPCR with size set to ext/scratch final = 0
14077 # d2: x/x
14078 # d3: x/scratch final = x
14079 # d4: LEN/LEN adjusted
14080 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14081 # d6: ILOG/ILOG adjusted
14082 # d7: k-factor/Unchanged
14083 # a0: pointer into memory for packed bcd string formation
14084 # a1: ptr to PTENxx array/Unchanged
14085 # a2: ptr to FP_SCR1(a6)/Unchanged
14086 # fp0: int portion of Y/abs(YINT) adjusted
14087 # fp1: 10^ISCALE/Unchanged
14088 # fp2: x/10^LEN
14089 # F_SCR1:x/x
14090 # F_SCR2:Y with original exponent/Unchanged
14091 # L_SCR1:original USER_FPCR/Unchanged
14092 # L_SCR2:first word of X packed/Unchanged
14094 A13_st:
14095 swap %d5 # put ICTR in lower word of d5
14096 tst.w %d5 # check if ICTR = 0
14097 bne not_zr # if non-zero, go to second test
14099 # Compute 10^(LEN-1)
14101 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14102 mov.l %d4,%d0 # put LEN in d0
14103 subq.l &1,%d0 # d0 = LEN -1
14104 clr.l %d3 # clr table index
14105 l_loop:
14106 lsr.l &1,%d0 # shift next bit into carry
14107 bcc.b l_next # if zero, skip the mul
14108 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14109 l_next:
14110 add.l &12,%d3 # inc d3 to next pwrten table entry
14111 tst.l %d0 # test if LEN is zero
14112 bne.b l_loop # if not, loop
14114 # 10^LEN-1 is computed for this test and A14. If the input was
14115 # denormalized, check only the case in which YINT > 10^LEN.
14117 tst.b BINDEC_FLG(%a6) # check if input was norm
14118 beq.b A13_con # if norm, continue with checking
14119 fabs.x %fp0 # take abs of YINT
14120 bra test_2
14122 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14124 A13_con:
14125 fabs.x %fp0 # take abs of YINT
14126 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14127 fbge.w test_2 # if greater, do next test
14128 subq.l &1,%d6 # subtract 1 from ILOG
14129 mov.w &1,%d5 # set ICTR
14130 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14131 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14132 bra.w A6_str # return to A6 and recompute YINT
14133 test_2:
14134 fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14135 fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14136 fblt.w A14_st # if less, all is ok, go to A14
14137 fbgt.w fix_ex # if greater, fix and redo
14138 fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14139 addq.l &1,%d6 # and inc ILOG
14140 bra.b A14_st # and continue elsewhere
14141 fix_ex:
14142 addq.l &1,%d6 # increment ILOG by 1
14143 mov.w &1,%d5 # set ICTR
14144 fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14145 bra.w A6_str # return to A6 and recompute YINT
14147 # Since ICTR <> 0, we have already been through one adjustment,
14148 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14149 # 10^LEN is again computed using whatever table is in a1 since the
14150 # value calculated cannot be inexact.
14152 not_zr:
14153 fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14154 mov.l %d4,%d0 # put LEN in d0
14155 clr.l %d3 # clr table index
14156 z_loop:
14157 lsr.l &1,%d0 # shift next bit into carry
14158 bcc.b z_next # if zero, skip the mul
14159 fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14160 z_next:
14161 add.l &12,%d3 # inc d3 to next pwrten table entry
14162 tst.l %d0 # test if LEN is zero
14163 bne.b z_loop # if not, loop
14164 fabs.x %fp0 # get abs(YINT)
14165 fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14166 fbneq.w A14_st # if not, skip this
14167 fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14168 addq.l &1,%d6 # and inc ILOG by 1
14169 addq.l &1,%d4 # and inc LEN
14170 fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14172 # A14. Convert the mantissa to bcd.
14173 # The binstr routine is used to convert the LEN digit
14174 # mantissa to bcd in memory. The input to binstr is
14175 # to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14176 # such that the decimal point is to the left of bit 63.
14177 # The bcd digits are stored in the correct position in
14178 # the final string area in memory.
14181 # Register usage:
14182 # Input/Output
14183 # d0: x/LEN call to binstr - final is 0
14184 # d1: x/0
14185 # d2: x/ms 32-bits of mant of abs(YINT)
14186 # d3: x/ls 32-bits of mant of abs(YINT)
14187 # d4: LEN/Unchanged
14188 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14189 # d6: ILOG
14190 # d7: k-factor/Unchanged
14191 # a0: pointer into memory for packed bcd string formation
14192 # /ptr to first mantissa byte in result string
14193 # a1: ptr to PTENxx array/Unchanged
14194 # a2: ptr to FP_SCR1(a6)/Unchanged
14195 # fp0: int portion of Y/abs(YINT) adjusted
14196 # fp1: 10^ISCALE/Unchanged
14197 # fp2: 10^LEN/Unchanged
14198 # F_SCR1:x/Work area for final result
14199 # F_SCR2:Y with original exponent/Unchanged
14200 # L_SCR1:original USER_FPCR/Unchanged
14201 # L_SCR2:first word of X packed/Unchanged
14203 A14_st:
14204 fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14205 fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14206 lea.l FP_SCR0(%a6),%a0
14207 fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14208 mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14209 mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14210 clr.l 4(%a0) # zero word 2 of FP_RES
14211 clr.l 8(%a0) # zero word 3 of FP_RES
14212 mov.l (%a0),%d0 # move exponent to d0
14213 swap %d0 # put exponent in lower word
14214 beq.b no_sft # if zero, don't shift
14215 sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14216 tst.l %d0 # check if > 1
14217 bgt.b no_sft # if so, don't shift
14218 neg.l %d0 # make exp positive
14219 m_loop:
14220 lsr.l &1,%d2 # shift d2:d3 right, add 0s
14221 roxr.l &1,%d3 # the number of places
14222 dbf.w %d0,m_loop # given in d0
14223 no_sft:
14224 tst.l %d2 # check for mantissa of zero
14225 bne.b no_zr # if not, go on
14226 tst.l %d3 # continue zero check
14227 beq.b zer_m # if zero, go directly to binstr
14228 no_zr:
14229 clr.l %d1 # put zero in d1 for addx
14230 add.l &0x00000080,%d3 # inc at bit 7
14231 addx.l %d1,%d2 # continue inc
14232 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14233 zer_m:
14234 mov.l %d4,%d0 # put LEN in d0 for binstr call
14235 addq.l &3,%a0 # a0 points to M16 byte in result
14236 bsr binstr # call binstr to convert mant
14239 # A15. Convert the exponent to bcd.
14240 # As in A14 above, the exp is converted to bcd and the
14241 # digits are stored in the final string.
14243 # Digits are stored in L_SCR1(a6) on return from BINDEC as:
14245 # 32 16 15 0
14246 # -----------------------------------------
14247 # | 0 | e3 | e2 | e1 | e4 | X | X | X |
14248 # -----------------------------------------
14250 # And are moved into their proper places in FP_SCR0. If digit e4
14251 # is non-zero, OPERR is signaled. In all cases, all 4 digits are
14252 # written as specified in the 881/882 manual for packed decimal.
14254 # Register usage:
14255 # Input/Output
14256 # d0: x/LEN call to binstr - final is 0
14257 # d1: x/scratch (0);shift count for final exponent packing
14258 # d2: x/ms 32-bits of exp fraction/scratch
14259 # d3: x/ls 32-bits of exp fraction
14260 # d4: LEN/Unchanged
14261 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14262 # d6: ILOG
14263 # d7: k-factor/Unchanged
14264 # a0: ptr to result string/ptr to L_SCR1(a6)
14265 # a1: ptr to PTENxx array/Unchanged
14266 # a2: ptr to FP_SCR1(a6)/Unchanged
14267 # fp0: abs(YINT) adjusted/float(ILOG)
14268 # fp1: 10^ISCALE/Unchanged
14269 # fp2: 10^LEN/Unchanged
14270 # F_SCR1:Work area for final result/BCD result
14271 # F_SCR2:Y with original exponent/ILOG/10^4
14272 # L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14273 # L_SCR2:first word of X packed/Unchanged
14275 A15_st:
14276 tst.b BINDEC_FLG(%a6) # check for denorm
14277 beq.b not_denorm
14278 ftest.x %fp0 # test for zero
14279 fbeq.w den_zero # if zero, use k-factor or 4933
14280 fmov.l %d6,%fp0 # float ILOG
14281 fabs.x %fp0 # get abs of ILOG
14282 bra.b convrt
14283 den_zero:
14284 tst.l %d7 # check sign of the k-factor
14285 blt.b use_ilog # if negative, use ILOG
14286 fmov.s F4933(%pc),%fp0 # force exponent to 4933
14287 bra.b convrt # do it
14288 use_ilog:
14289 fmov.l %d6,%fp0 # float ILOG
14290 fabs.x %fp0 # get abs of ILOG
14291 bra.b convrt
14292 not_denorm:
14293 ftest.x %fp0 # test for zero
14294 fbneq.w not_zero # if zero, force exponent
14295 fmov.s FONE(%pc),%fp0 # force exponent to 1
14296 bra.b convrt # do it
14297 not_zero:
14298 fmov.l %d6,%fp0 # float ILOG
14299 fabs.x %fp0 # get abs of ILOG
14300 convrt:
14301 fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14302 fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14303 mov.l 4(%a2),%d2 # move word 2 to d2
14304 mov.l 8(%a2),%d3 # move word 3 to d3
14305 mov.w (%a2),%d0 # move exp to d0
14306 beq.b x_loop_fin # if zero, skip the shift
14307 sub.w &0x3ffd,%d0 # subtract off bias
14308 neg.w %d0 # make exp positive
14309 x_loop:
14310 lsr.l &1,%d2 # shift d2:d3 right
14311 roxr.l &1,%d3 # the number of places
14312 dbf.w %d0,x_loop # given in d0
14313 x_loop_fin:
14314 clr.l %d1 # put zero in d1 for addx
14315 add.l &0x00000080,%d3 # inc at bit 6
14316 addx.l %d1,%d2 # continue inc
14317 and.l &0xffffff80,%d3 # strip off lsb not used by 882
14318 mov.l &4,%d0 # put 4 in d0 for binstr call
14319 lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14320 bsr binstr # call binstr to convert exp
14321 mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14322 mov.l &12,%d1 # use d1 for shift count
14323 lsr.l %d1,%d0 # shift d0 right by 12
14324 bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14325 lsr.l %d1,%d0 # shift d0 right by 12
14326 bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14327 tst.b %d0 # check if e4 is zero
14328 beq.b A16_st # if zero, skip rest
14329 or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14332 # A16. Write sign bits to final string.
14333 # Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14335 # Register usage:
14336 # Input/Output
14337 # d0: x/scratch - final is x
14338 # d2: x/x
14339 # d3: x/x
14340 # d4: LEN/Unchanged
14341 # d5: ICTR:LAMBDA/LAMBDA:ICTR
14342 # d6: ILOG/ILOG adjusted
14343 # d7: k-factor/Unchanged
14344 # a0: ptr to L_SCR1(a6)/Unchanged
14345 # a1: ptr to PTENxx array/Unchanged
14346 # a2: ptr to FP_SCR1(a6)/Unchanged
14347 # fp0: float(ILOG)/Unchanged
14348 # fp1: 10^ISCALE/Unchanged
14349 # fp2: 10^LEN/Unchanged
14350 # F_SCR1:BCD result with correct signs
14351 # F_SCR2:ILOG/10^4
14352 # L_SCR1:Exponent digits on return from binstr
14353 # L_SCR2:first word of X packed/Unchanged
14355 A16_st:
14356 clr.l %d0 # clr d0 for collection of signs
14357 and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14358 tst.l L_SCR2(%a6) # check sign of original mantissa
14359 bge.b mant_p # if pos, don't set SM
14360 mov.l &2,%d0 # move 2 in to d0 for SM
14361 mant_p:
14362 tst.l %d6 # check sign of ILOG
14363 bge.b wr_sgn # if pos, don't set SE
14364 addq.l &1,%d0 # set bit 0 in d0 for SE
14365 wr_sgn:
14366 bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14368 # Clean up and restore all registers used.
14370 fmov.l &0,%fpsr # clear possible inex2/ainex bits
14371 fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14372 movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14375 global PTENRN
14376 PTENRN:
14377 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14378 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14379 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14380 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14381 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14382 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14383 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14384 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14385 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14386 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14387 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14388 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14389 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14391 global PTENRP
14392 PTENRP:
14393 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14394 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14395 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14396 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14397 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14398 long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14399 long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14400 long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14401 long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14402 long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14403 long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14404 long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14405 long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14407 global PTENRM
14408 PTENRM:
14409 long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14410 long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14411 long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14412 long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14413 long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14414 long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14415 long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14416 long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14417 long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14418 long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14419 long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14420 long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14421 long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14423 #########################################################################
14424 # binstr(): Converts a 64-bit binary integer to bcd. #
14426 # INPUT *************************************************************** #
14427 # d2:d3 = 64-bit binary integer #
14428 # d0 = desired length (LEN) #
14429 # a0 = pointer to start in memory for bcd characters #
14430 # (This pointer must point to byte 4 of the first #
14431 # lword of the packed decimal memory string.) #
14433 # OUTPUT ************************************************************** #
14434 # a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14436 # ALGORITHM *********************************************************** #
14437 # The 64-bit binary is assumed to have a decimal point before #
14438 # bit 63. The fraction is multiplied by 10 using a mul by 2 #
14439 # shift and a mul by 8 shift. The bits shifted out of the #
14440 # msb form a decimal digit. This process is iterated until #
14441 # LEN digits are formed. #
14443 # A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14444 # digit formed will be assumed the least significant. This is #
14445 # to force the first byte formed to have a 0 in the upper 4 bits. #
14447 # A2. Beginning of the loop: #
14448 # Copy the fraction in d2:d3 to d4:d5. #
14450 # A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14451 # extracts and shifts. The three msbs from d2 will go into d1. #
14453 # A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14454 # will be collected by the carry. #
14456 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14457 # into d2:d3. D1 will contain the bcd digit formed. #
14459 # A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14460 # zero, it is the ls digit. Put the digit in its place in the #
14461 # upper word of d0. If it is the ls digit, write the word #
14462 # from d0 to memory. #
14464 # A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14466 #########################################################################
14468 # Implementation Notes:
14470 # The registers are used as follows:
14472 # d0: LEN counter
14473 # d1: temp used to form the digit
14474 # d2: upper 32-bits of fraction for mul by 8
14475 # d3: lower 32-bits of fraction for mul by 8
14476 # d4: upper 32-bits of fraction for mul by 2
14477 # d5: lower 32-bits of fraction for mul by 2
14478 # d6: temp for bit-field extracts
14479 # d7: byte digit formation word;digit count {0,1}
14480 # a0: pointer into memory for packed bcd string formation
14483 global binstr
14484 binstr:
14485 movm.l &0xff00,-(%sp) # {%d0-%d7}
14488 # A1: Init d7
14490 mov.l &1,%d7 # init d7 for second digit
14491 subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14493 # A2. Copy d2:d3 to d4:d5. Start loop.
14495 loop:
14496 mov.l %d2,%d4 # copy the fraction before muls
14497 mov.l %d3,%d5 # to d4:d5
14499 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14501 bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14502 asl.l &3,%d2 # shift d2 left by 3 places
14503 bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14504 asl.l &3,%d3 # shift d3 left by 3 places
14505 or.l %d6,%d2 # or in msbs from d3 into d2
14507 # A4. Multiply d4:d5 by 2; add carry out to d1.
14509 asl.l &1,%d5 # mul d5 by 2
14510 roxl.l &1,%d4 # mul d4 by 2
14511 swap %d6 # put 0 in d6 lower word
14512 addx.w %d6,%d1 # add in extend from mul by 2
14514 # A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14516 add.l %d5,%d3 # add lower 32 bits
14517 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14518 addx.l %d4,%d2 # add with extend upper 32 bits
14519 nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14520 addx.w %d6,%d1 # add in extend from add to d1
14521 swap %d6 # with d6 = 0; put 0 in upper word
14523 # A6. Test d7 and branch.
14525 tst.w %d7 # if zero, store digit & to loop
14526 beq.b first_d # if non-zero, form byte & write
14527 sec_d:
14528 swap %d7 # bring first digit to word d7b
14529 asl.w &4,%d7 # first digit in upper 4 bits d7b
14530 add.w %d1,%d7 # add in ls digit to d7b
14531 mov.b %d7,(%a0)+ # store d7b byte in memory
14532 swap %d7 # put LEN counter in word d7a
14533 clr.w %d7 # set d7a to signal no digits done
14534 dbf.w %d0,loop # do loop some more!
14535 bra.b end_bstr # finished, so exit
14536 first_d:
14537 swap %d7 # put digit word in d7b
14538 mov.w %d1,%d7 # put new digit in d7b
14539 swap %d7 # put LEN counter in word d7a
14540 addq.w &1,%d7 # set d7a to signal first digit done
14541 dbf.w %d0,loop # do loop some more!
14542 swap %d7 # put last digit in string
14543 lsl.w &4,%d7 # move it to upper 4 bits
14544 mov.b %d7,(%a0)+ # store it in memory string
14546 # Clean up and return with result in fp0.
14548 end_bstr:
14549 movm.l (%sp)+,&0xff # {%d0-%d7}
14552 #########################################################################
14553 # XDEF **************************************************************** #
14554 # facc_in_b(): dmem_read_byte failed #
14555 # facc_in_w(): dmem_read_word failed #
14556 # facc_in_l(): dmem_read_long failed #
14557 # facc_in_d(): dmem_read of dbl prec failed #
14558 # facc_in_x(): dmem_read of ext prec failed #
14560 # facc_out_b(): dmem_write_byte failed #
14561 # facc_out_w(): dmem_write_word failed #
14562 # facc_out_l(): dmem_write_long failed #
14563 # facc_out_d(): dmem_write of dbl prec failed #
14564 # facc_out_x(): dmem_write of ext prec failed #
14566 # XREF **************************************************************** #
14567 # _real_access() - exit through access error handler #
14569 # INPUT *************************************************************** #
14570 # None #
14572 # OUTPUT ************************************************************** #
14573 # None #
14575 # ALGORITHM *********************************************************** #
14576 # Flow jumps here when an FP data fetch call gets an error #
14577 # result. This means the operating system wants an access error frame #
14578 # made out of the current exception stack frame. #
14579 # So, we first call restore() which makes sure that any updated #
14580 # -(an)+ register gets returned to its pre-exception value and then #
14581 # we change the stack to an acess error stack frame. #
14583 #########################################################################
14585 facc_in_b:
14586 movq.l &0x1,%d0 # one byte
14587 bsr.w restore # fix An
14589 mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14590 bra.w facc_finish
14592 facc_in_w:
14593 movq.l &0x2,%d0 # two bytes
14594 bsr.w restore # fix An
14596 mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14597 bra.b facc_finish
14599 facc_in_l:
14600 movq.l &0x4,%d0 # four bytes
14601 bsr.w restore # fix An
14603 mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14604 bra.b facc_finish
14606 facc_in_d:
14607 movq.l &0x8,%d0 # eight bytes
14608 bsr.w restore # fix An
14610 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14611 bra.b facc_finish
14613 facc_in_x:
14614 movq.l &0xc,%d0 # twelve bytes
14615 bsr.w restore # fix An
14617 mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14618 bra.b facc_finish
14620 ################################################################
14622 facc_out_b:
14623 movq.l &0x1,%d0 # one byte
14624 bsr.w restore # restore An
14626 mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14627 bra.b facc_finish
14629 facc_out_w:
14630 movq.l &0x2,%d0 # two bytes
14631 bsr.w restore # restore An
14633 mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14634 bra.b facc_finish
14636 facc_out_l:
14637 movq.l &0x4,%d0 # four bytes
14638 bsr.w restore # restore An
14640 mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14641 bra.b facc_finish
14643 facc_out_d:
14644 movq.l &0x8,%d0 # eight bytes
14645 bsr.w restore # restore An
14647 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14648 bra.b facc_finish
14650 facc_out_x:
14651 mov.l &0xc,%d0 # twelve bytes
14652 bsr.w restore # restore An
14654 mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14656 # here's where we actually create the access error frame from the
14657 # current exception stack frame.
14658 facc_finish:
14659 mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14661 fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14662 fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14663 movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14665 unlk %a6
14667 mov.l (%sp),-(%sp) # store SR, hi(PC)
14668 mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14669 mov.l 0xc(%sp),0x8(%sp) # store EA
14670 mov.l &0x00000001,0xc(%sp) # store FSLW
14671 mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14672 mov.w &0x4008,0x6(%sp) # store voff
14674 btst &0x5,(%sp) # supervisor or user mode?
14675 beq.b facc_out2 # user
14676 bset &0x2,0xd(%sp) # set supervisor TM bit
14678 facc_out2:
14679 bra.l _real_access
14681 ##################################################################
14683 # if the effective addressing mode was predecrement or postincrement,
14684 # the emulation has already changed its value to the correct post-
14685 # instruction value. but since we're exiting to the access error
14686 # handler, then AN must be returned to its pre-instruction value.
14687 # we do that here.
14688 restore:
14689 mov.b EXC_OPWORD+0x1(%a6),%d1
14690 andi.b &0x38,%d1 # extract opmode
14691 cmpi.b %d1,&0x18 # postinc?
14692 beq.w rest_inc
14693 cmpi.b %d1,&0x20 # predec?
14694 beq.w rest_dec
14697 rest_inc:
14698 mov.b EXC_OPWORD+0x1(%a6),%d1
14699 andi.w &0x0007,%d1 # fetch An
14701 mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14702 jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14704 tbl_rest_inc:
14705 short ri_a0 - tbl_rest_inc
14706 short ri_a1 - tbl_rest_inc
14707 short ri_a2 - tbl_rest_inc
14708 short ri_a3 - tbl_rest_inc
14709 short ri_a4 - tbl_rest_inc
14710 short ri_a5 - tbl_rest_inc
14711 short ri_a6 - tbl_rest_inc
14712 short ri_a7 - tbl_rest_inc
14714 ri_a0:
14715 sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14717 ri_a1:
14718 sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14720 ri_a2:
14721 sub.l %d0,%a2 # fix a2
14723 ri_a3:
14724 sub.l %d0,%a3 # fix a3
14726 ri_a4:
14727 sub.l %d0,%a4 # fix a4
14729 ri_a5:
14730 sub.l %d0,%a5 # fix a5
14732 ri_a6:
14733 sub.l %d0,(%a6) # fix stacked a6
14735 # if it's a fmove out instruction, we don't have to fix a7
14736 # because we hadn't changed it yet. if it's an opclass two
14737 # instruction (data moved in) and the exception was in supervisor
14738 # mode, then also also wasn't updated. if it was user mode, then
14739 # restore the correct a7 which is in the USP currently.
14740 ri_a7:
14741 cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14742 bne.b ri_a7_done # out
14744 btst &0x5,EXC_SR(%a6) # user or supervisor?
14745 bne.b ri_a7_done # supervisor
14746 movc %usp,%a0 # restore USP
14747 sub.l %d0,%a0
14748 movc %a0,%usp
14749 ri_a7_done:
14752 # need to invert adjustment value if the <ea> was predec
14753 rest_dec:
14754 neg.l %d0
14755 bra.b rest_inc