1 * $NetBSD: round.sa,v 1.3 1994/10/26 07:49:24 cgd Exp $
3 * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
4 * M68000 Hi-Performance Microprocessor Division
5 * M68040 Software Package
7 * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
10 * THE SOFTWARE is provided on an "AS IS" basis and without warranty.
11 * To the maximum extent permitted by applicable law,
12 * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
13 * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
14 * PARTICULAR PURPOSE and any warranty against infringement with
15 * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
16 * and any accompanying written materials.
18 * To the maximum extent permitted by applicable law,
19 * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
20 * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
21 * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
22 * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
23 * SOFTWARE. Motorola assumes no responsibility for the maintenance
24 * and support of the SOFTWARE.
26 * You are hereby granted a copyright license to use, modify, and
27 * distribute the SOFTWARE so long as this entire notice is retained
28 * without alteration in any modified and/or redistributed versions,
29 * and that such modified versions are clearly identified as such.
30 * No licenses are granted by implication, estoppel or otherwise
31 * under any patents or trademarks of Motorola, Inc.
34 * round.sa 3.4 7/29/91
36 * handle rounding and normalization tasks
39 ROUND IDNT 2,1 Motorola 040 Floating Point Software Package
46 * round --- round result according to precision/mode
48 * a0 points to the input operand in the internal extended format
49 * d1(high word) contains rounding precision:
53 * d1(low word) contains rounding mode:
58 * d0{31:29} contains the g,r,s bits (extended)
60 * On return the value pointed to by a0 is correctly rounded,
61 * a0 is preserved and the g-r-s bits in d0 are cleared.
62 * The result is not typed - the tag field is invalid. The
63 * result is still in the internal extended format.
65 * The INEX bit of USER_FPSR will be set if the rounded result was
66 * inexact (i.e. if any of the g-r-s bits were set).
71 * If g=r=s=0 then result is exact and round is done, else set
72 * the inex flag in status reg and continue.
74 bsr.b ext_grs ;this subroutine looks at the
75 * :rounding precision and sets
76 * ;the appropriate g-r-s bits.
77 tst.l d0 ;if grs are zero, go force
78 bne.w rnd_cont ;lower bits to zero for size
80 swap d1 ;set up d1.w for round prec.
85 * Use rounding mode as an index into a jump table for these modes.
87 or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
92 * Jump table indexed by rounding mode in d1.w. All following assumes
101 * ROUND PLUS INFINITY
103 * If sign of fp number = 0 (positive), then add 1 to l.
106 swap d1 ;set up d1 for round prec.
107 tst.b LOCAL_SGN(a0) ;check for sign
108 bmi.w truncate ;if positive then truncate
109 move.l #$ffffffff,d0 ;force g,r,s to be all f's
111 move.l (a1,d1.w*4),a1
114 * ROUND MINUS INFINITY
116 * If sign of fp number = 1 (negative), then add 1 to l.
119 swap d1 ;set up d1 for round prec.
120 tst.b LOCAL_SGN(a0) ;check for sign
121 bpl.w truncate ;if negative then truncate
122 move.l #$ffffffff,d0 ;force g,r,s to be all f's
124 move.l (a1,d1.w*4),a1
131 swap d1 ;set up d1 for round prec.
137 * If (g=1), then add 1 to l and if (r=s=0), then clear l
138 * Note that this will round to even in case of a tie.
141 swap d1 ;set up d1 for round prec.
142 add.l d0,d0 ;shift g-bit to c-bit
143 bcc.w truncate ;if (g=1) then
145 move.l (a1,d1.w*4),a1
149 * ext_grs --- extract guard, round and sticky bits
151 * Input: d1 = PREC:ROUND
152 * Output: d0{31:29}= guard, round, sticky
154 * The ext_grs extract the guard/round/sticky bits according to the
155 * selected rounding precision. It is called by the round subroutine
156 * only. All registers except d0 are kept intact. d0 becomes an
157 * updated guard,round,sticky in d0{31:29}
159 * Notes: the ext_grs uses the round PREC, and therefore has to swap d1
160 * prior to usage, and needs to restore d1 to original.
163 swap d1 ;have d1.w point to round precision
169 movem.l d2/d3,-(a7) ;make some temp registers
173 bfextu LOCAL_HI(a0){24:2},d3 ;sgl prec. g-r are 2 bits right
174 move.l #30,d2 ;of the sgl prec. limits
175 lsl.l d2,d3 ;shift g-r bits to MSB of d3
176 move.l LOCAL_HI(a0),d2 ;get word 2 for s-bit test
177 andi.l #$0000003f,d2 ;s bit is the or of all other
178 bne.b st_stky ;bits to the right of g-r
179 tst.l LOCAL_LO(a0) ;test lower mantissa
180 bne.b st_stky ;if any are set, set sticky
181 tst.l d0 ;test original g,r,s
182 bne.b st_stky ;if any are set, set sticky
183 bra.b end_sd ;if words 3 and 4 are clr, exit
185 bfextu LOCAL_LO(a0){21:2},d3 ;dbl-prec. g-r are 2 bits right
186 move.l #30,d2 ;of the dbl prec. limits
187 lsl.l d2,d3 ;shift g-r bits to the MSB of d3
188 move.l LOCAL_LO(a0),d2 ;get lower mantissa for s-bit test
189 andi.l #$000001ff,d2 ;s bit is the or-ing of all
190 bne.b st_stky ;other bits to the right of g-r
191 tst.l d0 ;test word original g,r,s
192 bne.b st_stky ;if any are set, set sticky
193 bra.b end_sd ;if clear, exit
195 bset #rnd_stky_bit,d3
197 move.l d3,d0 ;return grs to d0
198 movem.l (a7)+,d2/d3 ;restore scratch registers
200 swap d1 ;restore d1 to original
203 ******************** Local Equates
204 ad_1_sgl equ $00000100 constant to add 1 to l-bit in sgl prec
205 ad_1_dbl equ $00000800 constant to add 1 to l-bit in dbl prec
208 *Jump table for adding 1 to the l-bit indexed by rnd prec
219 add.l #ad_1_sgl,LOCAL_HI(a0)
220 bcc.b scc_clr ;no mantissa overflow
221 roxr.w LOCAL_HI(a0) ;shift v-bit back in
222 roxr.w LOCAL_HI+2(a0) ;shift v-bit back in
223 add.w #$1,LOCAL_EX(a0) ;and incr exponent
225 tst.l d0 ;test for rs = 0
227 andi.w #$fe00,LOCAL_HI+2(a0) ;clear the l-bit
229 andi.l #$ffffff00,LOCAL_HI(a0) ;truncate bits beyond sgl limit
230 clr.l LOCAL_LO(a0) ;clear d2
237 addq.l #1,LOCAL_LO(a0) ;add 1 to l-bit
238 bcc.b xcc_clr ;test for carry out
239 addq.l #1,LOCAL_HI(a0) ;propogate carry
241 roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit
242 roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit
244 roxr.w LOCAL_LO+2(a0)
245 add.w #$1,LOCAL_EX(a0) ;and inc exp
247 tst.l d0 ;test rs = 0
249 andi.b #$fe,LOCAL_LO+3(a0) ;clear the l bit
256 add.l #ad_1_dbl,LOCAL_LO(a0)
258 addq.l #1,LOCAL_HI(a0) ;propogate carry
260 roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit
261 roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit
263 roxr.w LOCAL_LO+2(a0)
264 add.w #$1,LOCAL_EX(a0) ;incr exponent
266 tst.l d0 ;test for rs = 0
268 andi.w #$f000,LOCAL_LO+2(a0) ;clear the l-bit
271 andi.l #$fffff800,LOCAL_LO(a0) ;truncate bits beyond dbl limit
277 * Truncate all other bits
287 move.l (a1,d1.w*4),a1
296 * These routines (nrm_zero & nrm_set) normalize the unnorm. This
297 * is done by shifting the mantissa left while decrementing the
300 * NRM_SET shifts and decrements until there is a 1 set in the integer
301 * bit of the mantissa (msb in d1).
303 * NRM_ZERO shifts and decrements until there is a 1 set in the integer
304 * bit of the mantissa (msb in d1) unless this would mean the exponent
305 * would go less than 0. In that case the number becomes a denorm - the
306 * exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
309 * Note that both routines have been optimized (for the worst case) and
310 * therefore do not have the easy to follow decrement/shift loop.
314 * Distance to first 1 bit in mantissa = X
315 * Distance to 0 from exponent = Y
320 * shift mantissa by Y
324 * FP_SCR1 = exponent, ms mantissa part, ls mantissa part
326 * L_SCR1{4} = fpte15 or ete15 bit
330 move.w LOCAL_EX(a0),d0
331 cmp.w #64,d0 ;see if exp > 64
333 bsr nrm_set ;exp > 64 so exp won't exceed 0
336 movem.l d2/d3/d5/d6,-(a7)
337 move.l LOCAL_HI(a0),d1
338 move.l LOCAL_LO(a0),d2
340 bfffo d1{0:32},d3 ;get the distance to the first 1
342 beq.b ms_clr ;branch if no bits were set
344 bmi.b greater ;then exp will go past 0 (neg) if
345 * ;it is just shifted
346 bsr nrm_set ;else exp won't go past 0
347 movem.l (a7)+,d2/d3/d5/d6
350 move.l d2,d6 ;save ls mant in d6
351 lsl.l d0,d2 ;shift ls mant by count
352 lsl.l d0,d1 ;shift ms mant by count
354 sub.l d0,d5 ;make op a denorm by shifting bits
355 lsr.l d5,d6 ;by the number in the exp, then
357 or.l d6,d1 ;shift the ls mant bits into the ms mant
358 clr.l d0 ;same as if decremented exp to 0
360 move.w d0,LOCAL_EX(a0)
361 move.l d1,LOCAL_HI(a0)
362 move.l d2,LOCAL_LO(a0)
363 movem.l (a7)+,d2/d3/d5/d6
366 bfffo d2{0:32},d3 ;check if any bits set in ls mant
367 beq.b all_clr ;branch if none set
370 bmi.b greater ;then branch
371 bsr nrm_set ;else exp won't go past 0
372 movem.l (a7)+,d2/d3/d5/d6
375 clr.w LOCAL_EX(a0) ;no mantissa bits set. Set exp = 0.
376 movem.l (a7)+,d2/d3/d5/d6
384 bfffo LOCAL_HI(a0){0:32},d7 ;find first 1 in ms mant to d7)
385 beq.b lower ;branch if ms mant is all 0's
389 sub.w d7,LOCAL_EX(a0) ;sub exponent by count
390 move.l LOCAL_HI(a0),d0 ;d0 has ms mant
391 move.l LOCAL_LO(a0),d1 ;d1 has ls mant
393 lsl.l d7,d0 ;shift first 1 to j bit position
394 move.l d1,d6 ;copy ls mant into d6
395 lsl.l d7,d6 ;shift ls mant by count
396 move.l d6,LOCAL_LO(a0) ;store ls mant into memory
398 sub.l d7,d6 ;continue shift
399 lsr.l d6,d1 ;shift off all bits but those that will
400 * ;be shifted into ms mant
401 or.l d1,d0 ;shift the ls mant bits into the ms mant
402 move.l d0,LOCAL_HI(a0) ;store ms mant into memory
403 movem.l (a7)+,d7/d6 ;restore registers
407 * We get here if ms mant was = 0, and we assume ls mant has bits
408 * set (otherwise this would have been tagged a zero not a denorm).
411 move.w LOCAL_EX(a0),d0 ;d0 has exponent
412 move.l LOCAL_LO(a0),d1 ;d1 has ls mant
413 sub.w #32,d0 ;account for ms mant being all zeros
414 bfffo d1{0:32},d7 ;find first 1 in ls mant to d7)
415 sub.w d7,d0 ;subtract shift count from exp
416 lsl.l d7,d1 ;shift first 1 to integer bit in ms mant
417 move.w d0,LOCAL_EX(a0) ;store ms mant
418 move.l d1,LOCAL_HI(a0) ;store exp
419 clr.l LOCAL_LO(a0) ;clear ls mant
423 * denorm --- denormalize an intermediate result
428 * a0 points to the operand to be denormalized
429 * (in the internal extended format)
431 * d0: rounding precision
433 * a0 points to the denormalized result
434 * (in the internal extended format)
436 * d0 is guard,round,sticky
438 * d0 comes into this routine with the rounding precision. It
439 * is then loaded with the denormalized exponent threshold for the
440 * rounding precision.
445 btst.b #6,LOCAL_EX(a0) ;check for exponents between $7fff-$4000
447 bset.b #7,LOCAL_EX(a0) ;sign extend if it is so
450 tst.b d0 ;if 0 then extended precision
451 bne.b not_ext ;else branch
453 clr.l d1 ;load d1 with ext threshold
454 clr.l d0 ;clear the sticky flag
455 bsr dnrm_lp ;denormalize the number
456 tst.b d1 ;check for inex
457 beq.w no_inex ;if clr, no inex
458 bra.b dnrm_inex ;if set, set inex
461 cmpi.l #1,d0 ;if 1 then single precision
462 beq.b load_sgl ;else must be 2, double prec
465 move.w #dbl_thresh,d1 ;put copy of threshold in d1
466 move.l d1,d0 ;copy d1 into d0
467 sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp
468 cmp.w #67,d0 ;if diff > 67 (mant + grs bits)
469 bpl.b chk_stky ;then branch (all bits would be
470 * ; shifted off in denorm routine)
471 clr.l d0 ;else clear the sticky flag
472 bsr dnrm_lp ;denormalize the number
474 beq.b no_inex ;if clr, no inex
475 bra.b dnrm_inex ;if set, set inex
478 move.w #sgl_thresh,d1 ;put copy of threshold in d1
479 move.l d1,d0 ;copy d1 into d0
480 sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp
481 cmp.w #67,d0 ;if diff > 67 (mant + grs bits)
482 bpl.b chk_stky ;then branch (all bits would be
483 * ; shifted off in denorm routine)
484 clr.l d0 ;else clear the sticky flag
485 bsr dnrm_lp ;denormalize the number
487 beq.b no_inex ;if clr, no inex
488 bra.b dnrm_inex ;if set, set inex
491 tst.l LOCAL_HI(a0) ;check for any bits set
493 tst.l LOCAL_LO(a0) ;check for any bits set
497 or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
498 move.l #$20000000,d0 ;set sticky bit in return value
500 move.w d1,LOCAL_EX(a0) ;load exp with threshold
501 clr.l LOCAL_HI(a0) ;set d1 = 0 (ms mantissa)
502 clr.l LOCAL_LO(a0) ;set d2 = 0 (ms mantissa)
505 or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
510 * dnrm_lp --- normalize exponent/mantissa to specified threshhold
513 * a0 points to the operand to be denormalized
514 * d0{31:29} initial guard,round,sticky
515 * d1{15:0} denormalization threshold
517 * a0 points to the denormalized operand
518 * d0{31:29} final guard,round,sticky
519 * d1.b inexact flag: all ones means inexact result
521 * The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
522 * so that bfext can be used to extract the new low part of the mantissa.
523 * Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
524 * is no LOCAL_GRS scratch word following it on the fsave frame.
528 move.l d2,-(sp) ;save d2 for temp use
529 btst.b #E3,E_BYTE(a6) ;test for type E3 exception
530 beq.b not_E3 ;not type E3 exception
531 bfextu WBTEMP_GRS(a6){6:3},d2 ;extract guard,round, sticky bit
533 lsl.l d0,d2 ;shift g,r,s to their postions
536 move.l (sp)+,d2 ;restore d2
537 move.l LOCAL_LO(a0),FP_SCR2+LOCAL_LO(a6)
538 move.l d0,FP_SCR2+LOCAL_GRS(a6)
539 move.l d1,d0 ;copy the denorm threshold
540 sub.w LOCAL_EX(a0),d1 ;d1 = threshold - uns exponent
543 blt.b case_1 ;0 = d1 < 32
545 blt.b case_2 ;32 <= d1 < 64
546 bra.w case_3 ;d1 >= 64
548 * No normalization necessary
551 clr.b d1 ;set no inex2 reported
552 move.l FP_SCR2+LOCAL_GRS(a6),d0 ;restore original g,r,s
559 move.w d0,LOCAL_EX(a0) ;exponent = denorm threshold
561 sub.w d1,d0 ;d0 = 32 - d1
562 bfextu LOCAL_EX(a0){d0:32},d2
563 bfextu d2{d1:d0},d2 ;d2 = new LOCAL_HI
564 bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new LOCAL_LO
565 bfextu FP_SCR2+LOCAL_LO(a6){d0:32},d0 ;d0 = new G,R,S
566 move.l d2,LOCAL_HI(a0) ;store new LOCAL_HI
567 move.l d1,LOCAL_LO(a0) ;store new LOCAL_LO
571 bset.l #rnd_stky_bit,d0
574 move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s
575 andi.l #$e0000000,d2 ;clear all but G,R,S
576 tst.l d2 ;test if original G,R,S are clear
578 or.l #$20000000,d0 ;set sticky bit in d0
580 andi.l #$e0000000,d0 ;clear all but G,R,S
588 move.w d0,LOCAL_EX(a0) ;unsigned exponent = threshold
589 sub.w #32,d1 ;d1 now between 0 and 32
591 sub.w d1,d0 ;d0 = 32 - d1
592 bfextu LOCAL_EX(a0){d0:32},d2
593 bfextu d2{d1:d0},d2 ;d2 = new LOCAL_LO
594 bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new G,R,S
596 bne.b c2_sstky ;bra if sticky bit to be set
597 bftst FP_SCR2+LOCAL_LO(a6){d0:32}
598 bne.b c2_sstky ;bra if sticky bit to be set
604 bset.l #rnd_stky_bit,d0
607 clr.l LOCAL_HI(a0) ;store LOCAL_HI = 0
608 move.l d2,LOCAL_LO(a0) ;store LOCAL_LO
609 move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s
610 andi.l #$e0000000,d2 ;clear all but G,R,S
611 tst.l d2 ;test if original G,R,S are clear
613 or.l #$20000000,d0 ;set sticky bit in d0
615 andi.l #$e0000000,d0 ;get rid of all but G,R,S
619 * d1 >= 64 Force the exponent to be the denorm threshold with the
623 move.w d0,LOCAL_EX(a0)
627 or.l #$80000000,LOCAL_EX(a0)
634 * Shift value is out of range. Set d1 for inex2 flag and
635 * return a zero with the given threshold.
644 move.l LOCAL_HI(a0),d0
650 move.l LOCAL_HI(a0),d0
653 lsr.l #1,d0 ;shift high bit into R bit
660 tst.b FP_SCR2+LOCAL_GRS(a6)
666 bset.l #rnd_stky_bit,d0