1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3 M68000 Hi-Performance Microprocessor Division
4 M68060 Software Package
5 Production Release P1.00 -- October 10, 1994
7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10 To the maximum extent permitted by applicable law,
11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13 and any warranty against infringement with regard to the SOFTWARE
14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
16 To the maximum extent permitted by applicable law,
17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24 so long as this entire notice is retained without alteration in any modified and/or
25 redistributed versions, and that such modified versions are clearly identified as such.
26 No licenses are granted by implication, estoppel or otherwise under any patents
27 or trademarks of Motorola, Inc.
28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
30 # This file is appended to the top of the 060FPLSP package
31 # and contains the entry points into the package. The user, in
32 # effect, branches to one of the branch table entries located here.
35 bra.l _060LSP__idivs64_
37 bra.l _060LSP__idivu64_
40 bra.l _060LSP__imuls64_
42 bra.l _060LSP__imulu64_
45 bra.l _060LSP__cmp2_Ab_
47 bra.l _060LSP__cmp2_Aw_
49 bra.l _060LSP__cmp2_Al_
51 bra.l _060LSP__cmp2_Db_
53 bra.l _060LSP__cmp2_Dw_
55 bra.l _060LSP__cmp2_Dl_
58 # leave room for future possible aditions.
61 #########################################################################
62 # XDEF **************************************************************** #
63 # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
64 # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
66 # This is the library version which is accessed as a subroutine #
67 # and therefore does not work exactly like the 680X0 div{s,u}.l #
68 # 64-bit divide instruction. #
70 # XREF **************************************************************** #
73 # INPUT *************************************************************** #
75 # 0x8(sp) = hi(dividend) #
76 # 0xc(sp) = lo(dividend) #
77 # 0x10(sp) = pointer to location to place quotient/remainder #
79 # OUTPUT ************************************************************** #
80 # 0x10(sp) = points to location of remainder/quotient. #
81 # remainder is in first longword, quotient is in 2nd. #
83 # ALGORITHM *********************************************************** #
84 # If the operands are signed, make them unsigned and save the #
85 # sign info for later. Separate out special cases like divide-by-zero #
86 # or 32-bit divides if possible. Else, use a special math algorithm #
87 # to calculate the result. #
88 # Restore sign info if signed instruction. Set the condition #
89 # codes before performing the final "rts". If the divisor was equal to #
90 # zero, then perform a divide-by-zero using a 16-bit implemented #
91 # divide instruction. This way, the operating system can record that #
92 # the event occurred even though it may not point to the correct place. #
94 #########################################################################
107 global _060LSP__idivs64_
109 # PROLOGUE BEGIN ########################################################
111 movm.l &0x3f00,-(%sp) # save d2-d7
112 # fmovm.l &0x0,-(%sp) # save no fpregs
113 # PROLOGUE END ##########################################################
115 mov.w %cc,DIV64_CC(%a6)
116 st POSNEG(%a6) # signed operation
122 global _060LSP__idivu64_
124 # PROLOGUE BEGIN ########################################################
126 movm.l &0x3f00,-(%sp) # save d2-d7
127 # fmovm.l &0x0,-(%sp) # save no fpregs
128 # PROLOGUE END ##########################################################
130 mov.w %cc,DIV64_CC(%a6)
131 sf POSNEG(%a6) # unsigned operation
134 mov.l 0x8(%a6),%d7 # fetch divisor
136 beq.w ldiv64eq0 # divisor is = 0!!!
138 mov.l 0xc(%a6), %d5 # get dividend hi
139 mov.l 0x10(%a6), %d6 # get dividend lo
141 # separate signed and unsigned divide
142 tst.b POSNEG(%a6) # signed or unsigned?
143 beq.b ldspecialcases # use positive divide
145 # save the sign of the divisor
146 # make divisor unsigned if it's negative
147 tst.l %d7 # chk sign of divisor
148 slt NDIVISOR(%a6) # save sign of divisor
150 neg.l %d7 # complement negative divisor
152 # save the sign of the dividend
153 # make dividend unsigned if it's negative
155 tst.l %d5 # chk sign of hi(dividend)
156 slt NDIVIDEND(%a6) # save sign of dividend
159 mov.w &0x0, %cc # clear 'X' cc bit
160 negx.l %d6 # complement signed dividend
163 # extract some special cases:
164 # - is (dividend == 0) ?
165 # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
167 tst.l %d5 # is (hi(dividend) == 0)
168 bne.b ldnormaldivide # no, so try it the long way
170 tst.l %d6 # is (lo(dividend) == 0), too
171 beq.w lddone # yes, so (dividend == 0)
173 cmp.l %d7,%d6 # is (divisor <= lo(dividend))
174 bls.b ld32bitdivide # yes, so use 32 bit divide
176 exg %d5,%d6 # q = 0, r = dividend
177 bra.w ldivfinish # can't divide, we're done.
180 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
186 # - is hi(dividend) >= divisor ? if yes, then overflow
188 bls.b lddovf # answer won't fit in 32 bits
190 # perform the divide algorithm:
191 bsr.l ldclassical # do int divide
193 # separate into signed and unsigned finishes.
195 tst.b POSNEG(%a6) # do divs, divu separately
196 beq.b lddone # divu has no processing!!!
198 # it was a divs.l, so ccode setting is a little more complicated...
199 tst.b NDIVIDEND(%a6) # remainder has same sign
200 beq.b ldcc # as dividend.
201 neg.l %d5 # sgn(rem) = sgn(dividend)
203 mov.b NDIVISOR(%a6), %d0
204 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
205 beq.b ldqpos # branch to quot positive
207 # 0x80000000 is the largest number representable as a 32-bit negative
208 # number. the negative of 0x80000000 is 0x80000000.
209 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
212 neg.l %d6 # make (-quot) 2's comp
217 btst &0x1f, %d6 # will (+quot) fit in 32 bits?
221 # if the register numbers are the same, only the quotient gets saved.
222 # so, if we always save the quotient second, we save ourselves a cmp&beq
223 andi.w &0x10,DIV64_CC(%a6)
224 mov.w DIV64_CC(%a6),%cc
225 tst.l %d6 # may set 'N' ccode bit
227 # here, the result is in d1 and d0. the current strategy is to save
228 # the values at the location pointed to by a0.
229 # use movm here to not disturb the condition codes.
231 movm.l &0x0060,([0x14,%a6]) # save result
233 # EPILOGUE BEGIN ########################################################
234 # fmovm.l (%sp)+,&0x0 # restore no fpregs
235 movm.l (%sp)+,&0x00fc # restore d2-d7
237 # EPILOGUE END ##########################################################
241 # the result should be the unchanged dividend
243 mov.l 0xc(%a6), %d5 # get dividend hi
244 mov.l 0x10(%a6), %d6 # get dividend lo
246 andi.w &0x1c,DIV64_CC(%a6)
247 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
248 mov.w DIV64_CC(%a6),%cc
253 mov.l 0xc(%a6),([0x14,%a6])
254 mov.l 0x10(%a6),([0x14,%a6],0x4)
256 mov.w DIV64_CC(%a6),%cc
258 # EPILOGUE BEGIN ########################################################
259 # fmovm.l (%sp)+,&0x0 # restore no fpregs
260 movm.l (%sp)+,&0x00fc # restore d2-d7
262 # EPILOGUE END ##########################################################
264 divu.w &0x0,%d0 # force a divbyzero exception
267 ###########################################################################
268 #########################################################################
269 # This routine uses the 'classical' Algorithm D from Donald Knuth's #
270 # Art of Computer Programming, vol II, Seminumerical Algorithms. #
271 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
272 # where U,V are words of the quadword dividend and longword divisor, #
273 # and U1, V1 are the most significant words. #
275 # The most sig. longword of the 64 bit dividend must be in %d5, least #
276 # in %d6. The divisor must be in the variable ddivisor, and the #
277 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
278 # The quotient is returned in %d6, remainder in %d5, unless the #
279 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
281 #########################################################################
283 # if the divisor msw is 0, use simpler algorithm then the full blown
287 bhi.b lddknuth # go use D. Knuth algorithm
289 # Since the divisor is only a word (and larger than the mslw of the dividend),
290 # a simpler algorithm may be used :
291 # In the general case, four quotient words would be created by
292 # dividing the divisor word into each dividend word. In this case,
293 # the first two quotient words must be zero, or overflow would occur.
294 # Since we already checked this case above, we can treat the most significant
295 # longword of the dividend as (0) remainder (see Knuth) and merely complete
296 # the last two divisions to get a quotient longword and word remainder:
299 swap %d5 # same as r*b if previous step rqd
300 swap %d6 # get u3 to lsw position
301 mov.w %d6, %d5 # rb + u3
305 mov.w %d5, %d1 # first quotient word
307 mov.w %d6, %d5 # rb + u4
312 mov.w %d5, %d1 # 2nd quotient 'digit'
314 swap %d5 # now remainder
315 mov.l %d1, %d6 # and quotient
320 # In this algorithm, the divisor is treated as a 2 digit (word) number
321 # which is divided into a 3 digit (word) dividend to get one quotient
322 # digit (word). After subtraction, the dividend is shifted and the
323 # process repeated. Before beginning, the divisor and quotient are
324 # 'normalized' so that the process of estimating the quotient digit
325 # will yield verifiably correct results..
327 clr.l DDNORMAL(%a6) # count of shifts for normalization
328 clr.b DDSECOND(%a6) # clear flag for quotient digits
329 clr.l %d1 # %d1 will hold trial quotient
331 btst &31, %d7 # must we normalize? first word of
332 bne.b lddnormalized # divisor (V1) must be >= 65536/2
333 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
334 lsl.l &0x1, %d7 # shift the divisor
335 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
336 roxl.l &0x1, %d5 # shift u1,u2
340 # Now calculate an estimate of the quotient words (msw first, then lsw).
341 # The comments use subscripts for the first quotient digit determination.
342 mov.l %d7, %d3 # divisor
343 mov.l %d5, %d2 # dividend mslw
346 cmp.w %d2, %d3 # V1 = U1 ?
348 mov.w &0xffff, %d1 # use max trial quotient word
353 divu.w %d3, %d1 # use quotient of mslw/msw
355 andi.l &0x0000ffff, %d1 # zero any remainder
358 # now test the trial quotient and adjust. This step plus the
359 # normalization assures (according to Knuth) that the trial
360 # quotient will be at worst 1 too large.
362 clr.w %d6 # word u3 left
363 swap %d6 # in lsw position
364 lddadj1: mov.l %d7, %d3
366 mulu.w %d7, %d2 # V2q
368 mulu.w %d1, %d3 # V1q
369 mov.l %d5, %d4 # U1U2
370 sub.l %d3, %d4 # U1U2 - V1q
375 mov.w %d6,%d4 # insert lower word (U3)
377 tst.w %d0 # is upper word set?
380 # add.l %d6, %d4 # (U1U2 - V1q) + U3
383 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
384 subq.l &0x1, %d1 # yes, decrement and recheck
387 # now test the word by multiplying it by the divisor (V1V2) and comparing
388 # the 3 digit (word) result with the current dividend words
389 mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
391 swap %d6 # shift answer to ms 3 words
394 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
396 mov.l (%sp)+, %d5 # restore dividend
399 subx.l %d2, %d5 # subtract double precision
400 bcc ldd2nd # no carry, do next quotient digit
401 subq.l &0x1, %d1 # q is one too large
402 # need to add back divisor longword to current ms 3 digits of dividend
403 # - according to Knuth, this is done only 2 out of 65536 times for random
404 # divisor, dividend selection.
408 clr.w %d3 # %d3 now ls word of divisor
409 add.l %d3, %d6 # aligned with 3rd word of dividend
412 clr.w %d3 # %d3 now ms word of divisor
413 swap %d3 # aligned with 2nd word of dividend
416 tst.b DDSECOND(%a6) # both q words done?
418 # first quotient digit now correct. store digit and shift the
419 # (subtracted) dividend
420 mov.w %d1, DDQUOTIENT(%a6)
426 st DDSECOND(%a6) # second digit
429 # add 2nd word to quotient, get the remainder.
430 mov.w %d1, DDQUOTIENT+2(%a6)
431 # shift down one word/digit to renormalize remainder.
435 mov.l DDNORMAL(%a6), %d7 # get norm shift count
437 subq.l &0x1, %d7 # set for loop count
439 lsr.l &0x1, %d5 # shift into %d6
443 mov.l %d6, %d5 # remainder
444 mov.l DDQUOTIENT(%a6), %d6 # quotient
448 # factors for the 32X32->64 multiplication are in %d5 and %d6.
449 # returns 64 bit result in %d5 (hi) %d6(lo).
450 # destroys %d2,%d3,%d4.
452 # multiply hi,lo words of each factor to get 4 intermediate products
458 mulu.w %d5, %d6 # %d6 <- lsw*lsw
459 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
460 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
461 mulu.w %d4, %d3 # %d3 <- msw*msw
462 # now use swap and addx to consolidate to two longwords
465 add.w %d5, %d6 # add msw of l*l to lsw of m*l product
466 addx.w %d4, %d3 # add any carry to m*m product
467 add.w %d2, %d6 # add in lsw of other m*l product
468 addx.w %d4, %d3 # add any carry to m*m product
469 swap %d6 # %d6 is low 32 bits of final product
471 clr.w %d2 # lsw of two mixed products used,
472 swap %d5 # now use msws of longwords
475 add.l %d3, %d5 # %d5 now ms 32 bits of final product
478 #########################################################################
479 # XDEF **************************************************************** #
480 # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
481 # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
483 # This is the library version which is accessed as a subroutine #
484 # and therefore does not work exactly like the 680X0 mul{s,u}.l #
485 # 64-bit multiply instruction. #
487 # XREF **************************************************************** #
490 # INPUT *************************************************************** #
491 # 0x4(sp) = multiplier #
492 # 0x8(sp) = multiplicand #
493 # 0xc(sp) = pointer to location to place 64-bit result #
495 # OUTPUT ************************************************************** #
496 # 0xc(sp) = points to location of 64-bit result #
498 # ALGORITHM *********************************************************** #
499 # Perform the multiply in pieces using 16x16->32 unsigned #
500 # multiplies and "add" instructions. #
501 # Set the condition codes as appropriate before performing an #
504 #########################################################################
508 global _060LSP__imulu64_
511 # PROLOGUE BEGIN ########################################################
513 movm.l &0x3800,-(%sp) # save d2-d4
514 # fmovm.l &0x0,-(%sp) # save no fpregs
515 # PROLOGUE END ##########################################################
517 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
519 mov.l 0x8(%a6),%d0 # store multiplier in d0
520 beq.w mulu64_zero # handle zero separately
522 mov.l 0xc(%a6),%d1 # get multiplicand in d1
523 beq.w mulu64_zero # handle zero separately
525 #########################################################################
527 # ---------------------------- #
528 # | hi(mplier) * hi(mplicand)| #
529 # ---------------------------- #
530 # ----------------------------- #
531 # | hi(mplier) * lo(mplicand) | #
532 # ----------------------------- #
533 # ----------------------------- #
534 # | lo(mplier) * hi(mplicand) | #
535 # ----------------------------- #
536 # | ----------------------------- #
537 # --|-- | lo(mplier) * lo(mplicand) | #
538 # | ----------------------------- #
539 # ======================================================== #
540 # -------------------------------------------------------- #
541 # | hi(result) | lo(result) | #
542 # -------------------------------------------------------- #
543 #########################################################################
545 # load temp registers with operands
546 mov.l %d0,%d2 # mr in d2
547 mov.l %d0,%d3 # mr in d3
548 mov.l %d1,%d4 # md in d4
549 swap %d3 # hi(mr) in lo d3
550 swap %d4 # hi(md) in lo d4
552 # complete necessary multiplies:
553 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
554 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
555 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
556 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
558 # add lo portions of [2],[3] to hi portion of [1].
559 # add carries produced from these adds to [4].
560 # lo([1]) is the final lo 16 bits of the result.
561 clr.l %d4 # load d4 w/ zero value
562 swap %d0 # hi([1]) <==> lo([1])
563 add.w %d1,%d0 # hi([1]) + lo([2])
564 addx.l %d4,%d3 # [4] + carry
565 add.w %d2,%d0 # hi([1]) + lo([3])
566 addx.l %d4,%d3 # [4] + carry
567 swap %d0 # lo([1]) <==> hi([1])
569 # lo portions of [2],[3] have been added in to final result.
570 # now, clear lo, put hi in lo reg, and add to [4]
571 clr.w %d1 # clear lo([2])
572 clr.w %d2 # clear hi([3])
573 swap %d1 # hi([2]) in lo d1
574 swap %d2 # hi([3]) in lo d2
575 add.l %d2,%d1 # [4] + hi([2])
576 add.l %d3,%d1 # [4] + hi([3])
578 # now, grab the condition codes. only one that can be set is 'N'.
579 # 'N' CAN be set if the operation is unsigned if bit 63 is set.
580 mov.w MUL64_CC(%a6),%d4
581 andi.b &0x10,%d4 # keep old 'X' bit
582 tst.l %d1 # may set 'N' bit
584 ori.b &0x8,%d4 # set 'N' bit
588 # here, the result is in d1 and d0. the current strategy is to save
589 # the values at the location pointed to by a0.
590 # use movm here to not disturb the condition codes.
593 movm.l &0x0003,([0x10,%a6]) # save result
595 # EPILOGUE BEGIN ########################################################
596 # fmovm.l (%sp)+,&0x0 # restore no fpregs
597 movm.l (%sp)+,&0x001c # restore d2-d4
599 # EPILOGUE END ##########################################################
603 # one or both of the operands is zero so the result is also zero.
604 # save the zero result to the register file and set the 'Z' ccode bit.
609 mov.w MUL64_CC(%a6),%d4
612 mov.w %d4,%cc # set 'Z' ccode bit
619 global _060LSP__imuls64_
622 # PROLOGUE BEGIN ########################################################
624 movm.l &0x3c00,-(%sp) # save d2-d5
625 # fmovm.l &0x0,-(%sp) # save no fpregs
626 # PROLOGUE END ##########################################################
628 mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
630 mov.l 0x8(%a6),%d0 # store multiplier in d0
631 beq.b mulu64_zero # handle zero separately
633 mov.l 0xc(%a6),%d1 # get multiplicand in d1
634 beq.b mulu64_zero # handle zero separately
636 clr.b %d5 # clear sign tag
637 tst.l %d0 # is multiplier negative?
638 bge.b muls64_chk_md_sgn # no
639 neg.l %d0 # make multiplier positive
641 ori.b &0x1,%d5 # save multiplier sgn
643 # the result sign is the exclusive or of the operand sign bits.
645 tst.l %d1 # is multiplicand negative?
646 bge.b muls64_alg # no
647 neg.l %d1 # make multiplicand positive
649 eori.b &0x1,%d5 # calculate correct sign
651 #########################################################################
653 # ---------------------------- #
654 # | hi(mplier) * hi(mplicand)| #
655 # ---------------------------- #
656 # ----------------------------- #
657 # | hi(mplier) * lo(mplicand) | #
658 # ----------------------------- #
659 # ----------------------------- #
660 # | lo(mplier) * hi(mplicand) | #
661 # ----------------------------- #
662 # | ----------------------------- #
663 # --|-- | lo(mplier) * lo(mplicand) | #
664 # | ----------------------------- #
665 # ======================================================== #
666 # -------------------------------------------------------- #
667 # | hi(result) | lo(result) | #
668 # -------------------------------------------------------- #
669 #########################################################################
671 # load temp registers with operands
672 mov.l %d0,%d2 # mr in d2
673 mov.l %d0,%d3 # mr in d3
674 mov.l %d1,%d4 # md in d4
675 swap %d3 # hi(mr) in lo d3
676 swap %d4 # hi(md) in lo d4
678 # complete necessary multiplies:
679 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
680 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
681 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
682 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
684 # add lo portions of [2],[3] to hi portion of [1].
685 # add carries produced from these adds to [4].
686 # lo([1]) is the final lo 16 bits of the result.
687 clr.l %d4 # load d4 w/ zero value
688 swap %d0 # hi([1]) <==> lo([1])
689 add.w %d1,%d0 # hi([1]) + lo([2])
690 addx.l %d4,%d3 # [4] + carry
691 add.w %d2,%d0 # hi([1]) + lo([3])
692 addx.l %d4,%d3 # [4] + carry
693 swap %d0 # lo([1]) <==> hi([1])
695 # lo portions of [2],[3] have been added in to final result.
696 # now, clear lo, put hi in lo reg, and add to [4]
697 clr.w %d1 # clear lo([2])
698 clr.w %d2 # clear hi([3])
699 swap %d1 # hi([2]) in lo d1
700 swap %d2 # hi([3]) in lo d2
701 add.l %d2,%d1 # [4] + hi([2])
702 add.l %d3,%d1 # [4] + hi([3])
704 tst.b %d5 # should result be signed?
705 beq.b muls64_done # no
707 # result should be a signed negative number.
708 # compute 2's complement of the unsigned number:
709 # -negate all bits and add 1
711 not.l %d0 # negate lo(result) bits
712 not.l %d1 # negate hi(result) bits
713 addq.l &1,%d0 # add 1 to lo(result)
714 addx.l %d4,%d1 # add carry to hi(result)
717 mov.w MUL64_CC(%a6),%d4
718 andi.b &0x10,%d4 # keep old 'X' bit
719 tst.l %d1 # may set 'N' bit
721 ori.b &0x8,%d4 # set 'N' bit
725 # here, the result is in d1 and d0. the current strategy is to save
726 # the values at the location pointed to by a0.
727 # use movm here to not disturb the condition codes.
730 movm.l &0x0003,([0x10,%a6]) # save result at (a0)
732 # EPILOGUE BEGIN ########################################################
733 # fmovm.l (%sp)+,&0x0 # restore no fpregs
734 movm.l (%sp)+,&0x003c # restore d2-d5
736 # EPILOGUE END ##########################################################
740 # one or both of the operands is zero so the result is also zero.
741 # save the zero result to the register file and set the 'Z' ccode bit.
746 mov.w MUL64_CC(%a6),%d4
749 mov.w %d4,%cc # set 'Z' ccode bit
753 #########################################################################
754 # XDEF **************************************************************** #
755 # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
756 # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
757 # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
758 # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
759 # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
760 # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
762 # This is the library version which is accessed as a subroutine #
763 # and therefore does not work exactly like the 680X0 "cmp2" #
766 # XREF **************************************************************** #
769 # INPUT *************************************************************** #
771 # 0x8(sp) = pointer to boundary pair #
773 # OUTPUT ************************************************************** #
774 # cc = condition codes are set correctly #
776 # ALGORITHM *********************************************************** #
777 # In the interest of simplicity, all operands are converted to #
778 # longword size whether the operation is byte, word, or long. The #
779 # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
780 # also sign extended. If Rn is an address register, it need not be sign #
781 # extended since the full register is always used. #
782 # The condition codes are set correctly before the final "rts". #
784 #########################################################################
788 global _060LSP__cmp2_Ab_
791 # PROLOGUE BEGIN ########################################################
793 movm.l &0x3800,-(%sp) # save d2-d4
794 # fmovm.l &0x0,-(%sp) # save no fpregs
795 # PROLOGUE END ##########################################################
797 mov.w %cc,CMP2_CC(%a6)
798 mov.l 0x8(%a6), %d2 # get regval
800 mov.b ([0xc,%a6],0x0),%d0
801 mov.b ([0xc,%a6],0x1),%d1
803 extb.l %d0 # sign extend lo bnd
804 extb.l %d1 # sign extend hi bnd
805 bra.w l_cmp2_cmp # go do the compare emulation
807 global _060LSP__cmp2_Aw_
810 # PROLOGUE BEGIN ########################################################
812 movm.l &0x3800,-(%sp) # save d2-d4
813 # fmovm.l &0x0,-(%sp) # save no fpregs
814 # PROLOGUE END ##########################################################
816 mov.w %cc,CMP2_CC(%a6)
817 mov.l 0x8(%a6), %d2 # get regval
819 mov.w ([0xc,%a6],0x0),%d0
820 mov.w ([0xc,%a6],0x2),%d1
822 ext.l %d0 # sign extend lo bnd
823 ext.l %d1 # sign extend hi bnd
824 bra.w l_cmp2_cmp # go do the compare emulation
826 global _060LSP__cmp2_Al_
829 # PROLOGUE BEGIN ########################################################
831 movm.l &0x3800,-(%sp) # save d2-d4
832 # fmovm.l &0x0,-(%sp) # save no fpregs
833 # PROLOGUE END ##########################################################
835 mov.w %cc,CMP2_CC(%a6)
836 mov.l 0x8(%a6), %d2 # get regval
838 mov.l ([0xc,%a6],0x0),%d0
839 mov.l ([0xc,%a6],0x4),%d1
840 bra.w l_cmp2_cmp # go do the compare emulation
842 global _060LSP__cmp2_Db_
845 # PROLOGUE BEGIN ########################################################
847 movm.l &0x3800,-(%sp) # save d2-d4
848 # fmovm.l &0x0,-(%sp) # save no fpregs
849 # PROLOGUE END ##########################################################
851 mov.w %cc,CMP2_CC(%a6)
852 mov.l 0x8(%a6), %d2 # get regval
854 mov.b ([0xc,%a6],0x0),%d0
855 mov.b ([0xc,%a6],0x1),%d1
857 extb.l %d0 # sign extend lo bnd
858 extb.l %d1 # sign extend hi bnd
860 # operation is a data register compare.
861 # sign extend byte to long so we can do simple longword compares.
862 extb.l %d2 # sign extend data byte
863 bra.w l_cmp2_cmp # go do the compare emulation
865 global _060LSP__cmp2_Dw_
868 # PROLOGUE BEGIN ########################################################
870 movm.l &0x3800,-(%sp) # save d2-d4
871 # fmovm.l &0x0,-(%sp) # save no fpregs
872 # PROLOGUE END ##########################################################
874 mov.w %cc,CMP2_CC(%a6)
875 mov.l 0x8(%a6), %d2 # get regval
877 mov.w ([0xc,%a6],0x0),%d0
878 mov.w ([0xc,%a6],0x2),%d1
880 ext.l %d0 # sign extend lo bnd
881 ext.l %d1 # sign extend hi bnd
883 # operation is a data register compare.
884 # sign extend word to long so we can do simple longword compares.
885 ext.l %d2 # sign extend data word
886 bra.w l_cmp2_cmp # go emulate compare
888 global _060LSP__cmp2_Dl_
891 # PROLOGUE BEGIN ########################################################
893 movm.l &0x3800,-(%sp) # save d2-d4
894 # fmovm.l &0x0,-(%sp) # save no fpregs
895 # PROLOGUE END ##########################################################
897 mov.w %cc,CMP2_CC(%a6)
898 mov.l 0x8(%a6), %d2 # get regval
900 mov.l ([0xc,%a6],0x0),%d0
901 mov.l ([0xc,%a6],0x4),%d1
904 # To set the ccodes correctly:
905 # (1) save 'Z' bit from (Rn - lo)
906 # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907 # (3) keep 'X', 'N', and 'V' from before instruction
911 sub.l %d0, %d2 # (Rn - lo)
912 mov.w %cc, %d3 # fetch resulting ccodes
913 andi.b &0x4, %d3 # keep 'Z' bit
914 sub.l %d0, %d1 # (hi - lo)
915 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
917 mov.w %cc, %d4 # fetch resulting ccodes
918 or.b %d4, %d3 # combine w/ earlier ccodes
919 andi.b &0x5, %d3 # keep 'Z' and 'N'
921 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
922 andi.b &0x1a, %d4 # keep 'X','N','V' bits
923 or.b %d3, %d4 # insert new ccodes
924 mov.w %d4,%cc # save new ccodes
926 # EPILOGUE BEGIN ########################################################
927 # fmovm.l (%sp)+,&0x0 # restore no fpregs
928 movm.l (%sp)+,&0x001c # restore d2-d4
930 # EPILOGUE END ##########################################################