arch/m68k-all/m680x0/060sp/dist/ilsp.s

   1 #
   2 # $NetBSD: ilsp.s,v 1.1 2000/04/14 20:24:39 is Exp $
   3 #
   4
   5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   7 # M68000 Hi-Performance Microprocessor Division
   8 # M68060 Software Package Production Release
   9 #
  10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
  11 # All rights reserved.
  12 #
  13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  14 # To the maximum extent permitted by applicable law,
  15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
  17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
  18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
  19 # and any accompanying written materials.
  20 #
  21 # To the maximum extent permitted by applicable law,
  22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  26 #
  27 # Motorola assumes no responsibility for the maintenance and support
  28 # of the SOFTWARE.
  29 #
  30 # You are hereby granted a copyright license to use, modify, and distribute the
  31 # SOFTWARE so long as this entire notice is retained without alteration
  32 # in any modified and/or redistributed versions, and that such modified
  33 # versions are clearly identified as such.
  34 # No licenses are granted by implication, estoppel or otherwise under any
  35 # patents or trademarks of Motorola, Inc.
  36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  37
  38 #
  39 # litop.s:
  40 #       This file is appended to the top of the 060FPLSP package
  41 # and contains the entry points into the package. The user, in
  42 # effect, branches to one of the branch table entries located here.
  43 #
  44
  45         bra.l   _060LSP__idivs64_
  46         short   0x0000
  47         bra.l   _060LSP__idivu64_
  48         short   0x0000
  49
  50         bra.l   _060LSP__imuls64_
  51         short   0x0000
  52         bra.l   _060LSP__imulu64_
  53         short   0x0000
  54
  55         bra.l   _060LSP__cmp2_Ab_
  56         short   0x0000
  57         bra.l   _060LSP__cmp2_Aw_
  58         short   0x0000
  59         bra.l   _060LSP__cmp2_Al_
  60         short   0x0000
  61         bra.l   _060LSP__cmp2_Db_
  62         short   0x0000
  63         bra.l   _060LSP__cmp2_Dw_
  64         short   0x0000
  65         bra.l   _060LSP__cmp2_Dl_
  66         short   0x0000
  67
  68 # leave room for future possible aditions.
  69         align   0x200
  70
  71 #########################################################################
  72 # XDEF **************************************************************** #
  73 #       _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.   #
  74 #       _060LSP__idivs64_(): Emulate 64-bit signed div instruction.     #
  75 #                                                                       #
  76 #       This is the library version which is accessed as a subroutine   #
  77 #       and therefore does not work exactly like the 680X0 div{s,u}.l   #
  78 #       64-bit divide instruction.                                      #
  79 #                                                                       #
  80 # XREF **************************************************************** #
  81 #       None.                                                           #
  82 #                                                                       #
  83 # INPUT *************************************************************** #
  84 #       0x4(sp)  = divisor                                              #
  85 #       0x8(sp)  = hi(dividend)                                         #
  86 #       0xc(sp)  = lo(dividend)                                         #
  87 #       0x10(sp) = pointer to location to place quotient/remainder      #
  88 #                                                                       #
  89 # OUTPUT ************************************************************** #
  90 #       0x10(sp) = points to location of remainder/quotient.            #
  91 #                  remainder is in first longword, quotient is in 2nd.  #
  92 #                                                                       #
  93 # ALGORITHM *********************************************************** #
  94 #       If the operands are signed, make them unsigned and save the     #
  95 # sign info for later. Separate out special cases like divide-by-zero   #
  96 # or 32-bit divides if possible. Else, use a special math algorithm     #
  97 # to calculate the result.                                              #
  98 #       Restore sign info if signed instruction. Set the condition      #
  99 # codes before performing the final "rts". If the divisor was equal to  #
 100 # zero, then perform a divide-by-zero using a 16-bit implemented        #
 101 # divide instruction. This way, the operating system can record that    #
 102 # the event occurred even though it may not point to the correct place. #
 103 #                                                                       #
 104 #########################################################################
 105
 106 set     POSNEG,         -1
 107 set     NDIVISOR,       -2
 108 set     NDIVIDEND,      -3
 109 set     DDSECOND,       -4
 110 set     DDNORMAL,       -8
 111 set     DDQUOTIENT,     -12
 112 set     DIV64_CC,       -16
 113
 114 ##########
 115 # divs.l #
 116 ##########
 117         global          _060LSP__idivs64_
 118 _060LSP__idivs64_:
 119 # PROLOGUE BEGIN ########################################################
 120         link.w          %a6,&-16
 121         movm.l          &0x3f00,-(%sp)          # save d2-d7
 122 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 123 # PROLOGUE END ##########################################################
 124
 125         mov.w           %cc,DIV64_CC(%a6)
 126         st              POSNEG(%a6)             # signed operation
 127         bra.b           ldiv64_cont
 128
 129 ##########
 130 # divu.l #
 131 ##########
 132         global          _060LSP__idivu64_
 133 _060LSP__idivu64_:
 134 # PROLOGUE BEGIN ########################################################
 135         link.w          %a6,&-16
 136         movm.l          &0x3f00,-(%sp)          # save d2-d7
 137 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 138 # PROLOGUE END ##########################################################
 139
 140         mov.w           %cc,DIV64_CC(%a6)
 141         sf              POSNEG(%a6)             # unsigned operation
 142
 143 ldiv64_cont:
 144         mov.l           0x8(%a6),%d7            # fetch divisor
 145
 146         beq.w           ldiv64eq0               # divisor is = 0!!!
 147
 148         mov.l           0xc(%a6), %d5           # get dividend hi
 149         mov.l           0x10(%a6), %d6          # get dividend lo
 150
 151 # separate signed and unsigned divide
 152         tst.b           POSNEG(%a6)             # signed or unsigned?
 153         beq.b           ldspecialcases          # use positive divide
 154
 155 # save the sign of the divisor
 156 # make divisor unsigned if it's negative
 157         tst.l           %d7                     # chk sign of divisor
 158         slt             NDIVISOR(%a6)           # save sign of divisor
 159         bpl.b           ldsgndividend
 160         neg.l           %d7                     # complement negative divisor
 161
 162 # save the sign of the dividend
 163 # make dividend unsigned if it's negative
 164 ldsgndividend:
 165         tst.l           %d5                     # chk sign of hi(dividend)
 166         slt             NDIVIDEND(%a6)          # save sign of dividend
 167         bpl.b           ldspecialcases
 168
 169         mov.w           &0x0, %cc               # clear 'X' cc bit
 170         negx.l          %d6                     # complement signed dividend
 171         negx.l          %d5
 172
 173 # extract some special cases:
 174 #       - is (dividend == 0) ?
 175 #       - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
 176 ldspecialcases:
 177         tst.l           %d5                     # is (hi(dividend) == 0)
 178         bne.b           ldnormaldivide          # no, so try it the long way
 179
 180         tst.l           %d6                     # is (lo(dividend) == 0), too
 181         beq.w           lddone                  # yes, so (dividend == 0)
 182
 183         cmp.l           %d7,%d6                 # is (divisor <= lo(dividend))
 184         bls.b           ld32bitdivide           # yes, so use 32 bit divide
 185
 186         exg             %d5,%d6                 # q = 0, r = dividend
 187         bra.w           ldivfinish              # can't divide, we're done.
 188
 189 ld32bitdivide:
 190         tdivu.l         %d7, %d5:%d6            # it's only a 32/32 bit div!
 191
 192         bra.b           ldivfinish
 193
 194 ldnormaldivide:
 195 # last special case:
 196 #       - is hi(dividend) >= divisor ? if yes, then overflow
 197         cmp.l           %d7,%d5
 198         bls.b           lddovf                  # answer won't fit in 32 bits
 199
 200 # perform the divide algorithm:
 201         bsr.l           ldclassical             # do int divide
 202
 203 # separate into signed and unsigned finishes.
 204 ldivfinish:
 205         tst.b           POSNEG(%a6)             # do divs, divu separately
 206         beq.b           lddone                  # divu has no processing!!!
 207
 208 # it was a divs.l, so ccode setting is a little more complicated...
 209         tst.b           NDIVIDEND(%a6)          # remainder has same sign
 210         beq.b           ldcc                    # as dividend.
 211         neg.l           %d5                     # sgn(rem) = sgn(dividend)
 212 ldcc:
 213         mov.b           NDIVISOR(%a6), %d0
 214         eor.b           %d0, NDIVIDEND(%a6)     # chk if quotient is negative
 215         beq.b           ldqpos                  # branch to quot positive
 216
 217 # 0x80000000 is the largest number representable as a 32-bit negative
 218 # number. the negative of 0x80000000 is 0x80000000.
 219         cmpi.l          %d6, &0x80000000        # will (-quot) fit in 32 bits?
 220         bhi.b           lddovf
 221
 222         neg.l           %d6                     # make (-quot) 2's comp
 223
 224         bra.b           lddone
 225
 226 ldqpos:
 227         btst            &0x1f, %d6              # will (+quot) fit in 32 bits?
 228         bne.b           lddovf
 229
 230 lddone:
 231 # if the register numbers are the same, only the quotient gets saved.
 232 # so, if we always save the quotient second, we save ourselves a cmp&beq
 233         andi.w          &0x10,DIV64_CC(%a6)
 234         mov.w           DIV64_CC(%a6),%cc
 235         tst.l           %d6                     # may set 'N' ccode bit
 236
 237 # here, the result is in d1 and d0. the current strategy is to save
 238 # the values at the location pointed to by a0.
 239 # use movm here to not disturb the condition codes.
 240 ldexit:
 241         movm.l          &0x0060,([0x14,%a6])    # save result
 242
 243 # EPILOGUE BEGIN ########################################################
 244 #       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 245         movm.l          (%sp)+,&0x00fc          # restore d2-d7
 246         unlk            %a6
 247 # EPILOGUE END ##########################################################
 248
 249         rts
 250
 251 # the result should be the unchanged dividend
 252 lddovf:
 253         mov.l           0xc(%a6), %d5           # get dividend hi
 254         mov.l           0x10(%a6), %d6          # get dividend lo
 255
 256         andi.w          &0x1c,DIV64_CC(%a6)
 257         ori.w           &0x02,DIV64_CC(%a6)     # set 'V' ccode bit
 258         mov.w           DIV64_CC(%a6),%cc
 259
 260         bra.b           ldexit
 261
 262 ldiv64eq0:
 263         mov.l           0xc(%a6),([0x14,%a6])
 264         mov.l           0x10(%a6),([0x14,%a6],0x4)
 265
 266         mov.w           DIV64_CC(%a6),%cc
 267
 268 # EPILOGUE BEGIN ########################################################
 269 #       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 270         movm.l          (%sp)+,&0x00fc          # restore d2-d7
 271         unlk            %a6
 272 # EPILOGUE END ##########################################################
 273
 274         divu.w          &0x0,%d0                # force a divbyzero exception
 275         rts
 276
 277 ###########################################################################
 278 #########################################################################
 279 # This routine uses the 'classical' Algorithm D from Donald Knuth's     #
 280 # Art of Computer Programming, vol II, Seminumerical Algorithms.        #
 281 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2,     #
 282 # where U,V are words of the quadword dividend and longword divisor,    #
 283 # and U1, V1 are the most significant words.                            #
 284 #                                                                       #
 285 # The most sig. longword of the 64 bit dividend must be in %d5, least   #
 286 # in %d6. The divisor must be in the variable ddivisor, and the         #
 287 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed).       #
 288 # The quotient is returned in %d6, remainder in %d5, unless the         #
 289 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend  #
 290 # is unchanged.                                                         #
 291 #########################################################################
 292 ldclassical:
 293 # if the divisor msw is 0, use simpler algorithm then the full blown
 294 # one at ddknuth:
 295
 296         cmpi.l          %d7, &0xffff
 297         bhi.b           lddknuth                # go use D. Knuth algorithm
 298
 299 # Since the divisor is only a word (and larger than the mslw of the dividend),
 300 # a simpler algorithm may be used :
 301 # In the general case, four quotient words would be created by
 302 # dividing the divisor word into each dividend word. In this case,
 303 # the first two quotient words must be zero, or overflow would occur.
 304 # Since we already checked this case above, we can treat the most significant
 305 # longword of the dividend as (0) remainder (see Knuth) and merely complete
 306 # the last two divisions to get a quotient longword and word remainder:
 307
 308         clr.l           %d1
 309         swap            %d5                     # same as r*b if previous step rqd
 310         swap            %d6                     # get u3 to lsw position
 311         mov.w           %d6, %d5                # rb + u3
 312
 313         divu.w          %d7, %d5
 314
 315         mov.w           %d5, %d1                # first quotient word
 316         swap            %d6                     # get u4
 317         mov.w           %d6, %d5                # rb + u4
 318
 319         divu.w          %d7, %d5
 320
 321         swap            %d1
 322         mov.w           %d5, %d1                # 2nd quotient 'digit'
 323         clr.w           %d5
 324         swap            %d5                     # now remainder
 325         mov.l           %d1, %d6                # and quotient
 326
 327         rts
 328
 329 lddknuth:
 330 # In this algorithm, the divisor is treated as a 2 digit (word) number
 331 # which is divided into a 3 digit (word) dividend to get one quotient
 332 # digit (word). After subtraction, the dividend is shifted and the
 333 # process repeated. Before beginning, the divisor and quotient are
 334 # 'normalized' so that the process of estimating the quotient digit
 335 # will yield verifiably correct results..
 336
 337         clr.l           DDNORMAL(%a6)           # count of shifts for normalization
 338         clr.b           DDSECOND(%a6)           # clear flag for quotient digits
 339         clr.l           %d1                     # %d1 will hold trial quotient
 340 lddnchk:
 341         btst            &31, %d7                # must we normalize? first word of
 342         bne.b           lddnormalized           # divisor (V1) must be >= 65536/2
 343         addq.l          &0x1, DDNORMAL(%a6)     # count normalization shifts
 344         lsl.l           &0x1, %d7               # shift the divisor
 345         lsl.l           &0x1, %d6               # shift u4,u3 with overflow to u2
 346         roxl.l          &0x1, %d5               # shift u1,u2
 347         bra.w           lddnchk
 348 lddnormalized:
 349
 350 # Now calculate an estimate of the quotient words (msw first, then lsw).
 351 # The comments use subscripts for the first quotient digit determination.
 352         mov.l           %d7, %d3                # divisor
 353         mov.l           %d5, %d2                # dividend mslw
 354         swap            %d2
 355         swap            %d3
 356         cmp.w           %d2, %d3                # V1 = U1 ?
 357         bne.b           lddqcalc1
 358         mov.w           &0xffff, %d1            # use max trial quotient word
 359         bra.b           lddadj0
 360 lddqcalc1:
 361         mov.l           %d5, %d1
 362
 363         divu.w          %d3, %d1                # use quotient of mslw/msw
 364
 365         andi.l          &0x0000ffff, %d1        # zero any remainder
 366 lddadj0:
 367
 368 # now test the trial quotient and adjust. This step plus the
 369 # normalization assures (according to Knuth) that the trial
 370 # quotient will be at worst 1 too large.
 371         mov.l           %d6, -(%sp)
 372         clr.w           %d6                     # word u3 left
 373         swap            %d6                     # in lsw position
 374 lddadj1: mov.l          %d7, %d3
 375         mov.l           %d1, %d2
 376         mulu.w          %d7, %d2                # V2q
 377         swap            %d3
 378         mulu.w          %d1, %d3                # V1q
 379         mov.l           %d5, %d4                # U1U2
 380         sub.l           %d3, %d4                # U1U2 - V1q
 381
 382         swap            %d4
 383
 384         mov.w           %d4,%d0
 385         mov.w           %d6,%d4                 # insert lower word (U3)
 386
 387         tst.w           %d0                     # is upper word set?
 388         bne.w           lddadjd1
 389
 390 #       add.l           %d6, %d4                # (U1U2 - V1q) + U3
 391
 392         cmp.l           %d2, %d4
 393         bls.b           lddadjd1                # is V2q > (U1U2-V1q) + U3 ?
 394         subq.l          &0x1, %d1               # yes, decrement and recheck
 395         bra.b           lddadj1
 396 lddadjd1:
 397 # now test the word by multiplying it by the divisor (V1V2) and comparing
 398 # the 3 digit (word) result with the current dividend words
 399         mov.l           %d5, -(%sp)             # save %d5 (%d6 already saved)
 400         mov.l           %d1, %d6
 401         swap            %d6                     # shift answer to ms 3 words
 402         mov.l           %d7, %d5
 403         bsr.l           ldmm2
 404         mov.l           %d5, %d2                # now %d2,%d3 are trial*divisor
 405         mov.l           %d6, %d3
 406         mov.l           (%sp)+, %d5             # restore dividend
 407         mov.l           (%sp)+, %d6
 408         sub.l           %d3, %d6
 409         subx.l          %d2, %d5                # subtract double precision
 410         bcc             ldd2nd                  # no carry, do next quotient digit
 411         subq.l          &0x1, %d1               # q is one too large
 412 # need to add back divisor longword to current ms 3 digits of dividend
 413 # - according to Knuth, this is done only 2 out of 65536 times for random
 414 # divisor, dividend selection.
 415         clr.l           %d2
 416         mov.l           %d7, %d3
 417         swap            %d3
 418         clr.w           %d3                     # %d3 now ls word of divisor
 419         add.l           %d3, %d6                # aligned with 3rd word of dividend
 420         addx.l          %d2, %d5
 421         mov.l           %d7, %d3
 422         clr.w           %d3                     # %d3 now ms word of divisor
 423         swap            %d3                     # aligned with 2nd word of dividend
 424         add.l           %d3, %d5
 425 ldd2nd:
 426         tst.b           DDSECOND(%a6)   # both q words done?
 427         bne.b           lddremain
 428 # first quotient digit now correct. store digit and shift the
 429 # (subtracted) dividend
 430         mov.w           %d1, DDQUOTIENT(%a6)
 431         clr.l           %d1
 432         swap            %d5
 433         swap            %d6
 434         mov.w           %d6, %d5
 435         clr.w           %d6
 436         st              DDSECOND(%a6)           # second digit
 437         bra.w           lddnormalized
 438 lddremain:
 439 # add 2nd word to quotient, get the remainder.
 440         mov.w           %d1, DDQUOTIENT+2(%a6)
 441 # shift down one word/digit to renormalize remainder.
 442         mov.w           %d5, %d6
 443         swap            %d6
 444         swap            %d5
 445         mov.l           DDNORMAL(%a6), %d7      # get norm shift count
 446         beq.b           lddrn
 447         subq.l          &0x1, %d7               # set for loop count
 448 lddnlp:
 449         lsr.l           &0x1, %d5               # shift into %d6
 450         roxr.l          &0x1, %d6
 451         dbf             %d7, lddnlp
 452 lddrn:
 453         mov.l           %d6, %d5                # remainder
 454         mov.l           DDQUOTIENT(%a6), %d6    # quotient
 455
 456         rts
 457 ldmm2:
 458 # factors for the 32X32->64 multiplication are in %d5 and %d6.
 459 # returns 64 bit result in %d5 (hi) %d6(lo).
 460 # destroys %d2,%d3,%d4.
 461
 462 # multiply hi,lo words of each factor to get 4 intermediate products
 463         mov.l           %d6, %d2
 464         mov.l           %d6, %d3
 465         mov.l           %d5, %d4
 466         swap            %d3
 467         swap            %d4
 468         mulu.w          %d5, %d6                # %d6 <- lsw*lsw
 469         mulu.w          %d3, %d5                # %d5 <- msw-dest*lsw-source
 470         mulu.w          %d4, %d2                # %d2 <- msw-source*lsw-dest
 471         mulu.w          %d4, %d3                # %d3 <- msw*msw
 472 # now use swap and addx to consolidate to two longwords
 473         clr.l           %d4
 474         swap            %d6
 475         add.w           %d5, %d6                # add msw of l*l to lsw of m*l product
 476         addx.w          %d4, %d3                # add any carry to m*m product
 477         add.w           %d2, %d6                # add in lsw of other m*l product
 478         addx.w          %d4, %d3                # add any carry to m*m product
 479         swap            %d6                     # %d6 is low 32 bits of final product
 480         clr.w           %d5
 481         clr.w           %d2                     # lsw of two mixed products used,
 482         swap            %d5                     # now use msws of longwords
 483         swap            %d2
 484         add.l           %d2, %d5
 485         add.l           %d3, %d5        # %d5 now ms 32 bits of final product
 486         rts
 487
 488 #########################################################################
 489 # XDEF **************************************************************** #
 490 #       _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction    #
 491 #       _060LSP__imuls64_(): Emulate 64-bit signed mul instruction.     #
 492 #                                                                       #
 493 #       This is the library version which is accessed as a subroutine   #
 494 #       and therefore does not work exactly like the 680X0 mul{s,u}.l   #
 495 #       64-bit multiply instruction.                                    #
 496 #                                                                       #
 497 # XREF **************************************************************** #
 498 #       None                                                            #
 499 #                                                                       #
 500 # INPUT *************************************************************** #
 501 #       0x4(sp) = multiplier                                            #
 502 #       0x8(sp) = multiplicand                                          #
 503 #       0xc(sp) = pointer to location to place 64-bit result            #
 504 #                                                                       #
 505 # OUTPUT ************************************************************** #
 506 #       0xc(sp) = points to location of 64-bit result                   #
 507 #                                                                       #
 508 # ALGORITHM *********************************************************** #
 509 #       Perform the multiply in pieces using 16x16->32 unsigned         #
 510 # multiplies and "add" instructions.                                    #
 511 #       Set the condition codes as appropriate before performing an     #
 512 # "rts".                                                                #
 513 #                                                                       #
 514 #########################################################################
 515
 516 set MUL64_CC, -4
 517
 518         global          _060LSP__imulu64_
 519 _060LSP__imulu64_:
 520
 521 # PROLOGUE BEGIN ########################################################
 522         link.w          %a6,&-4
 523         movm.l          &0x3800,-(%sp)          # save d2-d4
 524 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 525 # PROLOGUE END ##########################################################
 526
 527         mov.w           %cc,MUL64_CC(%a6)       # save incomming ccodes
 528
 529         mov.l           0x8(%a6),%d0            # store multiplier in d0
 530         beq.w           mulu64_zero             # handle zero separately
 531
 532         mov.l           0xc(%a6),%d1            # get multiplicand in d1
 533         beq.w           mulu64_zero             # handle zero separately
 534
 535 #########################################################################
 536 #       63                         32                           0       #
 537 #       ----------------------------                                    #
 538 #       | hi(mplier) * hi(mplicand)|                                    #
 539 #       ----------------------------                                    #
 540 #                    -----------------------------                      #
 541 #                    | hi(mplier) * lo(mplicand) |                      #
 542 #                    -----------------------------                      #
 543 #                    -----------------------------                      #
 544 #                    | lo(mplier) * hi(mplicand) |                      #
 545 #                    -----------------------------                      #
 546 #         |                        -----------------------------        #
 547 #       --|--                      | lo(mplier) * lo(mplicand) |        #
 548 #         |                        -----------------------------        #
 549 #       ========================================================        #
 550 #       --------------------------------------------------------        #
 551 #       |       hi(result)         |        lo(result)         |        #
 552 #       --------------------------------------------------------        #
 553 #########################################################################
 554 mulu64_alg:
 555 # load temp registers with operands
 556         mov.l           %d0,%d2                 # mr in d2
 557         mov.l           %d0,%d3                 # mr in d3
 558         mov.l           %d1,%d4                 # md in d4
 559         swap            %d3                     # hi(mr) in lo d3
 560         swap            %d4                     # hi(md) in lo d4
 561
 562 # complete necessary multiplies:
 563         mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
 564         mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
 565         mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
 566         mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
 567
 568 # add lo portions of [2],[3] to hi portion of [1].
 569 # add carries produced from these adds to [4].
 570 # lo([1]) is the final lo 16 bits of the result.
 571         clr.l           %d4                     # load d4 w/ zero value
 572         swap            %d0                     # hi([1]) <==> lo([1])
 573         add.w           %d1,%d0                 # hi([1]) + lo([2])
 574         addx.l          %d4,%d3                 #    [4]  + carry
 575         add.w           %d2,%d0                 # hi([1]) + lo([3])
 576         addx.l          %d4,%d3                 #    [4]  + carry
 577         swap            %d0                     # lo([1]) <==> hi([1])
 578
 579 # lo portions of [2],[3] have been added in to final result.
 580 # now, clear lo, put hi in lo reg, and add to [4]
 581         clr.w           %d1                     # clear lo([2])
 582         clr.w           %d2                     # clear hi([3])
 583         swap            %d1                     # hi([2]) in lo d1
 584         swap            %d2                     # hi([3]) in lo d2
 585         add.l           %d2,%d1                 #    [4]  + hi([2])
 586         add.l           %d3,%d1                 #    [4]  + hi([3])
 587
 588 # now, grab the condition codes. only one that can be set is 'N'.
 589 # 'N' CAN be set if the operation is unsigned if bit 63 is set.
 590         mov.w           MUL64_CC(%a6),%d4
 591         andi.b          &0x10,%d4               # keep old 'X' bit
 592         tst.l           %d1                     # may set 'N' bit
 593         bpl.b           mulu64_ddone
 594         ori.b           &0x8,%d4                # set 'N' bit
 595 mulu64_ddone:
 596         mov.w           %d4,%cc
 597
 598 # here, the result is in d1 and d0. the current strategy is to save
 599 # the values at the location pointed to by a0.
 600 # use movm here to not disturb the condition codes.
 601 mulu64_end:
 602         exg             %d1,%d0
 603         movm.l          &0x0003,([0x10,%a6])            # save result
 604
 605 # EPILOGUE BEGIN ########################################################
 606 #       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 607         movm.l          (%sp)+,&0x001c          # restore d2-d4
 608         unlk            %a6
 609 # EPILOGUE END ##########################################################
 610
 611         rts
 612
 613 # one or both of the operands is zero so the result is also zero.
 614 # save the zero result to the register file and set the 'Z' ccode bit.
 615 mulu64_zero:
 616         clr.l           %d0
 617         clr.l           %d1
 618
 619         mov.w           MUL64_CC(%a6),%d4
 620         andi.b          &0x10,%d4
 621         ori.b           &0x4,%d4
 622         mov.w           %d4,%cc                 # set 'Z' ccode bit
 623
 624         bra.b           mulu64_end
 625
 626 ##########
 627 # muls.l #
 628 ##########
 629         global          _060LSP__imuls64_
 630 _060LSP__imuls64_:
 631
 632 # PROLOGUE BEGIN ########################################################
 633         link.w          %a6,&-4
 634         movm.l          &0x3c00,-(%sp)          # save d2-d5
 635 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 636 # PROLOGUE END ##########################################################
 637
 638         mov.w           %cc,MUL64_CC(%a6)       # save incomming ccodes
 639
 640         mov.l           0x8(%a6),%d0            # store multiplier in d0
 641         beq.b           mulu64_zero             # handle zero separately
 642
 643         mov.l           0xc(%a6),%d1            # get multiplicand in d1
 644         beq.b           mulu64_zero             # handle zero separately
 645
 646         clr.b           %d5                     # clear sign tag
 647         tst.l           %d0                     # is multiplier negative?
 648         bge.b           muls64_chk_md_sgn       # no
 649         neg.l           %d0                     # make multiplier positive
 650
 651         ori.b           &0x1,%d5                # save multiplier sgn
 652
 653 # the result sign is the exclusive or of the operand sign bits.
 654 muls64_chk_md_sgn:
 655         tst.l           %d1                     # is multiplicand negative?
 656         bge.b           muls64_alg              # no
 657         neg.l           %d1                     # make multiplicand positive
 658
 659         eori.b          &0x1,%d5                # calculate correct sign
 660
 661 #########################################################################
 662 #       63                         32                           0       #
 663 #       ----------------------------                                    #
 664 #       | hi(mplier) * hi(mplicand)|                                    #
 665 #       ----------------------------                                    #
 666 #                    -----------------------------                      #
 667 #                    | hi(mplier) * lo(mplicand) |                      #
 668 #                    -----------------------------                      #
 669 #                    -----------------------------                      #
 670 #                    | lo(mplier) * hi(mplicand) |                      #
 671 #                    -----------------------------                      #
 672 #         |                        -----------------------------        #
 673 #       --|--                      | lo(mplier) * lo(mplicand) |        #
 674 #         |                        -----------------------------        #
 675 #       ========================================================        #
 676 #       --------------------------------------------------------        #
 677 #       |       hi(result)         |        lo(result)         |        #
 678 #       --------------------------------------------------------        #
 679 #########################################################################
 680 muls64_alg:
 681 # load temp registers with operands
 682         mov.l           %d0,%d2                 # mr in d2
 683         mov.l           %d0,%d3                 # mr in d3
 684         mov.l           %d1,%d4                 # md in d4
 685         swap            %d3                     # hi(mr) in lo d3
 686         swap            %d4                     # hi(md) in lo d4
 687
 688 # complete necessary multiplies:
 689         mulu.w          %d1,%d0                 # [1] lo(mr) * lo(md)
 690         mulu.w          %d3,%d1                 # [2] hi(mr) * lo(md)
 691         mulu.w          %d4,%d2                 # [3] lo(mr) * hi(md)
 692         mulu.w          %d4,%d3                 # [4] hi(mr) * hi(md)
 693
 694 # add lo portions of [2],[3] to hi portion of [1].
 695 # add carries produced from these adds to [4].
 696 # lo([1]) is the final lo 16 bits of the result.
 697         clr.l           %d4                     # load d4 w/ zero value
 698         swap            %d0                     # hi([1]) <==> lo([1])
 699         add.w           %d1,%d0                 # hi([1]) + lo([2])
 700         addx.l          %d4,%d3                 #    [4]  + carry
 701         add.w           %d2,%d0                 # hi([1]) + lo([3])
 702         addx.l          %d4,%d3                 #    [4]  + carry
 703         swap            %d0                     # lo([1]) <==> hi([1])
 704
 705 # lo portions of [2],[3] have been added in to final result.
 706 # now, clear lo, put hi in lo reg, and add to [4]
 707         clr.w           %d1                     # clear lo([2])
 708         clr.w           %d2                     # clear hi([3])
 709         swap            %d1                     # hi([2]) in lo d1
 710         swap            %d2                     # hi([3]) in lo d2
 711         add.l           %d2,%d1                 #    [4]  + hi([2])
 712         add.l           %d3,%d1                 #    [4]  + hi([3])
 713
 714         tst.b           %d5                     # should result be signed?
 715         beq.b           muls64_done             # no
 716
 717 # result should be a signed negative number.
 718 # compute 2's complement of the unsigned number:
 719 #   -negate all bits and add 1
 720 muls64_neg:
 721         not.l           %d0                     # negate lo(result) bits
 722         not.l           %d1                     # negate hi(result) bits
 723         addq.l          &1,%d0                  # add 1 to lo(result)
 724         addx.l          %d4,%d1                 # add carry to hi(result)
 725
 726 muls64_done:
 727         mov.w           MUL64_CC(%a6),%d4
 728         andi.b          &0x10,%d4               # keep old 'X' bit
 729         tst.l           %d1                     # may set 'N' bit
 730         bpl.b           muls64_ddone
 731         ori.b           &0x8,%d4                # set 'N' bit
 732 muls64_ddone:
 733         mov.w           %d4,%cc
 734
 735 # here, the result is in d1 and d0. the current strategy is to save
 736 # the values at the location pointed to by a0.
 737 # use movm here to not disturb the condition codes.
 738 muls64_end:
 739         exg             %d1,%d0
 740         movm.l          &0x0003,([0x10,%a6])    # save result at (a0)
 741
 742 # EPILOGUE BEGIN ########################################################
 743 #       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 744         movm.l          (%sp)+,&0x003c          # restore d2-d5
 745         unlk            %a6
 746 # EPILOGUE END ##########################################################
 747
 748         rts
 749
 750 # one or both of the operands is zero so the result is also zero.
 751 # save the zero result to the register file and set the 'Z' ccode bit.
 752 muls64_zero:
 753         clr.l           %d0
 754         clr.l           %d1
 755
 756         mov.w           MUL64_CC(%a6),%d4
 757         andi.b          &0x10,%d4
 758         ori.b           &0x4,%d4
 759         mov.w           %d4,%cc                 # set 'Z' ccode bit
 760
 761         bra.b           muls64_end
 762
 763 #########################################################################
 764 # XDEF **************************************************************** #
 765 #       _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".                  #
 766 #       _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".                  #
 767 #       _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".                  #
 768 #       _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".                  #
 769 #       _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".                  #
 770 #       _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".                  #
 771 #                                                                       #
 772 #       This is the library version which is accessed as a subroutine   #
 773 #       and therefore does not work exactly like the 680X0 "cmp2"       #
 774 #       instruction.                                                    #
 775 #                                                                       #
 776 # XREF **************************************************************** #
 777 #       None                                                            #
 778 #                                                                       #
 779 # INPUT *************************************************************** #
 780 #       0x4(sp) = Rn                                                    #
 781 #       0x8(sp) = pointer to boundary pair                              #
 782 #                                                                       #
 783 # OUTPUT ************************************************************** #
 784 #       cc = condition codes are set correctly                          #
 785 #                                                                       #
 786 # ALGORITHM *********************************************************** #
 787 #       In the interest of simplicity, all operands are converted to    #
 788 # longword size whether the operation is byte, word, or long. The       #
 789 # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
 790 # also sign extended. If Rn is an address register, it need not be sign #
 791 # extended since the full register is always used.                      #
 792 #       The condition codes are set correctly before the final "rts".   #
 793 #                                                                       #
 794 #########################################################################
 795
 796 set     CMP2_CC,        -4
 797
 798         global          _060LSP__cmp2_Ab_
 799 _060LSP__cmp2_Ab_:
 800
 801 # PROLOGUE BEGIN ########################################################
 802         link.w          %a6,&-4
 803         movm.l          &0x3800,-(%sp)          # save d2-d4
 804 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 805 # PROLOGUE END ##########################################################
 806
 807         mov.w           %cc,CMP2_CC(%a6)
 808         mov.l           0x8(%a6), %d2           # get regval
 809
 810         mov.b           ([0xc,%a6],0x0),%d0
 811         mov.b           ([0xc,%a6],0x1),%d1
 812
 813         extb.l          %d0                     # sign extend lo bnd
 814         extb.l          %d1                     # sign extend hi bnd
 815         bra.w           l_cmp2_cmp              # go do the compare emulation
 816
 817         global          _060LSP__cmp2_Aw_
 818 _060LSP__cmp2_Aw_:
 819
 820 # PROLOGUE BEGIN ########################################################
 821         link.w          %a6,&-4
 822         movm.l          &0x3800,-(%sp)          # save d2-d4
 823 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 824 # PROLOGUE END ##########################################################
 825
 826         mov.w           %cc,CMP2_CC(%a6)
 827         mov.l           0x8(%a6), %d2           # get regval
 828
 829         mov.w           ([0xc,%a6],0x0),%d0
 830         mov.w           ([0xc,%a6],0x2),%d1
 831
 832         ext.l           %d0                     # sign extend lo bnd
 833         ext.l           %d1                     # sign extend hi bnd
 834         bra.w           l_cmp2_cmp              # go do the compare emulation
 835
 836         global          _060LSP__cmp2_Al_
 837 _060LSP__cmp2_Al_:
 838
 839 # PROLOGUE BEGIN ########################################################
 840         link.w          %a6,&-4
 841         movm.l          &0x3800,-(%sp)          # save d2-d4
 842 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 843 # PROLOGUE END ##########################################################
 844
 845         mov.w           %cc,CMP2_CC(%a6)
 846         mov.l           0x8(%a6), %d2           # get regval
 847
 848         mov.l           ([0xc,%a6],0x0),%d0
 849         mov.l           ([0xc,%a6],0x4),%d1
 850         bra.w           l_cmp2_cmp              # go do the compare emulation
 851
 852         global          _060LSP__cmp2_Db_
 853 _060LSP__cmp2_Db_:
 854
 855 # PROLOGUE BEGIN ########################################################
 856         link.w          %a6,&-4
 857         movm.l          &0x3800,-(%sp)          # save d2-d4
 858 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 859 # PROLOGUE END ##########################################################
 860
 861         mov.w           %cc,CMP2_CC(%a6)
 862         mov.l           0x8(%a6), %d2           # get regval
 863
 864         mov.b           ([0xc,%a6],0x0),%d0
 865         mov.b           ([0xc,%a6],0x1),%d1
 866
 867         extb.l          %d0                     # sign extend lo bnd
 868         extb.l          %d1                     # sign extend hi bnd
 869
 870 # operation is a data register compare.
 871 # sign extend byte to long so we can do simple longword compares.
 872         extb.l          %d2                     # sign extend data byte
 873         bra.w           l_cmp2_cmp              # go do the compare emulation
 874
 875         global          _060LSP__cmp2_Dw_
 876 _060LSP__cmp2_Dw_:
 877
 878 # PROLOGUE BEGIN ########################################################
 879         link.w          %a6,&-4
 880         movm.l          &0x3800,-(%sp)          # save d2-d4
 881 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 882 # PROLOGUE END ##########################################################
 883
 884         mov.w           %cc,CMP2_CC(%a6)
 885         mov.l           0x8(%a6), %d2           # get regval
 886
 887         mov.w           ([0xc,%a6],0x0),%d0
 888         mov.w           ([0xc,%a6],0x2),%d1
 889
 890         ext.l           %d0                     # sign extend lo bnd
 891         ext.l           %d1                     # sign extend hi bnd
 892
 893 # operation is a data register compare.
 894 # sign extend word to long so we can do simple longword compares.
 895         ext.l           %d2                     # sign extend data word
 896         bra.w           l_cmp2_cmp              # go emulate compare
 897
 898         global          _060LSP__cmp2_Dl_
 899 _060LSP__cmp2_Dl_:
 900
 901 # PROLOGUE BEGIN ########################################################
 902         link.w          %a6,&-4
 903         movm.l          &0x3800,-(%sp)          # save d2-d4
 904 #       fmovm.l         &0x0,-(%sp)             # save no fpregs
 905 # PROLOGUE END ##########################################################
 906
 907         mov.w           %cc,CMP2_CC(%a6)
 908         mov.l           0x8(%a6), %d2           # get regval
 909
 910         mov.l           ([0xc,%a6],0x0),%d0
 911         mov.l           ([0xc,%a6],0x4),%d1
 912
 913 #
 914 # To set the ccodes correctly:
 915 #       (1) save 'Z' bit from (Rn - lo)
 916 #       (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
 917 #       (3) keep 'X', 'N', and 'V' from before instruction
 918 #       (4) combine ccodes
 919 #
 920 l_cmp2_cmp:
 921         sub.l           %d0, %d2                # (Rn - lo)
 922         mov.w           %cc, %d3                # fetch resulting ccodes
 923         andi.b          &0x4, %d3               # keep 'Z' bit
 924         sub.l           %d0, %d1                # (hi - lo)
 925         cmp.l           %d1,%d2                 # ((hi - lo) - (Rn - hi))
 926
 927         mov.w           %cc, %d4                # fetch resulting ccodes
 928         or.b            %d4, %d3                # combine w/ earlier ccodes
 929         andi.b          &0x5, %d3               # keep 'Z' and 'N'
 930
 931         mov.w           CMP2_CC(%a6), %d4       # fetch old ccodes
 932         andi.b          &0x1a, %d4              # keep 'X','N','V' bits
 933         or.b            %d3, %d4                # insert new ccodes
 934         mov.w           %d4,%cc                 # save new ccodes
 935
 936 # EPILOGUE BEGIN ########################################################
 937 #       fmovm.l         (%sp)+,&0x0             # restore no fpregs
 938         movm.l          (%sp)+,&0x001c          # restore d2-d4
 939         unlk            %a6
 940 # EPILOGUE END ##########################################################
 941
 942         rts