make.tmpl: add missing compiler attribute to build_progs
[AROS.git] / arch / m68k-all / m680x0 / 060sp / dist / ilsp.s
blob428b37f750ee739b2e1d5768cd2a49ba33a4c248
2 # $NetBSD: ilsp.s,v 1.1 2000/04/14 20:24:39 is Exp $
5 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6 # MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
7 # M68000 Hi-Performance Microprocessor Division
8 # M68060 Software Package Production Release
9 #
10 # M68060 Software Package Copyright (C) 1993, 1994, 1995, 1996 Motorola Inc.
11 # All rights reserved.
13 # THE SOFTWARE is provided on an "AS IS" basis and without warranty.
14 # To the maximum extent permitted by applicable law,
15 # MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
16 # INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS
17 # FOR A PARTICULAR PURPOSE and any warranty against infringement with
18 # regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
19 # and any accompanying written materials.
21 # To the maximum extent permitted by applicable law,
22 # IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
23 # (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
24 # BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
25 # ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
27 # Motorola assumes no responsibility for the maintenance and support
28 # of the SOFTWARE.
30 # You are hereby granted a copyright license to use, modify, and distribute the
31 # SOFTWARE so long as this entire notice is retained without alteration
32 # in any modified and/or redistributed versions, and that such modified
33 # versions are clearly identified as such.
34 # No licenses are granted by implication, estoppel or otherwise under any
35 # patents or trademarks of Motorola, Inc.
36 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 # litop.s:
40 # This file is appended to the top of the 060FPLSP package
41 # and contains the entry points into the package. The user, in
42 # effect, branches to one of the branch table entries located here.
45 bra.l _060LSP__idivs64_
46 short 0x0000
47 bra.l _060LSP__idivu64_
48 short 0x0000
50 bra.l _060LSP__imuls64_
51 short 0x0000
52 bra.l _060LSP__imulu64_
53 short 0x0000
55 bra.l _060LSP__cmp2_Ab_
56 short 0x0000
57 bra.l _060LSP__cmp2_Aw_
58 short 0x0000
59 bra.l _060LSP__cmp2_Al_
60 short 0x0000
61 bra.l _060LSP__cmp2_Db_
62 short 0x0000
63 bra.l _060LSP__cmp2_Dw_
64 short 0x0000
65 bra.l _060LSP__cmp2_Dl_
66 short 0x0000
68 # leave room for future possible aditions.
69 align 0x200
71 #########################################################################
72 # XDEF **************************************************************** #
73 # _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
74 # _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
75 # #
76 # This is the library version which is accessed as a subroutine #
77 # and therefore does not work exactly like the 680X0 div{s,u}.l #
78 # 64-bit divide instruction. #
79 # #
80 # XREF **************************************************************** #
81 # None. #
82 # #
83 # INPUT *************************************************************** #
84 # 0x4(sp) = divisor #
85 # 0x8(sp) = hi(dividend) #
86 # 0xc(sp) = lo(dividend) #
87 # 0x10(sp) = pointer to location to place quotient/remainder #
88 # #
89 # OUTPUT ************************************************************** #
90 # 0x10(sp) = points to location of remainder/quotient. #
91 # remainder is in first longword, quotient is in 2nd. #
92 # #
93 # ALGORITHM *********************************************************** #
94 # If the operands are signed, make them unsigned and save the #
95 # sign info for later. Separate out special cases like divide-by-zero #
96 # or 32-bit divides if possible. Else, use a special math algorithm #
97 # to calculate the result. #
98 # Restore sign info if signed instruction. Set the condition #
99 # codes before performing the final "rts". If the divisor was equal to #
100 # zero, then perform a divide-by-zero using a 16-bit implemented #
101 # divide instruction. This way, the operating system can record that #
102 # the event occurred even though it may not point to the correct place. #
104 #########################################################################
106 set POSNEG, -1
107 set NDIVISOR, -2
108 set NDIVIDEND, -3
109 set DDSECOND, -4
110 set DDNORMAL, -8
111 set DDQUOTIENT, -12
112 set DIV64_CC, -16
114 ##########
115 # divs.l #
116 ##########
117 global _060LSP__idivs64_
118 _060LSP__idivs64_:
119 # PROLOGUE BEGIN ########################################################
120 link.w %a6,&-16
121 movm.l &0x3f00,-(%sp) # save d2-d7
122 # fmovm.l &0x0,-(%sp) # save no fpregs
123 # PROLOGUE END ##########################################################
125 mov.w %cc,DIV64_CC(%a6)
126 st POSNEG(%a6) # signed operation
127 bra.b ldiv64_cont
129 ##########
130 # divu.l #
131 ##########
132 global _060LSP__idivu64_
133 _060LSP__idivu64_:
134 # PROLOGUE BEGIN ########################################################
135 link.w %a6,&-16
136 movm.l &0x3f00,-(%sp) # save d2-d7
137 # fmovm.l &0x0,-(%sp) # save no fpregs
138 # PROLOGUE END ##########################################################
140 mov.w %cc,DIV64_CC(%a6)
141 sf POSNEG(%a6) # unsigned operation
143 ldiv64_cont:
144 mov.l 0x8(%a6),%d7 # fetch divisor
146 beq.w ldiv64eq0 # divisor is = 0!!!
148 mov.l 0xc(%a6), %d5 # get dividend hi
149 mov.l 0x10(%a6), %d6 # get dividend lo
151 # separate signed and unsigned divide
152 tst.b POSNEG(%a6) # signed or unsigned?
153 beq.b ldspecialcases # use positive divide
155 # save the sign of the divisor
156 # make divisor unsigned if it's negative
157 tst.l %d7 # chk sign of divisor
158 slt NDIVISOR(%a6) # save sign of divisor
159 bpl.b ldsgndividend
160 neg.l %d7 # complement negative divisor
162 # save the sign of the dividend
163 # make dividend unsigned if it's negative
164 ldsgndividend:
165 tst.l %d5 # chk sign of hi(dividend)
166 slt NDIVIDEND(%a6) # save sign of dividend
167 bpl.b ldspecialcases
169 mov.w &0x0, %cc # clear 'X' cc bit
170 negx.l %d6 # complement signed dividend
171 negx.l %d5
173 # extract some special cases:
174 # - is (dividend == 0) ?
175 # - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
176 ldspecialcases:
177 tst.l %d5 # is (hi(dividend) == 0)
178 bne.b ldnormaldivide # no, so try it the long way
180 tst.l %d6 # is (lo(dividend) == 0), too
181 beq.w lddone # yes, so (dividend == 0)
183 cmp.l %d7,%d6 # is (divisor <= lo(dividend))
184 bls.b ld32bitdivide # yes, so use 32 bit divide
186 exg %d5,%d6 # q = 0, r = dividend
187 bra.w ldivfinish # can't divide, we're done.
189 ld32bitdivide:
190 tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
192 bra.b ldivfinish
194 ldnormaldivide:
195 # last special case:
196 # - is hi(dividend) >= divisor ? if yes, then overflow
197 cmp.l %d7,%d5
198 bls.b lddovf # answer won't fit in 32 bits
200 # perform the divide algorithm:
201 bsr.l ldclassical # do int divide
203 # separate into signed and unsigned finishes.
204 ldivfinish:
205 tst.b POSNEG(%a6) # do divs, divu separately
206 beq.b lddone # divu has no processing!!!
208 # it was a divs.l, so ccode setting is a little more complicated...
209 tst.b NDIVIDEND(%a6) # remainder has same sign
210 beq.b ldcc # as dividend.
211 neg.l %d5 # sgn(rem) = sgn(dividend)
212 ldcc:
213 mov.b NDIVISOR(%a6), %d0
214 eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
215 beq.b ldqpos # branch to quot positive
217 # 0x80000000 is the largest number representable as a 32-bit negative
218 # number. the negative of 0x80000000 is 0x80000000.
219 cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
220 bhi.b lddovf
222 neg.l %d6 # make (-quot) 2's comp
224 bra.b lddone
226 ldqpos:
227 btst &0x1f, %d6 # will (+quot) fit in 32 bits?
228 bne.b lddovf
230 lddone:
231 # if the register numbers are the same, only the quotient gets saved.
232 # so, if we always save the quotient second, we save ourselves a cmp&beq
233 andi.w &0x10,DIV64_CC(%a6)
234 mov.w DIV64_CC(%a6),%cc
235 tst.l %d6 # may set 'N' ccode bit
237 # here, the result is in d1 and d0. the current strategy is to save
238 # the values at the location pointed to by a0.
239 # use movm here to not disturb the condition codes.
240 ldexit:
241 movm.l &0x0060,([0x14,%a6]) # save result
243 # EPILOGUE BEGIN ########################################################
244 # fmovm.l (%sp)+,&0x0 # restore no fpregs
245 movm.l (%sp)+,&0x00fc # restore d2-d7
246 unlk %a6
247 # EPILOGUE END ##########################################################
251 # the result should be the unchanged dividend
252 lddovf:
253 mov.l 0xc(%a6), %d5 # get dividend hi
254 mov.l 0x10(%a6), %d6 # get dividend lo
256 andi.w &0x1c,DIV64_CC(%a6)
257 ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
258 mov.w DIV64_CC(%a6),%cc
260 bra.b ldexit
262 ldiv64eq0:
263 mov.l 0xc(%a6),([0x14,%a6])
264 mov.l 0x10(%a6),([0x14,%a6],0x4)
266 mov.w DIV64_CC(%a6),%cc
268 # EPILOGUE BEGIN ########################################################
269 # fmovm.l (%sp)+,&0x0 # restore no fpregs
270 movm.l (%sp)+,&0x00fc # restore d2-d7
271 unlk %a6
272 # EPILOGUE END ##########################################################
274 divu.w &0x0,%d0 # force a divbyzero exception
277 ###########################################################################
278 #########################################################################
279 # This routine uses the 'classical' Algorithm D from Donald Knuth's #
280 # Art of Computer Programming, vol II, Seminumerical Algorithms. #
281 # For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
282 # where U,V are words of the quadword dividend and longword divisor, #
283 # and U1, V1 are the most significant words. #
285 # The most sig. longword of the 64 bit dividend must be in %d5, least #
286 # in %d6. The divisor must be in the variable ddivisor, and the #
287 # signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
288 # The quotient is returned in %d6, remainder in %d5, unless the #
289 # v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
290 # is unchanged. #
291 #########################################################################
292 ldclassical:
293 # if the divisor msw is 0, use simpler algorithm then the full blown
294 # one at ddknuth:
296 cmpi.l %d7, &0xffff
297 bhi.b lddknuth # go use D. Knuth algorithm
299 # Since the divisor is only a word (and larger than the mslw of the dividend),
300 # a simpler algorithm may be used :
301 # In the general case, four quotient words would be created by
302 # dividing the divisor word into each dividend word. In this case,
303 # the first two quotient words must be zero, or overflow would occur.
304 # Since we already checked this case above, we can treat the most significant
305 # longword of the dividend as (0) remainder (see Knuth) and merely complete
306 # the last two divisions to get a quotient longword and word remainder:
308 clr.l %d1
309 swap %d5 # same as r*b if previous step rqd
310 swap %d6 # get u3 to lsw position
311 mov.w %d6, %d5 # rb + u3
313 divu.w %d7, %d5
315 mov.w %d5, %d1 # first quotient word
316 swap %d6 # get u4
317 mov.w %d6, %d5 # rb + u4
319 divu.w %d7, %d5
321 swap %d1
322 mov.w %d5, %d1 # 2nd quotient 'digit'
323 clr.w %d5
324 swap %d5 # now remainder
325 mov.l %d1, %d6 # and quotient
329 lddknuth:
330 # In this algorithm, the divisor is treated as a 2 digit (word) number
331 # which is divided into a 3 digit (word) dividend to get one quotient
332 # digit (word). After subtraction, the dividend is shifted and the
333 # process repeated. Before beginning, the divisor and quotient are
334 # 'normalized' so that the process of estimating the quotient digit
335 # will yield verifiably correct results..
337 clr.l DDNORMAL(%a6) # count of shifts for normalization
338 clr.b DDSECOND(%a6) # clear flag for quotient digits
339 clr.l %d1 # %d1 will hold trial quotient
340 lddnchk:
341 btst &31, %d7 # must we normalize? first word of
342 bne.b lddnormalized # divisor (V1) must be >= 65536/2
343 addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
344 lsl.l &0x1, %d7 # shift the divisor
345 lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
346 roxl.l &0x1, %d5 # shift u1,u2
347 bra.w lddnchk
348 lddnormalized:
350 # Now calculate an estimate of the quotient words (msw first, then lsw).
351 # The comments use subscripts for the first quotient digit determination.
352 mov.l %d7, %d3 # divisor
353 mov.l %d5, %d2 # dividend mslw
354 swap %d2
355 swap %d3
356 cmp.w %d2, %d3 # V1 = U1 ?
357 bne.b lddqcalc1
358 mov.w &0xffff, %d1 # use max trial quotient word
359 bra.b lddadj0
360 lddqcalc1:
361 mov.l %d5, %d1
363 divu.w %d3, %d1 # use quotient of mslw/msw
365 andi.l &0x0000ffff, %d1 # zero any remainder
366 lddadj0:
368 # now test the trial quotient and adjust. This step plus the
369 # normalization assures (according to Knuth) that the trial
370 # quotient will be at worst 1 too large.
371 mov.l %d6, -(%sp)
372 clr.w %d6 # word u3 left
373 swap %d6 # in lsw position
374 lddadj1: mov.l %d7, %d3
375 mov.l %d1, %d2
376 mulu.w %d7, %d2 # V2q
377 swap %d3
378 mulu.w %d1, %d3 # V1q
379 mov.l %d5, %d4 # U1U2
380 sub.l %d3, %d4 # U1U2 - V1q
382 swap %d4
384 mov.w %d4,%d0
385 mov.w %d6,%d4 # insert lower word (U3)
387 tst.w %d0 # is upper word set?
388 bne.w lddadjd1
390 # add.l %d6, %d4 # (U1U2 - V1q) + U3
392 cmp.l %d2, %d4
393 bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
394 subq.l &0x1, %d1 # yes, decrement and recheck
395 bra.b lddadj1
396 lddadjd1:
397 # now test the word by multiplying it by the divisor (V1V2) and comparing
398 # the 3 digit (word) result with the current dividend words
399 mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
400 mov.l %d1, %d6
401 swap %d6 # shift answer to ms 3 words
402 mov.l %d7, %d5
403 bsr.l ldmm2
404 mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
405 mov.l %d6, %d3
406 mov.l (%sp)+, %d5 # restore dividend
407 mov.l (%sp)+, %d6
408 sub.l %d3, %d6
409 subx.l %d2, %d5 # subtract double precision
410 bcc ldd2nd # no carry, do next quotient digit
411 subq.l &0x1, %d1 # q is one too large
412 # need to add back divisor longword to current ms 3 digits of dividend
413 # - according to Knuth, this is done only 2 out of 65536 times for random
414 # divisor, dividend selection.
415 clr.l %d2
416 mov.l %d7, %d3
417 swap %d3
418 clr.w %d3 # %d3 now ls word of divisor
419 add.l %d3, %d6 # aligned with 3rd word of dividend
420 addx.l %d2, %d5
421 mov.l %d7, %d3
422 clr.w %d3 # %d3 now ms word of divisor
423 swap %d3 # aligned with 2nd word of dividend
424 add.l %d3, %d5
425 ldd2nd:
426 tst.b DDSECOND(%a6) # both q words done?
427 bne.b lddremain
428 # first quotient digit now correct. store digit and shift the
429 # (subtracted) dividend
430 mov.w %d1, DDQUOTIENT(%a6)
431 clr.l %d1
432 swap %d5
433 swap %d6
434 mov.w %d6, %d5
435 clr.w %d6
436 st DDSECOND(%a6) # second digit
437 bra.w lddnormalized
438 lddremain:
439 # add 2nd word to quotient, get the remainder.
440 mov.w %d1, DDQUOTIENT+2(%a6)
441 # shift down one word/digit to renormalize remainder.
442 mov.w %d5, %d6
443 swap %d6
444 swap %d5
445 mov.l DDNORMAL(%a6), %d7 # get norm shift count
446 beq.b lddrn
447 subq.l &0x1, %d7 # set for loop count
448 lddnlp:
449 lsr.l &0x1, %d5 # shift into %d6
450 roxr.l &0x1, %d6
451 dbf %d7, lddnlp
452 lddrn:
453 mov.l %d6, %d5 # remainder
454 mov.l DDQUOTIENT(%a6), %d6 # quotient
457 ldmm2:
458 # factors for the 32X32->64 multiplication are in %d5 and %d6.
459 # returns 64 bit result in %d5 (hi) %d6(lo).
460 # destroys %d2,%d3,%d4.
462 # multiply hi,lo words of each factor to get 4 intermediate products
463 mov.l %d6, %d2
464 mov.l %d6, %d3
465 mov.l %d5, %d4
466 swap %d3
467 swap %d4
468 mulu.w %d5, %d6 # %d6 <- lsw*lsw
469 mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
470 mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
471 mulu.w %d4, %d3 # %d3 <- msw*msw
472 # now use swap and addx to consolidate to two longwords
473 clr.l %d4
474 swap %d6
475 add.w %d5, %d6 # add msw of l*l to lsw of m*l product
476 addx.w %d4, %d3 # add any carry to m*m product
477 add.w %d2, %d6 # add in lsw of other m*l product
478 addx.w %d4, %d3 # add any carry to m*m product
479 swap %d6 # %d6 is low 32 bits of final product
480 clr.w %d5
481 clr.w %d2 # lsw of two mixed products used,
482 swap %d5 # now use msws of longwords
483 swap %d2
484 add.l %d2, %d5
485 add.l %d3, %d5 # %d5 now ms 32 bits of final product
488 #########################################################################
489 # XDEF **************************************************************** #
490 # _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
491 # _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
493 # This is the library version which is accessed as a subroutine #
494 # and therefore does not work exactly like the 680X0 mul{s,u}.l #
495 # 64-bit multiply instruction. #
497 # XREF **************************************************************** #
498 # None #
500 # INPUT *************************************************************** #
501 # 0x4(sp) = multiplier #
502 # 0x8(sp) = multiplicand #
503 # 0xc(sp) = pointer to location to place 64-bit result #
505 # OUTPUT ************************************************************** #
506 # 0xc(sp) = points to location of 64-bit result #
508 # ALGORITHM *********************************************************** #
509 # Perform the multiply in pieces using 16x16->32 unsigned #
510 # multiplies and "add" instructions. #
511 # Set the condition codes as appropriate before performing an #
512 # "rts". #
514 #########################################################################
516 set MUL64_CC, -4
518 global _060LSP__imulu64_
519 _060LSP__imulu64_:
521 # PROLOGUE BEGIN ########################################################
522 link.w %a6,&-4
523 movm.l &0x3800,-(%sp) # save d2-d4
524 # fmovm.l &0x0,-(%sp) # save no fpregs
525 # PROLOGUE END ##########################################################
527 mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
529 mov.l 0x8(%a6),%d0 # store multiplier in d0
530 beq.w mulu64_zero # handle zero separately
532 mov.l 0xc(%a6),%d1 # get multiplicand in d1
533 beq.w mulu64_zero # handle zero separately
535 #########################################################################
536 # 63 32 0 #
537 # ---------------------------- #
538 # | hi(mplier) * hi(mplicand)| #
539 # ---------------------------- #
540 # ----------------------------- #
541 # | hi(mplier) * lo(mplicand) | #
542 # ----------------------------- #
543 # ----------------------------- #
544 # | lo(mplier) * hi(mplicand) | #
545 # ----------------------------- #
546 # | ----------------------------- #
547 # --|-- | lo(mplier) * lo(mplicand) | #
548 # | ----------------------------- #
549 # ======================================================== #
550 # -------------------------------------------------------- #
551 # | hi(result) | lo(result) | #
552 # -------------------------------------------------------- #
553 #########################################################################
554 mulu64_alg:
555 # load temp registers with operands
556 mov.l %d0,%d2 # mr in d2
557 mov.l %d0,%d3 # mr in d3
558 mov.l %d1,%d4 # md in d4
559 swap %d3 # hi(mr) in lo d3
560 swap %d4 # hi(md) in lo d4
562 # complete necessary multiplies:
563 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
564 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
565 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
566 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
568 # add lo portions of [2],[3] to hi portion of [1].
569 # add carries produced from these adds to [4].
570 # lo([1]) is the final lo 16 bits of the result.
571 clr.l %d4 # load d4 w/ zero value
572 swap %d0 # hi([1]) <==> lo([1])
573 add.w %d1,%d0 # hi([1]) + lo([2])
574 addx.l %d4,%d3 # [4] + carry
575 add.w %d2,%d0 # hi([1]) + lo([3])
576 addx.l %d4,%d3 # [4] + carry
577 swap %d0 # lo([1]) <==> hi([1])
579 # lo portions of [2],[3] have been added in to final result.
580 # now, clear lo, put hi in lo reg, and add to [4]
581 clr.w %d1 # clear lo([2])
582 clr.w %d2 # clear hi([3])
583 swap %d1 # hi([2]) in lo d1
584 swap %d2 # hi([3]) in lo d2
585 add.l %d2,%d1 # [4] + hi([2])
586 add.l %d3,%d1 # [4] + hi([3])
588 # now, grab the condition codes. only one that can be set is 'N'.
589 # 'N' CAN be set if the operation is unsigned if bit 63 is set.
590 mov.w MUL64_CC(%a6),%d4
591 andi.b &0x10,%d4 # keep old 'X' bit
592 tst.l %d1 # may set 'N' bit
593 bpl.b mulu64_ddone
594 ori.b &0x8,%d4 # set 'N' bit
595 mulu64_ddone:
596 mov.w %d4,%cc
598 # here, the result is in d1 and d0. the current strategy is to save
599 # the values at the location pointed to by a0.
600 # use movm here to not disturb the condition codes.
601 mulu64_end:
602 exg %d1,%d0
603 movm.l &0x0003,([0x10,%a6]) # save result
605 # EPILOGUE BEGIN ########################################################
606 # fmovm.l (%sp)+,&0x0 # restore no fpregs
607 movm.l (%sp)+,&0x001c # restore d2-d4
608 unlk %a6
609 # EPILOGUE END ##########################################################
613 # one or both of the operands is zero so the result is also zero.
614 # save the zero result to the register file and set the 'Z' ccode bit.
615 mulu64_zero:
616 clr.l %d0
617 clr.l %d1
619 mov.w MUL64_CC(%a6),%d4
620 andi.b &0x10,%d4
621 ori.b &0x4,%d4
622 mov.w %d4,%cc # set 'Z' ccode bit
624 bra.b mulu64_end
626 ##########
627 # muls.l #
628 ##########
629 global _060LSP__imuls64_
630 _060LSP__imuls64_:
632 # PROLOGUE BEGIN ########################################################
633 link.w %a6,&-4
634 movm.l &0x3c00,-(%sp) # save d2-d5
635 # fmovm.l &0x0,-(%sp) # save no fpregs
636 # PROLOGUE END ##########################################################
638 mov.w %cc,MUL64_CC(%a6) # save incomming ccodes
640 mov.l 0x8(%a6),%d0 # store multiplier in d0
641 beq.b mulu64_zero # handle zero separately
643 mov.l 0xc(%a6),%d1 # get multiplicand in d1
644 beq.b mulu64_zero # handle zero separately
646 clr.b %d5 # clear sign tag
647 tst.l %d0 # is multiplier negative?
648 bge.b muls64_chk_md_sgn # no
649 neg.l %d0 # make multiplier positive
651 ori.b &0x1,%d5 # save multiplier sgn
653 # the result sign is the exclusive or of the operand sign bits.
654 muls64_chk_md_sgn:
655 tst.l %d1 # is multiplicand negative?
656 bge.b muls64_alg # no
657 neg.l %d1 # make multiplicand positive
659 eori.b &0x1,%d5 # calculate correct sign
661 #########################################################################
662 # 63 32 0 #
663 # ---------------------------- #
664 # | hi(mplier) * hi(mplicand)| #
665 # ---------------------------- #
666 # ----------------------------- #
667 # | hi(mplier) * lo(mplicand) | #
668 # ----------------------------- #
669 # ----------------------------- #
670 # | lo(mplier) * hi(mplicand) | #
671 # ----------------------------- #
672 # | ----------------------------- #
673 # --|-- | lo(mplier) * lo(mplicand) | #
674 # | ----------------------------- #
675 # ======================================================== #
676 # -------------------------------------------------------- #
677 # | hi(result) | lo(result) | #
678 # -------------------------------------------------------- #
679 #########################################################################
680 muls64_alg:
681 # load temp registers with operands
682 mov.l %d0,%d2 # mr in d2
683 mov.l %d0,%d3 # mr in d3
684 mov.l %d1,%d4 # md in d4
685 swap %d3 # hi(mr) in lo d3
686 swap %d4 # hi(md) in lo d4
688 # complete necessary multiplies:
689 mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
690 mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
691 mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
692 mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
694 # add lo portions of [2],[3] to hi portion of [1].
695 # add carries produced from these adds to [4].
696 # lo([1]) is the final lo 16 bits of the result.
697 clr.l %d4 # load d4 w/ zero value
698 swap %d0 # hi([1]) <==> lo([1])
699 add.w %d1,%d0 # hi([1]) + lo([2])
700 addx.l %d4,%d3 # [4] + carry
701 add.w %d2,%d0 # hi([1]) + lo([3])
702 addx.l %d4,%d3 # [4] + carry
703 swap %d0 # lo([1]) <==> hi([1])
705 # lo portions of [2],[3] have been added in to final result.
706 # now, clear lo, put hi in lo reg, and add to [4]
707 clr.w %d1 # clear lo([2])
708 clr.w %d2 # clear hi([3])
709 swap %d1 # hi([2]) in lo d1
710 swap %d2 # hi([3]) in lo d2
711 add.l %d2,%d1 # [4] + hi([2])
712 add.l %d3,%d1 # [4] + hi([3])
714 tst.b %d5 # should result be signed?
715 beq.b muls64_done # no
717 # result should be a signed negative number.
718 # compute 2's complement of the unsigned number:
719 # -negate all bits and add 1
720 muls64_neg:
721 not.l %d0 # negate lo(result) bits
722 not.l %d1 # negate hi(result) bits
723 addq.l &1,%d0 # add 1 to lo(result)
724 addx.l %d4,%d1 # add carry to hi(result)
726 muls64_done:
727 mov.w MUL64_CC(%a6),%d4
728 andi.b &0x10,%d4 # keep old 'X' bit
729 tst.l %d1 # may set 'N' bit
730 bpl.b muls64_ddone
731 ori.b &0x8,%d4 # set 'N' bit
732 muls64_ddone:
733 mov.w %d4,%cc
735 # here, the result is in d1 and d0. the current strategy is to save
736 # the values at the location pointed to by a0.
737 # use movm here to not disturb the condition codes.
738 muls64_end:
739 exg %d1,%d0
740 movm.l &0x0003,([0x10,%a6]) # save result at (a0)
742 # EPILOGUE BEGIN ########################################################
743 # fmovm.l (%sp)+,&0x0 # restore no fpregs
744 movm.l (%sp)+,&0x003c # restore d2-d5
745 unlk %a6
746 # EPILOGUE END ##########################################################
750 # one or both of the operands is zero so the result is also zero.
751 # save the zero result to the register file and set the 'Z' ccode bit.
752 muls64_zero:
753 clr.l %d0
754 clr.l %d1
756 mov.w MUL64_CC(%a6),%d4
757 andi.b &0x10,%d4
758 ori.b &0x4,%d4
759 mov.w %d4,%cc # set 'Z' ccode bit
761 bra.b muls64_end
763 #########################################################################
764 # XDEF **************************************************************** #
765 # _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
766 # _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
767 # _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
768 # _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
769 # _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
770 # _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
772 # This is the library version which is accessed as a subroutine #
773 # and therefore does not work exactly like the 680X0 "cmp2" #
774 # instruction. #
776 # XREF **************************************************************** #
777 # None #
779 # INPUT *************************************************************** #
780 # 0x4(sp) = Rn #
781 # 0x8(sp) = pointer to boundary pair #
783 # OUTPUT ************************************************************** #
784 # cc = condition codes are set correctly #
786 # ALGORITHM *********************************************************** #
787 # In the interest of simplicity, all operands are converted to #
788 # longword size whether the operation is byte, word, or long. The #
789 # bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
790 # also sign extended. If Rn is an address register, it need not be sign #
791 # extended since the full register is always used. #
792 # The condition codes are set correctly before the final "rts". #
794 #########################################################################
796 set CMP2_CC, -4
798 global _060LSP__cmp2_Ab_
799 _060LSP__cmp2_Ab_:
801 # PROLOGUE BEGIN ########################################################
802 link.w %a6,&-4
803 movm.l &0x3800,-(%sp) # save d2-d4
804 # fmovm.l &0x0,-(%sp) # save no fpregs
805 # PROLOGUE END ##########################################################
807 mov.w %cc,CMP2_CC(%a6)
808 mov.l 0x8(%a6), %d2 # get regval
810 mov.b ([0xc,%a6],0x0),%d0
811 mov.b ([0xc,%a6],0x1),%d1
813 extb.l %d0 # sign extend lo bnd
814 extb.l %d1 # sign extend hi bnd
815 bra.w l_cmp2_cmp # go do the compare emulation
817 global _060LSP__cmp2_Aw_
818 _060LSP__cmp2_Aw_:
820 # PROLOGUE BEGIN ########################################################
821 link.w %a6,&-4
822 movm.l &0x3800,-(%sp) # save d2-d4
823 # fmovm.l &0x0,-(%sp) # save no fpregs
824 # PROLOGUE END ##########################################################
826 mov.w %cc,CMP2_CC(%a6)
827 mov.l 0x8(%a6), %d2 # get regval
829 mov.w ([0xc,%a6],0x0),%d0
830 mov.w ([0xc,%a6],0x2),%d1
832 ext.l %d0 # sign extend lo bnd
833 ext.l %d1 # sign extend hi bnd
834 bra.w l_cmp2_cmp # go do the compare emulation
836 global _060LSP__cmp2_Al_
837 _060LSP__cmp2_Al_:
839 # PROLOGUE BEGIN ########################################################
840 link.w %a6,&-4
841 movm.l &0x3800,-(%sp) # save d2-d4
842 # fmovm.l &0x0,-(%sp) # save no fpregs
843 # PROLOGUE END ##########################################################
845 mov.w %cc,CMP2_CC(%a6)
846 mov.l 0x8(%a6), %d2 # get regval
848 mov.l ([0xc,%a6],0x0),%d0
849 mov.l ([0xc,%a6],0x4),%d1
850 bra.w l_cmp2_cmp # go do the compare emulation
852 global _060LSP__cmp2_Db_
853 _060LSP__cmp2_Db_:
855 # PROLOGUE BEGIN ########################################################
856 link.w %a6,&-4
857 movm.l &0x3800,-(%sp) # save d2-d4
858 # fmovm.l &0x0,-(%sp) # save no fpregs
859 # PROLOGUE END ##########################################################
861 mov.w %cc,CMP2_CC(%a6)
862 mov.l 0x8(%a6), %d2 # get regval
864 mov.b ([0xc,%a6],0x0),%d0
865 mov.b ([0xc,%a6],0x1),%d1
867 extb.l %d0 # sign extend lo bnd
868 extb.l %d1 # sign extend hi bnd
870 # operation is a data register compare.
871 # sign extend byte to long so we can do simple longword compares.
872 extb.l %d2 # sign extend data byte
873 bra.w l_cmp2_cmp # go do the compare emulation
875 global _060LSP__cmp2_Dw_
876 _060LSP__cmp2_Dw_:
878 # PROLOGUE BEGIN ########################################################
879 link.w %a6,&-4
880 movm.l &0x3800,-(%sp) # save d2-d4
881 # fmovm.l &0x0,-(%sp) # save no fpregs
882 # PROLOGUE END ##########################################################
884 mov.w %cc,CMP2_CC(%a6)
885 mov.l 0x8(%a6), %d2 # get regval
887 mov.w ([0xc,%a6],0x0),%d0
888 mov.w ([0xc,%a6],0x2),%d1
890 ext.l %d0 # sign extend lo bnd
891 ext.l %d1 # sign extend hi bnd
893 # operation is a data register compare.
894 # sign extend word to long so we can do simple longword compares.
895 ext.l %d2 # sign extend data word
896 bra.w l_cmp2_cmp # go emulate compare
898 global _060LSP__cmp2_Dl_
899 _060LSP__cmp2_Dl_:
901 # PROLOGUE BEGIN ########################################################
902 link.w %a6,&-4
903 movm.l &0x3800,-(%sp) # save d2-d4
904 # fmovm.l &0x0,-(%sp) # save no fpregs
905 # PROLOGUE END ##########################################################
907 mov.w %cc,CMP2_CC(%a6)
908 mov.l 0x8(%a6), %d2 # get regval
910 mov.l ([0xc,%a6],0x0),%d0
911 mov.l ([0xc,%a6],0x4),%d1
914 # To set the ccodes correctly:
915 # (1) save 'Z' bit from (Rn - lo)
916 # (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
917 # (3) keep 'X', 'N', and 'V' from before instruction
918 # (4) combine ccodes
920 l_cmp2_cmp:
921 sub.l %d0, %d2 # (Rn - lo)
922 mov.w %cc, %d3 # fetch resulting ccodes
923 andi.b &0x4, %d3 # keep 'Z' bit
924 sub.l %d0, %d1 # (hi - lo)
925 cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
927 mov.w %cc, %d4 # fetch resulting ccodes
928 or.b %d4, %d3 # combine w/ earlier ccodes
929 andi.b &0x5, %d3 # keep 'Z' and 'N'
931 mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
932 andi.b &0x1a, %d4 # keep 'X','N','V' bits
933 or.b %d3, %d4 # insert new ccodes
934 mov.w %d4,%cc # save new ccodes
936 # EPILOGUE BEGIN ########################################################
937 # fmovm.l (%sp)+,&0x0 # restore no fpregs
938 movm.l (%sp)+,&0x001c # restore d2-d4
939 unlk %a6
940 # EPILOGUE END ##########################################################