arch/.unmaintained/m68k-emul/utility/umult64.s

   1 /*
   2     Copyright © 1995-2001, The AROS Development Team. All rights reserved.
   3     $Id$
   4
   5     Desc: Utility 64 bit multiplication routines. m68k version.
   6     Lang: english
   7 */
   8
   9 #include "machine.i"
  10
  11 /* SMult64()/UMult64():
  12     These are the signed/unsigned 64 bit multiplication routines.
  13     There are two possibilities here because as of the 060 the
  14     32*32->64 bit result instructions are not supported, and I haven't
  15     quite figured out how to do this using the 32 bit ops yet (can't be
  16     that hard though).
  17
  18     Still, emulating is faster than a unsup integer instruction except.
  19
  20 */
  21     .text
  22     .balign 16
  23
  24     .globl  AROS_SLIB_ENTRY(UMult64,Utility)
  25     .globl  AROS_SLIB_ENTRY(UMult64_020,Utility)
  26
  27     .type   AROS_SLIB_ENTRY(UMult64,Utility),@function
  28     .type   AROS_SLIB_ENTRY(UMult64_020,Utility),@function
  29
  30
  31 AROS_SLIB_ENTRY(UMult64_020,Utility):
  32     mulu.l  %d0,%d0:%d1
  33     rts
  34
  35 /* How do I do this, again consider:
  36       (a^16 + b) * (c^16 + d)
  37     = ac^32 + (ad + bc)^16 + bd
  38
  39     I tried to think of a way of doing this with the mulu.l instr,
  40     but I couldn't so I'll just use the mulu.w. Its quicker than
  41     an unsupp integer instruction anyway :)
  42 */
  43
  44 AROS_SLIB_ENTRY(UMult64,Utility):
  45     movem.l  %d2-%d5,-(%sp)
  46     /* Set up some registers */
  47     move.l  %d0,%d2
  48     move.l  %d1,%d3
  49     move.l  %d0,%d4   /* d */
  50     move.l  %d1,%d5   /* b */
  51     swap    %d2      /* a */
  52     swap    %d3      /* c */
  53
  54
  55     /* Firstly, find the product bd */
  56     mulu    %d5,%d1   /* d1 = bd */
  57     swap    %d1      /* d1 = (bd)^16 */
  58
  59     /* Then find ac, put in d0 */
  60     mulu    %d3,%d0   /* d0 = ac */
  61
  62     /* Next find ad, bc, and add together */
  63     mulu    %d2,%d4
  64     mulu    %d3,%d5
  65     add.l   %d5,%d4
  66
  67     /*
  68         Add the low 16 bits to d1, then add upper 16 bits to d0
  69         But make sure we carry the 1...
  70
  71         Apparently swap doesn't affect the X bit.
  72     */
  73     add.w   %d4,%d1
  74     swap    %d4
  75     addx.w  %d4,%d0
  76
  77     /* All that remains to do is to flip d1 around the right way */
  78     swap    %d1
  79     movem.l  (%sp)+,%d2-%d5
  80
  81     rts