1 /*-------------------------------------------------------------------------
2 _mullong.c - routine for multiplication of 32 bit (unsigned) long
4 Copyright (C) 1999, Sandeep Dutta . sandeep.dutta@usa.net
5 Copyright (C) 1999, Jean Louis VERN jlvern@writeme.com
7 This library is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this library; see the file COPYING. If not, write to the
19 Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
22 As a special exception, if you link this library with other files,
23 some of which are compiled with SDCC, to produce an executable,
24 this library does not by itself cause the resulting executable to
25 be covered by the GNU General Public License. This exception does
26 not however invalidate any other reasons why the executable file
27 might be covered by the GNU General Public License.
28 -------------------------------------------------------------------------*/
30 /* Signed and unsigned multiplication are the same - as long as the output
31 has the same precision as the input.
33 Assembler-functions are provided for:
35 mcs51 small stack-auto
40 #if !defined(__SDCC_USE_XSTACK) && !defined(_SDCC_NO_ASM_LIB_FUNCS)
41 # if defined(__SDCC_mcs51)
42 # if defined(__SDCC_MODEL_SMALL)
43 # if defined(__SDCC_STACK_AUTO) && !defined(__SDCC_PARMS_IN_BANK1)
44 # define _MULLONG_ASM_SMALL_AUTO
46 # define _MULLONG_ASM_SMALL
48 # elif defined(__SDCC_MODEL_LARGE)
49 # if !defined(__SDCC_STACK_AUTO)
50 # define _MULLONG_ASM_LARGE
56 #if defined(_MULLONG_ASM_SMALL) || defined(_MULLONG_ASM_SMALL_AUTO)
59 _mullong_dummy (void) __naked
67 ; the result c will be stored in r4
...r7
79 ; c1 a1
* b0
+ a0
* b1
80 ; c2 a2
* b0
+ a1
* b1
+ a0
* b2
81 ; c3 a3
* b0
+ a2
* b1
+ a1
* b2
+ a0
* b3
83 #if !defined(__SDCC_STACK_AUTO) || defined(__SDCC_PARMS_IN_BANK1)
84 #if defined(__SDCC_PARMS_IN_BANK1)
90 #if defined(__SDCC_NOOVERLAY)
98 .globl __mullong_PARM_2
102 b0
= __mullong_PARM_2
103 b1
= (__mullong_PARM_2
+1)
104 b2
= (__mullong_PARM_2
+2)
105 b3
= (__mullong_PARM_2
+3)
110 ; parameter a comes in a
, b
, dph
, dpl
111 mov r2
,b
; save parameter a
200 #else // __SDCC_STACK_AUTO
202 ; parameter a comes in a
, b
, dph
, dpl
203 mov r2
,b
; save parameter a
213 mov a
,#-2-3 ; 1 return address 2 bytes, b 4 bytes
215 mov r0
,a
; 1 r0 points to b0
220 mov b0
,b
; we need b0 several times
221 inc r0
; r0 points to b1
310 #endif // __SDCC_STACK_AUTO
315 #elif defined(_MULLONG_ASM_LARGE)
318 _mullong_dummy (void) __naked
326 ; the result c will be stored in r4
...r7
333 ; c1 a1
* b0
+ a0
* b1
334 ; c2 a2
* b0
+ a1
* b1
+ a0
* b2
335 ; c3 a3
* b0
+ a2
* b1
+ a1
* b2
+ a0
* b3
337 #if !defined(__SDCC_PARMS_IN_BANK1)
342 .globl __mullong_PARM_2
348 ; parameter a comes in a
, b
, dph
, dpl
349 mov r0
,dpl
; save parameter a
361 #if defined(__SDCC_PARMS_IN_BANK1)
364 mov dptr
,#__mullong_PARM_2
373 #if defined(__SDCC_PARMS_IN_BANK1)
386 #if defined(__SDCC_PARMS_IN_BANK1)
404 #if defined(__SDCC_PARMS_IN_BANK1)
417 #if defined(__SDCC_PARMS_IN_BANK1)
431 #if defined(__SDCC_PARMS_IN_BANK1)
434 mov dptr
,#__mullong_PARM_2
446 #if defined(__SDCC_PARMS_IN_BANK1)
456 #if defined(__SDCC_PARMS_IN_BANK1)
467 #if defined(__SDCC_PARMS_IN_BANK1)
478 #if defined(__SDCC_PARMS_IN_BANK1)
495 #elif defined(__SDCC_USE_XSTACK) && defined(__SDCC_STACK_AUTO)
498 _mullong_dummy (void) __naked
506 ; the result c will be stored in r4
...r7
520 ; c1 a1
* b0
+ a0
* b1
521 ; c2 a2
* b0
+ a1
* b1
+ a0
* b2
522 ; c3 a3
* b0
+ a2
* b1
+ a1
* b2
+ a0
* b3
524 ; parameter a comes in a
, b
, dph
, dpl
525 mov r2
,b
; save parameter a
528 mov a
,#-4 ; 1 b 4 bytes
530 mov r0
,a
; 1 r0 points to b0
534 mov b0
,a
; we need b0 several times
535 inc r0
; r0 points to b1
628 #else // _MULLONG_ASM
637 #if __STDC_ENDIAN_NATIVE__ == __STDC_ENDIAN_BIG__
639 struct {unsigned char b3
,b2
,b1
,b0
;} b
;
640 struct {unsigned short hi
,lo
;} i
;
642 struct { unsigned char b3
; unsigned short i12
; unsigned char b0
;} bi
;
646 struct {unsigned char b0
,b1
,b2
,b3
;} b
;
647 struct {unsigned short lo
,hi
;} i
;
649 struct { unsigned char b0
; unsigned short i12
; unsigned char b3
;} bi
;
654 #include <sdcc-lib.h>
657 #define bcast(x) ((union bil _AUTOMEM *)&(x))
662 ----------------------------
667 ----------------------------
675 |-------> only this side 32 x 32 -> 32
677 #if defined(__SDCC_USE_XSTACK)
678 // currently the original code without u fails with --xstack
679 // it runs out of pointer registers
681 _mullong (long a
, long b
)
685 t
.i
.hi
= bcast(a
)->b
.b0
* bcast(b
)->b
.b2
; // A
686 t
.i
.lo
= bcast(a
)->b
.b0
* bcast(b
)->b
.b0
; // A
687 u
.bi
.b3
= bcast(a
)->b
.b0
* bcast(b
)->b
.b3
; // B
688 u
.bi
.i12
= bcast(a
)->b
.b0
* bcast(b
)->b
.b1
; // B
692 t
.b
.b3
+= bcast(a
)->b
.b3
* bcast(b
)->b
.b0
; // G
693 t
.b
.b3
+= bcast(a
)->b
.b2
* bcast(b
)->b
.b1
; // F
694 t
.i
.hi
+= bcast(a
)->b
.b2
* bcast(b
)->b
.b0
; // E
695 t
.i
.hi
+= bcast(a
)->b
.b1
* bcast(b
)->b
.b1
; // D
697 u
.bi
.b3
= bcast(a
)->b
.b1
* bcast(b
)->b
.b2
; // C
698 u
.bi
.i12
= bcast(a
)->b
.b1
* bcast(b
)->b
.b0
; // C
704 #elif defined(__SDCC_z80) || defined(__SDCC_sm83) || defined(__SDCC_r2ka) || defined(__SDCC_r3k) || defined(__SDCC_r3ka)
705 /* 32x32->32 multiplication to be used
706 if 16x16->16 is faster than three 8x8->16.
707 2009, by M.Bodrato ( http://bodrato.it/ )
709 z80 and sm83 don't have any hardware multiplication, not even 8x8.
710 software 16x16 is neqrly as efficient as software 8x8 there.
711 r2k(a) and r3k(a) have 16x16 hardware multiplication,
712 but on r2k it is affected by a hardware bug, and not used by sdcc.
715 _mullong (long a
, long b
)
719 bcast(a
)->i
.hi
*= bcast(b
)->i
.lo
;
720 bcast(a
)->i
.hi
+= bcast(b
)->i
.hi
* bcast(a
)->i
.lo
;
722 /* only (a->i.lo * b->i.lo) 16x16->32 to do. asm? */
723 bcast(a
)->i
.hi
+= bcast(a
)->b
.b1
* bcast(b
)->b
.b1
;
725 i12
= bcast(b
)->b
.b0
* bcast(a
)->b
.b1
;
726 bcast(b
)->bi
.i12
= bcast(a
)->b
.b0
* bcast(b
)->b
.b1
;
728 /* add up the two partial result, store carry in b3 */
729 bcast(b
)->b
.b3
= ((bcast(b
)->bi
.i12
+= i12
) < i12
);
731 bcast(a
)->i
.lo
= bcast(a
)->b
.b0
* bcast(b
)->b
.b0
;
739 _mullong (long a
, long b
) __SDCC_NONBANKED
743 t
.i
.hi
= bcast(a
)->b
.b0
* bcast(b
)->b
.b2
; // A
744 t
.i
.lo
= bcast(a
)->b
.b0
* bcast(b
)->b
.b0
; // A
745 t
.b
.b3
+= bcast(a
)->b
.b3
* bcast(b
)->b
.b0
; // G
746 t
.b
.b3
+= bcast(a
)->b
.b2
* bcast(b
)->b
.b1
; // F
747 t
.i
.hi
+= bcast(a
)->b
.b2
* bcast(b
)->b
.b0
; // E <- b lost in .lst
748 // bcast(a)->i.hi is free !
749 t
.i
.hi
+= bcast(a
)->b
.b1
* bcast(b
)->b
.b1
; // D <- b lost in .lst
751 bcast(a
)->bi
.b3
= bcast(a
)->b
.b1
* bcast(b
)->b
.b2
; // C
752 bcast(a
)->bi
.i12
= bcast(a
)->b
.b1
* bcast(b
)->b
.b0
; // C
754 bcast(b
)->bi
.b3
= bcast(a
)->b
.b0
* bcast(b
)->b
.b3
; // B
755 bcast(b
)->bi
.i12
= bcast(a
)->b
.b0
* bcast(b
)->b
.b1
; // B
757 bcast(b
)->bi
.b0
= 0; // B
758 bcast(a
)->bi
.b0
= 0; // C
765 #endif // _MULLONG_ASM