1 /* 128 bit integer arithmetic.
3 * Not optimized for speed.
5 * Copyright: Copyright D Language Foundation 2022.
6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7 * Authors: Walter Bright
8 * Source: $(DRUNTIMESRC core/_int128.d)
17 private alias I
= long;
18 private alias U
= ulong;
19 enum Ubits
= uint(U
.sizeof
* 8);
23 /* The alignment should follow target.stackAlign(),
24 * which is `isXmmSupported() ? 16 : (is64bit ? 8 : 4)
27 private enum Cent_alignment
= 16;
29 private enum Cent_alignment
= 8;
31 private enum Cent_alignment
= 4;
35 version (X86_64
) private enum Cent_alignment
= 16;
36 else private enum Cent_alignment
= (size_t
.sizeof
* 2);
40 * 128 bit integer type.
41 * See_also: $(REF Int128, std,int128).
43 align(Cent_alignment
) struct Cent
45 version (LittleEndian
)
57 enum Cent One
= { lo
:1 };
58 enum Cent Zero
= { lo
:0 };
59 enum Cent MinusOne
= neg(One
);
61 /*****************************
75 /*****************************
78 * c = Cent to complement
90 /*****************************
110 /*****************************
113 * c = Cent to increment
123 /*****************************
126 * c = Cent to decrement
136 /*****************************
146 c
.hi
= (c
.hi
<< 1) |
(cast(I
)c
.lo
< 0);
151 /*****************************
152 * Unsigned shift right one bit
161 c
.lo
= (c
.lo
>> 1) |
((c
.hi
& 1) << (Ubits
- 1));
167 /*****************************
168 * Arithmetic shift right one bit
177 c
.lo
= (c
.lo
>> 1) |
((c
.hi
& 1) << (Ubits
- 1));
178 c
.hi
= cast(I
)c
.hi
>> 1;
182 /*****************************
186 * n = number of bits to shift
191 Cent
shl(Cent c
, uint n
)
198 c
.hi
= c
.lo
<< (n
- Ubits
);
203 c
.hi
= ((c
.hi
<< n
) |
(c
.lo
>> (Ubits
- n
- 1) >> 1));
209 /*****************************
210 * Unsigned shift right n bits
213 * n = number of bits to shift
218 Cent
shr(Cent c
, uint n
)
225 c
.lo
= c
.hi
>> (n
- Ubits
);
230 c
.lo
= ((c
.lo
>> n
) |
(c
.hi
<< (Ubits
- n
- 1) << 1));
236 /*****************************
237 * Arithmetic shift right n bits
240 * n = number of bits to shift
245 Cent
sar(Cent c
, uint n
)
247 const signmask
= -(c
.hi
>> (Ubits
- 1));
248 const signshift
= (Ubits
* 2) - n
;
251 // Sign extend all bits beyond the precision of Cent.
257 else if (signshift
>= Ubits
* 2)
260 else if (signshift
>= Ubits
)
262 c
.hi
&= ~(U
.max
<< (signshift
- Ubits
));
263 c
.hi |
= signmask
<< (signshift
- Ubits
);
268 c
.lo
&= ~(U
.max
<< signshift
);
269 c
.lo |
= signmask
<< signshift
;
274 /*****************************
275 * Rotate left one bit
284 int carry
= cast(I
)c
.hi
< 0;
286 c
.hi
= (c
.hi
<< 1) |
(cast(I
)c
.lo
< 0);
287 c
.lo
= (c
.lo
<< 1) | carry
;
291 /*****************************
292 * Rotate right one bit
301 int carry
= c
.lo
& 1;
302 c
.lo
= (c
.lo
>> 1) |
(cast(U
)(c
.hi
& 1) << (Ubits
- 1));
303 c
.hi
= (c
.hi
>> 1) |
(cast(U
)carry
<< (Ubits
- 1));
308 /*****************************
312 * n = number of bits to rotate
317 Cent
rol(Cent c
, uint n
)
321 Cent r
= shr(c
, Ubits
* 2 - n
);
325 /*****************************
326 * Rotate right n bits
329 * n = number of bits to rotate
334 Cent
ror(Cent c
, uint n
)
338 Cent l
= shl(c
, Ubits
* 2 - n
);
342 /****************************
351 Cent
and(Cent c1
, Cent c2
)
353 const Cent
ret = { lo
:c1
.lo
& c2
.lo
, hi
:c1
.hi
& c2
.hi
};
357 /****************************
366 Cent
or(Cent c1
, Cent c2
)
368 const Cent
ret = { lo
:c1
.lo | c2
.lo
, hi
:c1
.hi | c2
.hi
};
372 /****************************
381 Cent
xor(Cent c1
, Cent c2
)
383 const Cent
ret = { lo
:c1
.lo ^ c2
.lo
, hi
:c1
.hi ^ c2
.hi
};
387 /****************************
396 Cent
add(Cent c1
, Cent c2
)
398 U r
= cast(U
)(c1
.lo
+ c2
.lo
);
399 const Cent
ret = { lo
:r
, hi
:cast(U
)(c1
.hi
+ c2
.hi
+ (r
< c1
.lo
)) };
403 /****************************
404 * Subtract c2 from c1.
412 Cent
sub(Cent c1
, Cent c2
)
414 return add(c1
, neg(c2
));
417 /****************************
426 Cent
mul(Cent c1
, Cent c2
)
428 enum mulmask
= (1UL << (Ubits
/ 2)) - 1;
429 enum mulshift
= Ubits
/ 2;
431 // This algorithm splits the operands into 4 words, then computes and sums
432 // the partial products of each part.
433 const c2l0
= c2
.lo
& mulmask
;
434 const c2l1
= c2
.lo
>> mulshift
;
435 const c2h0
= c2
.hi
& mulmask
;
436 const c2h1
= c2
.hi
>> mulshift
;
438 const c1l0
= c1
.lo
& mulmask
;
440 U r1
= c1l0
* c2l1
+ (r0
>> mulshift
);
441 U r2
= c1l0
* c2h0
+ (r1
>> mulshift
);
442 U r3
= c1l0
* c2h1
+ (r2
>> mulshift
);
444 const c1l1
= c1
.lo
>> mulshift
;
445 r1
= c1l1
* c2l0
+ (r1
& mulmask
);
446 r2
= c1l1
* c2l1
+ (r2
& mulmask
) + (r1
>> mulshift
);
447 r3
= c1l1
* c2h0
+ (r3
& mulmask
) + (r2
>> mulshift
);
449 const c1h0
= c1
.hi
& mulmask
;
450 r2
= c1h0
* c2l0
+ (r2
& mulmask
);
451 r3
= c1h0
* c2l1
+ (r3
& mulmask
) + (r2
>> mulshift
);
453 const c1h1
= c1
.hi
>> mulshift
;
454 r3
= c1h1
* c2l0
+ (r3
& mulmask
);
456 const Cent
ret = { lo
:(r0
& mulmask
) + (r1
& mulmask
) * (mulmask
+ 1),
457 hi
:(r2
& mulmask
) + (r3
& mulmask
) * (mulmask
+ 1) };
462 /****************************
463 * Unsigned divide c1 / c2.
471 Cent
udiv(Cent c1
, Cent c2
)
474 return udivmod(c1
, c2
, modulus
);
477 /****************************
478 * Unsigned divide c1 / c2. The remainder after division is stored to modulus.
482 * modulus = set to c1 % c2
487 Cent
udivmod(Cent c1
, Cent c2
, out Cent modulus
)
489 //printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi);
490 // Based on "Unsigned Doubleword Division" in Hacker's Delight
493 // Divides a 128-bit dividend by a 64-bit divisor.
494 // The result must fit in 64 bits.
495 static U
udivmod128_64(Cent c1
, U c2
, out U modulus
)
497 // We work in base 2^^32
498 enum base
= 1UL << 32;
499 enum divmask
= (1UL << (Ubits
/ 2)) - 1;
500 enum divshift
= Ubits
/ 2;
502 // Check for overflow and divide by 0
509 // Computes [num1 num0] / den
510 static uint udiv96_64(U num1
, uint num0
, U den
)
512 // Extract both digits of the denominator
513 const den1
= cast(uint)(den
>> divshift
);
514 const den0
= cast(uint)(den
& divmask
);
515 // Estimate ret as num1 / den1, and then correct it
517 const t2
= (num1
% den1
) * base
+ num0
;
518 const t1
= ret * den0
;
520 ret -= (t1
- t2
> den
) ?
2 : 1;
521 return cast(uint)ret;
524 // Determine the normalization factor. We multiply c2 by this, so that its leading
525 // digit is at least half base. In binary this means just shifting left by the number
526 // of leading zeros, so that there's a 1 in the MSB.
527 // We also shift number by the same amount. This cannot overflow because c1.hi < c2.
528 const shift
= (Ubits
- 1) - bsr(c2
);
532 num2 |
= (c1
.lo
>> (-shift
& 63)) & (-cast(I
)shift
>> 63);
535 // Extract the low digits of the numerator (after normalizing)
536 const num1
= cast(uint)(c1
.lo
>> divshift
);
537 const num0
= cast(uint)(c1
.lo
& divmask
);
539 // Compute q1 = [num2 num1] / c2
540 const q1
= udiv96_64(num2
, num1
, c2
);
541 // Compute the true (partial) remainder
542 const rem
= num2
* base
+ num1
- q1
* c2
;
543 // Compute q0 = [rem num0] / c2
544 const q0
= udiv96_64(rem
, num0
, c2
);
546 modulus
= (rem
* base
+ num0
- q0
* c2
) >> shift
;
547 return (cast(U
)q1
<< divshift
) | q0
;
557 if (c1
.hi
== 0 && c2
.hi
== 0)
559 // Single precision divide
560 const Cent rem
= { lo
:c1
.lo
% c2
.lo
};
562 const Cent
ret = { lo
:c1
.lo
/ c2
.lo
};
567 // Numerator is smaller than the divisor
573 // Divisor is a 64-bit value, so we just need one 128/64 division.
574 // If c1 / c2 would overflow, break c1 up into two halves.
575 const q1
= (c1
.hi
< c2
.lo
) ?
0 : (c1
.hi
/ c2
.lo
);
577 c1
.hi
= c1
.hi
% c2
.lo
;
579 const q0
= udivmod128_64(c1
, c2
.lo
, rem
.lo
);
581 const Cent
ret = { lo
:q0
, hi
:q1
};
585 // Full cent precision division.
587 // We know that c2.hi != 0, so count leading zeros is OK
588 // We have 0 <= shift <= 63
589 const shift
= (Ubits
- 1) - bsr(c2
.hi
);
591 // Normalize the divisor so its MSB is 1
592 // v1 = (c2 << shift) >> 64
593 U v1
= shl(c2
, shift
).hi
;
595 // To ensure no overflow.
598 // Get quotient from divide unsigned operation.
600 const Cent q1
= { lo
:udivmod128_64(u1
, v1
, rem_ignored
) };
602 // Undo normalization and division of c1 by 2.
603 Cent quotient
= shr(shl(q1
, shift
), 63);
605 // Make quotient correct or too small by 1
607 quotient
= dec(quotient
);
609 // Now quotient is correct.
610 // Compute rem = c1 - (quotient * c2);
611 Cent rem
= sub(c1
, mul(quotient
, c2
));
613 // Check if remainder is larger than the divisor
616 // Increment quotient
617 quotient
= inc(quotient
);
618 // Subtract c2 from remainder
622 //printf("quotient "); print(quotient);
623 //printf("modulus "); print(modulus);
628 /****************************
629 * Signed divide c1 / c2.
637 Cent
div(Cent c1
, Cent c2
)
640 return divmod(c1
, c2
, modulus
);
643 /****************************
644 * Signed divide c1 / c2. The remainder after division is stored to modulus.
648 * modulus = set to c1 % c2
653 Cent
divmod(Cent c1
, Cent c2
, out Cent modulus
)
655 /* Muck about with the signs so we can use the unsigned divide
657 if (cast(I
)c1
.hi
< 0)
659 if (cast(I
)c2
.hi
< 0)
661 Cent r
= udivmod(neg(c1
), neg(c2
), modulus
);
662 modulus
= neg(modulus
);
665 Cent r
= neg(udivmod(neg(c1
), c2
, modulus
));
666 modulus
= neg(modulus
);
669 else if (cast(I
)c2
.hi
< 0)
671 return neg(udivmod(c1
, neg(c2
), modulus
));
674 return udivmod(c1
, c2
, modulus
);
677 /****************************
678 * If c1 > c2 unsigned
686 bool ugt(Cent c1
, Cent c2
)
688 return (c1
.hi
== c2
.hi
) ?
(c1
.lo
> c2
.lo
) : (c1
.hi
> c2
.hi
);
691 /****************************
692 * If c1 >= c2 unsigned
700 bool uge(Cent c1
, Cent c2
)
705 /****************************
706 * If c1 < c2 unsigned
714 bool ult(Cent c1
, Cent c2
)
719 /****************************
720 * If c1 <= c2 unsigned
728 bool ule(Cent c1
, Cent c2
)
733 /****************************
742 bool gt(Cent c1
, Cent c2
)
744 return (c1
.hi
== c2
.hi
)
746 : (cast(I
)c1
.hi
> cast(I
)c2
.hi
);
749 /****************************
758 bool ge(Cent c1
, Cent c2
)
763 /****************************
772 bool lt(Cent c1
, Cent c2
)
777 /****************************
786 bool le(Cent c1
, Cent c2
)
791 /*******************************************************/
797 import core
.stdc
.stdio
: printf
;
802 printf("%lld, %lld\n", cast(ulong)c
.lo
, cast(ulong)c
.hi
);
803 printf("x%llx, x%llx\n", cast(ulong)c
.lo
, cast(ulong)c
.hi
);
810 const Cent C0
= Zero
;
812 const Cent C2
= { lo
:2 };
813 const Cent C3
= { lo
:3 };
814 const Cent C5
= { lo
:5 };
815 const Cent C10
= { lo
:10 };
816 const Cent C20
= { lo
:20 };
817 const Cent C30
= { lo
:30 };
818 const Cent C100
= { lo
:100 };
820 const Cent Cm1
= neg(One
);
821 const Cent Cm3
= neg(C3
);
822 const Cent Cm10
= neg(C10
);
824 const Cent C3_1
= { lo
:1, hi
:3 };
825 const Cent C3_2
= { lo
:2, hi
:3 };
826 const Cent C4_8
= { lo
:8, hi
:4 };
827 const Cent C5_0
= { lo
:0, hi
:5 };
828 const Cent C7_1
= { lo
:1, hi
:7 };
829 const Cent C7_9
= { lo
:9, hi
:7 };
830 const Cent C9_3
= { lo
:3, hi
:9 };
831 const Cent C10_0
= { lo
:0, hi
:10 };
832 const Cent C10_1
= { lo
:1, hi
:10 };
833 const Cent C10_3
= { lo
:3, hi
:10 };
834 const Cent C11_3
= { lo
:3, hi
:11 };
835 const Cent C20_0
= { lo
:0, hi
:20 };
836 const Cent C90_30
= { lo
:30, hi
:90 };
838 const Cent Cm10_0
= inc(com(C10_0
)); // Cent(lo=0, hi=-10);
839 const Cent Cm10_1
= inc(com(C10_1
)); // Cent(lo=-1, hi=-11);
840 const Cent Cm10_3
= inc(com(C10_3
)); // Cent(lo=-3, hi=-11);
841 const Cent Cm20_0
= inc(com(C20_0
)); // Cent(lo=0, hi=-20);
843 enum Cent Cs_3
= { lo
:3, hi
:I
.min
};
845 const Cent Cbig_1
= { lo
:0xa3ccac1832952398, hi
:0xc3ac542864f652f8 };
846 const Cent Cbig_2
= { lo
:0x5267b85f8a42fc20, hi
:0 };
847 const Cent Cbig_3
= { lo
:0xf0000000ffffffff, hi
:0 };
849 /************************/
851 assert( ugt(C1
, C0
) );
852 assert( ult(C1
, C2
) );
853 assert( uge(C1
, C0
) );
854 assert( ule(C1
, C2
) );
856 assert( !ugt(C0
, C1
) );
857 assert( !ult(C2
, C1
) );
858 assert( !uge(C0
, C1
) );
859 assert( !ule(C2
, C1
) );
861 assert( !ugt(C1
, C1
) );
862 assert( !ult(C1
, C1
) );
863 assert( uge(C1
, C1
) );
864 assert( ule(C2
, C2
) );
866 assert( ugt(C10_3
, C10_1
) );
867 assert( ugt(C11_3
, C10_3
) );
868 assert( !ugt(C9_3
, C10_3
) );
869 assert( !ugt(C9_3
, C9_3
) );
871 assert( gt(C2
, C1
) );
872 assert( !gt(C1
, C2
) );
873 assert( !gt(C1
, C1
) );
874 assert( gt(C0
, Cm1
) );
875 assert( gt(Cm1
, neg(C10
)));
876 assert( !gt(Cm1
, Cm1
) );
877 assert( !gt(Cm1
, C0
) );
879 assert( !lt(C2
, C1
) );
880 assert( !le(C2
, C1
) );
881 assert( ge(C2
, C1
) );
883 assert(neg(C10_0
) == Cm10_0
);
884 assert(neg(C10_1
) == Cm10_1
);
885 assert(neg(C10_3
) == Cm10_3
);
887 assert(add(C7_1
,C3_2
) == C10_3
);
888 assert(sub(C1
,C2
) == Cm1
);
890 assert(inc(C3_1
) == C3_2
);
891 assert(dec(C3_2
) == C3_1
);
893 assert(shl(C10
,0) == C10
);
894 assert(shl(C10
,Ubits
) == C10_0
);
895 assert(shl(C10
,1) == C20
);
896 assert(shl(C10
,Ubits
* 2) == C0
);
897 assert(shr(C10_0
,0) == C10_0
);
898 assert(shr(C10_0
,Ubits
) == C10
);
899 assert(shr(C10_0
,Ubits
- 1) == C20
);
900 assert(shr(C10_0
,Ubits
+ 1) == C5
);
901 assert(shr(C10_0
,Ubits
* 2) == C0
);
902 assert(sar(C10_0
,0) == C10_0
);
903 assert(sar(C10_0
,Ubits
) == C10
);
904 assert(sar(C10_0
,Ubits
- 1) == C20
);
905 assert(sar(C10_0
,Ubits
+ 1) == C5
);
906 assert(sar(C10_0
,Ubits
* 2) == C0
);
907 assert(sar(Cm1
,Ubits
* 2) == Cm1
);
909 assert(shl1(C10
) == C20
);
910 assert(shr1(C10_0
) == C5_0
);
911 assert(sar1(C10_0
) == C5_0
);
912 assert(sar1(Cm1
) == Cm1
);
916 assert(udiv(C10
,C2
) == C5
);
917 assert(udivmod(C10
,C2
, modulus
) == C5
); assert(modulus
== C0
);
918 assert(udivmod(C10
,C3
, modulus
) == C3
); assert(modulus
== C1
);
919 assert(udivmod(C10
,C0
, modulus
) == Cm1
); assert(modulus
== C0
);
920 assert(udivmod(C2
,C90_30
, modulus
) == C0
); assert(modulus
== C2
);
921 assert(udiv(mul(C90_30
, C2
), C2
) == C90_30
);
922 assert(udiv(mul(C90_30
, C2
), C90_30
) == C2
);
924 assert(div(C10
,C3
) == C3
);
925 assert(divmod( C10
, C3
, modulus
) == C3
); assert(modulus
== C1
);
926 assert(divmod(Cm10
, C3
, modulus
) == Cm3
); assert(modulus
== Cm1
);
927 assert(divmod( C10
, Cm3
, modulus
) == Cm3
); assert(modulus
== C1
);
928 assert(divmod(Cm10
, Cm3
, modulus
) == C3
); assert(modulus
== Cm1
);
929 assert(divmod(C2
, C90_30
, modulus
) == C0
); assert(modulus
== C2
);
930 assert(div(mul(C90_30
, C2
), C2
) == C90_30
);
931 assert(div(mul(C90_30
, C2
), C90_30
) == C2
);
933 const Cent Cb1divb2
= { lo
:0x4496aa309d4d4a2f, hi
:U
.max
};
934 const Cent Cb1modb2
= { lo
:0xd83203d0fdc799b8, hi
:U
.max
};
935 assert(divmod(Cbig_1
, Cbig_2
, modulus
) == Cb1divb2
);
936 assert(modulus
== Cb1modb2
);
938 const Cent Cb1udivb2
= { lo
:0x5fe0e9bace2bedad, hi
:2 };
939 const Cent Cb1umodb2
= { lo
:0x2c923125a68721f8, hi
:0 };
940 assert(udivmod(Cbig_1
, Cbig_2
, modulus
) == Cb1udivb2
);
941 assert(modulus
== Cb1umodb2
);
943 const Cent Cb1divb3
= { lo
:0xbfa6c02b5aff8b86, hi
:U
.max
};
944 const Cent Cb1udivb3
= { lo
:0xd0b7d13b48cb350f, hi
:0 };
945 assert(div(Cbig_1
, Cbig_3
) == Cb1divb3
);
946 assert(udiv(Cbig_1
, Cbig_3
) == Cb1udivb3
);
948 assert(mul(Cm10
, C1
) == Cm10
);
949 assert(mul(C1
, Cm10
) == Cm10
);
950 assert(mul(C9_3
, C10
) == C90_30
);
951 assert(mul(Cs_3
, C10
) == C30
);
952 assert(mul(Cm10
, Cm10
) == C100
);
953 assert(mul(C20_0
, Cm1
) == Cm20_0
);
955 assert( or(C4_8
, C3_1
) == C7_9
);
956 assert(and(C4_8
, C7_9
) == C4_8
);
957 assert(xor(C4_8
, C7_9
) == C3_1
);
959 assert(rol(Cm1
, 1) == Cm1
);
960 assert(ror(Cm1
, 45) == Cm1
);
961 assert(rol(ror(C7_9
, 5), 5) == C7_9
);
962 assert(rol(C7_9
, 1) == rol1(C7_9
));
963 assert(ror(C7_9
, 1) == ror1(C7_9
));