1 //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements __udivmodti4 for the compiler_rt library.
11 //===----------------------------------------------------------------------===//
17 // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
18 // Remainder stored in r.
19 // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
20 // fallback. For a correctness proof see the reference for this algorithm
21 // in Knuth, Volume 2, section 4.3.1, Algorithm D.
23 static inline du_int
udiv128by64to64default(du_int u1
, du_int u0
, du_int v
,
25 const unsigned n_udword_bits
= sizeof(du_int
) * CHAR_BIT
;
26 const du_int b
= (1ULL << (n_udword_bits
/ 2)); // Number base (32 bits)
27 du_int un1
, un0
; // Norm. dividend LSD's
28 du_int vn1
, vn0
; // Norm. divisor digits
29 du_int q1
, q0
; // Quotient digits
30 du_int un64
, un21
, un10
; // Dividend digit pairs
31 du_int rhat
; // A remainder
32 si_int s
; // Shift amount for normalization
34 s
= __builtin_clzll(v
);
36 // Normalize the divisor.
38 un64
= (u1
<< s
) | (u0
>> (n_udword_bits
- s
));
39 un10
= u0
<< s
; // Shift dividend left
41 // Avoid undefined behavior of (u0 >> 64).
46 // Break divisor up into two 32-bit digits.
47 vn1
= v
>> (n_udword_bits
/ 2);
50 // Break right half of dividend into two digits.
51 un1
= un10
>> (n_udword_bits
/ 2);
52 un0
= un10
& 0xFFFFFFFF;
54 // Compute the first quotient digit, q1.
56 rhat
= un64
- q1
* vn1
;
58 // q1 has at most error 2. No more than 2 iterations.
59 while (q1
>= b
|| q1
* vn0
> b
* rhat
+ un1
) {
66 un21
= un64
* b
+ un1
- q1
* v
;
68 // Compute the second quotient digit.
70 rhat
= un21
- q0
* vn1
;
72 // q0 has at most error 2. No more than 2 iterations.
73 while (q0
>= b
|| q0
* vn0
> b
* rhat
+ un0
) {
80 *r
= (un21
* b
+ un0
- q0
* v
) >> s
;
84 static inline du_int
udiv128by64to64(du_int u1
, du_int u0
, du_int v
,
86 #if defined(__x86_64__)
89 : "=a"(result
), "=d"(*r
)
90 : [ v
] "r"(v
), "a"(u0
), "d"(u1
));
93 return udiv128by64to64default(u1
, u0
, v
, r
);
97 // Effects: if rem != 0, *rem = a % b
100 COMPILER_RT_ABI tu_int
__udivmodti4(tu_int a
, tu_int b
, tu_int
*rem
) {
101 const unsigned n_utword_bits
= sizeof(tu_int
) * CHAR_BIT
;
108 if (divisor
.all
> dividend
.all
) {
113 // When the divisor fits in 64 bits, we can use an optimized path.
114 if (divisor
.s
.high
== 0) {
115 remainder
.s
.high
= 0;
116 if (dividend
.s
.high
< divisor
.s
.low
) {
117 // The result fits in 64 bits.
118 quotient
.s
.low
= udiv128by64to64(dividend
.s
.high
, dividend
.s
.low
,
119 divisor
.s
.low
, &remainder
.s
.low
);
122 // First, divide with the high part to get the remainder in dividend.s.high.
123 // After that dividend.s.high < divisor.s.low.
124 quotient
.s
.high
= dividend
.s
.high
/ divisor
.s
.low
;
125 dividend
.s
.high
= dividend
.s
.high
% divisor
.s
.low
;
126 quotient
.s
.low
= udiv128by64to64(dividend
.s
.high
, dividend
.s
.low
,
127 divisor
.s
.low
, &remainder
.s
.low
);
130 *rem
= remainder
.all
;
135 __builtin_clzll(divisor
.s
.high
) - __builtin_clzll(dividend
.s
.high
);
136 divisor
.all
<<= shift
;
139 for (; shift
>= 0; --shift
) {
140 quotient
.s
.low
<<= 1;
141 // Branch free version of.
142 // if (dividend.all >= divisor.all)
144 // dividend.all -= divisor.all;
148 (ti_int
)(divisor
.all
- dividend
.all
- 1) >> (n_utword_bits
- 1);
149 quotient
.s
.low
|= s
& 1;
150 dividend
.all
-= divisor
.all
& s
;
158 #endif // CRT_HAS_128BIT