1 //===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "NormalFloat.h"
11 #include "src/__support/UInt.h"
15 namespace __llvm_libc
{
18 // Store and manipulate positive double precision numbers at |Precision| bits.
19 template <size_t Precision
> class XFloat
{
20 static constexpr uint64_t OneMask
= (uint64_t(1) << 63);
22 static constexpr uint64_t WORDCOUNT
= Precision
/ 64;
25 size_t bit_width(uint64_t x
) {
29 while ((OneMask
& x
) == 0) {
38 for (int i
= 0; i
< WORDCOUNT
; ++i
)
42 XFloat(const XFloat
&other
) : exp(other
.exp
) {
43 for (int i
= 0; i
< WORDCOUNT
; ++i
)
44 man
[i
] = other
.man
[i
];
47 explicit XFloat(double x
) {
48 auto xn
= NormalFloat
<double>(x
);
50 man
[WORDCOUNT
- 1] = xn
.mantissa
<< 11;
51 for (int i
= 0; i
< WORDCOUNT
- 1; ++i
)
55 XFloat(int e
, const UInt
<Precision
> &bits
) : exp(e
) {
56 for (size_t i
= 0; i
< WORDCOUNT
; ++i
)
60 // Multiply this number with x and store the result in this number.
62 auto xn
= NormalFloat
<double>(x
);
64 uint64_t carry
= man
.mul(xn
.mantissa
<< 11);
65 size_t carry_width
= bit_width(carry
);
66 carry_width
= (carry_width
== 64 ? 64 : 63);
67 man
.shift_right(carry_width
);
68 man
[WORDCOUNT
- 1] = man
[WORDCOUNT
- 1] + (carry
<< (64 - carry_width
));
69 exp
+= carry_width
== 64 ? 1 : 0;
76 if (exp
> int(Precision
- 1)) {
77 for (size_t i
= 0; i
< WORDCOUNT
; ++i
)
82 man
.shift_left(exp
+ 1);
83 man
.shift_right(exp
+ 1);
88 double mul(const XFloat
<Precision
> &other
) {
89 constexpr size_t row_words
= 2 * WORDCOUNT
+ 1;
90 constexpr size_t row_precision
= row_words
* 64;
91 constexpr size_t result_bits
= 2 * Precision
;
92 UInt
<row_precision
> rows
[WORDCOUNT
];
94 for (size_t r
= 0; r
< WORDCOUNT
; ++r
) {
95 for (size_t i
= 0; i
< row_words
; ++i
) {
101 rows
[r
].mul(other
.man
[r
]);
102 rows
[r
].shift_left(r
* 64);
105 for (size_t r
= 1; r
< WORDCOUNT
; ++r
) {
106 rows
[0].add(rows
[r
]);
108 int result_exp
= exp
+ other
.exp
;
109 uint64_t carry
= rows
[0][row_words
- 1];
111 size_t carry_width
= bit_width(carry
);
112 rows
[0].shift_right(carry_width
);
113 rows
[0][row_words
- 2] =
114 rows
[0][row_words
- 2] + (carry
<< (64 - carry_width
));
115 result_exp
+= carry_width
;
118 if (rows
[0][row_words
- 2] & OneMask
) {
121 rows
[0].shift_left(1);
124 UInt
<result_bits
> result_man
;
125 for (size_t i
= 0; i
< result_bits
/ 64; ++i
)
126 result_man
[i
] = rows
[0][i
];
127 XFloat
<result_bits
> result(result_exp
, result_man
);
129 return double(result
);
132 explicit operator double() {
135 constexpr uint64_t one
= uint64_t(1) << 10;
136 constexpr uint64_t excess_mask
= (one
<< 1) - 1;
137 uint64_t excess
= man
[WORDCOUNT
- 1] & excess_mask
;
138 uint64_t new_man
= man
[WORDCOUNT
- 1] >> 11;
140 // We have to round up.
142 } else if (excess
== one
) {
143 bool greater_than_one
= false;
144 for (size_t i
= 0; i
< WORDCOUNT
- 1; ++i
) {
145 greater_than_one
= (man
[i
] != 0);
146 if (greater_than_one
)
149 if (greater_than_one
|| (new_man
& 1) != 0) {
154 if (new_man
== (uint64_t(1) << 53))
157 // We use NormalFloat as it can produce subnormal numbers or underflow to 0
159 NormalFloat
<double> d(exp
, new_man
, 0);
163 // Normalizes this number.
165 uint64_t man_bits
= 0;
166 for (size_t i
= 0; i
< WORDCOUNT
; ++i
)
172 while ((man
[WORDCOUNT
- 1] & OneMask
) == 0) {
179 } // namespace fputil
180 } // namespace __llvm_libc