[HLSL] Implement RWBuffer::operator[] via __builtin_hlsl_resource_getpointer (#117017)
[llvm-project.git] / llvm / unittests / ADT / APFloatTest.cpp
blobf291c814886d353378fdb4b86abb77b9e1aeec8f
1 //===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ADT/APFloat.h"
10 #include "llvm/ADT/APSInt.h"
11 #include "llvm/ADT/Hashing.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Support/Error.h"
16 #include "llvm/Support/FormatVariadic.h"
17 #include "gtest/gtest.h"
18 #include <cmath>
19 #include <ostream>
20 #include <string>
21 #include <tuple>
23 using namespace llvm;
25 static std::string convertToErrorFromString(StringRef Str) {
26 llvm::APFloat F(0.0);
27 auto StatusOrErr =
28 F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven);
29 EXPECT_TRUE(!StatusOrErr);
30 return toString(StatusOrErr.takeError());
33 static double convertToDoubleFromString(StringRef Str) {
34 llvm::APFloat F(0.0);
35 auto StatusOrErr =
36 F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven);
37 EXPECT_FALSE(!StatusOrErr);
38 consumeError(StatusOrErr.takeError());
39 return F.convertToDouble();
42 static std::string convertToString(double d, unsigned Prec, unsigned Pad,
43 bool Tr = true) {
44 llvm::SmallVector<char, 100> Buffer;
45 llvm::APFloat F(d);
46 F.toString(Buffer, Prec, Pad, Tr);
47 return std::string(Buffer.data(), Buffer.size());
50 namespace {
52 TEST(APFloatTest, isSignaling) {
53 // We test qNaN, -qNaN, +sNaN, -sNaN with and without payloads. *NOTE* The
54 // positive/negative distinction is included only since the getQNaN/getSNaN
55 // API provides the option.
56 APInt payload = APInt::getOneBitSet(4, 2);
57 APFloat QNan = APFloat::getQNaN(APFloat::IEEEsingle(), false);
58 EXPECT_FALSE(QNan.isSignaling());
59 EXPECT_EQ(fcQNan, QNan.classify());
61 EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), true).isSignaling());
62 EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), false, &payload).isSignaling());
63 EXPECT_FALSE(APFloat::getQNaN(APFloat::IEEEsingle(), true, &payload).isSignaling());
65 APFloat SNan = APFloat::getSNaN(APFloat::IEEEsingle(), false);
66 EXPECT_TRUE(SNan.isSignaling());
67 EXPECT_EQ(fcSNan, SNan.classify());
69 EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isSignaling());
70 EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), false, &payload).isSignaling());
71 EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true, &payload).isSignaling());
74 TEST(APFloatTest, next) {
76 APFloat test(APFloat::IEEEquad(), APFloat::uninitialized);
77 APFloat expected(APFloat::IEEEquad(), APFloat::uninitialized);
79 // 1. Test Special Cases Values.
81 // Test all special values for nextUp and nextDown perscribed by IEEE-754R
82 // 2008. These are:
83 // 1. +inf
84 // 2. -inf
85 // 3. getLargest()
86 // 4. -getLargest()
87 // 5. getSmallest()
88 // 6. -getSmallest()
89 // 7. qNaN
90 // 8. sNaN
91 // 9. +0
92 // 10. -0
94 // nextUp(+inf) = +inf.
95 test = APFloat::getInf(APFloat::IEEEquad(), false);
96 expected = APFloat::getInf(APFloat::IEEEquad(), false);
97 EXPECT_EQ(test.next(false), APFloat::opOK);
98 EXPECT_TRUE(test.isInfinity());
99 EXPECT_TRUE(!test.isNegative());
100 EXPECT_TRUE(test.bitwiseIsEqual(expected));
102 // nextDown(+inf) = -nextUp(-inf) = -(-getLargest()) = getLargest()
103 test = APFloat::getInf(APFloat::IEEEquad(), false);
104 expected = APFloat::getLargest(APFloat::IEEEquad(), false);
105 EXPECT_EQ(test.next(true), APFloat::opOK);
106 EXPECT_TRUE(!test.isNegative());
107 EXPECT_TRUE(test.bitwiseIsEqual(expected));
109 // nextUp(-inf) = -getLargest()
110 test = APFloat::getInf(APFloat::IEEEquad(), true);
111 expected = APFloat::getLargest(APFloat::IEEEquad(), true);
112 EXPECT_EQ(test.next(false), APFloat::opOK);
113 EXPECT_TRUE(test.isNegative());
114 EXPECT_TRUE(test.bitwiseIsEqual(expected));
116 // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf.
117 test = APFloat::getInf(APFloat::IEEEquad(), true);
118 expected = APFloat::getInf(APFloat::IEEEquad(), true);
119 EXPECT_EQ(test.next(true), APFloat::opOK);
120 EXPECT_TRUE(test.isInfinity() && test.isNegative());
121 EXPECT_TRUE(test.bitwiseIsEqual(expected));
123 // nextUp(getLargest()) = +inf
124 test = APFloat::getLargest(APFloat::IEEEquad(), false);
125 expected = APFloat::getInf(APFloat::IEEEquad(), false);
126 EXPECT_EQ(test.next(false), APFloat::opOK);
127 EXPECT_TRUE(test.isInfinity() && !test.isNegative());
128 EXPECT_TRUE(test.bitwiseIsEqual(expected));
130 // nextDown(getLargest()) = -nextUp(-getLargest())
131 // = -(-getLargest() + inc)
132 // = getLargest() - inc.
133 test = APFloat::getLargest(APFloat::IEEEquad(), false);
134 expected = APFloat(APFloat::IEEEquad(),
135 "0x1.fffffffffffffffffffffffffffep+16383");
136 EXPECT_EQ(test.next(true), APFloat::opOK);
137 EXPECT_TRUE(!test.isInfinity() && !test.isNegative());
138 EXPECT_TRUE(test.bitwiseIsEqual(expected));
140 // nextUp(-getLargest()) = -getLargest() + inc.
141 test = APFloat::getLargest(APFloat::IEEEquad(), true);
142 expected = APFloat(APFloat::IEEEquad(),
143 "-0x1.fffffffffffffffffffffffffffep+16383");
144 EXPECT_EQ(test.next(false), APFloat::opOK);
145 EXPECT_TRUE(test.bitwiseIsEqual(expected));
147 // nextDown(-getLargest()) = -nextUp(getLargest()) = -(inf) = -inf.
148 test = APFloat::getLargest(APFloat::IEEEquad(), true);
149 expected = APFloat::getInf(APFloat::IEEEquad(), true);
150 EXPECT_EQ(test.next(true), APFloat::opOK);
151 EXPECT_TRUE(test.isInfinity() && test.isNegative());
152 EXPECT_TRUE(test.bitwiseIsEqual(expected));
154 // nextUp(getSmallest()) = getSmallest() + inc.
155 test = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382");
156 expected = APFloat(APFloat::IEEEquad(),
157 "0x0.0000000000000000000000000002p-16382");
158 EXPECT_EQ(test.next(false), APFloat::opOK);
159 EXPECT_TRUE(test.bitwiseIsEqual(expected));
161 // nextDown(getSmallest()) = -nextUp(-getSmallest()) = -(-0) = +0.
162 test = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382");
163 expected = APFloat::getZero(APFloat::IEEEquad(), false);
164 EXPECT_EQ(test.next(true), APFloat::opOK);
165 EXPECT_TRUE(test.isPosZero());
166 EXPECT_TRUE(test.bitwiseIsEqual(expected));
168 // nextUp(-getSmallest()) = -0.
169 test = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382");
170 expected = APFloat::getZero(APFloat::IEEEquad(), true);
171 EXPECT_EQ(test.next(false), APFloat::opOK);
172 EXPECT_TRUE(test.isNegZero());
173 EXPECT_TRUE(test.bitwiseIsEqual(expected));
175 // nextDown(-getSmallest()) = -nextUp(getSmallest()) = -getSmallest() - inc.
176 test = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382");
177 expected = APFloat(APFloat::IEEEquad(),
178 "-0x0.0000000000000000000000000002p-16382");
179 EXPECT_EQ(test.next(true), APFloat::opOK);
180 EXPECT_TRUE(test.bitwiseIsEqual(expected));
182 // nextUp(qNaN) = qNaN
183 test = APFloat::getQNaN(APFloat::IEEEquad(), false);
184 expected = APFloat::getQNaN(APFloat::IEEEquad(), false);
185 EXPECT_EQ(test.next(false), APFloat::opOK);
186 EXPECT_TRUE(test.bitwiseIsEqual(expected));
188 // nextDown(qNaN) = qNaN
189 test = APFloat::getQNaN(APFloat::IEEEquad(), false);
190 expected = APFloat::getQNaN(APFloat::IEEEquad(), false);
191 EXPECT_EQ(test.next(true), APFloat::opOK);
192 EXPECT_TRUE(test.bitwiseIsEqual(expected));
194 // nextUp(sNaN) = qNaN
195 test = APFloat::getSNaN(APFloat::IEEEquad(), false);
196 expected = APFloat::getQNaN(APFloat::IEEEquad(), false);
197 EXPECT_EQ(test.next(false), APFloat::opInvalidOp);
198 EXPECT_TRUE(test.bitwiseIsEqual(expected));
200 // nextDown(sNaN) = qNaN
201 test = APFloat::getSNaN(APFloat::IEEEquad(), false);
202 expected = APFloat::getQNaN(APFloat::IEEEquad(), false);
203 EXPECT_EQ(test.next(true), APFloat::opInvalidOp);
204 EXPECT_TRUE(test.bitwiseIsEqual(expected));
206 // nextUp(+0) = +getSmallest()
207 test = APFloat::getZero(APFloat::IEEEquad(), false);
208 expected = APFloat::getSmallest(APFloat::IEEEquad(), false);
209 EXPECT_EQ(test.next(false), APFloat::opOK);
210 EXPECT_TRUE(test.bitwiseIsEqual(expected));
212 // nextDown(+0) = -nextUp(-0) = -getSmallest()
213 test = APFloat::getZero(APFloat::IEEEquad(), false);
214 expected = APFloat::getSmallest(APFloat::IEEEquad(), true);
215 EXPECT_EQ(test.next(true), APFloat::opOK);
216 EXPECT_TRUE(test.bitwiseIsEqual(expected));
218 // nextUp(-0) = +getSmallest()
219 test = APFloat::getZero(APFloat::IEEEquad(), true);
220 expected = APFloat::getSmallest(APFloat::IEEEquad(), false);
221 EXPECT_EQ(test.next(false), APFloat::opOK);
222 EXPECT_TRUE(test.bitwiseIsEqual(expected));
224 // nextDown(-0) = -nextUp(0) = -getSmallest()
225 test = APFloat::getZero(APFloat::IEEEquad(), true);
226 expected = APFloat::getSmallest(APFloat::IEEEquad(), true);
227 EXPECT_EQ(test.next(true), APFloat::opOK);
228 EXPECT_TRUE(test.bitwiseIsEqual(expected));
230 // 2. Binade Boundary Tests.
232 // 2a. Test denormal <-> normal binade boundaries.
233 // * nextUp(+Largest Denormal) -> +Smallest Normal.
234 // * nextDown(-Largest Denormal) -> -Smallest Normal.
235 // * nextUp(-Smallest Normal) -> -Largest Denormal.
236 // * nextDown(+Smallest Normal) -> +Largest Denormal.
238 // nextUp(+Largest Denormal) -> +Smallest Normal.
239 test = APFloat(APFloat::IEEEquad(), "0x0.ffffffffffffffffffffffffffffp-16382");
240 expected = APFloat(APFloat::IEEEquad(),
241 "0x1.0000000000000000000000000000p-16382");
242 EXPECT_EQ(test.next(false), APFloat::opOK);
243 EXPECT_FALSE(test.isDenormal());
244 EXPECT_TRUE(test.bitwiseIsEqual(expected));
246 // nextDown(-Largest Denormal) -> -Smallest Normal.
247 test = APFloat(APFloat::IEEEquad(),
248 "-0x0.ffffffffffffffffffffffffffffp-16382");
249 expected = APFloat(APFloat::IEEEquad(),
250 "-0x1.0000000000000000000000000000p-16382");
251 EXPECT_EQ(test.next(true), APFloat::opOK);
252 EXPECT_FALSE(test.isDenormal());
253 EXPECT_TRUE(test.bitwiseIsEqual(expected));
255 // nextUp(-Smallest Normal) -> -LargestDenormal.
256 test = APFloat(APFloat::IEEEquad(),
257 "-0x1.0000000000000000000000000000p-16382");
258 expected = APFloat(APFloat::IEEEquad(),
259 "-0x0.ffffffffffffffffffffffffffffp-16382");
260 EXPECT_EQ(test.next(false), APFloat::opOK);
261 EXPECT_TRUE(test.isDenormal());
262 EXPECT_TRUE(test.bitwiseIsEqual(expected));
264 // nextDown(+Smallest Normal) -> +Largest Denormal.
265 test = APFloat(APFloat::IEEEquad(),
266 "+0x1.0000000000000000000000000000p-16382");
267 expected = APFloat(APFloat::IEEEquad(),
268 "+0x0.ffffffffffffffffffffffffffffp-16382");
269 EXPECT_EQ(test.next(true), APFloat::opOK);
270 EXPECT_TRUE(test.isDenormal());
271 EXPECT_TRUE(test.bitwiseIsEqual(expected));
273 // 2b. Test normal <-> normal binade boundaries.
274 // * nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1.
275 // * nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1.
276 // * nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary.
277 // * nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary.
279 // nextUp(-Normal Binade Boundary) -> -Normal Binade Boundary + 1.
280 test = APFloat(APFloat::IEEEquad(), "-0x1p+1");
281 expected = APFloat(APFloat::IEEEquad(),
282 "-0x1.ffffffffffffffffffffffffffffp+0");
283 EXPECT_EQ(test.next(false), APFloat::opOK);
284 EXPECT_TRUE(test.bitwiseIsEqual(expected));
286 // nextDown(+Normal Binade Boundary) -> +Normal Binade Boundary - 1.
287 test = APFloat(APFloat::IEEEquad(), "0x1p+1");
288 expected = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp+0");
289 EXPECT_EQ(test.next(true), APFloat::opOK);
290 EXPECT_TRUE(test.bitwiseIsEqual(expected));
292 // nextUp(+Normal Binade Boundary - 1) -> +Normal Binade Boundary.
293 test = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp+0");
294 expected = APFloat(APFloat::IEEEquad(), "0x1p+1");
295 EXPECT_EQ(test.next(false), APFloat::opOK);
296 EXPECT_TRUE(test.bitwiseIsEqual(expected));
298 // nextDown(-Normal Binade Boundary + 1) -> -Normal Binade Boundary.
299 test = APFloat(APFloat::IEEEquad(), "-0x1.ffffffffffffffffffffffffffffp+0");
300 expected = APFloat(APFloat::IEEEquad(), "-0x1p+1");
301 EXPECT_EQ(test.next(true), APFloat::opOK);
302 EXPECT_TRUE(test.bitwiseIsEqual(expected));
304 // 2c. Test using next at binade boundaries with a direction away from the
305 // binade boundary. Away from denormal <-> normal boundaries.
307 // This is to make sure that even though we are at a binade boundary, since
308 // we are rounding away, we do not trigger the binade boundary code. Thus we
309 // test:
310 // * nextUp(-Largest Denormal) -> -Largest Denormal + inc.
311 // * nextDown(+Largest Denormal) -> +Largest Denormal - inc.
312 // * nextUp(+Smallest Normal) -> +Smallest Normal + inc.
313 // * nextDown(-Smallest Normal) -> -Smallest Normal - inc.
315 // nextUp(-Largest Denormal) -> -Largest Denormal + inc.
316 test = APFloat(APFloat::IEEEquad(), "-0x0.ffffffffffffffffffffffffffffp-16382");
317 expected = APFloat(APFloat::IEEEquad(),
318 "-0x0.fffffffffffffffffffffffffffep-16382");
319 EXPECT_EQ(test.next(false), APFloat::opOK);
320 EXPECT_TRUE(test.isDenormal());
321 EXPECT_TRUE(test.isNegative());
322 EXPECT_TRUE(test.bitwiseIsEqual(expected));
324 // nextDown(+Largest Denormal) -> +Largest Denormal - inc.
325 test = APFloat(APFloat::IEEEquad(), "0x0.ffffffffffffffffffffffffffffp-16382");
326 expected = APFloat(APFloat::IEEEquad(),
327 "0x0.fffffffffffffffffffffffffffep-16382");
328 EXPECT_EQ(test.next(true), APFloat::opOK);
329 EXPECT_TRUE(test.isDenormal());
330 EXPECT_TRUE(!test.isNegative());
331 EXPECT_TRUE(test.bitwiseIsEqual(expected));
333 // nextUp(+Smallest Normal) -> +Smallest Normal + inc.
334 test = APFloat(APFloat::IEEEquad(), "0x1.0000000000000000000000000000p-16382");
335 expected = APFloat(APFloat::IEEEquad(),
336 "0x1.0000000000000000000000000001p-16382");
337 EXPECT_EQ(test.next(false), APFloat::opOK);
338 EXPECT_TRUE(!test.isDenormal());
339 EXPECT_TRUE(!test.isNegative());
340 EXPECT_TRUE(test.bitwiseIsEqual(expected));
342 // nextDown(-Smallest Normal) -> -Smallest Normal - inc.
343 test = APFloat(APFloat::IEEEquad(), "-0x1.0000000000000000000000000000p-16382");
344 expected = APFloat(APFloat::IEEEquad(),
345 "-0x1.0000000000000000000000000001p-16382");
346 EXPECT_EQ(test.next(true), APFloat::opOK);
347 EXPECT_TRUE(!test.isDenormal());
348 EXPECT_TRUE(test.isNegative());
349 EXPECT_TRUE(test.bitwiseIsEqual(expected));
351 // 2d. Test values which cause our exponent to go to min exponent. This
352 // is to ensure that guards in the code to check for min exponent
353 // trigger properly.
354 // * nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382
355 // * nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) ->
356 // -0x1p-16381
357 // * nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16382
358 // * nextDown(0x1p-16382) -> 0x1.ffffffffffffffffffffffffffffp-16382
360 // nextUp(-0x1p-16381) -> -0x1.ffffffffffffffffffffffffffffp-16382
361 test = APFloat(APFloat::IEEEquad(), "-0x1p-16381");
362 expected = APFloat(APFloat::IEEEquad(),
363 "-0x1.ffffffffffffffffffffffffffffp-16382");
364 EXPECT_EQ(test.next(false), APFloat::opOK);
365 EXPECT_TRUE(test.bitwiseIsEqual(expected));
367 // nextDown(-0x1.ffffffffffffffffffffffffffffp-16382) ->
368 // -0x1p-16381
369 test = APFloat(APFloat::IEEEquad(), "-0x1.ffffffffffffffffffffffffffffp-16382");
370 expected = APFloat(APFloat::IEEEquad(), "-0x1p-16381");
371 EXPECT_EQ(test.next(true), APFloat::opOK);
372 EXPECT_TRUE(test.bitwiseIsEqual(expected));
374 // nextUp(0x1.ffffffffffffffffffffffffffffp-16382) -> 0x1p-16381
375 test = APFloat(APFloat::IEEEquad(), "0x1.ffffffffffffffffffffffffffffp-16382");
376 expected = APFloat(APFloat::IEEEquad(), "0x1p-16381");
377 EXPECT_EQ(test.next(false), APFloat::opOK);
378 EXPECT_TRUE(test.bitwiseIsEqual(expected));
380 // nextDown(0x1p-16381) -> 0x1.ffffffffffffffffffffffffffffp-16382
381 test = APFloat(APFloat::IEEEquad(), "0x1p-16381");
382 expected = APFloat(APFloat::IEEEquad(),
383 "0x1.ffffffffffffffffffffffffffffp-16382");
384 EXPECT_EQ(test.next(true), APFloat::opOK);
385 EXPECT_TRUE(test.bitwiseIsEqual(expected));
387 // 3. Now we test both denormal/normal computation which will not cause us
388 // to go across binade boundaries. Specifically we test:
389 // * nextUp(+Denormal) -> +Denormal.
390 // * nextDown(+Denormal) -> +Denormal.
391 // * nextUp(-Denormal) -> -Denormal.
392 // * nextDown(-Denormal) -> -Denormal.
393 // * nextUp(+Normal) -> +Normal.
394 // * nextDown(+Normal) -> +Normal.
395 // * nextUp(-Normal) -> -Normal.
396 // * nextDown(-Normal) -> -Normal.
398 // nextUp(+Denormal) -> +Denormal.
399 test = APFloat(APFloat::IEEEquad(),
400 "0x0.ffffffffffffffffffffffff000cp-16382");
401 expected = APFloat(APFloat::IEEEquad(),
402 "0x0.ffffffffffffffffffffffff000dp-16382");
403 EXPECT_EQ(test.next(false), APFloat::opOK);
404 EXPECT_TRUE(test.isDenormal());
405 EXPECT_TRUE(!test.isNegative());
406 EXPECT_TRUE(test.bitwiseIsEqual(expected));
408 // nextDown(+Denormal) -> +Denormal.
409 test = APFloat(APFloat::IEEEquad(),
410 "0x0.ffffffffffffffffffffffff000cp-16382");
411 expected = APFloat(APFloat::IEEEquad(),
412 "0x0.ffffffffffffffffffffffff000bp-16382");
413 EXPECT_EQ(test.next(true), APFloat::opOK);
414 EXPECT_TRUE(test.isDenormal());
415 EXPECT_TRUE(!test.isNegative());
416 EXPECT_TRUE(test.bitwiseIsEqual(expected));
418 // nextUp(-Denormal) -> -Denormal.
419 test = APFloat(APFloat::IEEEquad(),
420 "-0x0.ffffffffffffffffffffffff000cp-16382");
421 expected = APFloat(APFloat::IEEEquad(),
422 "-0x0.ffffffffffffffffffffffff000bp-16382");
423 EXPECT_EQ(test.next(false), APFloat::opOK);
424 EXPECT_TRUE(test.isDenormal());
425 EXPECT_TRUE(test.isNegative());
426 EXPECT_TRUE(test.bitwiseIsEqual(expected));
428 // nextDown(-Denormal) -> -Denormal
429 test = APFloat(APFloat::IEEEquad(),
430 "-0x0.ffffffffffffffffffffffff000cp-16382");
431 expected = APFloat(APFloat::IEEEquad(),
432 "-0x0.ffffffffffffffffffffffff000dp-16382");
433 EXPECT_EQ(test.next(true), APFloat::opOK);
434 EXPECT_TRUE(test.isDenormal());
435 EXPECT_TRUE(test.isNegative());
436 EXPECT_TRUE(test.bitwiseIsEqual(expected));
438 // nextUp(+Normal) -> +Normal.
439 test = APFloat(APFloat::IEEEquad(),
440 "0x1.ffffffffffffffffffffffff000cp-16000");
441 expected = APFloat(APFloat::IEEEquad(),
442 "0x1.ffffffffffffffffffffffff000dp-16000");
443 EXPECT_EQ(test.next(false), APFloat::opOK);
444 EXPECT_TRUE(!test.isDenormal());
445 EXPECT_TRUE(!test.isNegative());
446 EXPECT_TRUE(test.bitwiseIsEqual(expected));
448 // nextDown(+Normal) -> +Normal.
449 test = APFloat(APFloat::IEEEquad(),
450 "0x1.ffffffffffffffffffffffff000cp-16000");
451 expected = APFloat(APFloat::IEEEquad(),
452 "0x1.ffffffffffffffffffffffff000bp-16000");
453 EXPECT_EQ(test.next(true), APFloat::opOK);
454 EXPECT_TRUE(!test.isDenormal());
455 EXPECT_TRUE(!test.isNegative());
456 EXPECT_TRUE(test.bitwiseIsEqual(expected));
458 // nextUp(-Normal) -> -Normal.
459 test = APFloat(APFloat::IEEEquad(),
460 "-0x1.ffffffffffffffffffffffff000cp-16000");
461 expected = APFloat(APFloat::IEEEquad(),
462 "-0x1.ffffffffffffffffffffffff000bp-16000");
463 EXPECT_EQ(test.next(false), APFloat::opOK);
464 EXPECT_TRUE(!test.isDenormal());
465 EXPECT_TRUE(test.isNegative());
466 EXPECT_TRUE(test.bitwiseIsEqual(expected));
468 // nextDown(-Normal) -> -Normal.
469 test = APFloat(APFloat::IEEEquad(),
470 "-0x1.ffffffffffffffffffffffff000cp-16000");
471 expected = APFloat(APFloat::IEEEquad(),
472 "-0x1.ffffffffffffffffffffffff000dp-16000");
473 EXPECT_EQ(test.next(true), APFloat::opOK);
474 EXPECT_TRUE(!test.isDenormal());
475 EXPECT_TRUE(test.isNegative());
476 EXPECT_TRUE(test.bitwiseIsEqual(expected));
479 TEST(APFloatTest, FMA) {
480 APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven;
483 APFloat f1(14.5f);
484 APFloat f2(-14.5f);
485 APFloat f3(225.0f);
486 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
487 EXPECT_EQ(14.75f, f1.convertToFloat());
491 APFloat Val2(2.0f);
492 APFloat f1((float)1.17549435e-38F);
493 APFloat f2((float)1.17549435e-38F);
494 f1.divide(Val2, rdmd);
495 f2.divide(Val2, rdmd);
496 APFloat f3(12.0f);
497 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
498 EXPECT_EQ(12.0f, f1.convertToFloat());
501 // Test for correct zero sign when answer is exactly zero.
502 // fma(1.0, -1.0, 1.0) -> +ve 0.
504 APFloat f1(1.0);
505 APFloat f2(-1.0);
506 APFloat f3(1.0);
507 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
508 EXPECT_TRUE(!f1.isNegative() && f1.isZero());
511 // Test for correct zero sign when answer is exactly zero and rounding towards
512 // negative.
513 // fma(1.0, -1.0, 1.0) -> +ve 0.
515 APFloat f1(1.0);
516 APFloat f2(-1.0);
517 APFloat f3(1.0);
518 f1.fusedMultiplyAdd(f2, f3, APFloat::rmTowardNegative);
519 EXPECT_TRUE(f1.isNegative() && f1.isZero());
522 // Test for correct (in this case -ve) sign when adding like signed zeros.
523 // Test fma(0.0, -0.0, -0.0) -> -ve 0.
525 APFloat f1(0.0);
526 APFloat f2(-0.0);
527 APFloat f3(-0.0);
528 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
529 EXPECT_TRUE(f1.isNegative() && f1.isZero());
532 // Test -ve sign preservation when small negative results underflow.
534 APFloat f1(APFloat::IEEEdouble(), "-0x1p-1074");
535 APFloat f2(APFloat::IEEEdouble(), "+0x1p-1074");
536 APFloat f3(0.0);
537 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
538 EXPECT_TRUE(f1.isNegative() && f1.isZero());
541 // Test x87 extended precision case from http://llvm.org/PR20728.
543 APFloat M1(APFloat::x87DoubleExtended(), 1);
544 APFloat M2(APFloat::x87DoubleExtended(), 1);
545 APFloat A(APFloat::x87DoubleExtended(), 3);
547 bool losesInfo = false;
548 M1.fusedMultiplyAdd(M1, A, APFloat::rmNearestTiesToEven);
549 M1.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
550 EXPECT_FALSE(losesInfo);
551 EXPECT_EQ(4.0f, M1.convertToFloat());
554 // Regression test that failed an assertion.
556 APFloat f1(-8.85242279E-41f);
557 APFloat f2(2.0f);
558 APFloat f3(8.85242279E-41f);
559 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
560 EXPECT_EQ(-8.85242279E-41f, f1.convertToFloat());
563 // Test using only a single instance of APFloat.
565 APFloat F(1.5);
567 F.fusedMultiplyAdd(F, F, APFloat::rmNearestTiesToEven);
568 EXPECT_EQ(3.75, F.convertToDouble());
572 TEST(APFloatTest, MinNum) {
573 APFloat f1(1.0);
574 APFloat f2(2.0);
575 APFloat nan = APFloat::getNaN(APFloat::IEEEdouble());
577 EXPECT_EQ(1.0, minnum(f1, f2).convertToDouble());
578 EXPECT_EQ(1.0, minnum(f2, f1).convertToDouble());
579 EXPECT_EQ(1.0, minnum(f1, nan).convertToDouble());
580 EXPECT_EQ(1.0, minnum(nan, f1).convertToDouble());
582 APFloat zp(0.0);
583 APFloat zn(-0.0);
584 EXPECT_EQ(-0.0, minnum(zp, zn).convertToDouble());
585 EXPECT_EQ(-0.0, minnum(zn, zp).convertToDouble());
588 TEST(APFloatTest, MaxNum) {
589 APFloat f1(1.0);
590 APFloat f2(2.0);
591 APFloat nan = APFloat::getNaN(APFloat::IEEEdouble());
593 EXPECT_EQ(2.0, maxnum(f1, f2).convertToDouble());
594 EXPECT_EQ(2.0, maxnum(f2, f1).convertToDouble());
595 EXPECT_EQ(1.0, maxnum(f1, nan).convertToDouble());
596 EXPECT_EQ(1.0, maxnum(nan, f1).convertToDouble());
598 APFloat zp(0.0);
599 APFloat zn(-0.0);
600 EXPECT_EQ(0.0, maxnum(zp, zn).convertToDouble());
601 EXPECT_EQ(0.0, maxnum(zn, zp).convertToDouble());
604 TEST(APFloatTest, Minimum) {
605 APFloat f1(1.0);
606 APFloat f2(2.0);
607 APFloat zp(0.0);
608 APFloat zn(-0.0);
609 APFloat nan = APFloat::getNaN(APFloat::IEEEdouble());
610 APFloat snan = APFloat::getSNaN(APFloat::IEEEdouble());
612 EXPECT_EQ(1.0, minimum(f1, f2).convertToDouble());
613 EXPECT_EQ(1.0, minimum(f2, f1).convertToDouble());
614 EXPECT_EQ(-0.0, minimum(zp, zn).convertToDouble());
615 EXPECT_EQ(-0.0, minimum(zn, zp).convertToDouble());
616 EXPECT_TRUE(std::isnan(minimum(f1, nan).convertToDouble()));
617 EXPECT_TRUE(std::isnan(minimum(nan, f1).convertToDouble()));
618 EXPECT_TRUE(maximum(snan, f1).isNaN());
619 EXPECT_TRUE(maximum(f1, snan).isNaN());
620 EXPECT_FALSE(maximum(snan, f1).isSignaling());
621 EXPECT_FALSE(maximum(f1, snan).isSignaling());
624 TEST(APFloatTest, Maximum) {
625 APFloat f1(1.0);
626 APFloat f2(2.0);
627 APFloat zp(0.0);
628 APFloat zn(-0.0);
629 APFloat nan = APFloat::getNaN(APFloat::IEEEdouble());
630 APFloat snan = APFloat::getSNaN(APFloat::IEEEdouble());
632 EXPECT_EQ(2.0, maximum(f1, f2).convertToDouble());
633 EXPECT_EQ(2.0, maximum(f2, f1).convertToDouble());
634 EXPECT_EQ(0.0, maximum(zp, zn).convertToDouble());
635 EXPECT_EQ(0.0, maximum(zn, zp).convertToDouble());
636 EXPECT_TRUE(std::isnan(maximum(f1, nan).convertToDouble()));
637 EXPECT_TRUE(std::isnan(maximum(nan, f1).convertToDouble()));
638 EXPECT_TRUE(maximum(snan, f1).isNaN());
639 EXPECT_TRUE(maximum(f1, snan).isNaN());
640 EXPECT_FALSE(maximum(snan, f1).isSignaling());
641 EXPECT_FALSE(maximum(f1, snan).isSignaling());
644 TEST(APFloatTest, MinimumNumber) {
645 APFloat f1(1.0);
646 APFloat f2(2.0);
647 APFloat zp(0.0);
648 APFloat zn(-0.0);
649 APInt intPayload_89ab(64, 0x89ab);
650 APInt intPayload_cdef(64, 0xcdef);
651 APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123),
652 APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)};
653 APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567),
654 APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)};
655 APFloat nan_89ab[2] = {
656 APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab),
657 APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)};
658 APFloat mnan_cdef[2] = {
659 APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef),
660 APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)};
662 EXPECT_TRUE(f1.bitwiseIsEqual(minimumnum(f1, f2)));
663 EXPECT_TRUE(f1.bitwiseIsEqual(minimumnum(f2, f1)));
664 EXPECT_TRUE(zn.bitwiseIsEqual(minimumnum(zp, zn)));
665 EXPECT_TRUE(zn.bitwiseIsEqual(minimumnum(zn, zp)));
667 EXPECT_TRUE(minimumnum(zn, zp).isNegative());
668 EXPECT_TRUE(minimumnum(zp, zn).isNegative());
669 EXPECT_TRUE(minimumnum(zn, zn).isNegative());
670 EXPECT_FALSE(minimumnum(zp, zp).isNegative());
672 for (APFloat n : {nan_0123[0], mnan_4567[0], nan_89ab[0], mnan_cdef[0]})
673 for (APFloat f : {f1, f2, zn, zp}) {
674 APFloat res = minimumnum(f, n);
675 EXPECT_FALSE(res.isNaN());
676 EXPECT_TRUE(res.bitwiseIsEqual(f));
677 res = minimumnum(n, f);
678 EXPECT_FALSE(res.isNaN());
679 EXPECT_TRUE(res.bitwiseIsEqual(f));
682 // When NaN vs NaN, we should keep payload/sign of either one.
683 for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef})
684 for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) {
685 APFloat res = minimumnum(n1[0], n2[0]);
686 EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1]));
687 EXPECT_FALSE(res.isSignaling());
691 TEST(APFloatTest, MaximumNumber) {
692 APFloat f1(1.0);
693 APFloat f2(2.0);
694 APFloat zp(0.0);
695 APFloat zn(-0.0);
696 APInt intPayload_89ab(64, 0x89ab);
697 APInt intPayload_cdef(64, 0xcdef);
698 APFloat nan_0123[2] = {APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123),
699 APFloat::getNaN(APFloat::IEEEdouble(), false, 0x0123)};
700 APFloat mnan_4567[2] = {APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567),
701 APFloat::getNaN(APFloat::IEEEdouble(), true, 0x4567)};
702 APFloat nan_89ab[2] = {
703 APFloat::getSNaN(APFloat::IEEEdouble(), false, &intPayload_89ab),
704 APFloat::getNaN(APFloat::IEEEdouble(), false, 0x89ab)};
705 APFloat mnan_cdef[2] = {
706 APFloat::getSNaN(APFloat::IEEEdouble(), true, &intPayload_cdef),
707 APFloat::getNaN(APFloat::IEEEdouble(), true, 0xcdef)};
709 EXPECT_TRUE(f2.bitwiseIsEqual(maximumnum(f1, f2)));
710 EXPECT_TRUE(f2.bitwiseIsEqual(maximumnum(f2, f1)));
711 EXPECT_TRUE(zp.bitwiseIsEqual(maximumnum(zp, zn)));
712 EXPECT_TRUE(zp.bitwiseIsEqual(maximumnum(zn, zp)));
714 EXPECT_FALSE(maximumnum(zn, zp).isNegative());
715 EXPECT_FALSE(maximumnum(zp, zn).isNegative());
716 EXPECT_TRUE(maximumnum(zn, zn).isNegative());
717 EXPECT_FALSE(maximumnum(zp, zp).isNegative());
719 for (APFloat n : {nan_0123[0], mnan_4567[0], nan_89ab[0], mnan_cdef[0]})
720 for (APFloat f : {f1, f2, zn, zp}) {
721 APFloat res = maximumnum(f, n);
722 EXPECT_FALSE(res.isNaN());
723 EXPECT_TRUE(res.bitwiseIsEqual(f));
724 res = maximumnum(n, f);
725 EXPECT_FALSE(res.isNaN());
726 EXPECT_TRUE(res.bitwiseIsEqual(f));
729 // When NaN vs NaN, we should keep payload/sign of either one.
730 for (auto n1 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef})
731 for (auto n2 : {nan_0123, mnan_4567, nan_89ab, mnan_cdef}) {
732 APFloat res = maximumnum(n1[0], n2[0]);
733 EXPECT_TRUE(res.bitwiseIsEqual(n1[1]) || res.bitwiseIsEqual(n2[1]));
734 EXPECT_FALSE(res.isSignaling());
738 TEST(APFloatTest, Denormal) {
739 APFloat::roundingMode rdmd = APFloat::rmNearestTiesToEven;
741 // Test single precision
743 const char *MinNormalStr = "1.17549435082228750797e-38";
744 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), MinNormalStr).isDenormal());
745 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), 0).isDenormal());
747 APFloat Val2(APFloat::IEEEsingle(), 2);
748 APFloat T(APFloat::IEEEsingle(), MinNormalStr);
749 T.divide(Val2, rdmd);
750 EXPECT_TRUE(T.isDenormal());
751 EXPECT_EQ(fcPosSubnormal, T.classify());
754 const char *NegMinNormalStr = "-1.17549435082228750797e-38";
755 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), NegMinNormalStr).isDenormal());
756 APFloat NegT(APFloat::IEEEsingle(), NegMinNormalStr);
757 NegT.divide(Val2, rdmd);
758 EXPECT_TRUE(NegT.isDenormal());
759 EXPECT_EQ(fcNegSubnormal, NegT.classify());
762 // Test double precision
764 const char *MinNormalStr = "2.22507385850720138309e-308";
765 EXPECT_FALSE(APFloat(APFloat::IEEEdouble(), MinNormalStr).isDenormal());
766 EXPECT_FALSE(APFloat(APFloat::IEEEdouble(), 0).isDenormal());
768 APFloat Val2(APFloat::IEEEdouble(), 2);
769 APFloat T(APFloat::IEEEdouble(), MinNormalStr);
770 T.divide(Val2, rdmd);
771 EXPECT_TRUE(T.isDenormal());
772 EXPECT_EQ(fcPosSubnormal, T.classify());
775 // Test Intel double-ext
777 const char *MinNormalStr = "3.36210314311209350626e-4932";
778 EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended(), MinNormalStr).isDenormal());
779 EXPECT_FALSE(APFloat(APFloat::x87DoubleExtended(), 0).isDenormal());
781 APFloat Val2(APFloat::x87DoubleExtended(), 2);
782 APFloat T(APFloat::x87DoubleExtended(), MinNormalStr);
783 T.divide(Val2, rdmd);
784 EXPECT_TRUE(T.isDenormal());
785 EXPECT_EQ(fcPosSubnormal, T.classify());
788 // Test quadruple precision
790 const char *MinNormalStr = "3.36210314311209350626267781732175260e-4932";
791 EXPECT_FALSE(APFloat(APFloat::IEEEquad(), MinNormalStr).isDenormal());
792 EXPECT_FALSE(APFloat(APFloat::IEEEquad(), 0).isDenormal());
794 APFloat Val2(APFloat::IEEEquad(), 2);
795 APFloat T(APFloat::IEEEquad(), MinNormalStr);
796 T.divide(Val2, rdmd);
797 EXPECT_TRUE(T.isDenormal());
798 EXPECT_EQ(fcPosSubnormal, T.classify());
801 // Test TF32
803 const char *MinNormalStr = "1.17549435082228750797e-38";
804 EXPECT_FALSE(APFloat(APFloat::FloatTF32(), MinNormalStr).isDenormal());
805 EXPECT_FALSE(APFloat(APFloat::FloatTF32(), 0).isDenormal());
807 APFloat Val2(APFloat::FloatTF32(), 2);
808 APFloat T(APFloat::FloatTF32(), MinNormalStr);
809 T.divide(Val2, rdmd);
810 EXPECT_TRUE(T.isDenormal());
811 EXPECT_EQ(fcPosSubnormal, T.classify());
813 const char *NegMinNormalStr = "-1.17549435082228750797e-38";
814 EXPECT_FALSE(APFloat(APFloat::FloatTF32(), NegMinNormalStr).isDenormal());
815 APFloat NegT(APFloat::FloatTF32(), NegMinNormalStr);
816 NegT.divide(Val2, rdmd);
817 EXPECT_TRUE(NegT.isDenormal());
818 EXPECT_EQ(fcNegSubnormal, NegT.classify());
822 TEST(APFloatTest, IsSmallestNormalized) {
823 for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) {
824 const fltSemantics &Semantics =
825 APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I));
827 // For Float8E8M0FNU format, the below cases are tested
828 // through Float8E8M0FNUSmallest and Float8E8M0FNUNext tests.
829 if (I == APFloat::S_Float8E8M0FNU)
830 continue;
832 EXPECT_FALSE(APFloat::getZero(Semantics, false).isSmallestNormalized());
833 EXPECT_FALSE(APFloat::getZero(Semantics, true).isSmallestNormalized());
835 if (APFloat::semanticsHasNaN(Semantics)) {
836 // Types that do not support Inf will return NaN when asked for Inf.
837 // (But only if they support NaN.)
838 EXPECT_FALSE(APFloat::getInf(Semantics, false).isSmallestNormalized());
839 EXPECT_FALSE(APFloat::getInf(Semantics, true).isSmallestNormalized());
841 EXPECT_FALSE(APFloat::getQNaN(Semantics).isSmallestNormalized());
842 EXPECT_FALSE(APFloat::getSNaN(Semantics).isSmallestNormalized());
845 EXPECT_FALSE(APFloat::getLargest(Semantics).isSmallestNormalized());
846 EXPECT_FALSE(APFloat::getLargest(Semantics, true).isSmallestNormalized());
848 EXPECT_FALSE(APFloat::getSmallest(Semantics).isSmallestNormalized());
849 EXPECT_FALSE(APFloat::getSmallest(Semantics, true).isSmallestNormalized());
851 EXPECT_FALSE(APFloat::getAllOnesValue(Semantics).isSmallestNormalized());
853 APFloat PosSmallestNormalized =
854 APFloat::getSmallestNormalized(Semantics, false);
855 APFloat NegSmallestNormalized =
856 APFloat::getSmallestNormalized(Semantics, true);
857 EXPECT_TRUE(PosSmallestNormalized.isSmallestNormalized());
858 EXPECT_TRUE(NegSmallestNormalized.isSmallestNormalized());
859 EXPECT_EQ(fcPosNormal, PosSmallestNormalized.classify());
860 EXPECT_EQ(fcNegNormal, NegSmallestNormalized.classify());
862 for (APFloat *Val : {&PosSmallestNormalized, &NegSmallestNormalized}) {
863 bool OldSign = Val->isNegative();
865 // Step down, make sure it's still not smallest normalized.
866 EXPECT_EQ(APFloat::opOK, Val->next(false));
867 EXPECT_EQ(OldSign, Val->isNegative());
868 EXPECT_FALSE(Val->isSmallestNormalized());
869 EXPECT_EQ(OldSign, Val->isNegative());
871 // Step back up should restore it to being smallest normalized.
872 EXPECT_EQ(APFloat::opOK, Val->next(true));
873 EXPECT_TRUE(Val->isSmallestNormalized());
874 EXPECT_EQ(OldSign, Val->isNegative());
876 // Step beyond should no longer smallest normalized.
877 EXPECT_EQ(APFloat::opOK, Val->next(true));
878 EXPECT_FALSE(Val->isSmallestNormalized());
879 EXPECT_EQ(OldSign, Val->isNegative());
884 TEST(APFloatTest, Zero) {
885 EXPECT_EQ(0.0f, APFloat(0.0f).convertToFloat());
886 EXPECT_EQ(-0.0f, APFloat(-0.0f).convertToFloat());
887 EXPECT_TRUE(APFloat(-0.0f).isNegative());
889 EXPECT_EQ(0.0, APFloat(0.0).convertToDouble());
890 EXPECT_EQ(-0.0, APFloat(-0.0).convertToDouble());
891 EXPECT_TRUE(APFloat(-0.0).isNegative());
893 EXPECT_EQ(fcPosZero, APFloat(0.0).classify());
894 EXPECT_EQ(fcNegZero, APFloat(-0.0).classify());
897 TEST(APFloatTest, getOne) {
898 EXPECT_EQ(APFloat::getOne(APFloat::IEEEsingle(), false).convertToFloat(),
899 1.0f);
900 EXPECT_EQ(APFloat::getOne(APFloat::IEEEsingle(), true).convertToFloat(),
901 -1.0f);
904 TEST(APFloatTest, DecimalStringsWithoutNullTerminators) {
905 // Make sure that we can parse strings without null terminators.
906 // rdar://14323230.
907 EXPECT_EQ(convertToDoubleFromString(StringRef("0.00", 3)), 0.0);
908 EXPECT_EQ(convertToDoubleFromString(StringRef("0.01", 3)), 0.0);
909 EXPECT_EQ(convertToDoubleFromString(StringRef("0.09", 3)), 0.0);
910 EXPECT_EQ(convertToDoubleFromString(StringRef("0.095", 4)), 0.09);
911 EXPECT_EQ(convertToDoubleFromString(StringRef("0.00e+3", 7)), 0.00);
912 EXPECT_EQ(convertToDoubleFromString(StringRef("0e+3", 4)), 0.00);
915 TEST(APFloatTest, fromZeroDecimalString) {
916 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0").convertToDouble());
917 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0").convertToDouble());
918 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0").convertToDouble());
920 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.").convertToDouble());
921 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.").convertToDouble());
922 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.").convertToDouble());
924 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0").convertToDouble());
925 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0").convertToDouble());
926 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0").convertToDouble());
928 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0").convertToDouble());
929 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0").convertToDouble());
930 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0").convertToDouble());
932 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "00000.").convertToDouble());
933 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+00000.").convertToDouble());
934 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-00000.").convertToDouble());
936 EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), ".00000").convertToDouble());
937 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.00000").convertToDouble());
938 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.00000").convertToDouble());
940 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0000.00000").convertToDouble());
941 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0000.00000").convertToDouble());
942 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0000.00000").convertToDouble());
945 TEST(APFloatTest, fromZeroDecimalSingleExponentString) {
946 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e1").convertToDouble());
947 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e1").convertToDouble());
948 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e1").convertToDouble());
950 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e+1").convertToDouble());
951 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e+1").convertToDouble());
952 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e+1").convertToDouble());
954 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e-1").convertToDouble());
955 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e-1").convertToDouble());
956 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e-1").convertToDouble());
959 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e1").convertToDouble());
960 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e1").convertToDouble());
961 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e1").convertToDouble());
963 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e+1").convertToDouble());
964 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e+1").convertToDouble());
965 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e+1").convertToDouble());
967 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.e-1").convertToDouble());
968 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.e-1").convertToDouble());
969 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.e-1").convertToDouble());
971 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e1").convertToDouble());
972 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e1").convertToDouble());
973 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e1").convertToDouble());
975 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e+1").convertToDouble());
976 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e+1").convertToDouble());
977 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e+1").convertToDouble());
979 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), ".0e-1").convertToDouble());
980 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+.0e-1").convertToDouble());
981 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-.0e-1").convertToDouble());
984 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e1").convertToDouble());
985 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e1").convertToDouble());
986 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e1").convertToDouble());
988 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e+1").convertToDouble());
989 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e+1").convertToDouble());
990 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e+1").convertToDouble());
992 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0.0e-1").convertToDouble());
993 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0.0e-1").convertToDouble());
994 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0.0e-1").convertToDouble());
997 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "000.0000e1").convertToDouble());
998 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+000.0000e+1").convertToDouble());
999 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-000.0000e+1").convertToDouble());
1002 TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
1003 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e1234").convertToDouble());
1004 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e1234").convertToDouble());
1005 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e1234").convertToDouble());
1007 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e+1234").convertToDouble());
1008 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e+1234").convertToDouble());
1009 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e+1234").convertToDouble());
1011 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0e-1234").convertToDouble());
1012 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0e-1234").convertToDouble());
1013 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0e-1234").convertToDouble());
1015 EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), "000.0000e1234").convertToDouble());
1016 EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), "000.0000e-1234").convertToDouble());
1018 EXPECT_EQ(0.0, APFloat(APFloat::IEEEdouble(), StringRef("0e1234" "\0" "2", 6)).convertToDouble());
1021 TEST(APFloatTest, fromZeroHexadecimalString) {
1022 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p1").convertToDouble());
1023 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p1").convertToDouble());
1024 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p1").convertToDouble());
1026 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p+1").convertToDouble());
1027 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p+1").convertToDouble());
1028 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p+1").convertToDouble());
1030 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p-1").convertToDouble());
1031 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0p-1").convertToDouble());
1032 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p-1").convertToDouble());
1035 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1").convertToDouble());
1036 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p1").convertToDouble());
1037 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p1").convertToDouble());
1039 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p+1").convertToDouble());
1040 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p+1").convertToDouble());
1041 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p+1").convertToDouble());
1043 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p-1").convertToDouble());
1044 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.p-1").convertToDouble());
1045 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.p-1").convertToDouble());
1048 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p1").convertToDouble());
1049 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p1").convertToDouble());
1050 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p1").convertToDouble());
1052 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p+1").convertToDouble());
1053 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p+1").convertToDouble());
1054 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p+1").convertToDouble());
1056 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.0p-1").convertToDouble());
1057 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x.0p-1").convertToDouble());
1058 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x.0p-1").convertToDouble());
1061 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p1").convertToDouble());
1062 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p1").convertToDouble());
1063 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p1").convertToDouble());
1065 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p+1").convertToDouble());
1066 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p+1").convertToDouble());
1067 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p+1").convertToDouble());
1069 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.0p-1").convertToDouble());
1070 EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble(), "+0x0.0p-1").convertToDouble());
1071 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0.0p-1").convertToDouble());
1074 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x00000.p1").convertToDouble());
1075 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0000.00000p1").convertToDouble());
1076 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.00000p1").convertToDouble());
1077 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1").convertToDouble());
1078 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0p1234").convertToDouble());
1079 EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble(), "-0x0p1234").convertToDouble());
1080 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x00000.p1234").convertToDouble());
1081 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0000.00000p1234").convertToDouble());
1082 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x.00000p1234").convertToDouble());
1083 EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble(), "0x0.p1234").convertToDouble());
1086 TEST(APFloatTest, fromDecimalString) {
1087 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1").convertToDouble());
1088 EXPECT_EQ(2.0, APFloat(APFloat::IEEEdouble(), "2.").convertToDouble());
1089 EXPECT_EQ(0.5, APFloat(APFloat::IEEEdouble(), ".5").convertToDouble());
1090 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0").convertToDouble());
1091 EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-2").convertToDouble());
1092 EXPECT_EQ(-4.0, APFloat(APFloat::IEEEdouble(), "-4.").convertToDouble());
1093 EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble(), "-.5").convertToDouble());
1094 EXPECT_EQ(-1.5, APFloat(APFloat::IEEEdouble(), "-1.5").convertToDouble());
1095 EXPECT_EQ(1.25e12, APFloat(APFloat::IEEEdouble(), "1.25e12").convertToDouble());
1096 EXPECT_EQ(1.25e+12, APFloat(APFloat::IEEEdouble(), "1.25e+12").convertToDouble());
1097 EXPECT_EQ(1.25e-12, APFloat(APFloat::IEEEdouble(), "1.25e-12").convertToDouble());
1098 EXPECT_EQ(1024.0, APFloat(APFloat::IEEEdouble(), "1024.").convertToDouble());
1099 EXPECT_EQ(1024.05, APFloat(APFloat::IEEEdouble(), "1024.05000").convertToDouble());
1100 EXPECT_EQ(0.05, APFloat(APFloat::IEEEdouble(), ".05000").convertToDouble());
1101 EXPECT_EQ(2.0, APFloat(APFloat::IEEEdouble(), "2.").convertToDouble());
1102 EXPECT_EQ(2.0e2, APFloat(APFloat::IEEEdouble(), "2.e2").convertToDouble());
1103 EXPECT_EQ(2.0e+2, APFloat(APFloat::IEEEdouble(), "2.e+2").convertToDouble());
1104 EXPECT_EQ(2.0e-2, APFloat(APFloat::IEEEdouble(), "2.e-2").convertToDouble());
1105 EXPECT_EQ(2.05e2, APFloat(APFloat::IEEEdouble(), "002.05000e2").convertToDouble());
1106 EXPECT_EQ(2.05e+2, APFloat(APFloat::IEEEdouble(), "002.05000e+2").convertToDouble());
1107 EXPECT_EQ(2.05e-2, APFloat(APFloat::IEEEdouble(), "002.05000e-2").convertToDouble());
1108 EXPECT_EQ(2.05e12, APFloat(APFloat::IEEEdouble(), "002.05000e12").convertToDouble());
1109 EXPECT_EQ(2.05e+12, APFloat(APFloat::IEEEdouble(), "002.05000e+12").convertToDouble());
1110 EXPECT_EQ(2.05e-12, APFloat(APFloat::IEEEdouble(), "002.05000e-12").convertToDouble());
1112 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e").convertToDouble());
1113 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "+1e").convertToDouble());
1114 EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-1e").convertToDouble());
1116 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.e").convertToDouble());
1117 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "+1.e").convertToDouble());
1118 EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-1.e").convertToDouble());
1120 EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e").convertToDouble());
1121 EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), "+.1e").convertToDouble());
1122 EXPECT_EQ(-0.1, APFloat(APFloat::IEEEdouble(), "-.1e").convertToDouble());
1124 EXPECT_EQ(1.1, APFloat(APFloat::IEEEdouble(), "1.1e").convertToDouble());
1125 EXPECT_EQ(1.1, APFloat(APFloat::IEEEdouble(), "+1.1e").convertToDouble());
1126 EXPECT_EQ(-1.1, APFloat(APFloat::IEEEdouble(), "-1.1e").convertToDouble());
1128 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e+").convertToDouble());
1129 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1e-").convertToDouble());
1131 EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e").convertToDouble());
1132 EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e+").convertToDouble());
1133 EXPECT_EQ(0.1, APFloat(APFloat::IEEEdouble(), ".1e-").convertToDouble());
1135 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e").convertToDouble());
1136 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e+").convertToDouble());
1137 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "1.0e-").convertToDouble());
1139 // These are "carefully selected" to overflow the fast log-base
1140 // calculations in APFloat.cpp
1141 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "99e99999").isInfinity());
1142 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-99e99999").isInfinity());
1143 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "1e-99999").isPosZero());
1144 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-1e-99999").isNegZero());
1146 EXPECT_EQ(2.71828, convertToDoubleFromString("2.71828"));
1149 TEST(APFloatTest, fromStringSpecials) {
1150 const fltSemantics &Sem = APFloat::IEEEdouble();
1151 const unsigned Precision = 53;
1152 const unsigned PayloadBits = Precision - 2;
1153 uint64_t PayloadMask = (uint64_t(1) << PayloadBits) - uint64_t(1);
1155 uint64_t NaNPayloads[] = {
1158 123,
1159 0xDEADBEEF,
1160 uint64_t(-2),
1161 uint64_t(1) << PayloadBits, // overflow bit
1162 uint64_t(1) << (PayloadBits - 1), // signaling bit
1163 uint64_t(1) << (PayloadBits - 2) // highest possible bit
1166 // Convert payload integer to decimal string representation.
1167 std::string NaNPayloadDecStrings[std::size(NaNPayloads)];
1168 for (size_t I = 0; I < std::size(NaNPayloads); ++I)
1169 NaNPayloadDecStrings[I] = utostr(NaNPayloads[I]);
1171 // Convert payload integer to hexadecimal string representation.
1172 std::string NaNPayloadHexStrings[std::size(NaNPayloads)];
1173 for (size_t I = 0; I < std::size(NaNPayloads); ++I)
1174 NaNPayloadHexStrings[I] = "0x" + utohexstr(NaNPayloads[I]);
1176 // Fix payloads to expected result.
1177 for (uint64_t &Payload : NaNPayloads)
1178 Payload &= PayloadMask;
1180 // Signaling NaN must have a non-zero payload. In case a zero payload is
1181 // requested, a default arbitrary payload is set instead. Save this payload
1182 // for testing.
1183 const uint64_t SNaNDefaultPayload =
1184 APFloat::getSNaN(Sem).bitcastToAPInt().getZExtValue() & PayloadMask;
1186 // Negative sign prefix (or none - for positive).
1187 const char Signs[] = {0, '-'};
1189 // "Signaling" prefix (or none - for "Quiet").
1190 const char NaNTypes[] = {0, 's', 'S'};
1192 const StringRef NaNStrings[] = {"nan", "NaN"};
1193 for (StringRef NaNStr : NaNStrings)
1194 for (char TypeChar : NaNTypes) {
1195 bool Signaling = (TypeChar == 's' || TypeChar == 'S');
1197 for (size_t J = 0; J < std::size(NaNPayloads); ++J) {
1198 uint64_t Payload = (Signaling && !NaNPayloads[J]) ? SNaNDefaultPayload
1199 : NaNPayloads[J];
1200 std::string &PayloadDec = NaNPayloadDecStrings[J];
1201 std::string &PayloadHex = NaNPayloadHexStrings[J];
1203 for (char SignChar : Signs) {
1204 bool Negative = (SignChar == '-');
1206 std::string TestStrings[5];
1207 size_t NumTestStrings = 0;
1209 std::string Prefix;
1210 if (SignChar)
1211 Prefix += SignChar;
1212 if (TypeChar)
1213 Prefix += TypeChar;
1214 Prefix += NaNStr;
1216 // Test without any paylod.
1217 if (!Payload)
1218 TestStrings[NumTestStrings++] = Prefix;
1220 // Test with the payload as a suffix.
1221 TestStrings[NumTestStrings++] = Prefix + PayloadDec;
1222 TestStrings[NumTestStrings++] = Prefix + PayloadHex;
1224 // Test with the payload inside parentheses.
1225 TestStrings[NumTestStrings++] = Prefix + '(' + PayloadDec + ')';
1226 TestStrings[NumTestStrings++] = Prefix + '(' + PayloadHex + ')';
1228 for (size_t K = 0; K < NumTestStrings; ++K) {
1229 StringRef TestStr = TestStrings[K];
1231 APFloat F(Sem);
1232 bool HasError = !F.convertFromString(
1233 TestStr, llvm::APFloat::rmNearestTiesToEven);
1234 EXPECT_FALSE(HasError);
1235 EXPECT_TRUE(F.isNaN());
1236 EXPECT_EQ(Signaling, F.isSignaling());
1237 EXPECT_EQ(Negative, F.isNegative());
1238 uint64_t PayloadResult =
1239 F.bitcastToAPInt().getZExtValue() & PayloadMask;
1240 EXPECT_EQ(Payload, PayloadResult);
1246 const StringRef InfStrings[] = {"inf", "INFINITY", "+Inf",
1247 "-inf", "-INFINITY", "-Inf"};
1248 for (StringRef InfStr : InfStrings) {
1249 bool Negative = InfStr.front() == '-';
1251 APFloat F(Sem);
1252 bool HasError =
1253 !F.convertFromString(InfStr, llvm::APFloat::rmNearestTiesToEven);
1254 EXPECT_FALSE(HasError);
1255 EXPECT_TRUE(F.isInfinity());
1256 EXPECT_EQ(Negative, F.isNegative());
1257 uint64_t PayloadResult = F.bitcastToAPInt().getZExtValue() & PayloadMask;
1258 EXPECT_EQ(UINT64_C(0), PayloadResult);
1262 TEST(APFloatTest, fromToStringSpecials) {
1263 auto expects = [] (const char *first, const char *second) {
1264 std::string roundtrip = convertToString(convertToDoubleFromString(second), 0, 3);
1265 EXPECT_STREQ(first, roundtrip.c_str());
1267 expects("+Inf", "+Inf");
1268 expects("+Inf", "INFINITY");
1269 expects("+Inf", "inf");
1270 expects("-Inf", "-Inf");
1271 expects("-Inf", "-INFINITY");
1272 expects("-Inf", "-inf");
1273 expects("NaN", "NaN");
1274 expects("NaN", "nan");
1275 expects("NaN", "-NaN");
1276 expects("NaN", "-nan");
1279 TEST(APFloatTest, fromHexadecimalString) {
1280 EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p0").convertToDouble());
1281 EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p0").convertToDouble());
1282 EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p0").convertToDouble());
1284 EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p+0").convertToDouble());
1285 EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p+0").convertToDouble());
1286 EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p+0").convertToDouble());
1288 EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble(), "0x1p-0").convertToDouble());
1289 EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble(), "+0x1p-0").convertToDouble());
1290 EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble(), "-0x1p-0").convertToDouble());
1293 EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble(), "0x1p1").convertToDouble());
1294 EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble(), "+0x1p1").convertToDouble());
1295 EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-0x1p1").convertToDouble());
1297 EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble(), "0x1p+1").convertToDouble());
1298 EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble(), "+0x1p+1").convertToDouble());
1299 EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble(), "-0x1p+1").convertToDouble());
1301 EXPECT_EQ( 0.5, APFloat(APFloat::IEEEdouble(), "0x1p-1").convertToDouble());
1302 EXPECT_EQ(+0.5, APFloat(APFloat::IEEEdouble(), "+0x1p-1").convertToDouble());
1303 EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble(), "-0x1p-1").convertToDouble());
1306 EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble(), "0x1.8p1").convertToDouble());
1307 EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble(), "+0x1.8p1").convertToDouble());
1308 EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble(), "-0x1.8p1").convertToDouble());
1310 EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble(), "0x1.8p+1").convertToDouble());
1311 EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble(), "+0x1.8p+1").convertToDouble());
1312 EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble(), "-0x1.8p+1").convertToDouble());
1314 EXPECT_EQ( 0.75, APFloat(APFloat::IEEEdouble(), "0x1.8p-1").convertToDouble());
1315 EXPECT_EQ(+0.75, APFloat(APFloat::IEEEdouble(), "+0x1.8p-1").convertToDouble());
1316 EXPECT_EQ(-0.75, APFloat(APFloat::IEEEdouble(), "-0x1.8p-1").convertToDouble());
1319 EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p1").convertToDouble());
1320 EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p1").convertToDouble());
1321 EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p1").convertToDouble());
1323 EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p+1").convertToDouble());
1324 EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p+1").convertToDouble());
1325 EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p+1").convertToDouble());
1327 EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble(), "0x1000.000p-1").convertToDouble());
1328 EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble(), "+0x1000.000p-1").convertToDouble());
1329 EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble(), "-0x1000.000p-1").convertToDouble());
1332 EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000p1").convertToDouble());
1333 EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000p1").convertToDouble());
1334 EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000p1").convertToDouble());
1336 EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble(), "0x1000p+1").convertToDouble());
1337 EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble(), "+0x1000p+1").convertToDouble());
1338 EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble(), "-0x1000p+1").convertToDouble());
1340 EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble(), "0x1000p-1").convertToDouble());
1341 EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble(), "+0x1000p-1").convertToDouble());
1342 EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble(), "-0x1000p-1").convertToDouble());
1345 EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble(), "0x10p10").convertToDouble());
1346 EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble(), "+0x10p10").convertToDouble());
1347 EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble(), "-0x10p10").convertToDouble());
1349 EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble(), "0x10p+10").convertToDouble());
1350 EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble(), "+0x10p+10").convertToDouble());
1351 EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble(), "-0x10p+10").convertToDouble());
1353 EXPECT_EQ( 0.015625, APFloat(APFloat::IEEEdouble(), "0x10p-10").convertToDouble());
1354 EXPECT_EQ(+0.015625, APFloat(APFloat::IEEEdouble(), "+0x10p-10").convertToDouble());
1355 EXPECT_EQ(-0.015625, APFloat(APFloat::IEEEdouble(), "-0x10p-10").convertToDouble());
1357 EXPECT_EQ(1.0625, APFloat(APFloat::IEEEdouble(), "0x1.1p0").convertToDouble());
1358 EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble(), "0x1p0").convertToDouble());
1360 EXPECT_EQ(convertToDoubleFromString("0x1p-150"),
1361 convertToDoubleFromString("+0x800000000000000001.p-221"));
1362 EXPECT_EQ(2251799813685248.5,
1363 convertToDoubleFromString("0x80000000000004000000.010p-28"));
1366 TEST(APFloatTest, toString) {
1367 ASSERT_EQ("10", convertToString(10.0, 6, 3));
1368 ASSERT_EQ("1.0E+1", convertToString(10.0, 6, 0));
1369 ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2));
1370 ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 4, 2));
1371 ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 5, 1));
1372 ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2));
1373 ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2));
1374 ASSERT_EQ("1.01E-2", convertToString(1.01E-2, 5, 1));
1375 ASSERT_EQ("0.78539816339744828", convertToString(0.78539816339744830961, 0, 3));
1376 ASSERT_EQ("4.9406564584124654E-324", convertToString(4.9406564584124654e-324, 0, 3));
1377 ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1));
1378 ASSERT_EQ("8.7318340000000001E+2", convertToString(873.1834, 0, 0));
1379 ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 0, 0));
1380 ASSERT_EQ("10", convertToString(10.0, 6, 3, false));
1381 ASSERT_EQ("1.000000e+01", convertToString(10.0, 6, 0, false));
1382 ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2, false));
1383 ASSERT_EQ("1.0100e+04", convertToString(1.01E+4, 4, 2, false));
1384 ASSERT_EQ("1.01000e+04", convertToString(1.01E+4, 5, 1, false));
1385 ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2, false));
1386 ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2, false));
1387 ASSERT_EQ("1.01000e-02", convertToString(1.01E-2, 5, 1, false));
1388 ASSERT_EQ("0.78539816339744828",
1389 convertToString(0.78539816339744830961, 0, 3, false));
1390 ASSERT_EQ("4.94065645841246540e-324",
1391 convertToString(4.9406564584124654e-324, 0, 3, false));
1392 ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1, false));
1393 ASSERT_EQ("8.73183400000000010e+02", convertToString(873.1834, 0, 0, false));
1394 ASSERT_EQ("1.79769313486231570e+308",
1395 convertToString(1.7976931348623157E+308, 0, 0, false));
1398 SmallString<64> Str;
1399 APFloat UnnormalZero(APFloat::x87DoubleExtended(), APInt(80, {0, 1}));
1400 UnnormalZero.toString(Str);
1401 ASSERT_EQ("NaN", Str);
1405 TEST(APFloatTest, toInteger) {
1406 bool isExact = false;
1407 APSInt result(5, /*isUnsigned=*/true);
1409 EXPECT_EQ(APFloat::opOK,
1410 APFloat(APFloat::IEEEdouble(), "10")
1411 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1412 EXPECT_TRUE(isExact);
1413 EXPECT_EQ(APSInt(APInt(5, 10), true), result);
1415 EXPECT_EQ(APFloat::opInvalidOp,
1416 APFloat(APFloat::IEEEdouble(), "-10")
1417 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1418 EXPECT_FALSE(isExact);
1419 EXPECT_EQ(APSInt::getMinValue(5, true), result);
1421 EXPECT_EQ(APFloat::opInvalidOp,
1422 APFloat(APFloat::IEEEdouble(), "32")
1423 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1424 EXPECT_FALSE(isExact);
1425 EXPECT_EQ(APSInt::getMaxValue(5, true), result);
1427 EXPECT_EQ(APFloat::opInexact,
1428 APFloat(APFloat::IEEEdouble(), "7.9")
1429 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1430 EXPECT_FALSE(isExact);
1431 EXPECT_EQ(APSInt(APInt(5, 7), true), result);
1433 result.setIsUnsigned(false);
1434 EXPECT_EQ(APFloat::opOK,
1435 APFloat(APFloat::IEEEdouble(), "-10")
1436 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1437 EXPECT_TRUE(isExact);
1438 EXPECT_EQ(APSInt(APInt(5, -10, true), false), result);
1440 EXPECT_EQ(APFloat::opInvalidOp,
1441 APFloat(APFloat::IEEEdouble(), "-17")
1442 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1443 EXPECT_FALSE(isExact);
1444 EXPECT_EQ(APSInt::getMinValue(5, false), result);
1446 EXPECT_EQ(APFloat::opInvalidOp,
1447 APFloat(APFloat::IEEEdouble(), "16")
1448 .convertToInteger(result, APFloat::rmTowardZero, &isExact));
1449 EXPECT_FALSE(isExact);
1450 EXPECT_EQ(APSInt::getMaxValue(5, false), result);
1453 static APInt nanbitsFromAPInt(const fltSemantics &Sem, bool SNaN, bool Negative,
1454 uint64_t payload) {
1455 APInt appayload(64, payload);
1456 if (SNaN)
1457 return APFloat::getSNaN(Sem, Negative, &appayload).bitcastToAPInt();
1458 else
1459 return APFloat::getQNaN(Sem, Negative, &appayload).bitcastToAPInt();
1462 TEST(APFloatTest, makeNaN) {
1463 const struct {
1464 uint64_t expected;
1465 const fltSemantics &semantics;
1466 bool SNaN;
1467 bool Negative;
1468 uint64_t payload;
1469 } tests[] = {
1470 // clang-format off
1471 /* expected semantics SNaN Neg payload */
1472 { 0x7fc00000ULL, APFloat::IEEEsingle(), false, false, 0x00000000ULL },
1473 { 0xffc00000ULL, APFloat::IEEEsingle(), false, true, 0x00000000ULL },
1474 { 0x7fc0ae72ULL, APFloat::IEEEsingle(), false, false, 0x0000ae72ULL },
1475 { 0x7fffae72ULL, APFloat::IEEEsingle(), false, false, 0xffffae72ULL },
1476 { 0x7fdaae72ULL, APFloat::IEEEsingle(), false, false, 0x00daae72ULL },
1477 { 0x7fa00000ULL, APFloat::IEEEsingle(), true, false, 0x00000000ULL },
1478 { 0xffa00000ULL, APFloat::IEEEsingle(), true, true, 0x00000000ULL },
1479 { 0x7f80ae72ULL, APFloat::IEEEsingle(), true, false, 0x0000ae72ULL },
1480 { 0x7fbfae72ULL, APFloat::IEEEsingle(), true, false, 0xffffae72ULL },
1481 { 0x7f9aae72ULL, APFloat::IEEEsingle(), true, false, 0x001aae72ULL },
1482 { 0x7ff8000000000000ULL, APFloat::IEEEdouble(), false, false, 0x0000000000000000ULL },
1483 { 0xfff8000000000000ULL, APFloat::IEEEdouble(), false, true, 0x0000000000000000ULL },
1484 { 0x7ff800000000ae72ULL, APFloat::IEEEdouble(), false, false, 0x000000000000ae72ULL },
1485 { 0x7fffffffffffae72ULL, APFloat::IEEEdouble(), false, false, 0xffffffffffffae72ULL },
1486 { 0x7ffdaaaaaaaaae72ULL, APFloat::IEEEdouble(), false, false, 0x000daaaaaaaaae72ULL },
1487 { 0x7ff4000000000000ULL, APFloat::IEEEdouble(), true, false, 0x0000000000000000ULL },
1488 { 0xfff4000000000000ULL, APFloat::IEEEdouble(), true, true, 0x0000000000000000ULL },
1489 { 0x7ff000000000ae72ULL, APFloat::IEEEdouble(), true, false, 0x000000000000ae72ULL },
1490 { 0x7ff7ffffffffae72ULL, APFloat::IEEEdouble(), true, false, 0xffffffffffffae72ULL },
1491 { 0x7ff1aaaaaaaaae72ULL, APFloat::IEEEdouble(), true, false, 0x0001aaaaaaaaae72ULL },
1492 { 0x80ULL, APFloat::Float8E5M2FNUZ(), false, false, 0xaaULL },
1493 { 0x80ULL, APFloat::Float8E5M2FNUZ(), false, true, 0xaaULL },
1494 { 0x80ULL, APFloat::Float8E5M2FNUZ(), true, false, 0xaaULL },
1495 { 0x80ULL, APFloat::Float8E5M2FNUZ(), true, true, 0xaaULL },
1496 { 0x80ULL, APFloat::Float8E4M3FNUZ(), false, false, 0xaaULL },
1497 { 0x80ULL, APFloat::Float8E4M3FNUZ(), false, true, 0xaaULL },
1498 { 0x80ULL, APFloat::Float8E4M3FNUZ(), true, false, 0xaaULL },
1499 { 0x80ULL, APFloat::Float8E4M3FNUZ(), true, true, 0xaaULL },
1500 { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), false, false, 0xaaULL },
1501 { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), false, true, 0xaaULL },
1502 { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), true, false, 0xaaULL },
1503 { 0x80ULL, APFloat::Float8E4M3B11FNUZ(), true, true, 0xaaULL },
1504 { 0x3fe00ULL, APFloat::FloatTF32(), false, false, 0x00000000ULL },
1505 { 0x7fe00ULL, APFloat::FloatTF32(), false, true, 0x00000000ULL },
1506 { 0x3feaaULL, APFloat::FloatTF32(), false, false, 0xaaULL },
1507 { 0x3ffaaULL, APFloat::FloatTF32(), false, false, 0xdaaULL },
1508 { 0x3ffaaULL, APFloat::FloatTF32(), false, false, 0xfdaaULL },
1509 { 0x3fd00ULL, APFloat::FloatTF32(), true, false, 0x00000000ULL },
1510 { 0x7fd00ULL, APFloat::FloatTF32(), true, true, 0x00000000ULL },
1511 { 0x3fcaaULL, APFloat::FloatTF32(), true, false, 0xaaULL },
1512 { 0x3fdaaULL, APFloat::FloatTF32(), true, false, 0xfaaULL },
1513 { 0x3fdaaULL, APFloat::FloatTF32(), true, false, 0x1aaULL },
1514 // clang-format on
1517 for (const auto &t : tests) {
1518 ASSERT_EQ(t.expected, nanbitsFromAPInt(t.semantics, t.SNaN, t.Negative, t.payload));
1522 #ifdef GTEST_HAS_DEATH_TEST
1523 #ifndef NDEBUG
1524 TEST(APFloatTest, SemanticsDeath) {
1525 EXPECT_DEATH(APFloat(APFloat::IEEEquad(), 0).convertToDouble(),
1526 "Float semantics is not representable by IEEEdouble");
1527 EXPECT_DEATH(APFloat(APFloat::IEEEdouble(), 0).convertToFloat(),
1528 "Float semantics is not representable by IEEEsingle");
1530 #endif
1531 #endif
1533 TEST(APFloatTest, StringDecimalError) {
1534 EXPECT_EQ("Invalid string length", convertToErrorFromString(""));
1535 EXPECT_EQ("String has no digits", convertToErrorFromString("+"));
1536 EXPECT_EQ("String has no digits", convertToErrorFromString("-"));
1538 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("\0", 1)));
1539 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1\0", 2)));
1540 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1" "\0" "2", 3)));
1541 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("1" "\0" "2e1", 5)));
1542 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e\0", 3)));
1543 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e1\0", 4)));
1544 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("1e1" "\0" "2", 5)));
1546 EXPECT_EQ("Invalid character in significand", convertToErrorFromString("1.0f"));
1548 EXPECT_EQ("String contains multiple dots", convertToErrorFromString(".."));
1549 EXPECT_EQ("String contains multiple dots", convertToErrorFromString("..0"));
1550 EXPECT_EQ("String contains multiple dots", convertToErrorFromString("1.0.0"));
1553 TEST(APFloatTest, StringDecimalSignificandError) {
1554 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "."));
1555 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+."));
1556 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-."));
1559 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "e"));
1560 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+e"));
1561 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-e"));
1563 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "e1"));
1564 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+e1"));
1565 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-e1"));
1567 EXPECT_EQ("Significand has no digits", convertToErrorFromString( ".e1"));
1568 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+.e1"));
1569 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-.e1"));
1572 EXPECT_EQ("Significand has no digits", convertToErrorFromString( ".e"));
1573 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+.e"));
1574 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-.e"));
1577 TEST(APFloatTest, StringHexadecimalError) {
1578 EXPECT_EQ("Invalid string", convertToErrorFromString( "0x"));
1579 EXPECT_EQ("Invalid string", convertToErrorFromString("+0x"));
1580 EXPECT_EQ("Invalid string", convertToErrorFromString("-0x"));
1582 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0"));
1583 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0"));
1584 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0"));
1586 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0."));
1587 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0."));
1588 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0."));
1590 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x.0"));
1591 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x.0"));
1592 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x.0"));
1594 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString( "0x0.0"));
1595 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("+0x0.0"));
1596 EXPECT_EQ("Hex strings require an exponent", convertToErrorFromString("-0x0.0"));
1598 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x\0", 3)));
1599 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1\0", 4)));
1600 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1" "\0" "2", 5)));
1601 EXPECT_EQ("Invalid character in significand", convertToErrorFromString(StringRef("0x1" "\0" "2p1", 7)));
1602 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p\0", 5)));
1603 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p1\0", 6)));
1604 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString(StringRef("0x1p1" "\0" "2", 7)));
1606 EXPECT_EQ("Invalid character in exponent", convertToErrorFromString("0x1p0f"));
1608 EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x..p1"));
1609 EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x..0p1"));
1610 EXPECT_EQ("String contains multiple dots", convertToErrorFromString("0x1.0.0p1"));
1613 TEST(APFloatTest, StringHexadecimalSignificandError) {
1614 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x."));
1615 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x."));
1616 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x."));
1618 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp"));
1619 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp"));
1620 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp"));
1622 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp+"));
1623 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp+"));
1624 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp+"));
1626 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0xp-"));
1627 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0xp-"));
1628 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0xp-"));
1631 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p"));
1632 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p"));
1633 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p"));
1635 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p+"));
1636 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p+"));
1637 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p+"));
1639 EXPECT_EQ("Significand has no digits", convertToErrorFromString( "0x.p-"));
1640 EXPECT_EQ("Significand has no digits", convertToErrorFromString("+0x.p-"));
1641 EXPECT_EQ("Significand has no digits", convertToErrorFromString("-0x.p-"));
1644 TEST(APFloatTest, StringHexadecimalExponentError) {
1645 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p"));
1646 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p"));
1647 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p"));
1649 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p+"));
1650 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p+"));
1651 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p+"));
1653 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1p-"));
1654 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1p-"));
1655 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1p-"));
1658 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p"));
1659 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p"));
1660 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p"));
1662 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p+"));
1663 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p+"));
1664 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p+"));
1666 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.p-"));
1667 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.p-"));
1668 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.p-"));
1671 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p"));
1672 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p"));
1673 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p"));
1675 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p+"));
1676 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p+"));
1677 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p+"));
1679 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x.1p-"));
1680 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x.1p-"));
1681 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x.1p-"));
1684 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p"));
1685 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p"));
1686 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p"));
1688 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p+"));
1689 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p+"));
1690 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p+"));
1692 EXPECT_EQ("Exponent has no digits", convertToErrorFromString( "0x1.1p-"));
1693 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("+0x1.1p-"));
1694 EXPECT_EQ("Exponent has no digits", convertToErrorFromString("-0x1.1p-"));
1697 TEST(APFloatTest, exactInverse) {
1698 APFloat inv(0.0f);
1700 // Trivial operation.
1701 EXPECT_TRUE(APFloat(2.0).getExactInverse(&inv));
1702 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5)));
1703 EXPECT_TRUE(APFloat(2.0f).getExactInverse(&inv));
1704 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(0.5f)));
1705 EXPECT_TRUE(APFloat(APFloat::IEEEquad(), "2.0").getExactInverse(&inv));
1706 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::IEEEquad(), "0.5")));
1707 EXPECT_TRUE(APFloat(APFloat::PPCDoubleDouble(), "2.0").getExactInverse(&inv));
1708 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::PPCDoubleDouble(), "0.5")));
1709 EXPECT_TRUE(APFloat(APFloat::x87DoubleExtended(), "2.0").getExactInverse(&inv));
1710 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(APFloat::x87DoubleExtended(), "0.5")));
1712 // FLT_MIN
1713 EXPECT_TRUE(APFloat(1.17549435e-38f).getExactInverse(&inv));
1714 EXPECT_TRUE(inv.bitwiseIsEqual(APFloat(8.5070592e+37f)));
1716 // Large float, inverse is a denormal.
1717 EXPECT_FALSE(APFloat(1.7014118e38f).getExactInverse(nullptr));
1718 // Zero
1719 EXPECT_FALSE(APFloat(0.0).getExactInverse(nullptr));
1720 // Denormalized float
1721 EXPECT_FALSE(APFloat(1.40129846e-45f).getExactInverse(nullptr));
1724 TEST(APFloatTest, roundToIntegral) {
1725 APFloat T(-0.5), S(3.14), R(APFloat::getLargest(APFloat::IEEEdouble())), P(0.0);
1727 P = T;
1728 P.roundToIntegral(APFloat::rmTowardZero);
1729 EXPECT_EQ(-0.0, P.convertToDouble());
1730 P = T;
1731 P.roundToIntegral(APFloat::rmTowardNegative);
1732 EXPECT_EQ(-1.0, P.convertToDouble());
1733 P = T;
1734 P.roundToIntegral(APFloat::rmTowardPositive);
1735 EXPECT_EQ(-0.0, P.convertToDouble());
1736 P = T;
1737 P.roundToIntegral(APFloat::rmNearestTiesToEven);
1738 EXPECT_EQ(-0.0, P.convertToDouble());
1740 P = S;
1741 P.roundToIntegral(APFloat::rmTowardZero);
1742 EXPECT_EQ(3.0, P.convertToDouble());
1743 P = S;
1744 P.roundToIntegral(APFloat::rmTowardNegative);
1745 EXPECT_EQ(3.0, P.convertToDouble());
1746 P = S;
1747 P.roundToIntegral(APFloat::rmTowardPositive);
1748 EXPECT_EQ(4.0, P.convertToDouble());
1749 P = S;
1750 P.roundToIntegral(APFloat::rmNearestTiesToEven);
1751 EXPECT_EQ(3.0, P.convertToDouble());
1753 P = R;
1754 P.roundToIntegral(APFloat::rmTowardZero);
1755 EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
1756 P = R;
1757 P.roundToIntegral(APFloat::rmTowardNegative);
1758 EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
1759 P = R;
1760 P.roundToIntegral(APFloat::rmTowardPositive);
1761 EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
1762 P = R;
1763 P.roundToIntegral(APFloat::rmNearestTiesToEven);
1764 EXPECT_EQ(R.convertToDouble(), P.convertToDouble());
1766 P = APFloat::getZero(APFloat::IEEEdouble());
1767 P.roundToIntegral(APFloat::rmTowardZero);
1768 EXPECT_EQ(0.0, P.convertToDouble());
1769 P = APFloat::getZero(APFloat::IEEEdouble(), true);
1770 P.roundToIntegral(APFloat::rmTowardZero);
1771 EXPECT_EQ(-0.0, P.convertToDouble());
1772 P = APFloat::getNaN(APFloat::IEEEdouble());
1773 P.roundToIntegral(APFloat::rmTowardZero);
1774 EXPECT_TRUE(std::isnan(P.convertToDouble()));
1775 P = APFloat::getInf(APFloat::IEEEdouble());
1776 P.roundToIntegral(APFloat::rmTowardZero);
1777 EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() > 0.0);
1778 P = APFloat::getInf(APFloat::IEEEdouble(), true);
1779 P.roundToIntegral(APFloat::rmTowardZero);
1780 EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0);
1782 APFloat::opStatus St;
1784 P = APFloat::getNaN(APFloat::IEEEdouble());
1785 St = P.roundToIntegral(APFloat::rmTowardZero);
1786 EXPECT_TRUE(P.isNaN());
1787 EXPECT_FALSE(P.isNegative());
1788 EXPECT_EQ(APFloat::opOK, St);
1790 P = APFloat::getNaN(APFloat::IEEEdouble(), true);
1791 St = P.roundToIntegral(APFloat::rmTowardZero);
1792 EXPECT_TRUE(P.isNaN());
1793 EXPECT_TRUE(P.isNegative());
1794 EXPECT_EQ(APFloat::opOK, St);
1796 P = APFloat::getSNaN(APFloat::IEEEdouble());
1797 St = P.roundToIntegral(APFloat::rmTowardZero);
1798 EXPECT_TRUE(P.isNaN());
1799 EXPECT_FALSE(P.isSignaling());
1800 EXPECT_FALSE(P.isNegative());
1801 EXPECT_EQ(APFloat::opInvalidOp, St);
1803 P = APFloat::getSNaN(APFloat::IEEEdouble(), true);
1804 St = P.roundToIntegral(APFloat::rmTowardZero);
1805 EXPECT_TRUE(P.isNaN());
1806 EXPECT_FALSE(P.isSignaling());
1807 EXPECT_TRUE(P.isNegative());
1808 EXPECT_EQ(APFloat::opInvalidOp, St);
1810 P = APFloat::getInf(APFloat::IEEEdouble());
1811 St = P.roundToIntegral(APFloat::rmTowardZero);
1812 EXPECT_TRUE(P.isInfinity());
1813 EXPECT_FALSE(P.isNegative());
1814 EXPECT_EQ(APFloat::opOK, St);
1816 P = APFloat::getInf(APFloat::IEEEdouble(), true);
1817 St = P.roundToIntegral(APFloat::rmTowardZero);
1818 EXPECT_TRUE(P.isInfinity());
1819 EXPECT_TRUE(P.isNegative());
1820 EXPECT_EQ(APFloat::opOK, St);
1822 P = APFloat::getZero(APFloat::IEEEdouble(), false);
1823 St = P.roundToIntegral(APFloat::rmTowardZero);
1824 EXPECT_TRUE(P.isZero());
1825 EXPECT_FALSE(P.isNegative());
1826 EXPECT_EQ(APFloat::opOK, St);
1828 P = APFloat::getZero(APFloat::IEEEdouble(), false);
1829 St = P.roundToIntegral(APFloat::rmTowardNegative);
1830 EXPECT_TRUE(P.isZero());
1831 EXPECT_FALSE(P.isNegative());
1832 EXPECT_EQ(APFloat::opOK, St);
1834 P = APFloat::getZero(APFloat::IEEEdouble(), true);
1835 St = P.roundToIntegral(APFloat::rmTowardZero);
1836 EXPECT_TRUE(P.isZero());
1837 EXPECT_TRUE(P.isNegative());
1838 EXPECT_EQ(APFloat::opOK, St);
1840 P = APFloat::getZero(APFloat::IEEEdouble(), true);
1841 St = P.roundToIntegral(APFloat::rmTowardNegative);
1842 EXPECT_TRUE(P.isZero());
1843 EXPECT_TRUE(P.isNegative());
1844 EXPECT_EQ(APFloat::opOK, St);
1846 P = APFloat(1E-100);
1847 St = P.roundToIntegral(APFloat::rmTowardNegative);
1848 EXPECT_TRUE(P.isZero());
1849 EXPECT_FALSE(P.isNegative());
1850 EXPECT_EQ(APFloat::opInexact, St);
1852 P = APFloat(1E-100);
1853 St = P.roundToIntegral(APFloat::rmTowardPositive);
1854 EXPECT_EQ(1.0, P.convertToDouble());
1855 EXPECT_FALSE(P.isNegative());
1856 EXPECT_EQ(APFloat::opInexact, St);
1858 P = APFloat(-1E-100);
1859 St = P.roundToIntegral(APFloat::rmTowardNegative);
1860 EXPECT_TRUE(P.isNegative());
1861 EXPECT_EQ(-1.0, P.convertToDouble());
1862 EXPECT_EQ(APFloat::opInexact, St);
1864 P = APFloat(-1E-100);
1865 St = P.roundToIntegral(APFloat::rmTowardPositive);
1866 EXPECT_TRUE(P.isZero());
1867 EXPECT_TRUE(P.isNegative());
1868 EXPECT_EQ(APFloat::opInexact, St);
1870 P = APFloat(10.0);
1871 St = P.roundToIntegral(APFloat::rmTowardZero);
1872 EXPECT_EQ(10.0, P.convertToDouble());
1873 EXPECT_EQ(APFloat::opOK, St);
1875 P = APFloat(10.5);
1876 St = P.roundToIntegral(APFloat::rmTowardZero);
1877 EXPECT_EQ(10.0, P.convertToDouble());
1878 EXPECT_EQ(APFloat::opInexact, St);
1880 P = APFloat(10.5);
1881 St = P.roundToIntegral(APFloat::rmTowardPositive);
1882 EXPECT_EQ(11.0, P.convertToDouble());
1883 EXPECT_EQ(APFloat::opInexact, St);
1885 P = APFloat(10.5);
1886 St = P.roundToIntegral(APFloat::rmTowardNegative);
1887 EXPECT_EQ(10.0, P.convertToDouble());
1888 EXPECT_EQ(APFloat::opInexact, St);
1890 P = APFloat(10.5);
1891 St = P.roundToIntegral(APFloat::rmNearestTiesToAway);
1892 EXPECT_EQ(11.0, P.convertToDouble());
1893 EXPECT_EQ(APFloat::opInexact, St);
1895 P = APFloat(10.5);
1896 St = P.roundToIntegral(APFloat::rmNearestTiesToEven);
1897 EXPECT_EQ(10.0, P.convertToDouble());
1898 EXPECT_EQ(APFloat::opInexact, St);
1901 TEST(APFloatTest, isInteger) {
1902 APFloat T(-0.0);
1903 EXPECT_TRUE(T.isInteger());
1904 T = APFloat(3.14159);
1905 EXPECT_FALSE(T.isInteger());
1906 T = APFloat::getNaN(APFloat::IEEEdouble());
1907 EXPECT_FALSE(T.isInteger());
1908 T = APFloat::getInf(APFloat::IEEEdouble());
1909 EXPECT_FALSE(T.isInteger());
1910 T = APFloat::getInf(APFloat::IEEEdouble(), true);
1911 EXPECT_FALSE(T.isInteger());
1912 T = APFloat::getLargest(APFloat::IEEEdouble());
1913 EXPECT_TRUE(T.isInteger());
1916 TEST(DoubleAPFloatTest, isInteger) {
1917 APFloat F1(-0.0);
1918 APFloat F2(-0.0);
1919 llvm::detail::DoubleAPFloat T(APFloat::PPCDoubleDouble(), std::move(F1),
1920 std::move(F2));
1921 EXPECT_TRUE(T.isInteger());
1922 APFloat F3(3.14159);
1923 APFloat F4(-0.0);
1924 llvm::detail::DoubleAPFloat T2(APFloat::PPCDoubleDouble(), std::move(F3),
1925 std::move(F4));
1926 EXPECT_FALSE(T2.isInteger());
1927 APFloat F5(-0.0);
1928 APFloat F6(3.14159);
1929 llvm::detail::DoubleAPFloat T3(APFloat::PPCDoubleDouble(), std::move(F5),
1930 std::move(F6));
1931 EXPECT_FALSE(T3.isInteger());
1934 // Test to check if the full range of Float8E8M0FNU
1935 // values are being represented correctly.
1936 TEST(APFloatTest, Float8E8M0FNUValues) {
1937 // High end of the range
1938 auto test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p127");
1939 EXPECT_EQ(0x1.0p127, test.convertToDouble());
1941 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p126");
1942 EXPECT_EQ(0x1.0p126, test.convertToDouble());
1944 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p125");
1945 EXPECT_EQ(0x1.0p125, test.convertToDouble());
1947 // tests the fix in makeLargest()
1948 test = APFloat::getLargest(APFloat::Float8E8M0FNU());
1949 EXPECT_EQ(0x1.0p127, test.convertToDouble());
1951 // tests overflow to nan
1952 APFloat nan = APFloat(APFloat::Float8E8M0FNU(), "nan");
1953 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p128");
1954 EXPECT_TRUE(test.bitwiseIsEqual(nan));
1956 // Mid of the range
1957 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p0");
1958 EXPECT_EQ(1.0, test.convertToDouble());
1960 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p1");
1961 EXPECT_EQ(2.0, test.convertToDouble());
1963 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p2");
1964 EXPECT_EQ(4.0, test.convertToDouble());
1966 // Low end of the range
1967 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-125");
1968 EXPECT_EQ(0x1.0p-125, test.convertToDouble());
1970 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-126");
1971 EXPECT_EQ(0x1.0p-126, test.convertToDouble());
1973 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127");
1974 EXPECT_EQ(0x1.0p-127, test.convertToDouble());
1976 // Smallest value
1977 test = APFloat::getSmallest(APFloat::Float8E8M0FNU());
1978 EXPECT_EQ(0x1.0p-127, test.convertToDouble());
1980 // Value below the smallest, but clamped to the smallest
1981 test = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-128");
1982 EXPECT_EQ(0x1.0p-127, test.convertToDouble());
1985 TEST(APFloatTest, getLargest) {
1986 EXPECT_EQ(3.402823466e+38f, APFloat::getLargest(APFloat::IEEEsingle()).convertToFloat());
1987 EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble()).convertToDouble());
1988 EXPECT_EQ(448, APFloat::getLargest(APFloat::Float8E4M3FN()).convertToDouble());
1989 EXPECT_EQ(240,
1990 APFloat::getLargest(APFloat::Float8E4M3FNUZ()).convertToDouble());
1991 EXPECT_EQ(57344,
1992 APFloat::getLargest(APFloat::Float8E5M2FNUZ()).convertToDouble());
1993 EXPECT_EQ(
1994 30, APFloat::getLargest(APFloat::Float8E4M3B11FNUZ()).convertToDouble());
1995 EXPECT_EQ(3.40116213421e+38f,
1996 APFloat::getLargest(APFloat::FloatTF32()).convertToFloat());
1997 EXPECT_EQ(1.701411834e+38f,
1998 APFloat::getLargest(APFloat::Float8E8M0FNU()).convertToDouble());
1999 EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble());
2000 EXPECT_EQ(7.5,
2001 APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble());
2002 EXPECT_EQ(6, APFloat::getLargest(APFloat::Float4E2M1FN()).convertToDouble());
2005 TEST(APFloatTest, getSmallest) {
2006 APFloat test = APFloat::getSmallest(APFloat::IEEEsingle(), false);
2007 APFloat expected = APFloat(APFloat::IEEEsingle(), "0x0.000002p-126");
2008 EXPECT_FALSE(test.isNegative());
2009 EXPECT_TRUE(test.isFiniteNonZero());
2010 EXPECT_TRUE(test.isDenormal());
2011 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2013 test = APFloat::getSmallest(APFloat::IEEEsingle(), true);
2014 expected = APFloat(APFloat::IEEEsingle(), "-0x0.000002p-126");
2015 EXPECT_TRUE(test.isNegative());
2016 EXPECT_TRUE(test.isFiniteNonZero());
2017 EXPECT_TRUE(test.isDenormal());
2018 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2020 test = APFloat::getSmallest(APFloat::IEEEquad(), false);
2021 expected = APFloat(APFloat::IEEEquad(), "0x0.0000000000000000000000000001p-16382");
2022 EXPECT_FALSE(test.isNegative());
2023 EXPECT_TRUE(test.isFiniteNonZero());
2024 EXPECT_TRUE(test.isDenormal());
2025 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2027 test = APFloat::getSmallest(APFloat::IEEEquad(), true);
2028 expected = APFloat(APFloat::IEEEquad(), "-0x0.0000000000000000000000000001p-16382");
2029 EXPECT_TRUE(test.isNegative());
2030 EXPECT_TRUE(test.isFiniteNonZero());
2031 EXPECT_TRUE(test.isDenormal());
2032 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2034 test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
2035 expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x0.4p-15");
2036 EXPECT_FALSE(test.isNegative());
2037 EXPECT_TRUE(test.isFiniteNonZero());
2038 EXPECT_TRUE(test.isDenormal());
2039 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2041 test = APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
2042 expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x0.2p-7");
2043 EXPECT_FALSE(test.isNegative());
2044 EXPECT_TRUE(test.isFiniteNonZero());
2045 EXPECT_TRUE(test.isDenormal());
2046 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2048 test = APFloat::getSmallest(APFloat::Float8E4M3B11FNUZ(), false);
2049 expected = APFloat(APFloat::Float8E4M3B11FNUZ(), "0x0.2p-10");
2050 EXPECT_FALSE(test.isNegative());
2051 EXPECT_TRUE(test.isFiniteNonZero());
2052 EXPECT_TRUE(test.isDenormal());
2053 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2055 test = APFloat::getSmallest(APFloat::FloatTF32(), true);
2056 expected = APFloat(APFloat::FloatTF32(), "-0x0.004p-126");
2057 EXPECT_TRUE(test.isNegative());
2058 EXPECT_TRUE(test.isFiniteNonZero());
2059 EXPECT_TRUE(test.isDenormal());
2060 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2062 test = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
2063 expected = APFloat(APFloat::Float6E3M2FN(), "0x0.1p0");
2064 EXPECT_FALSE(test.isNegative());
2065 EXPECT_TRUE(test.isFiniteNonZero());
2066 EXPECT_TRUE(test.isDenormal());
2067 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2069 test = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
2070 expected = APFloat(APFloat::Float6E2M3FN(), "0x0.2p0");
2071 EXPECT_FALSE(test.isNegative());
2072 EXPECT_TRUE(test.isFiniteNonZero());
2073 EXPECT_TRUE(test.isDenormal());
2074 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2076 test = APFloat::getSmallest(APFloat::Float4E2M1FN(), false);
2077 expected = APFloat(APFloat::Float4E2M1FN(), "0x0.8p0");
2078 EXPECT_FALSE(test.isNegative());
2079 EXPECT_TRUE(test.isFiniteNonZero());
2080 EXPECT_TRUE(test.isDenormal());
2081 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2083 test = APFloat::getSmallest(APFloat::Float8E8M0FNU());
2084 expected = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127");
2085 EXPECT_FALSE(test.isNegative());
2086 EXPECT_TRUE(test.isFiniteNonZero());
2087 EXPECT_FALSE(test.isDenormal());
2088 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2091 TEST(APFloatTest, getSmallestNormalized) {
2092 APFloat test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
2093 APFloat expected = APFloat(APFloat::IEEEsingle(), "0x1p-126");
2094 EXPECT_FALSE(test.isNegative());
2095 EXPECT_TRUE(test.isFiniteNonZero());
2096 EXPECT_FALSE(test.isDenormal());
2097 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2098 EXPECT_TRUE(test.isSmallestNormalized());
2100 test = APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
2101 expected = APFloat(APFloat::IEEEsingle(), "-0x1p-126");
2102 EXPECT_TRUE(test.isNegative());
2103 EXPECT_TRUE(test.isFiniteNonZero());
2104 EXPECT_FALSE(test.isDenormal());
2105 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2106 EXPECT_TRUE(test.isSmallestNormalized());
2108 test = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false);
2109 expected = APFloat(APFloat::IEEEdouble(), "0x1p-1022");
2110 EXPECT_FALSE(test.isNegative());
2111 EXPECT_TRUE(test.isFiniteNonZero());
2112 EXPECT_FALSE(test.isDenormal());
2113 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2114 EXPECT_TRUE(test.isSmallestNormalized());
2116 test = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true);
2117 expected = APFloat(APFloat::IEEEdouble(), "-0x1p-1022");
2118 EXPECT_TRUE(test.isNegative());
2119 EXPECT_TRUE(test.isFiniteNonZero());
2120 EXPECT_FALSE(test.isDenormal());
2121 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2122 EXPECT_TRUE(test.isSmallestNormalized());
2124 test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), false);
2125 expected = APFloat(APFloat::IEEEquad(), "0x1p-16382");
2126 EXPECT_FALSE(test.isNegative());
2127 EXPECT_TRUE(test.isFiniteNonZero());
2128 EXPECT_FALSE(test.isDenormal());
2129 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2130 EXPECT_TRUE(test.isSmallestNormalized());
2132 test = APFloat::getSmallestNormalized(APFloat::IEEEquad(), true);
2133 expected = APFloat(APFloat::IEEEquad(), "-0x1p-16382");
2134 EXPECT_TRUE(test.isNegative());
2135 EXPECT_TRUE(test.isFiniteNonZero());
2136 EXPECT_FALSE(test.isDenormal());
2137 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2138 EXPECT_TRUE(test.isSmallestNormalized());
2140 test = APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
2141 expected = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-15");
2142 EXPECT_FALSE(test.isNegative());
2143 EXPECT_TRUE(test.isFiniteNonZero());
2144 EXPECT_FALSE(test.isDenormal());
2145 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2146 EXPECT_TRUE(test.isSmallestNormalized());
2148 test = APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
2149 expected = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.0p-7");
2150 EXPECT_FALSE(test.isNegative());
2151 EXPECT_TRUE(test.isFiniteNonZero());
2152 EXPECT_FALSE(test.isDenormal());
2153 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2154 EXPECT_TRUE(test.isSmallestNormalized());
2156 test = APFloat::getSmallestNormalized(APFloat::Float8E4M3B11FNUZ(), false);
2157 expected = APFloat(APFloat::Float8E4M3B11FNUZ(), "0x1.0p-10");
2158 EXPECT_FALSE(test.isNegative());
2159 EXPECT_TRUE(test.isFiniteNonZero());
2160 EXPECT_FALSE(test.isDenormal());
2161 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2162 EXPECT_TRUE(test.isSmallestNormalized());
2164 test = APFloat::getSmallestNormalized(APFloat::FloatTF32(), false);
2165 expected = APFloat(APFloat::FloatTF32(), "0x1p-126");
2166 EXPECT_FALSE(test.isNegative());
2167 EXPECT_TRUE(test.isFiniteNonZero());
2168 EXPECT_FALSE(test.isDenormal());
2169 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2170 EXPECT_TRUE(test.isSmallestNormalized());
2172 test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
2173 expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2");
2174 EXPECT_FALSE(test.isNegative());
2175 EXPECT_TRUE(test.isFiniteNonZero());
2176 EXPECT_FALSE(test.isDenormal());
2177 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2178 EXPECT_TRUE(test.isSmallestNormalized());
2180 test = APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false);
2181 expected = APFloat(APFloat::Float4E2M1FN(), "0x1p0");
2182 EXPECT_FALSE(test.isNegative());
2183 EXPECT_TRUE(test.isFiniteNonZero());
2184 EXPECT_FALSE(test.isDenormal());
2185 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2186 EXPECT_TRUE(test.isSmallestNormalized());
2188 test = APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
2189 expected = APFloat(APFloat::Float6E2M3FN(), "0x1p0");
2190 EXPECT_FALSE(test.isNegative());
2191 EXPECT_TRUE(test.isFiniteNonZero());
2192 EXPECT_FALSE(test.isDenormal());
2193 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2194 EXPECT_TRUE(test.isSmallestNormalized());
2196 test = APFloat::getSmallestNormalized(APFloat::Float8E8M0FNU(), false);
2197 expected = APFloat(APFloat::Float8E8M0FNU(), "0x1.0p-127");
2198 EXPECT_FALSE(test.isNegative());
2199 EXPECT_TRUE(test.isFiniteNonZero());
2200 EXPECT_FALSE(test.isDenormal());
2201 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2202 EXPECT_TRUE(test.isSmallestNormalized());
2205 TEST(APFloatTest, getZero) {
2206 struct {
2207 const fltSemantics *semantics;
2208 const bool sign;
2209 const bool signedZero;
2210 const unsigned long long bitPattern[2];
2211 const unsigned bitPatternLength;
2212 } const GetZeroTest[] = {
2213 {&APFloat::IEEEhalf(), false, true, {0, 0}, 1},
2214 {&APFloat::IEEEhalf(), true, true, {0x8000ULL, 0}, 1},
2215 {&APFloat::IEEEsingle(), false, true, {0, 0}, 1},
2216 {&APFloat::IEEEsingle(), true, true, {0x80000000ULL, 0}, 1},
2217 {&APFloat::IEEEdouble(), false, true, {0, 0}, 1},
2218 {&APFloat::IEEEdouble(), true, true, {0x8000000000000000ULL, 0}, 1},
2219 {&APFloat::IEEEquad(), false, true, {0, 0}, 2},
2220 {&APFloat::IEEEquad(), true, true, {0, 0x8000000000000000ULL}, 2},
2221 {&APFloat::PPCDoubleDouble(), false, true, {0, 0}, 2},
2222 {&APFloat::PPCDoubleDouble(), true, true, {0x8000000000000000ULL, 0}, 2},
2223 {&APFloat::x87DoubleExtended(), false, true, {0, 0}, 2},
2224 {&APFloat::x87DoubleExtended(), true, true, {0, 0x8000ULL}, 2},
2225 {&APFloat::Float8E5M2(), false, true, {0, 0}, 1},
2226 {&APFloat::Float8E5M2(), true, true, {0x80ULL, 0}, 1},
2227 {&APFloat::Float8E5M2FNUZ(), false, false, {0, 0}, 1},
2228 {&APFloat::Float8E5M2FNUZ(), true, false, {0, 0}, 1},
2229 {&APFloat::Float8E4M3(), false, true, {0, 0}, 1},
2230 {&APFloat::Float8E4M3(), true, true, {0x80ULL, 0}, 1},
2231 {&APFloat::Float8E4M3FN(), false, true, {0, 0}, 1},
2232 {&APFloat::Float8E4M3FN(), true, true, {0x80ULL, 0}, 1},
2233 {&APFloat::Float8E4M3FNUZ(), false, false, {0, 0}, 1},
2234 {&APFloat::Float8E4M3FNUZ(), true, false, {0, 0}, 1},
2235 {&APFloat::Float8E4M3B11FNUZ(), false, false, {0, 0}, 1},
2236 {&APFloat::Float8E4M3B11FNUZ(), true, false, {0, 0}, 1},
2237 {&APFloat::Float8E3M4(), false, true, {0, 0}, 1},
2238 {&APFloat::Float8E3M4(), true, true, {0x80ULL, 0}, 1},
2239 {&APFloat::FloatTF32(), false, true, {0, 0}, 1},
2240 {&APFloat::FloatTF32(), true, true, {0x40000ULL, 0}, 1},
2241 {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1},
2242 {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1},
2243 {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1},
2244 {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1},
2245 {&APFloat::Float4E2M1FN(), false, true, {0, 0}, 1},
2246 {&APFloat::Float4E2M1FN(), true, true, {0x8ULL, 0}, 1}};
2247 const unsigned NumGetZeroTests = std::size(GetZeroTest);
2248 for (unsigned i = 0; i < NumGetZeroTests; ++i) {
2249 APFloat test = APFloat::getZero(*GetZeroTest[i].semantics,
2250 GetZeroTest[i].sign);
2251 const char *pattern = GetZeroTest[i].sign? "-0x0p+0" : "0x0p+0";
2252 APFloat expected = APFloat(*GetZeroTest[i].semantics,
2253 pattern);
2254 EXPECT_TRUE(test.isZero());
2255 if (GetZeroTest[i].signedZero)
2256 EXPECT_TRUE(GetZeroTest[i].sign ? test.isNegative() : !test.isNegative());
2257 else
2258 EXPECT_TRUE(!test.isNegative());
2259 EXPECT_TRUE(test.bitwiseIsEqual(expected));
2260 for (unsigned j = 0, je = GetZeroTest[i].bitPatternLength; j < je; ++j) {
2261 EXPECT_EQ(GetZeroTest[i].bitPattern[j],
2262 test.bitcastToAPInt().getRawData()[j]);
2267 TEST(APFloatTest, copySign) {
2268 EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(
2269 APFloat::copySign(APFloat(42.0), APFloat(-1.0))));
2270 EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(
2271 APFloat::copySign(APFloat(-42.0), APFloat(1.0))));
2272 EXPECT_TRUE(APFloat(-42.0).bitwiseIsEqual(
2273 APFloat::copySign(APFloat(-42.0), APFloat(-1.0))));
2274 EXPECT_TRUE(APFloat(42.0).bitwiseIsEqual(
2275 APFloat::copySign(APFloat(42.0), APFloat(1.0))));
2276 // For floating-point formats with unsigned 0, copySign() to a zero is a noop
2277 for (APFloat::Semantics S :
2278 {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) {
2279 const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S);
2280 EXPECT_TRUE(APFloat::getZero(Sem).bitwiseIsEqual(
2281 APFloat::copySign(APFloat::getZero(Sem), APFloat(-1.0))));
2282 EXPECT_TRUE(APFloat::getNaN(Sem, true).bitwiseIsEqual(
2283 APFloat::copySign(APFloat::getNaN(Sem, true), APFloat(1.0))));
2287 TEST(APFloatTest, convert) {
2288 bool losesInfo;
2289 APFloat test(APFloat::IEEEdouble(), "1.0");
2290 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2291 EXPECT_EQ(1.0f, test.convertToFloat());
2292 EXPECT_FALSE(losesInfo);
2294 test = APFloat(APFloat::x87DoubleExtended(), "0x1p-53");
2295 test.add(APFloat(APFloat::x87DoubleExtended(), "1.0"), APFloat::rmNearestTiesToEven);
2296 test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo);
2297 EXPECT_EQ(1.0, test.convertToDouble());
2298 EXPECT_TRUE(losesInfo);
2300 test = APFloat(APFloat::IEEEquad(), "0x1p-53");
2301 test.add(APFloat(APFloat::IEEEquad(), "1.0"), APFloat::rmNearestTiesToEven);
2302 test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo);
2303 EXPECT_EQ(1.0, test.convertToDouble());
2304 EXPECT_TRUE(losesInfo);
2306 test = APFloat(APFloat::x87DoubleExtended(), "0xf.fffffffp+28");
2307 test.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo);
2308 EXPECT_EQ(4294967295.0, test.convertToDouble());
2309 EXPECT_FALSE(losesInfo);
2311 test = APFloat::getSNaN(APFloat::IEEEsingle());
2312 APFloat::opStatus status = test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, &losesInfo);
2313 // Conversion quiets the SNAN, so now 2 bits of the 64-bit significand should be set.
2314 APInt topTwoBits(64, 0x6000000000000000);
2315 EXPECT_TRUE(test.bitwiseIsEqual(APFloat::getQNaN(APFloat::x87DoubleExtended(), false, &topTwoBits)));
2316 EXPECT_FALSE(losesInfo);
2317 EXPECT_EQ(status, APFloat::opInvalidOp);
2319 test = APFloat::getQNaN(APFloat::IEEEsingle());
2320 APFloat X87QNaN = APFloat::getQNaN(APFloat::x87DoubleExtended());
2321 test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
2322 &losesInfo);
2323 EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN));
2324 EXPECT_FALSE(losesInfo);
2326 test = APFloat::getSNaN(APFloat::x87DoubleExtended());
2327 test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
2328 &losesInfo);
2329 APFloat X87SNaN = APFloat::getSNaN(APFloat::x87DoubleExtended());
2330 EXPECT_TRUE(test.bitwiseIsEqual(X87SNaN));
2331 EXPECT_FALSE(losesInfo);
2333 test = APFloat::getQNaN(APFloat::x87DoubleExtended());
2334 test.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
2335 &losesInfo);
2336 EXPECT_TRUE(test.bitwiseIsEqual(X87QNaN));
2337 EXPECT_FALSE(losesInfo);
2339 // The payload is lost in truncation, but we retain NaN by setting the quiet bit.
2340 APInt payload(52, 1);
2341 test = APFloat::getSNaN(APFloat::IEEEdouble(), false, &payload);
2342 status = test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2343 EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
2344 EXPECT_TRUE(losesInfo);
2345 EXPECT_EQ(status, APFloat::opInvalidOp);
2347 // The payload is lost in truncation. QNaN remains QNaN.
2348 test = APFloat::getQNaN(APFloat::IEEEdouble(), false, &payload);
2349 status = test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2350 EXPECT_EQ(0x7fc00000, test.bitcastToAPInt());
2351 EXPECT_TRUE(losesInfo);
2352 EXPECT_EQ(status, APFloat::opOK);
2354 // Test that subnormals are handled correctly in double to float conversion
2355 test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000000p-1022");
2356 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2357 EXPECT_EQ(0.0f, test.convertToFloat());
2358 EXPECT_TRUE(losesInfo);
2360 test = APFloat(APFloat::IEEEdouble(), "0x0.0000010000001p-1022");
2361 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2362 EXPECT_EQ(0.0f, test.convertToFloat());
2363 EXPECT_TRUE(losesInfo);
2365 test = APFloat(APFloat::IEEEdouble(), "-0x0.0000010000001p-1022");
2366 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2367 EXPECT_EQ(0.0f, test.convertToFloat());
2368 EXPECT_TRUE(losesInfo);
2370 test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000000p-1022");
2371 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2372 EXPECT_EQ(0.0f, test.convertToFloat());
2373 EXPECT_TRUE(losesInfo);
2375 test = APFloat(APFloat::IEEEdouble(), "0x0.0000020000001p-1022");
2376 test.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
2377 EXPECT_EQ(0.0f, test.convertToFloat());
2378 EXPECT_TRUE(losesInfo);
2380 // Test subnormal conversion to bfloat
2381 test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
2382 test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
2383 EXPECT_EQ(0.0f, test.convertToFloat());
2384 EXPECT_TRUE(losesInfo);
2386 test = APFloat(APFloat::IEEEsingle(), "0x0.02p-126");
2387 test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
2388 EXPECT_EQ(0x01, test.bitcastToAPInt());
2389 EXPECT_FALSE(losesInfo);
2391 test = APFloat(APFloat::IEEEsingle(), "0x0.01p-126");
2392 test.convert(APFloat::BFloat(), APFloat::rmNearestTiesToAway, &losesInfo);
2393 EXPECT_EQ(0x01, test.bitcastToAPInt());
2394 EXPECT_TRUE(losesInfo);
2397 TEST(APFloatTest, Float8UZConvert) {
2398 bool losesInfo = false;
2399 std::pair<APFloat, APFloat::opStatus> toNaNTests[] = {
2400 {APFloat::getQNaN(APFloat::IEEEsingle(), false), APFloat::opOK},
2401 {APFloat::getQNaN(APFloat::IEEEsingle(), true), APFloat::opOK},
2402 {APFloat::getSNaN(APFloat::IEEEsingle(), false), APFloat::opInvalidOp},
2403 {APFloat::getSNaN(APFloat::IEEEsingle(), true), APFloat::opInvalidOp},
2404 {APFloat::getInf(APFloat::IEEEsingle(), false), APFloat::opInexact},
2405 {APFloat::getInf(APFloat::IEEEsingle(), true), APFloat::opInexact}};
2406 for (APFloat::Semantics S :
2407 {APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ,
2408 APFloat::S_Float8E4M3B11FNUZ}) {
2409 const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S);
2410 SCOPED_TRACE("Semantics = " + std::to_string(S));
2411 for (auto [toTest, expectedRes] : toNaNTests) {
2412 llvm::SmallString<16> value;
2413 toTest.toString(value);
2414 SCOPED_TRACE("toTest = " + value);
2415 losesInfo = false;
2416 APFloat test = toTest;
2417 EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo),
2418 expectedRes);
2419 EXPECT_TRUE(test.isNaN());
2420 EXPECT_TRUE(test.isNegative());
2421 EXPECT_FALSE(test.isSignaling());
2422 EXPECT_FALSE(test.isInfinity());
2423 EXPECT_EQ(0x80, test.bitcastToAPInt());
2424 EXPECT_TRUE(losesInfo);
2427 // Negative zero conversions are information losing.
2428 losesInfo = false;
2429 APFloat test = APFloat::getZero(APFloat::IEEEsingle(), true);
2430 EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo),
2431 APFloat::opInexact);
2432 EXPECT_TRUE(test.isZero());
2433 EXPECT_FALSE(test.isNegative());
2434 EXPECT_TRUE(losesInfo);
2435 EXPECT_EQ(0x0, test.bitcastToAPInt());
2437 losesInfo = true;
2438 test = APFloat::getZero(APFloat::IEEEsingle(), false);
2439 EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo),
2440 APFloat::opOK);
2441 EXPECT_TRUE(test.isZero());
2442 EXPECT_FALSE(test.isNegative());
2443 EXPECT_FALSE(losesInfo);
2444 EXPECT_EQ(0x0, test.bitcastToAPInt());
2446 // Except in casts between ourselves.
2447 losesInfo = true;
2448 test = APFloat::getZero(Sem);
2449 EXPECT_EQ(test.convert(Sem, APFloat::rmNearestTiesToAway, &losesInfo),
2450 APFloat::opOK);
2451 EXPECT_FALSE(losesInfo);
2452 EXPECT_EQ(0x0, test.bitcastToAPInt());
2456 TEST(APFloatTest, PPCDoubleDouble) {
2457 APFloat test(APFloat::PPCDoubleDouble(), "1.0");
2458 EXPECT_EQ(0x3ff0000000000000ull, test.bitcastToAPInt().getRawData()[0]);
2459 EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);
2461 // LDBL_MAX
2462 test = APFloat(APFloat::PPCDoubleDouble(), "1.79769313486231580793728971405301e+308");
2463 EXPECT_EQ(0x7fefffffffffffffull, test.bitcastToAPInt().getRawData()[0]);
2464 EXPECT_EQ(0x7c8ffffffffffffeull, test.bitcastToAPInt().getRawData()[1]);
2466 // LDBL_MIN
2467 test = APFloat(APFloat::PPCDoubleDouble(), "2.00416836000897277799610805135016e-292");
2468 EXPECT_EQ(0x0360000000000000ull, test.bitcastToAPInt().getRawData()[0]);
2469 EXPECT_EQ(0x0000000000000000ull, test.bitcastToAPInt().getRawData()[1]);
2471 // PR30869
2473 auto Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") +
2474 APFloat(APFloat::PPCDoubleDouble(), "1.0");
2475 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2477 Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") -
2478 APFloat(APFloat::PPCDoubleDouble(), "1.0");
2479 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2481 Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") *
2482 APFloat(APFloat::PPCDoubleDouble(), "1.0");
2483 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2485 Result = APFloat(APFloat::PPCDoubleDouble(), "1.0") /
2486 APFloat(APFloat::PPCDoubleDouble(), "1.0");
2487 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2489 int Exp;
2490 Result = frexp(APFloat(APFloat::PPCDoubleDouble(), "1.0"), Exp,
2491 APFloat::rmNearestTiesToEven);
2492 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2494 Result = scalbn(APFloat(APFloat::PPCDoubleDouble(), "1.0"), 1,
2495 APFloat::rmNearestTiesToEven);
2496 EXPECT_EQ(&APFloat::PPCDoubleDouble(), &Result.getSemantics());
2500 TEST(APFloatTest, isNegative) {
2501 APFloat t(APFloat::IEEEsingle(), "0x1p+0");
2502 EXPECT_FALSE(t.isNegative());
2503 t = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
2504 EXPECT_TRUE(t.isNegative());
2506 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNegative());
2507 EXPECT_TRUE(APFloat::getInf(APFloat::IEEEsingle(), true).isNegative());
2509 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNegative());
2510 EXPECT_TRUE(APFloat::getZero(APFloat::IEEEsingle(), true).isNegative());
2512 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNegative());
2513 EXPECT_TRUE(APFloat::getNaN(APFloat::IEEEsingle(), true).isNegative());
2515 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNegative());
2516 EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isNegative());
2519 TEST(APFloatTest, isNormal) {
2520 APFloat t(APFloat::IEEEsingle(), "0x1p+0");
2521 EXPECT_TRUE(t.isNormal());
2523 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNormal());
2524 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNormal());
2525 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNormal());
2526 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNormal());
2527 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isNormal());
2530 TEST(APFloatTest, isFinite) {
2531 APFloat t(APFloat::IEEEsingle(), "0x1p+0");
2532 EXPECT_TRUE(t.isFinite());
2533 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isFinite());
2534 EXPECT_TRUE(APFloat::getZero(APFloat::IEEEsingle(), false).isFinite());
2535 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isFinite());
2536 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isFinite());
2537 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isFinite());
2540 TEST(APFloatTest, isInfinity) {
2541 APFloat t(APFloat::IEEEsingle(), "0x1p+0");
2542 EXPECT_FALSE(t.isInfinity());
2544 APFloat PosInf = APFloat::getInf(APFloat::IEEEsingle(), false);
2545 APFloat NegInf = APFloat::getInf(APFloat::IEEEsingle(), true);
2547 EXPECT_TRUE(PosInf.isInfinity());
2548 EXPECT_TRUE(PosInf.isPosInfinity());
2549 EXPECT_FALSE(PosInf.isNegInfinity());
2550 EXPECT_EQ(fcPosInf, PosInf.classify());
2552 EXPECT_TRUE(NegInf.isInfinity());
2553 EXPECT_FALSE(NegInf.isPosInfinity());
2554 EXPECT_TRUE(NegInf.isNegInfinity());
2555 EXPECT_EQ(fcNegInf, NegInf.classify());
2557 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isInfinity());
2558 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isInfinity());
2559 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isInfinity());
2560 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isInfinity());
2562 for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) {
2563 const fltSemantics &Semantics =
2564 APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I));
2565 if (APFloat::semanticsHasInf(Semantics)) {
2566 EXPECT_TRUE(APFloat::getInf(Semantics).isInfinity());
2571 TEST(APFloatTest, isNaN) {
2572 APFloat t(APFloat::IEEEsingle(), "0x1p+0");
2573 EXPECT_FALSE(t.isNaN());
2574 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isNaN());
2575 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isNaN());
2576 EXPECT_TRUE(APFloat::getNaN(APFloat::IEEEsingle(), false).isNaN());
2577 EXPECT_TRUE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isNaN());
2578 EXPECT_FALSE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isNaN());
2580 for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) {
2581 const fltSemantics &Semantics =
2582 APFloat::EnumToSemantics(static_cast<APFloat::Semantics>(I));
2583 if (APFloat::semanticsHasNaN(Semantics)) {
2584 EXPECT_TRUE(APFloat::getNaN(Semantics).isNaN());
2589 TEST(APFloatTest, isFiniteNonZero) {
2590 // Test positive/negative normal value.
2591 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p+0").isFiniteNonZero());
2592 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p+0").isFiniteNonZero());
2594 // Test positive/negative denormal value.
2595 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "0x1p-149").isFiniteNonZero());
2596 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p-149").isFiniteNonZero());
2598 // Test +/- Infinity.
2599 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), false).isFiniteNonZero());
2600 EXPECT_FALSE(APFloat::getInf(APFloat::IEEEsingle(), true).isFiniteNonZero());
2602 // Test +/- Zero.
2603 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), false).isFiniteNonZero());
2604 EXPECT_FALSE(APFloat::getZero(APFloat::IEEEsingle(), true).isFiniteNonZero());
2606 // Test +/- qNaN. +/- dont mean anything with qNaN but paranoia can't hurt in
2607 // this instance.
2608 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), false).isFiniteNonZero());
2609 EXPECT_FALSE(APFloat::getNaN(APFloat::IEEEsingle(), true).isFiniteNonZero());
2611 // Test +/- sNaN. +/- dont mean anything with sNaN but paranoia can't hurt in
2612 // this instance.
2613 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), false).isFiniteNonZero());
2614 EXPECT_FALSE(APFloat::getSNaN(APFloat::IEEEsingle(), true).isFiniteNonZero());
2617 TEST(APFloatTest, add) {
2618 // Test Special Cases against each other and normal values.
2620 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
2621 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
2622 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
2623 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
2624 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
2625 APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123");
2626 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
2627 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
2628 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
2629 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
2630 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
2631 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
2632 APFloat PSmallestNormalized =
2633 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
2634 APFloat MSmallestNormalized =
2635 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
2637 const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact;
2639 struct {
2640 APFloat x;
2641 APFloat y;
2642 const char *result;
2643 int status;
2644 int category;
2645 } SpecialCaseTests[] = {
2646 { PInf, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2647 { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2648 { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity },
2649 { PInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity },
2650 { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2651 { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2652 { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2653 { PInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2654 { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2655 { PInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2656 { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2657 { PInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2658 { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
2659 { PInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
2660 { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2661 { MInf, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2662 { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
2663 { MInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
2664 { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2665 { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2666 { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2667 { MInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2668 { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2669 { MInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2670 { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2671 { MInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2672 { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
2673 { MInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
2674 { PZero, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2675 { PZero, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2676 { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2677 { PZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2678 { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2679 { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2680 { PZero, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2681 { PZero, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2682 { PZero, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2683 { PZero, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2684 { PZero, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2685 { PZero, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2686 { PZero, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2687 { PZero, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2688 { MZero, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2689 { MZero, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2690 { MZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2691 { MZero, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
2692 { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2693 { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2694 { MZero, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2695 { MZero, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2696 { MZero, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2697 { MZero, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2698 { MZero, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2699 { MZero, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2700 { MZero, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2701 { MZero, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2702 { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN },
2703 { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN },
2704 { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN },
2705 { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN },
2706 { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2707 { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2708 { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
2709 { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
2710 { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2711 { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2712 { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2713 { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2714 { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
2715 { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
2716 { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2717 { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2718 { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2719 { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2720 { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2721 { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2722 { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2723 { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2724 { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2725 { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2726 { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2727 { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2728 { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2729 { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2730 { PNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2731 { PNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2732 { PNormalValue, PZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2733 { PNormalValue, MZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2734 { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2735 { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2736 { PNormalValue, PNormalValue, "0x1p+1", APFloat::opOK, APFloat::fcNormal },
2737 { PNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2738 { PNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2739 { PNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2740 { PNormalValue, PSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2741 { PNormalValue, MSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2742 { PNormalValue, PSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2743 { PNormalValue, MSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2744 { MNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2745 { MNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2746 { MNormalValue, PZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2747 { MNormalValue, MZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2748 { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2749 { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2750 { MNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2751 { MNormalValue, MNormalValue, "-0x1p+1", APFloat::opOK, APFloat::fcNormal },
2752 { MNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2753 { MNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2754 { MNormalValue, PSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2755 { MNormalValue, MSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2756 { MNormalValue, PSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2757 { MNormalValue, MSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2758 { PLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2759 { PLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2760 { PLargestValue, PZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2761 { PLargestValue, MZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2762 { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2763 { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2764 { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2765 { PLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2766 { PLargestValue, PLargestValue, "inf", OverflowStatus, APFloat::fcInfinity },
2767 { PLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2768 { PLargestValue, PSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2769 { PLargestValue, MSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2770 { PLargestValue, PSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2771 { PLargestValue, MSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2772 { MLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2773 { MLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2774 { MLargestValue, PZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2775 { MLargestValue, MZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2776 { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2777 { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2778 { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2779 { MLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2780 { MLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2781 { MLargestValue, MLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
2782 { MLargestValue, PSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2783 { MLargestValue, MSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2784 { MLargestValue, PSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2785 { MLargestValue, MSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2786 { PSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2787 { PSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2788 { PSmallestValue, PZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2789 { PSmallestValue, MZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2790 { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2791 { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2792 { PSmallestValue, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2793 { PSmallestValue, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2794 { PSmallestValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2795 { PSmallestValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2796 { PSmallestValue, PSmallestValue, "0x1p-148", APFloat::opOK, APFloat::fcNormal },
2797 { PSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2798 { PSmallestValue, PSmallestNormalized, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
2799 { PSmallestValue, MSmallestNormalized, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
2800 { MSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2801 { MSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2802 { MSmallestValue, PZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2803 { MSmallestValue, MZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2804 { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2805 { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2806 { MSmallestValue, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2807 { MSmallestValue, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2808 { MSmallestValue, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2809 { MSmallestValue, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2810 { MSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2811 { MSmallestValue, MSmallestValue, "-0x1p-148", APFloat::opOK, APFloat::fcNormal },
2812 { MSmallestValue, PSmallestNormalized, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
2813 { MSmallestValue, MSmallestNormalized, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
2814 { PSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2815 { PSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2816 { PSmallestNormalized, PZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2817 { PSmallestNormalized, MZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2818 { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2819 { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2820 { PSmallestNormalized, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2821 { PSmallestNormalized, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2822 { PSmallestNormalized, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2823 { PSmallestNormalized, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2824 { PSmallestNormalized, PSmallestValue, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
2825 { PSmallestNormalized, MSmallestValue, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
2826 { PSmallestNormalized, PSmallestNormalized, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
2827 { PSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2828 { MSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2829 { MSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2830 { MSmallestNormalized, PZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2831 { MSmallestNormalized, MZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2832 { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2833 { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2834 { MSmallestNormalized, PNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2835 { MSmallestNormalized, MNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2836 { MSmallestNormalized, PLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2837 { MSmallestNormalized, MLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2838 { MSmallestNormalized, PSmallestValue, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
2839 { MSmallestNormalized, MSmallestValue, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
2840 { MSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2841 { MSmallestNormalized, MSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }
2844 for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) {
2845 APFloat x(SpecialCaseTests[i].x);
2846 APFloat y(SpecialCaseTests[i].y);
2847 APFloat::opStatus status = x.add(y, APFloat::rmNearestTiesToEven);
2849 APFloat result(APFloat::IEEEsingle(), SpecialCaseTests[i].result);
2851 EXPECT_TRUE(result.bitwiseIsEqual(x));
2852 EXPECT_EQ(SpecialCaseTests[i].status, (int)status);
2853 EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory());
2857 TEST(APFloatTest, subtract) {
2858 // Test Special Cases against each other and normal values.
2860 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
2861 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
2862 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
2863 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
2864 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
2865 APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123");
2866 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
2867 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
2868 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
2869 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
2870 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
2871 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
2872 APFloat PSmallestNormalized =
2873 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
2874 APFloat MSmallestNormalized =
2875 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
2877 const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact;
2879 struct {
2880 APFloat x;
2881 APFloat y;
2882 const char *result;
2883 int status;
2884 int category;
2885 } SpecialCaseTests[] = {
2886 { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2887 { PInf, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2888 { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity },
2889 { PInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity },
2890 { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2891 { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2892 { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2893 { PInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2894 { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2895 { PInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2896 { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2897 { PInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
2898 { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
2899 { PInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
2900 { MInf, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2901 { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2902 { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
2903 { MInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
2904 { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2905 { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2906 { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2907 { MInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2908 { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2909 { MInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2910 { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2911 { MInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
2912 { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
2913 { MInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
2914 { PZero, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2915 { PZero, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2916 { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2917 { PZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2918 { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2919 { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2920 { PZero, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2921 { PZero, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2922 { PZero, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2923 { PZero, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2924 { PZero, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2925 { PZero, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2926 { PZero, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2927 { PZero, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2928 { MZero, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2929 { MZero, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2930 { MZero, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
2931 { MZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2932 { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2933 { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2934 { MZero, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2935 { MZero, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2936 { MZero, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2937 { MZero, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
2938 { MZero, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
2939 { MZero, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
2940 { MZero, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
2941 { MZero, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
2942 { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN },
2943 { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN },
2944 { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN },
2945 { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN },
2946 { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2947 { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
2948 { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
2949 { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
2950 { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2951 { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2952 { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2953 { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
2954 { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
2955 { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
2956 { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2957 { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2958 { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2959 { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2960 { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2961 { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2962 { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2963 { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2964 { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2965 { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2966 { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2967 { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2968 { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2969 { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2970 { PNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2971 { PNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2972 { PNormalValue, PZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2973 { PNormalValue, MZero, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
2974 { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2975 { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2976 { PNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2977 { PNormalValue, MNormalValue, "0x1p+1", APFloat::opOK, APFloat::fcNormal },
2978 { PNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2979 { PNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2980 { PNormalValue, PSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2981 { PNormalValue, MSmallestValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2982 { PNormalValue, PSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2983 { PNormalValue, MSmallestNormalized, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2984 { MNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2985 { MNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
2986 { MNormalValue, PZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2987 { MNormalValue, MZero, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
2988 { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
2989 { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
2990 { MNormalValue, PNormalValue, "-0x1p+1", APFloat::opOK, APFloat::fcNormal },
2991 { MNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
2992 { MNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2993 { MNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
2994 { MNormalValue, PSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2995 { MNormalValue, MSmallestValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2996 { MNormalValue, PSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2997 { MNormalValue, MSmallestNormalized, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
2998 { PLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
2999 { PLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3000 { PLargestValue, PZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3001 { PLargestValue, MZero, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3002 { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3003 { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3004 { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3005 { PLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3006 { PLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3007 { PLargestValue, MLargestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3008 { PLargestValue, PSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3009 { PLargestValue, MSmallestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3010 { PLargestValue, PSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3011 { PLargestValue, MSmallestNormalized, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3012 { MLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3013 { MLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3014 { MLargestValue, PZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3015 { MLargestValue, MZero, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3016 { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3017 { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3018 { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3019 { MLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3020 { MLargestValue, PLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3021 { MLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3022 { MLargestValue, PSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3023 { MLargestValue, MSmallestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3024 { MLargestValue, PSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3025 { MLargestValue, MSmallestNormalized, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3026 { PSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3027 { PSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3028 { PSmallestValue, PZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3029 { PSmallestValue, MZero, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3030 { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3031 { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3032 { PSmallestValue, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3033 { PSmallestValue, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3034 { PSmallestValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3035 { PSmallestValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3036 { PSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3037 { PSmallestValue, MSmallestValue, "0x1p-148", APFloat::opOK, APFloat::fcNormal },
3038 { PSmallestValue, PSmallestNormalized, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
3039 { PSmallestValue, MSmallestNormalized, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
3040 { MSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3041 { MSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3042 { MSmallestValue, PZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3043 { MSmallestValue, MZero, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3044 { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3045 { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3046 { MSmallestValue, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3047 { MSmallestValue, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3048 { MSmallestValue, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3049 { MSmallestValue, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3050 { MSmallestValue, PSmallestValue, "-0x1p-148", APFloat::opOK, APFloat::fcNormal },
3051 { MSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3052 { MSmallestValue, PSmallestNormalized, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
3053 { MSmallestValue, MSmallestNormalized, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
3054 { PSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3055 { PSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3056 { PSmallestNormalized, PZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3057 { PSmallestNormalized, MZero, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3058 { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3059 { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3060 { PSmallestNormalized, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3061 { PSmallestNormalized, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3062 { PSmallestNormalized, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3063 { PSmallestNormalized, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3064 { PSmallestNormalized, PSmallestValue, "0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
3065 { PSmallestNormalized, MSmallestValue, "0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
3066 { PSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3067 { PSmallestNormalized, MSmallestNormalized, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
3068 { MSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3069 { MSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3070 { MSmallestNormalized, PZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3071 { MSmallestNormalized, MZero, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3072 { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3073 { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3074 { MSmallestNormalized, PNormalValue, "-0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3075 { MSmallestNormalized, MNormalValue, "0x1p+0", APFloat::opInexact, APFloat::fcNormal },
3076 { MSmallestNormalized, PLargestValue, "-0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3077 { MSmallestNormalized, MLargestValue, "0x1.fffffep+127", APFloat::opInexact, APFloat::fcNormal },
3078 { MSmallestNormalized, PSmallestValue, "-0x1.000002p-126", APFloat::opOK, APFloat::fcNormal },
3079 { MSmallestNormalized, MSmallestValue, "-0x1.fffffcp-127", APFloat::opOK, APFloat::fcNormal },
3080 { MSmallestNormalized, PSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
3081 { MSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }
3084 for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) {
3085 APFloat x(SpecialCaseTests[i].x);
3086 APFloat y(SpecialCaseTests[i].y);
3087 APFloat::opStatus status = x.subtract(y, APFloat::rmNearestTiesToEven);
3089 APFloat result(APFloat::IEEEsingle(), SpecialCaseTests[i].result);
3091 EXPECT_TRUE(result.bitwiseIsEqual(x));
3092 EXPECT_EQ(SpecialCaseTests[i].status, (int)status);
3093 EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory());
3097 TEST(APFloatTest, multiply) {
3098 // Test Special Cases against each other and normal values.
3100 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
3101 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
3102 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
3103 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
3104 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
3105 APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123");
3106 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
3107 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
3108 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
3109 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
3110 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
3111 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
3112 APFloat PSmallestNormalized =
3113 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
3114 APFloat MSmallestNormalized =
3115 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
3117 APFloat MaxQuad(APFloat::IEEEquad(),
3118 "0x1.ffffffffffffffffffffffffffffp+16383");
3119 APFloat MinQuad(APFloat::IEEEquad(),
3120 "0x0.0000000000000000000000000001p-16382");
3121 APFloat NMinQuad(APFloat::IEEEquad(),
3122 "-0x0.0000000000000000000000000001p-16382");
3124 const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact;
3125 const int UnderflowStatus = APFloat::opUnderflow | APFloat::opInexact;
3127 struct {
3128 APFloat x;
3129 APFloat y;
3130 const char *result;
3131 int status;
3132 int category;
3133 APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
3134 } SpecialCaseTests[] = {
3135 { PInf, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3136 { PInf, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3137 { PInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3138 { PInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3139 { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3140 { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3141 { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3142 { PInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3143 { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3144 { PInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3145 { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3146 { PInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3147 { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
3148 { PInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
3149 { MInf, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3150 { MInf, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3151 { MInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3152 { MInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3153 { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3154 { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3155 { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3156 { MInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3157 { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3158 { MInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3159 { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3160 { MInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3161 { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
3162 { MInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
3163 { PZero, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3164 { PZero, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3165 { PZero, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3166 { PZero, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3167 { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3168 { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3169 { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3170 { PZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3171 { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3172 { PZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3173 { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3174 { PZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3175 { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3176 { PZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3177 { MZero, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3178 { MZero, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3179 { MZero, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3180 { MZero, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3181 { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3182 { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3183 { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3184 { MZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3185 { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3186 { MZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3187 { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3188 { MZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3189 { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3190 { MZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3191 { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN },
3192 { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN },
3193 { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN },
3194 { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN },
3195 { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3196 { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3197 { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
3198 { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
3199 { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3200 { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3201 { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3202 { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3203 { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
3204 { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
3205 { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3206 { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3207 { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3208 { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3209 { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3210 { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3211 { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3212 { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3213 { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3214 { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3215 { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3216 { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3217 { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3218 { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3219 { PNormalValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3220 { PNormalValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3221 { PNormalValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3222 { PNormalValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3223 { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3224 { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3225 { PNormalValue, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3226 { PNormalValue, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3227 { PNormalValue, PLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3228 { PNormalValue, MLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3229 { PNormalValue, PSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3230 { PNormalValue, MSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3231 { PNormalValue, PSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3232 { PNormalValue, MSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3233 { MNormalValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3234 { MNormalValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3235 { MNormalValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3236 { MNormalValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3237 { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3238 { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3239 { MNormalValue, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3240 { MNormalValue, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3241 { MNormalValue, PLargestValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3242 { MNormalValue, MLargestValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3243 { MNormalValue, PSmallestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3244 { MNormalValue, MSmallestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3245 { MNormalValue, PSmallestNormalized, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3246 { MNormalValue, MSmallestNormalized, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3247 { PLargestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3248 { PLargestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3249 { PLargestValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3250 { PLargestValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3251 { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3252 { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3253 { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3254 { PLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3255 { PLargestValue, PLargestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3256 { PLargestValue, MLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3257 { PLargestValue, PSmallestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3258 { PLargestValue, MSmallestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3259 { PLargestValue, PSmallestNormalized, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3260 { PLargestValue, MSmallestNormalized, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3261 { MLargestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3262 { MLargestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3263 { MLargestValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3264 { MLargestValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3265 { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3266 { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3267 { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3268 { MLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3269 { MLargestValue, PLargestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3270 { MLargestValue, MLargestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3271 { MLargestValue, PSmallestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3272 { MLargestValue, MSmallestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3273 { MLargestValue, PSmallestNormalized, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3274 { MLargestValue, MSmallestNormalized, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3275 { PSmallestValue, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3276 { PSmallestValue, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3277 { PSmallestValue, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3278 { PSmallestValue, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3279 { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3280 { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3281 { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3282 { PSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3283 { PSmallestValue, PLargestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3284 { PSmallestValue, MLargestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3285 { PSmallestValue, PSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3286 { PSmallestValue, MSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3287 { PSmallestValue, PSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3288 { PSmallestValue, MSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3289 { MSmallestValue, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3290 { MSmallestValue, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3291 { MSmallestValue, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3292 { MSmallestValue, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3293 { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3294 { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3295 { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3296 { MSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3297 { MSmallestValue, PLargestValue, "-0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3298 { MSmallestValue, MLargestValue, "0x1.fffffep-22", APFloat::opOK, APFloat::fcNormal },
3299 { MSmallestValue, PSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3300 { MSmallestValue, MSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3301 { MSmallestValue, PSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3302 { MSmallestValue, MSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3303 { PSmallestNormalized, PInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3304 { PSmallestNormalized, MInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3305 { PSmallestNormalized, PZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3306 { PSmallestNormalized, MZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3307 { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3308 { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3309 { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3310 { PSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3311 { PSmallestNormalized, PLargestValue, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3312 { PSmallestNormalized, MLargestValue, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3313 { PSmallestNormalized, PSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3314 { PSmallestNormalized, MSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3315 { PSmallestNormalized, PSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3316 { PSmallestNormalized, MSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3317 { MSmallestNormalized, PInf, "-inf", APFloat::opOK, APFloat::fcInfinity },
3318 { MSmallestNormalized, MInf, "inf", APFloat::opOK, APFloat::fcInfinity },
3319 { MSmallestNormalized, PZero, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3320 { MSmallestNormalized, MZero, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3321 { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3322 { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3323 { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3324 { MSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3325 { MSmallestNormalized, PLargestValue, "-0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3326 { MSmallestNormalized, MLargestValue, "0x1.fffffep+1", APFloat::opOK, APFloat::fcNormal },
3327 { MSmallestNormalized, PSmallestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3328 { MSmallestNormalized, MSmallestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3329 { MSmallestNormalized, PSmallestNormalized, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3330 { MSmallestNormalized, MSmallestNormalized, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3332 {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3333 APFloat::fcNormal, APFloat::rmNearestTiesToEven},
3334 {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3335 APFloat::fcNormal, APFloat::rmTowardPositive},
3336 {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3337 APFloat::fcNormal, APFloat::rmTowardNegative},
3338 {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3339 APFloat::fcNormal, APFloat::rmTowardZero},
3340 {MaxQuad, MinQuad, "0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3341 APFloat::fcNormal, APFloat::rmNearestTiesToAway},
3343 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3344 APFloat::fcNormal, APFloat::rmNearestTiesToEven},
3345 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3346 APFloat::fcNormal, APFloat::rmTowardPositive},
3347 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3348 APFloat::fcNormal, APFloat::rmTowardNegative},
3349 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3350 APFloat::fcNormal, APFloat::rmTowardZero},
3351 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp-111", APFloat::opOK,
3352 APFloat::fcNormal, APFloat::rmNearestTiesToAway},
3354 {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity,
3355 APFloat::rmNearestTiesToEven},
3356 {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity,
3357 APFloat::rmTowardPositive},
3358 {MaxQuad, MaxQuad, "0x1.ffffffffffffffffffffffffffffp+16383",
3359 APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardNegative},
3360 {MaxQuad, MaxQuad, "0x1.ffffffffffffffffffffffffffffp+16383",
3361 APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardZero},
3362 {MaxQuad, MaxQuad, "inf", OverflowStatus, APFloat::fcInfinity,
3363 APFloat::rmNearestTiesToAway},
3365 {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero,
3366 APFloat::rmNearestTiesToEven},
3367 {MinQuad, MinQuad, "0x0.0000000000000000000000000001p-16382",
3368 UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardPositive},
3369 {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero,
3370 APFloat::rmTowardNegative},
3371 {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero,
3372 APFloat::rmTowardZero},
3373 {MinQuad, MinQuad, "0", UnderflowStatus, APFloat::fcZero,
3374 APFloat::rmNearestTiesToAway},
3376 {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero,
3377 APFloat::rmNearestTiesToEven},
3378 {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero,
3379 APFloat::rmTowardPositive},
3380 {MinQuad, NMinQuad, "-0x0.0000000000000000000000000001p-16382",
3381 UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardNegative},
3382 {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero,
3383 APFloat::rmTowardZero},
3384 {MinQuad, NMinQuad, "-0", UnderflowStatus, APFloat::fcZero,
3385 APFloat::rmNearestTiesToAway},
3388 for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) {
3389 APFloat x(SpecialCaseTests[i].x);
3390 APFloat y(SpecialCaseTests[i].y);
3391 APFloat::opStatus status = x.multiply(y, SpecialCaseTests[i].roundingMode);
3393 APFloat result(x.getSemantics(), SpecialCaseTests[i].result);
3395 EXPECT_TRUE(result.bitwiseIsEqual(x));
3396 EXPECT_EQ(SpecialCaseTests[i].status, (int)status);
3397 EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory());
3401 TEST(APFloatTest, divide) {
3402 // Test Special Cases against each other and normal values.
3404 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
3405 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
3406 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
3407 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
3408 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
3409 APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123");
3410 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
3411 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
3412 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
3413 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
3414 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
3415 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
3416 APFloat PSmallestNormalized =
3417 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
3418 APFloat MSmallestNormalized =
3419 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
3421 APFloat MaxQuad(APFloat::IEEEquad(),
3422 "0x1.ffffffffffffffffffffffffffffp+16383");
3423 APFloat MinQuad(APFloat::IEEEquad(),
3424 "0x0.0000000000000000000000000001p-16382");
3425 APFloat NMinQuad(APFloat::IEEEquad(),
3426 "-0x0.0000000000000000000000000001p-16382");
3428 const int OverflowStatus = APFloat::opOverflow | APFloat::opInexact;
3429 const int UnderflowStatus = APFloat::opUnderflow | APFloat::opInexact;
3431 struct {
3432 APFloat x;
3433 APFloat y;
3434 const char *result;
3435 int status;
3436 int category;
3437 APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
3438 } SpecialCaseTests[] = {
3439 { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3440 { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3441 { PInf, PZero, "inf", APFloat::opOK, APFloat::fcInfinity },
3442 { PInf, MZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
3443 { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3444 { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3445 { PInf, PNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3446 { PInf, MNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3447 { PInf, PLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3448 { PInf, MLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3449 { PInf, PSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3450 { PInf, MSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3451 { PInf, PSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
3452 { PInf, MSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
3453 { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3454 { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3455 { MInf, PZero, "-inf", APFloat::opOK, APFloat::fcInfinity },
3456 { MInf, MZero, "inf", APFloat::opOK, APFloat::fcInfinity },
3457 { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3458 { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3459 { MInf, PNormalValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3460 { MInf, MNormalValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3461 { MInf, PLargestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3462 { MInf, MLargestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3463 { MInf, PSmallestValue, "-inf", APFloat::opOK, APFloat::fcInfinity },
3464 { MInf, MSmallestValue, "inf", APFloat::opOK, APFloat::fcInfinity },
3465 { MInf, PSmallestNormalized, "-inf", APFloat::opOK, APFloat::fcInfinity },
3466 { MInf, MSmallestNormalized, "inf", APFloat::opOK, APFloat::fcInfinity },
3467 { PZero, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3468 { PZero, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3469 { PZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3470 { PZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3471 { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3472 { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3473 { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3474 { PZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3475 { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3476 { PZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3477 { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3478 { PZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3479 { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3480 { PZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3481 { MZero, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3482 { MZero, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3483 { MZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3484 { MZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3485 { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3486 { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3487 { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3488 { MZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3489 { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3490 { MZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3491 { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3492 { MZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3493 { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3494 { MZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3495 { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN },
3496 { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN },
3497 { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN },
3498 { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN },
3499 { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3500 { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
3501 { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
3502 { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
3503 { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3504 { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3505 { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3506 { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
3507 { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
3508 { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
3509 { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3510 { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3511 { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3512 { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3513 { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3514 { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3515 { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3516 { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3517 { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3518 { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3519 { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3520 { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3521 { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3522 { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3523 { PNormalValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3524 { PNormalValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3525 { PNormalValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3526 { PNormalValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3527 { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3528 { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3529 { PNormalValue, PNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3530 { PNormalValue, MNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3531 { PNormalValue, PLargestValue, "0x1p-128", UnderflowStatus, APFloat::fcNormal },
3532 { PNormalValue, MLargestValue, "-0x1p-128", UnderflowStatus, APFloat::fcNormal },
3533 { PNormalValue, PSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3534 { PNormalValue, MSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3535 { PNormalValue, PSmallestNormalized, "0x1p+126", APFloat::opOK, APFloat::fcNormal },
3536 { PNormalValue, MSmallestNormalized, "-0x1p+126", APFloat::opOK, APFloat::fcNormal },
3537 { MNormalValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3538 { MNormalValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3539 { MNormalValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3540 { MNormalValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3541 { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3542 { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3543 { MNormalValue, PNormalValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3544 { MNormalValue, MNormalValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3545 { MNormalValue, PLargestValue, "-0x1p-128", UnderflowStatus, APFloat::fcNormal },
3546 { MNormalValue, MLargestValue, "0x1p-128", UnderflowStatus, APFloat::fcNormal },
3547 { MNormalValue, PSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3548 { MNormalValue, MSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3549 { MNormalValue, PSmallestNormalized, "-0x1p+126", APFloat::opOK, APFloat::fcNormal },
3550 { MNormalValue, MSmallestNormalized, "0x1p+126", APFloat::opOK, APFloat::fcNormal },
3551 { PLargestValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3552 { PLargestValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3553 { PLargestValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3554 { PLargestValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3555 { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3556 { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3557 { PLargestValue, PNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3558 { PLargestValue, MNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3559 { PLargestValue, PLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3560 { PLargestValue, MLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3561 { PLargestValue, PSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3562 { PLargestValue, MSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3563 { PLargestValue, PSmallestNormalized, "inf", OverflowStatus, APFloat::fcInfinity },
3564 { PLargestValue, MSmallestNormalized, "-inf", OverflowStatus, APFloat::fcInfinity },
3565 { MLargestValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3566 { MLargestValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3567 { MLargestValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3568 { MLargestValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3569 { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3570 { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3571 { MLargestValue, PNormalValue, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3572 { MLargestValue, MNormalValue, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
3573 { MLargestValue, PLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3574 { MLargestValue, MLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3575 { MLargestValue, PSmallestValue, "-inf", OverflowStatus, APFloat::fcInfinity },
3576 { MLargestValue, MSmallestValue, "inf", OverflowStatus, APFloat::fcInfinity },
3577 { MLargestValue, PSmallestNormalized, "-inf", OverflowStatus, APFloat::fcInfinity },
3578 { MLargestValue, MSmallestNormalized, "inf", OverflowStatus, APFloat::fcInfinity },
3579 { PSmallestValue, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3580 { PSmallestValue, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3581 { PSmallestValue, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3582 { PSmallestValue, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3583 { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3584 { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3585 { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3586 { PSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3587 { PSmallestValue, PLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3588 { PSmallestValue, MLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3589 { PSmallestValue, PSmallestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3590 { PSmallestValue, MSmallestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3591 { PSmallestValue, PSmallestNormalized, "0x1p-23", APFloat::opOK, APFloat::fcNormal },
3592 { PSmallestValue, MSmallestNormalized, "-0x1p-23", APFloat::opOK, APFloat::fcNormal },
3593 { MSmallestValue, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3594 { MSmallestValue, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3595 { MSmallestValue, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3596 { MSmallestValue, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3597 { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3598 { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3599 { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
3600 { MSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
3601 { MSmallestValue, PLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3602 { MSmallestValue, MLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3603 { MSmallestValue, PSmallestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3604 { MSmallestValue, MSmallestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3605 { MSmallestValue, PSmallestNormalized, "-0x1p-23", APFloat::opOK, APFloat::fcNormal },
3606 { MSmallestValue, MSmallestNormalized, "0x1p-23", APFloat::opOK, APFloat::fcNormal },
3607 { PSmallestNormalized, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3608 { PSmallestNormalized, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3609 { PSmallestNormalized, PZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3610 { PSmallestNormalized, MZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3611 { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3612 { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3613 { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3614 { PSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3615 { PSmallestNormalized, PLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3616 { PSmallestNormalized, MLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3617 { PSmallestNormalized, PSmallestValue, "0x1p+23", APFloat::opOK, APFloat::fcNormal },
3618 { PSmallestNormalized, MSmallestValue, "-0x1p+23", APFloat::opOK, APFloat::fcNormal },
3619 { PSmallestNormalized, PSmallestNormalized, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3620 { PSmallestNormalized, MSmallestNormalized, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3621 { MSmallestNormalized, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
3622 { MSmallestNormalized, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
3623 { MSmallestNormalized, PZero, "-inf", APFloat::opDivByZero, APFloat::fcInfinity },
3624 { MSmallestNormalized, MZero, "inf", APFloat::opDivByZero, APFloat::fcInfinity },
3625 { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
3626 { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
3627 { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
3628 { MSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
3629 { MSmallestNormalized, PLargestValue, "-0x0p+0", UnderflowStatus, APFloat::fcZero },
3630 { MSmallestNormalized, MLargestValue, "0x0p+0", UnderflowStatus, APFloat::fcZero },
3631 { MSmallestNormalized, PSmallestValue, "-0x1p+23", APFloat::opOK, APFloat::fcNormal },
3632 { MSmallestNormalized, MSmallestValue, "0x1p+23", APFloat::opOK, APFloat::fcNormal },
3633 { MSmallestNormalized, PSmallestNormalized, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
3634 { MSmallestNormalized, MSmallestNormalized, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
3636 {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity,
3637 APFloat::rmNearestTiesToEven},
3638 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp+16383",
3639 APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardPositive},
3640 {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity,
3641 APFloat::rmTowardNegative},
3642 {MaxQuad, NMinQuad, "-0x1.ffffffffffffffffffffffffffffp+16383",
3643 APFloat::opInexact, APFloat::fcNormal, APFloat::rmTowardZero},
3644 {MaxQuad, NMinQuad, "-inf", OverflowStatus, APFloat::fcInfinity,
3645 APFloat::rmNearestTiesToAway},
3647 {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero,
3648 APFloat::rmNearestTiesToEven},
3649 {MinQuad, MaxQuad, "0x0.0000000000000000000000000001p-16382",
3650 UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardPositive},
3651 {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero,
3652 APFloat::rmTowardNegative},
3653 {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero,
3654 APFloat::rmTowardZero},
3655 {MinQuad, MaxQuad, "0", UnderflowStatus, APFloat::fcZero,
3656 APFloat::rmNearestTiesToAway},
3658 {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero,
3659 APFloat::rmNearestTiesToEven},
3660 {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero,
3661 APFloat::rmTowardPositive},
3662 {NMinQuad, MaxQuad, "-0x0.0000000000000000000000000001p-16382",
3663 UnderflowStatus, APFloat::fcNormal, APFloat::rmTowardNegative},
3664 {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero,
3665 APFloat::rmTowardZero},
3666 {NMinQuad, MaxQuad, "-0", UnderflowStatus, APFloat::fcZero,
3667 APFloat::rmNearestTiesToAway},
3670 for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) {
3671 APFloat x(SpecialCaseTests[i].x);
3672 APFloat y(SpecialCaseTests[i].y);
3673 APFloat::opStatus status = x.divide(y, SpecialCaseTests[i].roundingMode);
3675 APFloat result(x.getSemantics(), SpecialCaseTests[i].result);
3677 EXPECT_TRUE(result.bitwiseIsEqual(x));
3678 EXPECT_EQ(SpecialCaseTests[i].status, (int)status);
3679 EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory());
3683 TEST(APFloatTest, operatorOverloads) {
3684 // This is mostly testing that these operator overloads compile.
3685 APFloat One = APFloat(APFloat::IEEEsingle(), "0x1p+0");
3686 APFloat Two = APFloat(APFloat::IEEEsingle(), "0x2p+0");
3687 EXPECT_TRUE(Two.bitwiseIsEqual(One + One));
3688 EXPECT_TRUE(One.bitwiseIsEqual(Two - One));
3689 EXPECT_TRUE(Two.bitwiseIsEqual(One * Two));
3690 EXPECT_TRUE(One.bitwiseIsEqual(Two / Two));
3693 TEST(APFloatTest, Comparisons) {
3694 enum {MNan, MInf, MBig, MOne, MZer, PZer, POne, PBig, PInf, PNan, NumVals};
3695 APFloat Vals[NumVals] = {
3696 APFloat::getNaN(APFloat::IEEEsingle(), true),
3697 APFloat::getInf(APFloat::IEEEsingle(), true),
3698 APFloat::getLargest(APFloat::IEEEsingle(), true),
3699 APFloat(APFloat::IEEEsingle(), "-0x1p+0"),
3700 APFloat::getZero(APFloat::IEEEsingle(), true),
3701 APFloat::getZero(APFloat::IEEEsingle(), false),
3702 APFloat(APFloat::IEEEsingle(), "0x1p+0"),
3703 APFloat::getLargest(APFloat::IEEEsingle(), false),
3704 APFloat::getInf(APFloat::IEEEsingle(), false),
3705 APFloat::getNaN(APFloat::IEEEsingle(), false),
3707 using Relation = void (*)(const APFloat &, const APFloat &);
3708 Relation LT = [](const APFloat &LHS, const APFloat &RHS) {
3709 EXPECT_FALSE(LHS == RHS);
3710 EXPECT_TRUE(LHS != RHS);
3711 EXPECT_TRUE(LHS < RHS);
3712 EXPECT_FALSE(LHS > RHS);
3713 EXPECT_TRUE(LHS <= RHS);
3714 EXPECT_FALSE(LHS >= RHS);
3716 Relation EQ = [](const APFloat &LHS, const APFloat &RHS) {
3717 EXPECT_TRUE(LHS == RHS);
3718 EXPECT_FALSE(LHS != RHS);
3719 EXPECT_FALSE(LHS < RHS);
3720 EXPECT_FALSE(LHS > RHS);
3721 EXPECT_TRUE(LHS <= RHS);
3722 EXPECT_TRUE(LHS >= RHS);
3724 Relation GT = [](const APFloat &LHS, const APFloat &RHS) {
3725 EXPECT_FALSE(LHS == RHS);
3726 EXPECT_TRUE(LHS != RHS);
3727 EXPECT_FALSE(LHS < RHS);
3728 EXPECT_TRUE(LHS > RHS);
3729 EXPECT_FALSE(LHS <= RHS);
3730 EXPECT_TRUE(LHS >= RHS);
3732 Relation UN = [](const APFloat &LHS, const APFloat &RHS) {
3733 EXPECT_FALSE(LHS == RHS);
3734 EXPECT_TRUE(LHS != RHS);
3735 EXPECT_FALSE(LHS < RHS);
3736 EXPECT_FALSE(LHS > RHS);
3737 EXPECT_FALSE(LHS <= RHS);
3738 EXPECT_FALSE(LHS >= RHS);
3740 Relation Relations[NumVals][NumVals] = {
3741 // -N -I -B -1 -0 +0 +1 +B +I +N
3742 /* MNan */ {UN, UN, UN, UN, UN, UN, UN, UN, UN, UN},
3743 /* MInf */ {UN, EQ, LT, LT, LT, LT, LT, LT, LT, UN},
3744 /* MBig */ {UN, GT, EQ, LT, LT, LT, LT, LT, LT, UN},
3745 /* MOne */ {UN, GT, GT, EQ, LT, LT, LT, LT, LT, UN},
3746 /* MZer */ {UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN},
3747 /* PZer */ {UN, GT, GT, GT, EQ, EQ, LT, LT, LT, UN},
3748 /* POne */ {UN, GT, GT, GT, GT, GT, EQ, LT, LT, UN},
3749 /* PBig */ {UN, GT, GT, GT, GT, GT, GT, EQ, LT, UN},
3750 /* PInf */ {UN, GT, GT, GT, GT, GT, GT, GT, EQ, UN},
3751 /* PNan */ {UN, UN, UN, UN, UN, UN, UN, UN, UN, UN},
3753 for (unsigned I = 0; I < NumVals; ++I)
3754 for (unsigned J = 0; J < NumVals; ++J)
3755 Relations[I][J](Vals[I], Vals[J]);
3758 TEST(APFloatTest, abs) {
3759 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
3760 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
3761 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
3762 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
3763 APFloat PQNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
3764 APFloat MQNaN = APFloat::getNaN(APFloat::IEEEsingle(), true);
3765 APFloat PSNaN = APFloat::getSNaN(APFloat::IEEEsingle(), false);
3766 APFloat MSNaN = APFloat::getSNaN(APFloat::IEEEsingle(), true);
3767 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
3768 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
3769 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
3770 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
3771 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
3772 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
3773 APFloat PSmallestNormalized =
3774 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
3775 APFloat MSmallestNormalized =
3776 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
3778 EXPECT_TRUE(PInf.bitwiseIsEqual(abs(PInf)));
3779 EXPECT_TRUE(PInf.bitwiseIsEqual(abs(MInf)));
3780 EXPECT_TRUE(PZero.bitwiseIsEqual(abs(PZero)));
3781 EXPECT_TRUE(PZero.bitwiseIsEqual(abs(MZero)));
3782 EXPECT_TRUE(PQNaN.bitwiseIsEqual(abs(PQNaN)));
3783 EXPECT_TRUE(PQNaN.bitwiseIsEqual(abs(MQNaN)));
3784 EXPECT_TRUE(PSNaN.bitwiseIsEqual(abs(PSNaN)));
3785 EXPECT_TRUE(PSNaN.bitwiseIsEqual(abs(MSNaN)));
3786 EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(PNormalValue)));
3787 EXPECT_TRUE(PNormalValue.bitwiseIsEqual(abs(MNormalValue)));
3788 EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(PLargestValue)));
3789 EXPECT_TRUE(PLargestValue.bitwiseIsEqual(abs(MLargestValue)));
3790 EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(PSmallestValue)));
3791 EXPECT_TRUE(PSmallestValue.bitwiseIsEqual(abs(MSmallestValue)));
3792 EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(PSmallestNormalized)));
3793 EXPECT_TRUE(PSmallestNormalized.bitwiseIsEqual(abs(MSmallestNormalized)));
3796 TEST(APFloatTest, neg) {
3797 APFloat One = APFloat(APFloat::IEEEsingle(), "1.0");
3798 APFloat NegOne = APFloat(APFloat::IEEEsingle(), "-1.0");
3799 APFloat Zero = APFloat::getZero(APFloat::IEEEsingle(), false);
3800 APFloat NegZero = APFloat::getZero(APFloat::IEEEsingle(), true);
3801 APFloat Inf = APFloat::getInf(APFloat::IEEEsingle(), false);
3802 APFloat NegInf = APFloat::getInf(APFloat::IEEEsingle(), true);
3803 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
3804 APFloat NegQNaN = APFloat::getNaN(APFloat::IEEEsingle(), true);
3806 EXPECT_TRUE(NegOne.bitwiseIsEqual(neg(One)));
3807 EXPECT_TRUE(One.bitwiseIsEqual(neg(NegOne)));
3808 EXPECT_TRUE(NegZero.bitwiseIsEqual(neg(Zero)));
3809 EXPECT_TRUE(Zero.bitwiseIsEqual(neg(NegZero)));
3810 EXPECT_TRUE(NegInf.bitwiseIsEqual(neg(Inf)));
3811 EXPECT_TRUE(Inf.bitwiseIsEqual(neg(NegInf)));
3812 EXPECT_TRUE(NegInf.bitwiseIsEqual(neg(Inf)));
3813 EXPECT_TRUE(Inf.bitwiseIsEqual(neg(NegInf)));
3814 EXPECT_TRUE(NegQNaN.bitwiseIsEqual(neg(QNaN)));
3815 EXPECT_TRUE(QNaN.bitwiseIsEqual(neg(NegQNaN)));
3817 EXPECT_TRUE(NegOne.bitwiseIsEqual(-One));
3818 EXPECT_TRUE(One.bitwiseIsEqual(-NegOne));
3819 EXPECT_TRUE(NegZero.bitwiseIsEqual(-Zero));
3820 EXPECT_TRUE(Zero.bitwiseIsEqual(-NegZero));
3821 EXPECT_TRUE(NegInf.bitwiseIsEqual(-Inf));
3822 EXPECT_TRUE(Inf.bitwiseIsEqual(-NegInf));
3823 EXPECT_TRUE(NegInf.bitwiseIsEqual(-Inf));
3824 EXPECT_TRUE(Inf.bitwiseIsEqual(-NegInf));
3825 EXPECT_TRUE(NegQNaN.bitwiseIsEqual(-QNaN));
3826 EXPECT_TRUE(QNaN.bitwiseIsEqual(-NegQNaN));
3829 TEST(APFloatTest, ilogb) {
3830 EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), false)));
3831 EXPECT_EQ(-1074, ilogb(APFloat::getSmallest(APFloat::IEEEdouble(), true)));
3832 EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1024")));
3833 EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023")));
3834 EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023")));
3835 EXPECT_EQ(-51, ilogb(APFloat(APFloat::IEEEdouble(), "0x1p-51")));
3836 EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1023")));
3837 EXPECT_EQ(-2, ilogb(APFloat(APFloat::IEEEdouble(), "0x0.ffffp-1")));
3838 EXPECT_EQ(-1023, ilogb(APFloat(APFloat::IEEEdouble(), "0x1.fffep-1023")));
3839 EXPECT_EQ(1023, ilogb(APFloat::getLargest(APFloat::IEEEdouble(), false)));
3840 EXPECT_EQ(1023, ilogb(APFloat::getLargest(APFloat::IEEEdouble(), true)));
3843 EXPECT_EQ(0, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p+0")));
3844 EXPECT_EQ(0, ilogb(APFloat(APFloat::IEEEsingle(), "-0x1p+0")));
3845 EXPECT_EQ(42, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p+42")));
3846 EXPECT_EQ(-42, ilogb(APFloat(APFloat::IEEEsingle(), "0x1p-42")));
3848 EXPECT_EQ(APFloat::IEK_Inf,
3849 ilogb(APFloat::getInf(APFloat::IEEEsingle(), false)));
3850 EXPECT_EQ(APFloat::IEK_Inf,
3851 ilogb(APFloat::getInf(APFloat::IEEEsingle(), true)));
3852 EXPECT_EQ(APFloat::IEK_Zero,
3853 ilogb(APFloat::getZero(APFloat::IEEEsingle(), false)));
3854 EXPECT_EQ(APFloat::IEK_Zero,
3855 ilogb(APFloat::getZero(APFloat::IEEEsingle(), true)));
3856 EXPECT_EQ(APFloat::IEK_NaN,
3857 ilogb(APFloat::getNaN(APFloat::IEEEsingle(), false)));
3858 EXPECT_EQ(APFloat::IEK_NaN,
3859 ilogb(APFloat::getSNaN(APFloat::IEEEsingle(), false)));
3861 EXPECT_EQ(127, ilogb(APFloat::getLargest(APFloat::IEEEsingle(), false)));
3862 EXPECT_EQ(127, ilogb(APFloat::getLargest(APFloat::IEEEsingle(), true)));
3864 EXPECT_EQ(-149, ilogb(APFloat::getSmallest(APFloat::IEEEsingle(), false)));
3865 EXPECT_EQ(-149, ilogb(APFloat::getSmallest(APFloat::IEEEsingle(), true)));
3866 EXPECT_EQ(-126,
3867 ilogb(APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false)));
3868 EXPECT_EQ(-126,
3869 ilogb(APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true)));
3872 TEST(APFloatTest, scalbn) {
3874 const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
3875 EXPECT_TRUE(
3876 APFloat(APFloat::IEEEsingle(), "0x1p+0")
3877 .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 0, RM)));
3878 EXPECT_TRUE(
3879 APFloat(APFloat::IEEEsingle(), "0x1p+42")
3880 .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 42, RM)));
3881 EXPECT_TRUE(
3882 APFloat(APFloat::IEEEsingle(), "0x1p-42")
3883 .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), -42, RM)));
3885 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
3886 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
3887 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
3888 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
3889 APFloat QPNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
3890 APFloat QMNaN = APFloat::getNaN(APFloat::IEEEsingle(), true);
3891 APFloat SNaN = APFloat::getSNaN(APFloat::IEEEsingle(), false);
3893 EXPECT_TRUE(PInf.bitwiseIsEqual(scalbn(PInf, 0, RM)));
3894 EXPECT_TRUE(MInf.bitwiseIsEqual(scalbn(MInf, 0, RM)));
3895 EXPECT_TRUE(PZero.bitwiseIsEqual(scalbn(PZero, 0, RM)));
3896 EXPECT_TRUE(MZero.bitwiseIsEqual(scalbn(MZero, 0, RM)));
3897 EXPECT_TRUE(QPNaN.bitwiseIsEqual(scalbn(QPNaN, 0, RM)));
3898 EXPECT_TRUE(QMNaN.bitwiseIsEqual(scalbn(QMNaN, 0, RM)));
3899 EXPECT_FALSE(scalbn(SNaN, 0, RM).isSignaling());
3901 APFloat ScalbnSNaN = scalbn(SNaN, 1, RM);
3902 EXPECT_TRUE(ScalbnSNaN.isNaN() && !ScalbnSNaN.isSignaling());
3904 // Make sure highest bit of payload is preserved.
3905 const APInt Payload(64, (UINT64_C(1) << 50) |
3906 (UINT64_C(1) << 49) |
3907 (UINT64_C(1234) << 32) |
3910 APFloat SNaNWithPayload = APFloat::getSNaN(APFloat::IEEEdouble(), false,
3911 &Payload);
3912 APFloat QuietPayload = scalbn(SNaNWithPayload, 1, RM);
3913 EXPECT_TRUE(QuietPayload.isNaN() && !QuietPayload.isSignaling());
3914 EXPECT_EQ(Payload, QuietPayload.bitcastToAPInt().getLoBits(51));
3916 EXPECT_TRUE(PInf.bitwiseIsEqual(
3917 scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+0"), 128, RM)));
3918 EXPECT_TRUE(MInf.bitwiseIsEqual(
3919 scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p+0"), 128, RM)));
3920 EXPECT_TRUE(PInf.bitwiseIsEqual(
3921 scalbn(APFloat(APFloat::IEEEsingle(), "0x1p+127"), 1, RM)));
3922 EXPECT_TRUE(PZero.bitwiseIsEqual(
3923 scalbn(APFloat(APFloat::IEEEsingle(), "0x1p-127"), -127, RM)));
3924 EXPECT_TRUE(MZero.bitwiseIsEqual(
3925 scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p-127"), -127, RM)));
3926 EXPECT_TRUE(APFloat(APFloat::IEEEsingle(), "-0x1p-149").bitwiseIsEqual(
3927 scalbn(APFloat(APFloat::IEEEsingle(), "-0x1p-127"), -22, RM)));
3928 EXPECT_TRUE(PZero.bitwiseIsEqual(
3929 scalbn(APFloat(APFloat::IEEEsingle(), "0x1p-126"), -24, RM)));
3932 APFloat SmallestF64 = APFloat::getSmallest(APFloat::IEEEdouble(), false);
3933 APFloat NegSmallestF64 = APFloat::getSmallest(APFloat::IEEEdouble(), true);
3935 APFloat LargestF64 = APFloat::getLargest(APFloat::IEEEdouble(), false);
3936 APFloat NegLargestF64 = APFloat::getLargest(APFloat::IEEEdouble(), true);
3938 APFloat SmallestNormalizedF64
3939 = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false);
3940 APFloat NegSmallestNormalizedF64
3941 = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true);
3943 APFloat LargestDenormalF64(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023");
3944 APFloat NegLargestDenormalF64(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023");
3947 EXPECT_TRUE(SmallestF64.bitwiseIsEqual(
3948 scalbn(APFloat(APFloat::IEEEdouble(), "0x1p-1074"), 0, RM)));
3949 EXPECT_TRUE(NegSmallestF64.bitwiseIsEqual(
3950 scalbn(APFloat(APFloat::IEEEdouble(), "-0x1p-1074"), 0, RM)));
3952 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1023")
3953 .bitwiseIsEqual(scalbn(SmallestF64, 2097, RM)));
3955 EXPECT_TRUE(scalbn(SmallestF64, -2097, RM).isPosZero());
3956 EXPECT_TRUE(scalbn(SmallestF64, -2098, RM).isPosZero());
3957 EXPECT_TRUE(scalbn(SmallestF64, -2099, RM).isPosZero());
3958 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1022")
3959 .bitwiseIsEqual(scalbn(SmallestF64, 2096, RM)));
3960 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+1023")
3961 .bitwiseIsEqual(scalbn(SmallestF64, 2097, RM)));
3962 EXPECT_TRUE(scalbn(SmallestF64, 2098, RM).isInfinity());
3963 EXPECT_TRUE(scalbn(SmallestF64, 2099, RM).isInfinity());
3965 // Test for integer overflows when adding to exponent.
3966 EXPECT_TRUE(scalbn(SmallestF64, -INT_MAX, RM).isPosZero());
3967 EXPECT_TRUE(scalbn(LargestF64, INT_MAX, RM).isInfinity());
3969 EXPECT_TRUE(LargestDenormalF64
3970 .bitwiseIsEqual(scalbn(LargestDenormalF64, 0, RM)));
3971 EXPECT_TRUE(NegLargestDenormalF64
3972 .bitwiseIsEqual(scalbn(NegLargestDenormalF64, 0, RM)));
3974 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1022")
3975 .bitwiseIsEqual(scalbn(LargestDenormalF64, 1, RM)));
3976 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1021")
3977 .bitwiseIsEqual(scalbn(NegLargestDenormalF64, 2, RM)));
3979 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+1")
3980 .bitwiseIsEqual(scalbn(LargestDenormalF64, 1024, RM)));
3981 EXPECT_TRUE(scalbn(LargestDenormalF64, -1023, RM).isPosZero());
3982 EXPECT_TRUE(scalbn(LargestDenormalF64, -1024, RM).isPosZero());
3983 EXPECT_TRUE(scalbn(LargestDenormalF64, -2048, RM).isPosZero());
3984 EXPECT_TRUE(scalbn(LargestDenormalF64, 2047, RM).isInfinity());
3985 EXPECT_TRUE(scalbn(LargestDenormalF64, 2098, RM).isInfinity());
3986 EXPECT_TRUE(scalbn(LargestDenormalF64, 2099, RM).isInfinity());
3988 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-2")
3989 .bitwiseIsEqual(scalbn(LargestDenormalF64, 1021, RM)));
3990 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1")
3991 .bitwiseIsEqual(scalbn(LargestDenormalF64, 1022, RM)));
3992 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+0")
3993 .bitwiseIsEqual(scalbn(LargestDenormalF64, 1023, RM)));
3994 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep+1023")
3995 .bitwiseIsEqual(scalbn(LargestDenormalF64, 2046, RM)));
3996 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p+974")
3997 .bitwiseIsEqual(scalbn(SmallestF64, 2048, RM)));
3999 APFloat RandomDenormalF64(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+51");
4000 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-972")
4001 .bitwiseIsEqual(scalbn(RandomDenormalF64, -1023, RM)));
4002 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1")
4003 .bitwiseIsEqual(scalbn(RandomDenormalF64, -52, RM)));
4004 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-2")
4005 .bitwiseIsEqual(scalbn(RandomDenormalF64, -53, RM)));
4006 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+0")
4007 .bitwiseIsEqual(scalbn(RandomDenormalF64, -51, RM)));
4009 EXPECT_TRUE(scalbn(RandomDenormalF64, -2097, RM).isPosZero());
4010 EXPECT_TRUE(scalbn(RandomDenormalF64, -2090, RM).isPosZero());
4013 EXPECT_TRUE(
4014 APFloat(APFloat::IEEEdouble(), "-0x1p-1073")
4015 .bitwiseIsEqual(scalbn(NegLargestF64, -2097, RM)));
4017 EXPECT_TRUE(
4018 APFloat(APFloat::IEEEdouble(), "-0x1p-1024")
4019 .bitwiseIsEqual(scalbn(NegLargestF64, -2048, RM)));
4021 EXPECT_TRUE(
4022 APFloat(APFloat::IEEEdouble(), "0x1p-1073")
4023 .bitwiseIsEqual(scalbn(LargestF64, -2097, RM)));
4025 EXPECT_TRUE(
4026 APFloat(APFloat::IEEEdouble(), "0x1p-1074")
4027 .bitwiseIsEqual(scalbn(LargestF64, -2098, RM)));
4028 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1074")
4029 .bitwiseIsEqual(scalbn(NegLargestF64, -2098, RM)));
4030 EXPECT_TRUE(scalbn(NegLargestF64, -2099, RM).isNegZero());
4031 EXPECT_TRUE(scalbn(LargestF64, 1, RM).isInfinity());
4034 EXPECT_TRUE(
4035 APFloat(APFloat::IEEEdouble(), "0x1p+0")
4036 .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEdouble(), "0x1p+52"), -52, RM)));
4038 EXPECT_TRUE(
4039 APFloat(APFloat::IEEEdouble(), "0x1p-103")
4040 .bitwiseIsEqual(scalbn(APFloat(APFloat::IEEEdouble(), "0x1p-51"), -52, RM)));
4043 TEST(APFloatTest, frexp) {
4044 const APFloat::roundingMode RM = APFloat::rmNearestTiesToEven;
4046 APFloat PZero = APFloat::getZero(APFloat::IEEEdouble(), false);
4047 APFloat MZero = APFloat::getZero(APFloat::IEEEdouble(), true);
4048 APFloat One(1.0);
4049 APFloat MOne(-1.0);
4050 APFloat Two(2.0);
4051 APFloat MTwo(-2.0);
4053 APFloat LargestDenormal(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1023");
4054 APFloat NegLargestDenormal(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1023");
4056 APFloat Smallest = APFloat::getSmallest(APFloat::IEEEdouble(), false);
4057 APFloat NegSmallest = APFloat::getSmallest(APFloat::IEEEdouble(), true);
4059 APFloat Largest = APFloat::getLargest(APFloat::IEEEdouble(), false);
4060 APFloat NegLargest = APFloat::getLargest(APFloat::IEEEdouble(), true);
4062 APFloat PInf = APFloat::getInf(APFloat::IEEEdouble(), false);
4063 APFloat MInf = APFloat::getInf(APFloat::IEEEdouble(), true);
4065 APFloat QPNaN = APFloat::getNaN(APFloat::IEEEdouble(), false);
4066 APFloat QMNaN = APFloat::getNaN(APFloat::IEEEdouble(), true);
4067 APFloat SNaN = APFloat::getSNaN(APFloat::IEEEdouble(), false);
4069 // Make sure highest bit of payload is preserved.
4070 const APInt Payload(64, (UINT64_C(1) << 50) |
4071 (UINT64_C(1) << 49) |
4072 (UINT64_C(1234) << 32) |
4075 APFloat SNaNWithPayload = APFloat::getSNaN(APFloat::IEEEdouble(), false,
4076 &Payload);
4078 APFloat SmallestNormalized
4079 = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false);
4080 APFloat NegSmallestNormalized
4081 = APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true);
4083 int Exp;
4084 APFloat Frac(APFloat::IEEEdouble());
4087 Frac = frexp(PZero, Exp, RM);
4088 EXPECT_EQ(0, Exp);
4089 EXPECT_TRUE(Frac.isPosZero());
4091 Frac = frexp(MZero, Exp, RM);
4092 EXPECT_EQ(0, Exp);
4093 EXPECT_TRUE(Frac.isNegZero());
4096 Frac = frexp(One, Exp, RM);
4097 EXPECT_EQ(1, Exp);
4098 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac));
4100 Frac = frexp(MOne, Exp, RM);
4101 EXPECT_EQ(1, Exp);
4102 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1").bitwiseIsEqual(Frac));
4104 Frac = frexp(LargestDenormal, Exp, RM);
4105 EXPECT_EQ(-1022, Exp);
4106 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.ffffffffffffep-1").bitwiseIsEqual(Frac));
4108 Frac = frexp(NegLargestDenormal, Exp, RM);
4109 EXPECT_EQ(-1022, Exp);
4110 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.ffffffffffffep-1").bitwiseIsEqual(Frac));
4113 Frac = frexp(Smallest, Exp, RM);
4114 EXPECT_EQ(-1073, Exp);
4115 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac));
4117 Frac = frexp(NegSmallest, Exp, RM);
4118 EXPECT_EQ(-1073, Exp);
4119 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1p-1").bitwiseIsEqual(Frac));
4122 Frac = frexp(Largest, Exp, RM);
4123 EXPECT_EQ(1024, Exp);
4124 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.fffffffffffffp-1").bitwiseIsEqual(Frac));
4126 Frac = frexp(NegLargest, Exp, RM);
4127 EXPECT_EQ(1024, Exp);
4128 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "-0x1.fffffffffffffp-1").bitwiseIsEqual(Frac));
4131 Frac = frexp(PInf, Exp, RM);
4132 EXPECT_EQ(INT_MAX, Exp);
4133 EXPECT_TRUE(Frac.isInfinity() && !Frac.isNegative());
4135 Frac = frexp(MInf, Exp, RM);
4136 EXPECT_EQ(INT_MAX, Exp);
4137 EXPECT_TRUE(Frac.isInfinity() && Frac.isNegative());
4139 Frac = frexp(QPNaN, Exp, RM);
4140 EXPECT_EQ(INT_MIN, Exp);
4141 EXPECT_TRUE(Frac.isNaN());
4143 Frac = frexp(QMNaN, Exp, RM);
4144 EXPECT_EQ(INT_MIN, Exp);
4145 EXPECT_TRUE(Frac.isNaN());
4147 Frac = frexp(SNaN, Exp, RM);
4148 EXPECT_EQ(INT_MIN, Exp);
4149 EXPECT_TRUE(Frac.isNaN() && !Frac.isSignaling());
4151 Frac = frexp(SNaNWithPayload, Exp, RM);
4152 EXPECT_EQ(INT_MIN, Exp);
4153 EXPECT_TRUE(Frac.isNaN() && !Frac.isSignaling());
4154 EXPECT_EQ(Payload, Frac.bitcastToAPInt().getLoBits(51));
4156 Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x0.ffffp-1"), Exp, RM);
4157 EXPECT_EQ(-1, Exp);
4158 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.fffep-1").bitwiseIsEqual(Frac));
4160 Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x1p-51"), Exp, RM);
4161 EXPECT_EQ(-50, Exp);
4162 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1p-1").bitwiseIsEqual(Frac));
4164 Frac = frexp(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp+51"), Exp, RM);
4165 EXPECT_EQ(52, Exp);
4166 EXPECT_TRUE(APFloat(APFloat::IEEEdouble(), "0x1.c60f120d9f87cp-1").bitwiseIsEqual(Frac));
4169 TEST(APFloatTest, mod) {
4171 APFloat f1(APFloat::IEEEdouble(), "1.5");
4172 APFloat f2(APFloat::IEEEdouble(), "1.0");
4173 APFloat expected(APFloat::IEEEdouble(), "0.5");
4174 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4175 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4178 APFloat f1(APFloat::IEEEdouble(), "0.5");
4179 APFloat f2(APFloat::IEEEdouble(), "1.0");
4180 APFloat expected(APFloat::IEEEdouble(), "0.5");
4181 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4182 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4185 APFloat f1(APFloat::IEEEdouble(), "0x1.3333333333333p-2"); // 0.3
4186 APFloat f2(APFloat::IEEEdouble(), "0x1.47ae147ae147bp-7"); // 0.01
4187 APFloat expected(APFloat::IEEEdouble(),
4188 "0x1.47ae147ae1471p-7"); // 0.009999999999999983
4189 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4190 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4193 APFloat f1(APFloat::IEEEdouble(), "0x1p64"); // 1.8446744073709552e19
4194 APFloat f2(APFloat::IEEEdouble(), "1.5");
4195 APFloat expected(APFloat::IEEEdouble(), "1.0");
4196 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4197 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4200 APFloat f1(APFloat::IEEEdouble(), "0x1p1000");
4201 APFloat f2(APFloat::IEEEdouble(), "0x1p-1000");
4202 APFloat expected(APFloat::IEEEdouble(), "0.0");
4203 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4204 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4207 APFloat f1(APFloat::IEEEdouble(), "0.0");
4208 APFloat f2(APFloat::IEEEdouble(), "1.0");
4209 APFloat expected(APFloat::IEEEdouble(), "0.0");
4210 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4211 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4214 APFloat f1(APFloat::IEEEdouble(), "1.0");
4215 APFloat f2(APFloat::IEEEdouble(), "0.0");
4216 EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp);
4217 EXPECT_TRUE(f1.isNaN());
4220 APFloat f1(APFloat::IEEEdouble(), "0.0");
4221 APFloat f2(APFloat::IEEEdouble(), "0.0");
4222 EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp);
4223 EXPECT_TRUE(f1.isNaN());
4226 APFloat f1 = APFloat::getInf(APFloat::IEEEdouble(), false);
4227 APFloat f2(APFloat::IEEEdouble(), "1.0");
4228 EXPECT_EQ(f1.mod(f2), APFloat::opInvalidOp);
4229 EXPECT_TRUE(f1.isNaN());
4232 APFloat f1(APFloat::IEEEdouble(), "-4.0");
4233 APFloat f2(APFloat::IEEEdouble(), "-2.0");
4234 APFloat expected(APFloat::IEEEdouble(), "-0.0");
4235 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4236 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4239 APFloat f1(APFloat::IEEEdouble(), "-4.0");
4240 APFloat f2(APFloat::IEEEdouble(), "2.0");
4241 APFloat expected(APFloat::IEEEdouble(), "-0.0");
4242 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4243 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4246 // Test E4M3FN mod where the LHS exponent is maxExponent (8) and the RHS is
4247 // the max value whose exponent is minExponent (-6). This requires special
4248 // logic in the mod implementation to prevent overflow to NaN.
4249 APFloat f1(APFloat::Float8E4M3FN(), "0x1p8"); // 256
4250 APFloat f2(APFloat::Float8E4M3FN(), "0x1.ep-6"); // 0.029296875
4251 APFloat expected(APFloat::Float8E4M3FN(), "0x1p-8"); // 0.00390625
4252 EXPECT_EQ(f1.mod(f2), APFloat::opOK);
4253 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4257 TEST(APFloatTest, remainder) {
4258 // Test Special Cases against each other and normal values.
4260 APFloat PInf = APFloat::getInf(APFloat::IEEEsingle(), false);
4261 APFloat MInf = APFloat::getInf(APFloat::IEEEsingle(), true);
4262 APFloat PZero = APFloat::getZero(APFloat::IEEEsingle(), false);
4263 APFloat MZero = APFloat::getZero(APFloat::IEEEsingle(), true);
4264 APFloat QNaN = APFloat::getNaN(APFloat::IEEEsingle(), false);
4265 APFloat SNaN = APFloat(APFloat::IEEEsingle(), "snan123");
4266 APFloat PNormalValue = APFloat(APFloat::IEEEsingle(), "0x1p+0");
4267 APFloat MNormalValue = APFloat(APFloat::IEEEsingle(), "-0x1p+0");
4268 APFloat PLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), false);
4269 APFloat MLargestValue = APFloat::getLargest(APFloat::IEEEsingle(), true);
4270 APFloat PSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), false);
4271 APFloat MSmallestValue = APFloat::getSmallest(APFloat::IEEEsingle(), true);
4272 APFloat PSmallestNormalized =
4273 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
4274 APFloat MSmallestNormalized =
4275 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
4277 APFloat PVal1(APFloat::IEEEsingle(), "0x1.fffffep+126");
4278 APFloat MVal1(APFloat::IEEEsingle(), "-0x1.fffffep+126");
4279 APFloat PVal2(APFloat::IEEEsingle(), "0x1.fffffep-126");
4280 APFloat MVal2(APFloat::IEEEsingle(), "-0x1.fffffep-126");
4281 APFloat PVal3(APFloat::IEEEsingle(), "0x1p-125");
4282 APFloat MVal3(APFloat::IEEEsingle(), "-0x1p-125");
4283 APFloat PVal4(APFloat::IEEEsingle(), "0x1p+127");
4284 APFloat MVal4(APFloat::IEEEsingle(), "-0x1p+127");
4285 APFloat PVal5(APFloat::IEEEsingle(), "1.5");
4286 APFloat MVal5(APFloat::IEEEsingle(), "-1.5");
4287 APFloat PVal6(APFloat::IEEEsingle(), "1");
4288 APFloat MVal6(APFloat::IEEEsingle(), "-1");
4290 struct {
4291 APFloat x;
4292 APFloat y;
4293 const char *result;
4294 int status;
4295 int category;
4296 } SpecialCaseTests[] = {
4297 { PInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4298 { PInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4299 { PInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4300 { PInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4301 { PInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4302 { PInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4303 { PInf, PNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4304 { PInf, MNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4305 { PInf, PLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4306 { PInf, MLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4307 { PInf, PSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4308 { PInf, MSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4309 { PInf, PSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4310 { PInf, MSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4311 { MInf, PInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4312 { MInf, MInf, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4313 { MInf, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4314 { MInf, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4315 { MInf, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4316 { MInf, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4317 { MInf, PNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4318 { MInf, MNormalValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4319 { MInf, PLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4320 { MInf, MLargestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4321 { MInf, PSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4322 { MInf, MSmallestValue, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4323 { MInf, PSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4324 { MInf, MSmallestNormalized, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4325 { PZero, PInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4326 { PZero, MInf, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4327 { PZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4328 { PZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4329 { PZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4330 { PZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4331 { PZero, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4332 { PZero, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4333 { PZero, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4334 { PZero, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4335 { PZero, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4336 { PZero, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4337 { PZero, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4338 { PZero, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4339 { MZero, PInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4340 { MZero, MInf, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4341 { MZero, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4342 { MZero, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4343 { MZero, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4344 { MZero, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4345 { MZero, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4346 { MZero, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4347 { MZero, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4348 { MZero, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4349 { MZero, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4350 { MZero, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4351 { MZero, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4352 { MZero, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4353 { QNaN, PInf, "nan", APFloat::opOK, APFloat::fcNaN },
4354 { QNaN, MInf, "nan", APFloat::opOK, APFloat::fcNaN },
4355 { QNaN, PZero, "nan", APFloat::opOK, APFloat::fcNaN },
4356 { QNaN, MZero, "nan", APFloat::opOK, APFloat::fcNaN },
4357 { QNaN, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4358 { QNaN, SNaN, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4359 { QNaN, PNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
4360 { QNaN, MNormalValue, "nan", APFloat::opOK, APFloat::fcNaN },
4361 { QNaN, PLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
4362 { QNaN, MLargestValue, "nan", APFloat::opOK, APFloat::fcNaN },
4363 { QNaN, PSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
4364 { QNaN, MSmallestValue, "nan", APFloat::opOK, APFloat::fcNaN },
4365 { QNaN, PSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
4366 { QNaN, MSmallestNormalized, "nan", APFloat::opOK, APFloat::fcNaN },
4367 { SNaN, PInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4368 { SNaN, MInf, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4369 { SNaN, PZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4370 { SNaN, MZero, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4371 { SNaN, QNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4372 { SNaN, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4373 { SNaN, PNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4374 { SNaN, MNormalValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4375 { SNaN, PLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4376 { SNaN, MLargestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4377 { SNaN, PSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4378 { SNaN, MSmallestValue, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4379 { SNaN, PSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4380 { SNaN, MSmallestNormalized, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4381 { PNormalValue, PInf, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4382 { PNormalValue, MInf, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4383 { PNormalValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4384 { PNormalValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4385 { PNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4386 { PNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4387 { PNormalValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4388 { PNormalValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4389 { PNormalValue, PLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4390 { PNormalValue, MLargestValue, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4391 { PNormalValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4392 { PNormalValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4393 { PNormalValue, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4394 { PNormalValue, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4395 { MNormalValue, PInf, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4396 { MNormalValue, MInf, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4397 { MNormalValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4398 { MNormalValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4399 { MNormalValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4400 { MNormalValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4401 { MNormalValue, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4402 { MNormalValue, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4403 { MNormalValue, PLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4404 { MNormalValue, MLargestValue, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4405 { MNormalValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4406 { MNormalValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4407 { MNormalValue, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4408 { MNormalValue, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4409 { PLargestValue, PInf, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
4410 { PLargestValue, MInf, "0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
4411 { PLargestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4412 { PLargestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4413 { PLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4414 { PLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4415 { PLargestValue, PNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4416 { PLargestValue, MNormalValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4417 { PLargestValue, PLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4418 { PLargestValue, MLargestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4419 { PLargestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4420 { PLargestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4421 { PLargestValue, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4422 { PLargestValue, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4423 { MLargestValue, PInf, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
4424 { MLargestValue, MInf, "-0x1.fffffep+127", APFloat::opOK, APFloat::fcNormal },
4425 { MLargestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4426 { MLargestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4427 { MLargestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4428 { MLargestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4429 { MLargestValue, PNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4430 { MLargestValue, MNormalValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4431 { MLargestValue, PLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4432 { MLargestValue, MLargestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4433 { MLargestValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4434 { MLargestValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4435 { MLargestValue, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4436 { MLargestValue, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4437 { PSmallestValue, PInf, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4438 { PSmallestValue, MInf, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4439 { PSmallestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4440 { PSmallestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4441 { PSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4442 { PSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4443 { PSmallestValue, PNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4444 { PSmallestValue, MNormalValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4445 { PSmallestValue, PLargestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4446 { PSmallestValue, MLargestValue, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4447 { PSmallestValue, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4448 { PSmallestValue, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4449 { PSmallestValue, PSmallestNormalized, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4450 { PSmallestValue, MSmallestNormalized, "0x1p-149", APFloat::opOK, APFloat::fcNormal },
4451 { MSmallestValue, PInf, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4452 { MSmallestValue, MInf, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4453 { MSmallestValue, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4454 { MSmallestValue, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4455 { MSmallestValue, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4456 { MSmallestValue, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4457 { MSmallestValue, PNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4458 { MSmallestValue, MNormalValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4459 { MSmallestValue, PLargestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4460 { MSmallestValue, MLargestValue, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4461 { MSmallestValue, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4462 { MSmallestValue, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4463 { MSmallestValue, PSmallestNormalized, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4464 { MSmallestValue, MSmallestNormalized, "-0x1p-149", APFloat::opOK, APFloat::fcNormal },
4465 { PSmallestNormalized, PInf, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4466 { PSmallestNormalized, MInf, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4467 { PSmallestNormalized, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4468 { PSmallestNormalized, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4469 { PSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4470 { PSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4471 { PSmallestNormalized, PNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4472 { PSmallestNormalized, MNormalValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4473 { PSmallestNormalized, PLargestValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4474 { PSmallestNormalized, MLargestValue, "0x1p-126", APFloat::opOK, APFloat::fcNormal },
4475 { PSmallestNormalized, PSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4476 { PSmallestNormalized, MSmallestValue, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4477 { PSmallestNormalized, PSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4478 { PSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4479 { MSmallestNormalized, PInf, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4480 { MSmallestNormalized, MInf, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4481 { MSmallestNormalized, PZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4482 { MSmallestNormalized, MZero, "nan", APFloat::opInvalidOp, APFloat::fcNaN },
4483 { MSmallestNormalized, QNaN, "nan", APFloat::opOK, APFloat::fcNaN },
4484 { MSmallestNormalized, SNaN, "nan123", APFloat::opInvalidOp, APFloat::fcNaN },
4485 { MSmallestNormalized, PNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4486 { MSmallestNormalized, MNormalValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4487 { MSmallestNormalized, PLargestValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4488 { MSmallestNormalized, MLargestValue, "-0x1p-126", APFloat::opOK, APFloat::fcNormal },
4489 { MSmallestNormalized, PSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4490 { MSmallestNormalized, MSmallestValue, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4491 { MSmallestNormalized, PSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4492 { MSmallestNormalized, MSmallestNormalized, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4494 { PVal1, PVal1, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4495 { PVal1, MVal1, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4496 { PVal1, PVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4497 { PVal1, MVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4498 { PVal1, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4499 { PVal1, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4500 { PVal1, PVal4, "-0x1p+103", APFloat::opOK, APFloat::fcNormal },
4501 { PVal1, MVal4, "-0x1p+103", APFloat::opOK, APFloat::fcNormal },
4502 { PVal1, PVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4503 { PVal1, MVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4504 { PVal1, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4505 { PVal1, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4506 { MVal1, PVal1, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4507 { MVal1, MVal1, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4508 { MVal1, PVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4509 { MVal1, MVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4510 { MVal1, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4511 { MVal1, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4512 { MVal1, PVal4, "0x1p+103", APFloat::opOK, APFloat::fcNormal },
4513 { MVal1, MVal4, "0x1p+103", APFloat::opOK, APFloat::fcNormal },
4514 { MVal1, PVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4515 { MVal1, MVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4516 { MVal1, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4517 { MVal1, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4518 { PVal2, PVal1, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4519 { PVal2, MVal1, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4520 { PVal2, PVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4521 { PVal2, MVal2, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4522 { PVal2, PVal3, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4523 { PVal2, MVal3, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4524 { PVal2, PVal4, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4525 { PVal2, MVal4, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4526 { PVal2, PVal5, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4527 { PVal2, MVal5, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4528 { PVal2, PVal6, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4529 { PVal2, MVal6, "0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4530 { MVal2, PVal1, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4531 { MVal2, MVal1, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4532 { MVal2, PVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4533 { MVal2, MVal2, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4534 { MVal2, PVal3, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4535 { MVal2, MVal3, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4536 { MVal2, PVal4, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4537 { MVal2, MVal4, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4538 { MVal2, PVal5, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4539 { MVal2, MVal5, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4540 { MVal2, PVal6, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4541 { MVal2, MVal6, "-0x1.fffffep-126", APFloat::opOK, APFloat::fcNormal },
4542 { PVal3, PVal1, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4543 { PVal3, MVal1, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4544 { PVal3, PVal2, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4545 { PVal3, MVal2, "0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4546 { PVal3, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4547 { PVal3, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4548 { PVal3, PVal4, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4549 { PVal3, MVal4, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4550 { PVal3, PVal5, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4551 { PVal3, MVal5, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4552 { PVal3, PVal6, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4553 { PVal3, MVal6, "0x1p-125", APFloat::opOK, APFloat::fcNormal },
4554 { MVal3, PVal1, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4555 { MVal3, MVal1, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4556 { MVal3, PVal2, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4557 { MVal3, MVal2, "-0x0.000002p-126", APFloat::opOK, APFloat::fcNormal },
4558 { MVal3, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4559 { MVal3, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4560 { MVal3, PVal4, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4561 { MVal3, MVal4, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4562 { MVal3, PVal5, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4563 { MVal3, MVal5, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4564 { MVal3, PVal6, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4565 { MVal3, MVal6, "-0x1p-125", APFloat::opOK, APFloat::fcNormal },
4566 { PVal4, PVal1, "0x1p+103", APFloat::opOK, APFloat::fcNormal },
4567 { PVal4, MVal1, "0x1p+103", APFloat::opOK, APFloat::fcNormal },
4568 { PVal4, PVal2, "0x0.002p-126", APFloat::opOK, APFloat::fcNormal },
4569 { PVal4, MVal2, "0x0.002p-126", APFloat::opOK, APFloat::fcNormal },
4570 { PVal4, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4571 { PVal4, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4572 { PVal4, PVal4, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4573 { PVal4, MVal4, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4574 { PVal4, PVal5, "0.5", APFloat::opOK, APFloat::fcNormal },
4575 { PVal4, MVal5, "0.5", APFloat::opOK, APFloat::fcNormal },
4576 { PVal4, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4577 { PVal4, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4578 { MVal4, PVal1, "-0x1p+103", APFloat::opOK, APFloat::fcNormal },
4579 { MVal4, MVal1, "-0x1p+103", APFloat::opOK, APFloat::fcNormal },
4580 { MVal4, PVal2, "-0x0.002p-126", APFloat::opOK, APFloat::fcNormal },
4581 { MVal4, MVal2, "-0x0.002p-126", APFloat::opOK, APFloat::fcNormal },
4582 { MVal4, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4583 { MVal4, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4584 { MVal4, PVal4, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4585 { MVal4, MVal4, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4586 { MVal4, PVal5, "-0.5", APFloat::opOK, APFloat::fcNormal },
4587 { MVal4, MVal5, "-0.5", APFloat::opOK, APFloat::fcNormal },
4588 { MVal4, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4589 { MVal4, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4590 { PVal5, PVal1, "1.5", APFloat::opOK, APFloat::fcNormal },
4591 { PVal5, MVal1, "1.5", APFloat::opOK, APFloat::fcNormal },
4592 { PVal5, PVal2, "0x0.00006p-126", APFloat::opOK, APFloat::fcNormal },
4593 { PVal5, MVal2, "0x0.00006p-126", APFloat::opOK, APFloat::fcNormal },
4594 { PVal5, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4595 { PVal5, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4596 { PVal5, PVal4, "1.5", APFloat::opOK, APFloat::fcNormal },
4597 { PVal5, MVal4, "1.5", APFloat::opOK, APFloat::fcNormal },
4598 { PVal5, PVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4599 { PVal5, MVal5, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4600 { PVal5, PVal6, "-0.5", APFloat::opOK, APFloat::fcNormal },
4601 { PVal5, MVal6, "-0.5", APFloat::opOK, APFloat::fcNormal },
4602 { MVal5, PVal1, "-1.5", APFloat::opOK, APFloat::fcNormal },
4603 { MVal5, MVal1, "-1.5", APFloat::opOK, APFloat::fcNormal },
4604 { MVal5, PVal2, "-0x0.00006p-126", APFloat::opOK, APFloat::fcNormal },
4605 { MVal5, MVal2, "-0x0.00006p-126", APFloat::opOK, APFloat::fcNormal },
4606 { MVal5, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4607 { MVal5, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4608 { MVal5, PVal4, "-1.5", APFloat::opOK, APFloat::fcNormal },
4609 { MVal5, MVal4, "-1.5", APFloat::opOK, APFloat::fcNormal },
4610 { MVal5, PVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4611 { MVal5, MVal5, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4612 { MVal5, PVal6, "0.5", APFloat::opOK, APFloat::fcNormal },
4613 { MVal5, MVal6, "0.5", APFloat::opOK, APFloat::fcNormal },
4614 { PVal6, PVal1, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4615 { PVal6, MVal1, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4616 { PVal6, PVal2, "0x0.00004p-126", APFloat::opOK, APFloat::fcNormal },
4617 { PVal6, MVal2, "0x0.00004p-126", APFloat::opOK, APFloat::fcNormal },
4618 { PVal6, PVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4619 { PVal6, MVal3, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4620 { PVal6, PVal4, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4621 { PVal6, MVal4, "0x1p+0", APFloat::opOK, APFloat::fcNormal },
4622 { PVal6, PVal5, "-0.5", APFloat::opOK, APFloat::fcNormal },
4623 { PVal6, MVal5, "-0.5", APFloat::opOK, APFloat::fcNormal },
4624 { PVal6, PVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4625 { PVal6, MVal6, "0x0p+0", APFloat::opOK, APFloat::fcZero },
4626 { MVal6, PVal1, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4627 { MVal6, MVal1, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4628 { MVal6, PVal2, "-0x0.00004p-126", APFloat::opOK, APFloat::fcNormal },
4629 { MVal6, MVal2, "-0x0.00004p-126", APFloat::opOK, APFloat::fcNormal },
4630 { MVal6, PVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4631 { MVal6, MVal3, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4632 { MVal6, PVal4, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4633 { MVal6, MVal4, "-0x1p+0", APFloat::opOK, APFloat::fcNormal },
4634 { MVal6, PVal5, "0.5", APFloat::opOK, APFloat::fcNormal },
4635 { MVal6, MVal5, "0.5", APFloat::opOK, APFloat::fcNormal },
4636 { MVal6, PVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4637 { MVal6, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
4640 for (size_t i = 0; i < std::size(SpecialCaseTests); ++i) {
4641 APFloat x(SpecialCaseTests[i].x);
4642 APFloat y(SpecialCaseTests[i].y);
4643 APFloat::opStatus status = x.remainder(y);
4645 APFloat result(x.getSemantics(), SpecialCaseTests[i].result);
4647 EXPECT_TRUE(result.bitwiseIsEqual(x));
4648 EXPECT_EQ(SpecialCaseTests[i].status, (int)status);
4649 EXPECT_EQ(SpecialCaseTests[i].category, (int)x.getCategory());
4653 APFloat f1(APFloat::IEEEdouble(), "0x1.3333333333333p-2"); // 0.3
4654 APFloat f2(APFloat::IEEEdouble(), "0x1.47ae147ae147bp-7"); // 0.01
4655 APFloat expected(APFloat::IEEEdouble(), "-0x1.4p-56");
4656 EXPECT_EQ(APFloat::opOK, f1.remainder(f2));
4657 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4660 APFloat f1(APFloat::IEEEdouble(), "0x1p64"); // 1.8446744073709552e19
4661 APFloat f2(APFloat::IEEEdouble(), "1.5");
4662 APFloat expected(APFloat::IEEEdouble(), "-0.5");
4663 EXPECT_EQ(APFloat::opOK, f1.remainder(f2));
4664 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4667 APFloat f1(APFloat::IEEEdouble(), "0x1p1000");
4668 APFloat f2(APFloat::IEEEdouble(), "0x1p-1000");
4669 APFloat expected(APFloat::IEEEdouble(), "0.0");
4670 EXPECT_EQ(APFloat::opOK, f1.remainder(f2));
4671 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4674 APFloat f1 = APFloat::getInf(APFloat::IEEEdouble(), false);
4675 APFloat f2(APFloat::IEEEdouble(), "1.0");
4676 EXPECT_EQ(f1.remainder(f2), APFloat::opInvalidOp);
4677 EXPECT_TRUE(f1.isNaN());
4680 APFloat f1(APFloat::IEEEdouble(), "-4.0");
4681 APFloat f2(APFloat::IEEEdouble(), "-2.0");
4682 APFloat expected(APFloat::IEEEdouble(), "-0.0");
4683 EXPECT_EQ(APFloat::opOK, f1.remainder(f2));
4684 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4687 APFloat f1(APFloat::IEEEdouble(), "-4.0");
4688 APFloat f2(APFloat::IEEEdouble(), "2.0");
4689 APFloat expected(APFloat::IEEEdouble(), "-0.0");
4690 EXPECT_EQ(APFloat::opOK, f1.remainder(f2));
4691 EXPECT_TRUE(f1.bitwiseIsEqual(expected));
4695 TEST(APFloatTest, PPCDoubleDoubleAddSpecial) {
4696 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t,
4697 APFloat::fltCategory, APFloat::roundingMode>;
4698 DataType Data[] = {
4699 // (1 + 0) + (-1 + 0) = fcZero
4700 std::make_tuple(0x3ff0000000000000ull, 0, 0xbff0000000000000ull, 0,
4701 APFloat::fcZero, APFloat::rmNearestTiesToEven),
4702 // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity
4703 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4704 0x7948000000000000ull, 0ull, APFloat::fcInfinity,
4705 APFloat::rmNearestTiesToEven),
4706 // TODO: change the 4th 0x75effffffffffffe to 0x75efffffffffffff when
4707 // semPPCDoubleDoubleLegacy is gone.
4708 // LDBL_MAX + (1.011111... >> (1023 - 106) + (1.1111111...0 >> (1023 -
4709 // 160))) = fcNormal
4710 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4711 0x7947ffffffffffffull, 0x75effffffffffffeull,
4712 APFloat::fcNormal, APFloat::rmNearestTiesToEven),
4713 // LDBL_MAX + (1.1 >> (1023 - 106) + 0)) = fcInfinity
4714 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4715 0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4716 APFloat::fcInfinity, APFloat::rmNearestTiesToEven),
4717 // NaN + (1 + 0) = fcNaN
4718 std::make_tuple(0x7ff8000000000000ull, 0, 0x3ff0000000000000ull, 0,
4719 APFloat::fcNaN, APFloat::rmNearestTiesToEven),
4722 for (auto Tp : Data) {
4723 uint64_t Op1[2], Op2[2];
4724 APFloat::fltCategory Expected;
4725 APFloat::roundingMode RM;
4726 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp;
4729 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4730 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4731 A1.add(A2, RM);
4733 EXPECT_EQ(Expected, A1.getCategory())
4734 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1],
4735 Op2[0], Op2[1])
4736 .str();
4739 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4740 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4741 A2.add(A1, RM);
4743 EXPECT_EQ(Expected, A2.getCategory())
4744 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1],
4745 Op1[0], Op1[1])
4746 .str();
4751 TEST(APFloatTest, PPCDoubleDoubleAdd) {
4752 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
4753 uint64_t, APFloat::roundingMode>;
4754 DataType Data[] = {
4755 // (1 + 0) + (1e-105 + 0) = (1 + 1e-105)
4756 std::make_tuple(0x3ff0000000000000ull, 0, 0x3960000000000000ull, 0,
4757 0x3ff0000000000000ull, 0x3960000000000000ull,
4758 APFloat::rmNearestTiesToEven),
4759 // (1 + 0) + (1e-106 + 0) = (1 + 1e-106)
4760 std::make_tuple(0x3ff0000000000000ull, 0, 0x3950000000000000ull, 0,
4761 0x3ff0000000000000ull, 0x3950000000000000ull,
4762 APFloat::rmNearestTiesToEven),
4763 // (1 + 1e-106) + (1e-106 + 0) = (1 + 1e-105)
4764 std::make_tuple(0x3ff0000000000000ull, 0x3950000000000000ull,
4765 0x3950000000000000ull, 0, 0x3ff0000000000000ull,
4766 0x3960000000000000ull, APFloat::rmNearestTiesToEven),
4767 // (1 + 0) + (epsilon + 0) = (1 + epsilon)
4768 std::make_tuple(0x3ff0000000000000ull, 0, 0x0000000000000001ull, 0,
4769 0x3ff0000000000000ull, 0x0000000000000001ull,
4770 APFloat::rmNearestTiesToEven),
4771 // TODO: change 0xf950000000000000 to 0xf940000000000000, when
4772 // semPPCDoubleDoubleLegacy is gone.
4773 // (DBL_MAX - 1 << (1023 - 105)) + (1 << (1023 - 53) + 0) = DBL_MAX +
4774 // 1.11111... << (1023 - 52)
4775 std::make_tuple(0x7fefffffffffffffull, 0xf950000000000000ull,
4776 0x7c90000000000000ull, 0, 0x7fefffffffffffffull,
4777 0x7c8ffffffffffffeull, APFloat::rmNearestTiesToEven),
4778 // TODO: change 0xf950000000000000 to 0xf940000000000000, when
4779 // semPPCDoubleDoubleLegacy is gone.
4780 // (1 << (1023 - 53) + 0) + (DBL_MAX - 1 << (1023 - 105)) = DBL_MAX +
4781 // 1.11111... << (1023 - 52)
4782 std::make_tuple(0x7c90000000000000ull, 0, 0x7fefffffffffffffull,
4783 0xf950000000000000ull, 0x7fefffffffffffffull,
4784 0x7c8ffffffffffffeull, APFloat::rmNearestTiesToEven),
4787 for (auto Tp : Data) {
4788 uint64_t Op1[2], Op2[2], Expected[2];
4789 APFloat::roundingMode RM;
4790 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
4793 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4794 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4795 A1.add(A2, RM);
4797 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
4798 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1],
4799 Op2[0], Op2[1])
4800 .str();
4801 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
4802 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op1[0], Op1[1],
4803 Op2[0], Op2[1])
4804 .str();
4807 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4808 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4809 A2.add(A1, RM);
4811 EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0])
4812 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1],
4813 Op1[0], Op1[1])
4814 .str();
4815 EXPECT_EQ(Expected[1], A2.bitcastToAPInt().getRawData()[1])
4816 << formatv("({0:x} + {1:x}) + ({2:x} + {3:x})", Op2[0], Op2[1],
4817 Op1[0], Op1[1])
4818 .str();
4823 TEST(APFloatTest, PPCDoubleDoubleSubtract) {
4824 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
4825 uint64_t, APFloat::roundingMode>;
4826 DataType Data[] = {
4827 // (1 + 0) - (-1e-105 + 0) = (1 + 1e-105)
4828 std::make_tuple(0x3ff0000000000000ull, 0, 0xb960000000000000ull, 0,
4829 0x3ff0000000000000ull, 0x3960000000000000ull,
4830 APFloat::rmNearestTiesToEven),
4831 // (1 + 0) - (-1e-106 + 0) = (1 + 1e-106)
4832 std::make_tuple(0x3ff0000000000000ull, 0, 0xb950000000000000ull, 0,
4833 0x3ff0000000000000ull, 0x3950000000000000ull,
4834 APFloat::rmNearestTiesToEven),
4837 for (auto Tp : Data) {
4838 uint64_t Op1[2], Op2[2], Expected[2];
4839 APFloat::roundingMode RM;
4840 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
4842 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4843 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4844 A1.subtract(A2, RM);
4846 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
4847 << formatv("({0:x} + {1:x}) - ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
4848 Op2[1])
4849 .str();
4850 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
4851 << formatv("({0:x} + {1:x}) - ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
4852 Op2[1])
4853 .str();
4857 TEST(APFloatTest, PPCDoubleDoubleMultiplySpecial) {
4858 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t,
4859 APFloat::fltCategory, APFloat::roundingMode>;
4860 DataType Data[] = {
4861 // fcNaN * fcNaN = fcNaN
4862 std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0,
4863 APFloat::fcNaN, APFloat::rmNearestTiesToEven),
4864 // fcNaN * fcZero = fcNaN
4865 std::make_tuple(0x7ff8000000000000ull, 0, 0, 0, APFloat::fcNaN,
4866 APFloat::rmNearestTiesToEven),
4867 // fcNaN * fcInfinity = fcNaN
4868 std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff0000000000000ull, 0,
4869 APFloat::fcNaN, APFloat::rmNearestTiesToEven),
4870 // fcNaN * fcNormal = fcNaN
4871 std::make_tuple(0x7ff8000000000000ull, 0, 0x3ff0000000000000ull, 0,
4872 APFloat::fcNaN, APFloat::rmNearestTiesToEven),
4873 // fcInfinity * fcInfinity = fcInfinity
4874 std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0,
4875 APFloat::fcInfinity, APFloat::rmNearestTiesToEven),
4876 // fcInfinity * fcZero = fcNaN
4877 std::make_tuple(0x7ff0000000000000ull, 0, 0, 0, APFloat::fcNaN,
4878 APFloat::rmNearestTiesToEven),
4879 // fcInfinity * fcNormal = fcInfinity
4880 std::make_tuple(0x7ff0000000000000ull, 0, 0x3ff0000000000000ull, 0,
4881 APFloat::fcInfinity, APFloat::rmNearestTiesToEven),
4882 // fcZero * fcZero = fcZero
4883 std::make_tuple(0, 0, 0, 0, APFloat::fcZero,
4884 APFloat::rmNearestTiesToEven),
4885 // fcZero * fcNormal = fcZero
4886 std::make_tuple(0, 0, 0x3ff0000000000000ull, 0, APFloat::fcZero,
4887 APFloat::rmNearestTiesToEven),
4890 for (auto Tp : Data) {
4891 uint64_t Op1[2], Op2[2];
4892 APFloat::fltCategory Expected;
4893 APFloat::roundingMode RM;
4894 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected, RM) = Tp;
4897 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4898 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4899 A1.multiply(A2, RM);
4901 EXPECT_EQ(Expected, A1.getCategory())
4902 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1],
4903 Op2[0], Op2[1])
4904 .str();
4907 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4908 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4909 A2.multiply(A1, RM);
4911 EXPECT_EQ(Expected, A2.getCategory())
4912 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1],
4913 Op1[0], Op1[1])
4914 .str();
4919 TEST(APFloatTest, PPCDoubleDoubleMultiply) {
4920 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
4921 uint64_t, APFloat::roundingMode>;
4922 DataType Data[] = {
4923 // 1/3 * 3 = 1.0
4924 std::make_tuple(0x3fd5555555555555ull, 0x3c75555555555556ull,
4925 0x4008000000000000ull, 0, 0x3ff0000000000000ull, 0,
4926 APFloat::rmNearestTiesToEven),
4927 // (1 + epsilon) * (1 + 0) = fcZero
4928 std::make_tuple(0x3ff0000000000000ull, 0x0000000000000001ull,
4929 0x3ff0000000000000ull, 0, 0x3ff0000000000000ull,
4930 0x0000000000000001ull, APFloat::rmNearestTiesToEven),
4931 // (1 + epsilon) * (1 + epsilon) = 1 + 2 * epsilon
4932 std::make_tuple(0x3ff0000000000000ull, 0x0000000000000001ull,
4933 0x3ff0000000000000ull, 0x0000000000000001ull,
4934 0x3ff0000000000000ull, 0x0000000000000002ull,
4935 APFloat::rmNearestTiesToEven),
4936 // -(1 + epsilon) * (1 + epsilon) = -1
4937 std::make_tuple(0xbff0000000000000ull, 0x0000000000000001ull,
4938 0x3ff0000000000000ull, 0x0000000000000001ull,
4939 0xbff0000000000000ull, 0, APFloat::rmNearestTiesToEven),
4940 // (0.5 + 0) * (1 + 2 * epsilon) = 0.5 + epsilon
4941 std::make_tuple(0x3fe0000000000000ull, 0, 0x3ff0000000000000ull,
4942 0x0000000000000002ull, 0x3fe0000000000000ull,
4943 0x0000000000000001ull, APFloat::rmNearestTiesToEven),
4944 // (0.5 + 0) * (1 + epsilon) = 0.5
4945 std::make_tuple(0x3fe0000000000000ull, 0, 0x3ff0000000000000ull,
4946 0x0000000000000001ull, 0x3fe0000000000000ull, 0,
4947 APFloat::rmNearestTiesToEven),
4948 // __LDBL_MAX__ * (1 + 1 << 106) = inf
4949 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4950 0x3ff0000000000000ull, 0x3950000000000000ull,
4951 0x7ff0000000000000ull, 0, APFloat::rmNearestTiesToEven),
4952 // __LDBL_MAX__ * (1 + 1 << 107) > __LDBL_MAX__, but not inf, yes =_=|||
4953 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4954 0x3ff0000000000000ull, 0x3940000000000000ull,
4955 0x7fefffffffffffffull, 0x7c8fffffffffffffull,
4956 APFloat::rmNearestTiesToEven),
4957 // __LDBL_MAX__ * (1 + 1 << 108) = __LDBL_MAX__
4958 std::make_tuple(0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4959 0x3ff0000000000000ull, 0x3930000000000000ull,
4960 0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
4961 APFloat::rmNearestTiesToEven),
4964 for (auto Tp : Data) {
4965 uint64_t Op1[2], Op2[2], Expected[2];
4966 APFloat::roundingMode RM;
4967 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
4970 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4971 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4972 A1.multiply(A2, RM);
4974 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
4975 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1],
4976 Op2[0], Op2[1])
4977 .str();
4978 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
4979 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op1[0], Op1[1],
4980 Op2[0], Op2[1])
4981 .str();
4984 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
4985 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
4986 A2.multiply(A1, RM);
4988 EXPECT_EQ(Expected[0], A2.bitcastToAPInt().getRawData()[0])
4989 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1],
4990 Op1[0], Op1[1])
4991 .str();
4992 EXPECT_EQ(Expected[1], A2.bitcastToAPInt().getRawData()[1])
4993 << formatv("({0:x} + {1:x}) * ({2:x} + {3:x})", Op2[0], Op2[1],
4994 Op1[0], Op1[1])
4995 .str();
5000 TEST(APFloatTest, PPCDoubleDoubleDivide) {
5001 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t,
5002 uint64_t, APFloat::roundingMode>;
5003 // TODO: Only a sanity check for now. Add more edge cases when the
5004 // double-double algorithm is implemented.
5005 DataType Data[] = {
5006 // 1 / 3 = 1/3
5007 std::make_tuple(0x3ff0000000000000ull, 0, 0x4008000000000000ull, 0,
5008 0x3fd5555555555555ull, 0x3c75555555555556ull,
5009 APFloat::rmNearestTiesToEven),
5012 for (auto Tp : Data) {
5013 uint64_t Op1[2], Op2[2], Expected[2];
5014 APFloat::roundingMode RM;
5015 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1], RM) = Tp;
5017 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
5018 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
5019 A1.divide(A2, RM);
5021 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
5022 << formatv("({0:x} + {1:x}) / ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
5023 Op2[1])
5024 .str();
5025 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
5026 << formatv("({0:x} + {1:x}) / ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
5027 Op2[1])
5028 .str();
5032 TEST(APFloatTest, PPCDoubleDoubleRemainder) {
5033 using DataType =
5034 std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>;
5035 DataType Data[] = {
5036 // remainder(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53)
5037 std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull,
5038 0x3ff4000000000000ull, 0x3ca4000000000000ull,
5039 0x3fe0000000000000ull, 0x3c90000000000000ull),
5040 // remainder(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (-0.5 - 0.5 << 53)
5041 std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull,
5042 0x3ffc000000000000ull, 0x3cac000000000000ull,
5043 0xbfe0000000000000ull, 0xbc90000000000000ull),
5046 for (auto Tp : Data) {
5047 uint64_t Op1[2], Op2[2], Expected[2];
5048 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp;
5050 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
5051 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
5052 A1.remainder(A2);
5054 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
5055 << formatv("remainder({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1],
5056 Op2[0], Op2[1])
5057 .str();
5058 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
5059 << formatv("remainder(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0],
5060 Op1[1], Op2[0], Op2[1])
5061 .str();
5065 TEST(APFloatTest, PPCDoubleDoubleMod) {
5066 using DataType =
5067 std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>;
5068 DataType Data[] = {
5069 // mod(3.0 + 3.0 << 53, 1.25 + 1.25 << 53) = (0.5 + 0.5 << 53)
5070 std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull,
5071 0x3ff4000000000000ull, 0x3ca4000000000000ull,
5072 0x3fe0000000000000ull, 0x3c90000000000000ull),
5073 // mod(3.0 + 3.0 << 53, 1.75 + 1.75 << 53) = (1.25 + 1.25 << 53)
5074 // 0xbc98000000000000 doesn't seem right, but it's what we currently have.
5075 // TODO: investigate
5076 std::make_tuple(0x4008000000000000ull, 0x3cb8000000000000ull,
5077 0x3ffc000000000000ull, 0x3cac000000000000ull,
5078 0x3ff4000000000001ull, 0xbc98000000000000ull),
5081 for (auto Tp : Data) {
5082 uint64_t Op1[2], Op2[2], Expected[2];
5083 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected[0], Expected[1]) = Tp;
5085 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
5086 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
5087 A1.mod(A2);
5089 EXPECT_EQ(Expected[0], A1.bitcastToAPInt().getRawData()[0])
5090 << formatv("fmod(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1],
5091 Op2[0], Op2[1])
5092 .str();
5093 EXPECT_EQ(Expected[1], A1.bitcastToAPInt().getRawData()[1])
5094 << formatv("fmod(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1],
5095 Op2[0], Op2[1])
5096 .str();
5100 TEST(APFloatTest, PPCDoubleDoubleFMA) {
5101 // Sanity check for now.
5102 APFloat A(APFloat::PPCDoubleDouble(), "2");
5103 A.fusedMultiplyAdd(APFloat(APFloat::PPCDoubleDouble(), "3"),
5104 APFloat(APFloat::PPCDoubleDouble(), "4"),
5105 APFloat::rmNearestTiesToEven);
5106 EXPECT_EQ(APFloat::cmpEqual,
5107 APFloat(APFloat::PPCDoubleDouble(), "10").compare(A));
5110 TEST(APFloatTest, PPCDoubleDoubleRoundToIntegral) {
5112 APFloat A(APFloat::PPCDoubleDouble(), "1.5");
5113 A.roundToIntegral(APFloat::rmNearestTiesToEven);
5114 EXPECT_EQ(APFloat::cmpEqual,
5115 APFloat(APFloat::PPCDoubleDouble(), "2").compare(A));
5118 APFloat A(APFloat::PPCDoubleDouble(), "2.5");
5119 A.roundToIntegral(APFloat::rmNearestTiesToEven);
5120 EXPECT_EQ(APFloat::cmpEqual,
5121 APFloat(APFloat::PPCDoubleDouble(), "2").compare(A));
5125 TEST(APFloatTest, PPCDoubleDoubleCompare) {
5126 using DataType =
5127 std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, APFloat::cmpResult>;
5129 DataType Data[] = {
5130 // (1 + 0) = (1 + 0)
5131 std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000000ull, 0,
5132 APFloat::cmpEqual),
5133 // (1 + 0) < (1.00...1 + 0)
5134 std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull, 0,
5135 APFloat::cmpLessThan),
5136 // (1.00...1 + 0) > (1 + 0)
5137 std::make_tuple(0x3ff0000000000001ull, 0, 0x3ff0000000000000ull, 0,
5138 APFloat::cmpGreaterThan),
5139 // (1 + 0) < (1 + epsilon)
5140 std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull,
5141 0x0000000000000001ull, APFloat::cmpLessThan),
5142 // NaN != NaN
5143 std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0,
5144 APFloat::cmpUnordered),
5145 // (1 + 0) != NaN
5146 std::make_tuple(0x3ff0000000000000ull, 0, 0x7ff8000000000000ull, 0,
5147 APFloat::cmpUnordered),
5148 // Inf = Inf
5149 std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0,
5150 APFloat::cmpEqual),
5153 for (auto Tp : Data) {
5154 uint64_t Op1[2], Op2[2];
5155 APFloat::cmpResult Expected;
5156 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp;
5158 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
5159 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
5160 EXPECT_EQ(Expected, A1.compare(A2))
5161 << formatv("compare(({0:x} + {1:x}), ({2:x} + {3:x}))", Op1[0], Op1[1],
5162 Op2[0], Op2[1])
5163 .str();
5167 TEST(APFloatTest, PPCDoubleDoubleBitwiseIsEqual) {
5168 using DataType = std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, bool>;
5170 DataType Data[] = {
5171 // (1 + 0) = (1 + 0)
5172 std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000000ull, 0, true),
5173 // (1 + 0) != (1.00...1 + 0)
5174 std::make_tuple(0x3ff0000000000000ull, 0, 0x3ff0000000000001ull, 0,
5175 false),
5176 // NaN = NaN
5177 std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull, 0, true),
5178 // NaN != NaN with a different bit pattern
5179 std::make_tuple(0x7ff8000000000000ull, 0, 0x7ff8000000000000ull,
5180 0x3ff0000000000000ull, false),
5181 // Inf = Inf
5182 std::make_tuple(0x7ff0000000000000ull, 0, 0x7ff0000000000000ull, 0, true),
5185 for (auto Tp : Data) {
5186 uint64_t Op1[2], Op2[2];
5187 bool Expected;
5188 std::tie(Op1[0], Op1[1], Op2[0], Op2[1], Expected) = Tp;
5190 APFloat A1(APFloat::PPCDoubleDouble(), APInt(128, 2, Op1));
5191 APFloat A2(APFloat::PPCDoubleDouble(), APInt(128, 2, Op2));
5192 EXPECT_EQ(Expected, A1.bitwiseIsEqual(A2))
5193 << formatv("({0:x} + {1:x}) = ({2:x} + {3:x})", Op1[0], Op1[1], Op2[0],
5194 Op2[1])
5195 .str();
5199 TEST(APFloatTest, PPCDoubleDoubleHashValue) {
5200 uint64_t Data1[] = {0x3ff0000000000001ull, 0x0000000000000001ull};
5201 uint64_t Data2[] = {0x3ff0000000000001ull, 0};
5202 // The hash values are *hopefully* different.
5203 EXPECT_NE(
5204 hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data1))),
5205 hash_value(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data2))));
5208 TEST(APFloatTest, PPCDoubleDoubleChangeSign) {
5209 uint64_t Data[] = {
5210 0x400f000000000000ull, 0xbcb0000000000000ull,
5212 APFloat Float(APFloat::PPCDoubleDouble(), APInt(128, 2, Data));
5214 APFloat Actual =
5215 APFloat::copySign(Float, APFloat(APFloat::IEEEdouble(), "1"));
5216 EXPECT_EQ(0x400f000000000000ull, Actual.bitcastToAPInt().getRawData()[0]);
5217 EXPECT_EQ(0xbcb0000000000000ull, Actual.bitcastToAPInt().getRawData()[1]);
5220 APFloat Actual =
5221 APFloat::copySign(Float, APFloat(APFloat::IEEEdouble(), "-1"));
5222 EXPECT_EQ(0xc00f000000000000ull, Actual.bitcastToAPInt().getRawData()[0]);
5223 EXPECT_EQ(0x3cb0000000000000ull, Actual.bitcastToAPInt().getRawData()[1]);
5227 TEST(APFloatTest, PPCDoubleDoubleFactories) {
5229 uint64_t Data[] = {
5230 0, 0,
5232 EXPECT_EQ(APInt(128, 2, Data),
5233 APFloat::getZero(APFloat::PPCDoubleDouble()).bitcastToAPInt());
5236 uint64_t Data[] = {
5237 0x7fefffffffffffffull, 0x7c8ffffffffffffeull,
5239 EXPECT_EQ(APInt(128, 2, Data),
5240 APFloat::getLargest(APFloat::PPCDoubleDouble()).bitcastToAPInt());
5243 uint64_t Data[] = {
5244 0x0000000000000001ull, 0,
5246 EXPECT_EQ(
5247 APInt(128, 2, Data),
5248 APFloat::getSmallest(APFloat::PPCDoubleDouble()).bitcastToAPInt());
5251 uint64_t Data[] = {0x0360000000000000ull, 0};
5252 EXPECT_EQ(APInt(128, 2, Data),
5253 APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble())
5254 .bitcastToAPInt());
5257 uint64_t Data[] = {
5258 0x8000000000000000ull, 0x0000000000000000ull,
5260 EXPECT_EQ(
5261 APInt(128, 2, Data),
5262 APFloat::getZero(APFloat::PPCDoubleDouble(), true).bitcastToAPInt());
5265 uint64_t Data[] = {
5266 0xffefffffffffffffull, 0xfc8ffffffffffffeull,
5268 EXPECT_EQ(
5269 APInt(128, 2, Data),
5270 APFloat::getLargest(APFloat::PPCDoubleDouble(), true).bitcastToAPInt());
5273 uint64_t Data[] = {
5274 0x8000000000000001ull, 0x0000000000000000ull,
5276 EXPECT_EQ(APInt(128, 2, Data),
5277 APFloat::getSmallest(APFloat::PPCDoubleDouble(), true)
5278 .bitcastToAPInt());
5281 uint64_t Data[] = {
5282 0x8360000000000000ull, 0x0000000000000000ull,
5284 EXPECT_EQ(APInt(128, 2, Data),
5285 APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble(), true)
5286 .bitcastToAPInt());
5288 EXPECT_TRUE(APFloat::getSmallest(APFloat::PPCDoubleDouble()).isSmallest());
5289 EXPECT_TRUE(APFloat::getLargest(APFloat::PPCDoubleDouble()).isLargest());
5292 TEST(APFloatTest, PPCDoubleDoubleIsDenormal) {
5293 EXPECT_TRUE(APFloat::getSmallest(APFloat::PPCDoubleDouble()).isDenormal());
5294 EXPECT_FALSE(APFloat::getLargest(APFloat::PPCDoubleDouble()).isDenormal());
5295 EXPECT_FALSE(
5296 APFloat::getSmallestNormalized(APFloat::PPCDoubleDouble()).isDenormal());
5298 // (4 + 3) is not normalized
5299 uint64_t Data[] = {
5300 0x4010000000000000ull, 0x4008000000000000ull,
5302 EXPECT_TRUE(
5303 APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Data)).isDenormal());
5307 TEST(APFloatTest, PPCDoubleDoubleScalbn) {
5308 // 3.0 + 3.0 << 53
5309 uint64_t Input[] = {
5310 0x4008000000000000ull, 0x3cb8000000000000ull,
5312 APFloat Result =
5313 scalbn(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Input)), 1,
5314 APFloat::rmNearestTiesToEven);
5315 // 6.0 + 6.0 << 53
5316 EXPECT_EQ(0x4018000000000000ull, Result.bitcastToAPInt().getRawData()[0]);
5317 EXPECT_EQ(0x3cc8000000000000ull, Result.bitcastToAPInt().getRawData()[1]);
5320 TEST(APFloatTest, PPCDoubleDoubleFrexp) {
5321 // 3.0 + 3.0 << 53
5322 uint64_t Input[] = {
5323 0x4008000000000000ull, 0x3cb8000000000000ull,
5325 int Exp;
5326 // 0.75 + 0.75 << 53
5327 APFloat Result =
5328 frexp(APFloat(APFloat::PPCDoubleDouble(), APInt(128, 2, Input)), Exp,
5329 APFloat::rmNearestTiesToEven);
5330 EXPECT_EQ(2, Exp);
5331 EXPECT_EQ(0x3fe8000000000000ull, Result.bitcastToAPInt().getRawData()[0]);
5332 EXPECT_EQ(0x3c98000000000000ull, Result.bitcastToAPInt().getRawData()[1]);
5335 TEST(APFloatTest, x87Largest) {
5336 APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended());
5337 EXPECT_TRUE(MaxX87Val.isLargest());
5340 TEST(APFloatTest, x87Next) {
5341 APFloat F(APFloat::x87DoubleExtended(), "-1.0");
5342 F.next(false);
5343 EXPECT_TRUE(ilogb(F) == -1);
5346 TEST(APFloatTest, Float8ExhaustivePair) {
5347 // Test each pair of 8-bit floats with non-standard semantics
5348 for (APFloat::Semantics Sem :
5349 {APFloat::S_Float8E4M3FN, APFloat::S_Float8E5M2FNUZ,
5350 APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) {
5351 const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
5352 for (int i = 0; i < 256; i++) {
5353 for (int j = 0; j < 256; j++) {
5354 SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
5355 ",j=" + std::to_string(j));
5356 APFloat x(S, APInt(8, i));
5357 APFloat y(S, APInt(8, j));
5359 bool losesInfo;
5360 APFloat x16 = x;
5361 x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5362 &losesInfo);
5363 EXPECT_FALSE(losesInfo);
5364 APFloat y16 = y;
5365 y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5366 &losesInfo);
5367 EXPECT_FALSE(losesInfo);
5369 // Add
5370 APFloat z = x;
5371 z.add(y, APFloat::rmNearestTiesToEven);
5372 APFloat z16 = x16;
5373 z16.add(y16, APFloat::rmNearestTiesToEven);
5374 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5375 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5376 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5378 // Subtract
5379 z = x;
5380 z.subtract(y, APFloat::rmNearestTiesToEven);
5381 z16 = x16;
5382 z16.subtract(y16, APFloat::rmNearestTiesToEven);
5383 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5384 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5385 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5387 // Multiply
5388 z = x;
5389 z.multiply(y, APFloat::rmNearestTiesToEven);
5390 z16 = x16;
5391 z16.multiply(y16, APFloat::rmNearestTiesToEven);
5392 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5393 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5394 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5396 // Divide
5397 z = x;
5398 z.divide(y, APFloat::rmNearestTiesToEven);
5399 z16 = x16;
5400 z16.divide(y16, APFloat::rmNearestTiesToEven);
5401 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5402 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5403 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5405 // Mod
5406 z = x;
5407 z.mod(y);
5408 z16 = x16;
5409 z16.mod(y16);
5410 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5411 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5412 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5414 // Remainder
5415 z = x;
5416 z.remainder(y);
5417 z16 = x16;
5418 z16.remainder(y16);
5419 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5420 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5421 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5427 TEST(APFloatTest, Float8E8M0FNUExhaustivePair) {
5428 // Test each pair of 8-bit values for Float8E8M0FNU format
5429 APFloat::Semantics Sem = APFloat::S_Float8E8M0FNU;
5430 const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
5431 for (int i = 0; i < 256; i++) {
5432 for (int j = 0; j < 256; j++) {
5433 SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
5434 ",j=" + std::to_string(j));
5435 APFloat x(S, APInt(8, i));
5436 APFloat y(S, APInt(8, j));
5438 bool losesInfo;
5439 APFloat xd = x;
5440 xd.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
5441 &losesInfo);
5442 EXPECT_FALSE(losesInfo);
5443 APFloat yd = y;
5444 yd.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
5445 &losesInfo);
5446 EXPECT_FALSE(losesInfo);
5448 // Add
5449 APFloat z = x;
5450 z.add(y, APFloat::rmNearestTiesToEven);
5451 APFloat zd = xd;
5452 zd.add(yd, APFloat::rmNearestTiesToEven);
5453 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5454 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5455 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5457 // Subtract
5458 if (i >= j) {
5459 z = x;
5460 z.subtract(y, APFloat::rmNearestTiesToEven);
5461 zd = xd;
5462 zd.subtract(yd, APFloat::rmNearestTiesToEven);
5463 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5464 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5465 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5468 // Multiply
5469 z = x;
5470 z.multiply(y, APFloat::rmNearestTiesToEven);
5471 zd = xd;
5472 zd.multiply(yd, APFloat::rmNearestTiesToEven);
5473 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5474 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5475 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5477 // Divide
5478 z = x;
5479 z.divide(y, APFloat::rmNearestTiesToEven);
5480 zd = xd;
5481 zd.divide(yd, APFloat::rmNearestTiesToEven);
5482 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5483 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5484 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5486 // Mod
5487 z = x;
5488 z.mod(y);
5489 zd = xd;
5490 zd.mod(yd);
5491 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5492 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5493 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5494 APFloat mod_cached = z;
5495 // When one of them is a NaN, the result is a NaN.
5496 // When i < j, the mod is 'i' since it is the smaller
5497 // number. Otherwise the mod is always zero since
5498 // both x and y are powers-of-two in this format.
5499 // Since this format does not support zero and it is
5500 // represented as the smallest normalized value, we
5501 // test for isSmallestNormalized().
5502 if (i == 255 || j == 255)
5503 EXPECT_TRUE(z.isNaN());
5504 else if (i >= j)
5505 EXPECT_TRUE(z.isSmallestNormalized());
5506 else
5507 EXPECT_TRUE(z.bitwiseIsEqual(x));
5509 // Remainder
5510 z = x;
5511 z.remainder(y);
5512 zd = xd;
5513 zd.remainder(yd);
5514 zd.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5515 EXPECT_TRUE(z.bitwiseIsEqual(zd))
5516 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5517 // Since this format has only exponents (i.e. no precision)
5518 // we expect the remainder and mod to provide the same results.
5519 EXPECT_TRUE(z.bitwiseIsEqual(mod_cached))
5520 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5525 TEST(APFloatTest, Float6ExhaustivePair) {
5526 // Test each pair of 6-bit floats with non-standard semantics
5527 for (APFloat::Semantics Sem :
5528 {APFloat::S_Float6E3M2FN, APFloat::S_Float6E2M3FN}) {
5529 const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
5530 for (int i = 1; i < 64; i++) {
5531 for (int j = 1; j < 64; j++) {
5532 SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
5533 ",j=" + std::to_string(j));
5534 APFloat x(S, APInt(6, i));
5535 APFloat y(S, APInt(6, j));
5537 bool losesInfo;
5538 APFloat x16 = x;
5539 x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5540 &losesInfo);
5541 EXPECT_FALSE(losesInfo);
5542 APFloat y16 = y;
5543 y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5544 &losesInfo);
5545 EXPECT_FALSE(losesInfo);
5547 // Add
5548 APFloat z = x;
5549 z.add(y, APFloat::rmNearestTiesToEven);
5550 APFloat z16 = x16;
5551 z16.add(y16, APFloat::rmNearestTiesToEven);
5552 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5553 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5554 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5556 // Subtract
5557 z = x;
5558 z.subtract(y, APFloat::rmNearestTiesToEven);
5559 z16 = x16;
5560 z16.subtract(y16, APFloat::rmNearestTiesToEven);
5561 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5562 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5563 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5565 // Multiply
5566 z = x;
5567 z.multiply(y, APFloat::rmNearestTiesToEven);
5568 z16 = x16;
5569 z16.multiply(y16, APFloat::rmNearestTiesToEven);
5570 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5571 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5572 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5574 // Skip divide by 0
5575 if (j == 0 || j == 32)
5576 continue;
5578 // Divide
5579 z = x;
5580 z.divide(y, APFloat::rmNearestTiesToEven);
5581 z16 = x16;
5582 z16.divide(y16, APFloat::rmNearestTiesToEven);
5583 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5584 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5585 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5587 // Mod
5588 z = x;
5589 z.mod(y);
5590 z16 = x16;
5591 z16.mod(y16);
5592 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5593 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5594 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5596 // Remainder
5597 z = x;
5598 z.remainder(y);
5599 z16 = x16;
5600 z16.remainder(y16);
5601 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5602 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5603 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5609 TEST(APFloatTest, Float4ExhaustivePair) {
5610 // Test each pair of 4-bit floats with non-standard semantics
5611 for (APFloat::Semantics Sem : {APFloat::S_Float4E2M1FN}) {
5612 const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem);
5613 for (int i = 0; i < 16; i++) {
5614 for (int j = 0; j < 16; j++) {
5615 SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) +
5616 ",j=" + std::to_string(j));
5617 APFloat x(S, APInt(4, i));
5618 APFloat y(S, APInt(4, j));
5620 bool losesInfo;
5621 APFloat x16 = x;
5622 x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5623 &losesInfo);
5624 EXPECT_FALSE(losesInfo);
5625 APFloat y16 = y;
5626 y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
5627 &losesInfo);
5628 EXPECT_FALSE(losesInfo);
5630 // Add
5631 APFloat z = x;
5632 z.add(y, APFloat::rmNearestTiesToEven);
5633 APFloat z16 = x16;
5634 z16.add(y16, APFloat::rmNearestTiesToEven);
5635 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5636 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5637 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5639 // Subtract
5640 z = x;
5641 z.subtract(y, APFloat::rmNearestTiesToEven);
5642 z16 = x16;
5643 z16.subtract(y16, APFloat::rmNearestTiesToEven);
5644 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5645 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5646 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5648 // Multiply
5649 z = x;
5650 z.multiply(y, APFloat::rmNearestTiesToEven);
5651 z16 = x16;
5652 z16.multiply(y16, APFloat::rmNearestTiesToEven);
5653 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5654 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5655 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5657 // Skip divide by 0
5658 if (j == 0 || j == 8)
5659 continue;
5661 // Divide
5662 z = x;
5663 z.divide(y, APFloat::rmNearestTiesToEven);
5664 z16 = x16;
5665 z16.divide(y16, APFloat::rmNearestTiesToEven);
5666 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5667 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5668 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5670 // Mod
5671 z = x;
5672 z.mod(y);
5673 z16 = x16;
5674 z16.mod(y16);
5675 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5676 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5677 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5679 // Remainder
5680 z = x;
5681 z.remainder(y);
5682 z16 = x16;
5683 z16.remainder(y16);
5684 z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo);
5685 EXPECT_TRUE(z.bitwiseIsEqual(z16))
5686 << "sem=" << Sem << ", i=" << i << ", j=" << j;
5692 TEST(APFloatTest, ConvertE4M3FNToE5M2) {
5693 bool losesInfo;
5694 APFloat test(APFloat::Float8E4M3FN(), "1.0");
5695 APFloat::opStatus status = test.convert(
5696 APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven, &losesInfo);
5697 EXPECT_EQ(1.0f, test.convertToFloat());
5698 EXPECT_FALSE(losesInfo);
5699 EXPECT_EQ(status, APFloat::opOK);
5701 test = APFloat(APFloat::Float8E4M3FN(), "0.0");
5702 status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
5703 &losesInfo);
5704 EXPECT_EQ(0.0f, test.convertToFloat());
5705 EXPECT_FALSE(losesInfo);
5706 EXPECT_EQ(status, APFloat::opOK);
5708 test = APFloat(APFloat::Float8E4M3FN(), "0x1.2p0"); // 1.125
5709 status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
5710 &losesInfo);
5711 EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat());
5712 EXPECT_TRUE(losesInfo);
5713 EXPECT_EQ(status, APFloat::opInexact);
5715 test = APFloat(APFloat::Float8E4M3FN(), "0x1.6p0"); // 1.375
5716 status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
5717 &losesInfo);
5718 EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat());
5719 EXPECT_TRUE(losesInfo);
5720 EXPECT_EQ(status, APFloat::opInexact);
5722 // Convert E4M3FN denormal to E5M2 normal. Should not be truncated, despite
5723 // the destination format having one fewer significand bit
5724 test = APFloat(APFloat::Float8E4M3FN(), "0x1.Cp-7");
5725 status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
5726 &losesInfo);
5727 EXPECT_EQ(0x1.Cp-7, test.convertToFloat());
5728 EXPECT_FALSE(losesInfo);
5729 EXPECT_EQ(status, APFloat::opOK);
5731 // Test convert from NaN
5732 test = APFloat(APFloat::Float8E4M3FN(), "nan");
5733 status = test.convert(APFloat::Float8E5M2(), APFloat::rmNearestTiesToEven,
5734 &losesInfo);
5735 EXPECT_TRUE(std::isnan(test.convertToFloat()));
5736 EXPECT_FALSE(losesInfo);
5737 EXPECT_EQ(status, APFloat::opOK);
5740 TEST(APFloatTest, ConvertE5M2ToE4M3FN) {
5741 bool losesInfo;
5742 APFloat test(APFloat::Float8E5M2(), "1.0");
5743 APFloat::opStatus status = test.convert(
5744 APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven, &losesInfo);
5745 EXPECT_EQ(1.0f, test.convertToFloat());
5746 EXPECT_FALSE(losesInfo);
5747 EXPECT_EQ(status, APFloat::opOK);
5749 test = APFloat(APFloat::Float8E5M2(), "0.0");
5750 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5751 &losesInfo);
5752 EXPECT_EQ(0.0f, test.convertToFloat());
5753 EXPECT_FALSE(losesInfo);
5754 EXPECT_EQ(status, APFloat::opOK);
5756 test = APFloat(APFloat::Float8E5M2(), "0x1.Cp8"); // 448
5757 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5758 &losesInfo);
5759 EXPECT_EQ(0x1.Cp8 /* 448 */, test.convertToFloat());
5760 EXPECT_FALSE(losesInfo);
5761 EXPECT_EQ(status, APFloat::opOK);
5763 // Test overflow
5764 test = APFloat(APFloat::Float8E5M2(), "0x1.0p9"); // 512
5765 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5766 &losesInfo);
5767 EXPECT_TRUE(std::isnan(test.convertToFloat()));
5768 EXPECT_TRUE(losesInfo);
5769 EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact);
5771 // Test underflow
5772 test = APFloat(APFloat::Float8E5M2(), "0x1.0p-10");
5773 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5774 &losesInfo);
5775 EXPECT_EQ(0., test.convertToFloat());
5776 EXPECT_TRUE(losesInfo);
5777 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
5779 // Test rounding up to smallest denormal number
5780 test = APFloat(APFloat::Float8E5M2(), "0x1.8p-10");
5781 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5782 &losesInfo);
5783 EXPECT_EQ(0x1.0p-9, test.convertToFloat());
5784 EXPECT_TRUE(losesInfo);
5785 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
5787 // Testing inexact rounding to denormal number
5788 test = APFloat(APFloat::Float8E5M2(), "0x1.8p-9");
5789 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5790 &losesInfo);
5791 EXPECT_EQ(0x1.0p-8, test.convertToFloat());
5792 EXPECT_TRUE(losesInfo);
5793 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
5795 APFloat nan = APFloat(APFloat::Float8E4M3FN(), "nan");
5797 // Testing convert from Inf
5798 test = APFloat(APFloat::Float8E5M2(), "inf");
5799 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5800 &losesInfo);
5801 EXPECT_TRUE(std::isnan(test.convertToFloat()));
5802 EXPECT_TRUE(losesInfo);
5803 EXPECT_EQ(status, APFloat::opInexact);
5804 EXPECT_TRUE(test.bitwiseIsEqual(nan));
5806 // Testing convert from quiet NaN
5807 test = APFloat(APFloat::Float8E5M2(), "nan");
5808 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5809 &losesInfo);
5810 EXPECT_TRUE(std::isnan(test.convertToFloat()));
5811 EXPECT_TRUE(losesInfo);
5812 EXPECT_EQ(status, APFloat::opOK);
5813 EXPECT_TRUE(test.bitwiseIsEqual(nan));
5815 // Testing convert from signaling NaN
5816 test = APFloat(APFloat::Float8E5M2(), "snan");
5817 status = test.convert(APFloat::Float8E4M3FN(), APFloat::rmNearestTiesToEven,
5818 &losesInfo);
5819 EXPECT_TRUE(std::isnan(test.convertToFloat()));
5820 EXPECT_TRUE(losesInfo);
5821 EXPECT_EQ(status, APFloat::opInvalidOp);
5822 EXPECT_TRUE(test.bitwiseIsEqual(nan));
5825 TEST(APFloatTest, Float8E4M3FNGetInf) {
5826 APFloat t = APFloat::getInf(APFloat::Float8E4M3FN());
5827 EXPECT_TRUE(t.isNaN());
5828 EXPECT_FALSE(t.isInfinity());
5831 TEST(APFloatTest, Float8E4M3FNFromString) {
5832 // Exactly representable
5833 EXPECT_EQ(448, APFloat(APFloat::Float8E4M3FN(), "448").convertToDouble());
5834 // Round down to maximum value
5835 EXPECT_EQ(448, APFloat(APFloat::Float8E4M3FN(), "464").convertToDouble());
5836 // Round up, causing overflow to NaN
5837 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "465").isNaN());
5838 // Overflow without rounding
5839 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "480").isNaN());
5840 // Inf converted to NaN
5841 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "inf").isNaN());
5842 // NaN converted to NaN
5843 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FN(), "nan").isNaN());
5846 TEST(APFloatTest, Float8E4M3FNAdd) {
5847 APFloat QNaN = APFloat::getNaN(APFloat::Float8E4M3FN(), false);
5849 auto FromStr = [](StringRef S) {
5850 return APFloat(APFloat::Float8E4M3FN(), S);
5853 struct {
5854 APFloat x;
5855 APFloat y;
5856 const char *result;
5857 int status;
5858 int category;
5859 APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
5860 } AdditionTests[] = {
5861 // Test addition operations involving NaN, overflow, and the max E4M3FN
5862 // value (448) because E4M3FN differs from IEEE-754 types in these regards
5863 {FromStr("448"), FromStr("16"), "448", APFloat::opInexact,
5864 APFloat::fcNormal},
5865 {FromStr("448"), FromStr("18"), "NaN",
5866 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
5867 {FromStr("448"), FromStr("32"), "NaN",
5868 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
5869 {FromStr("-448"), FromStr("-32"), "-NaN",
5870 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
5871 {QNaN, FromStr("-448"), "NaN", APFloat::opOK, APFloat::fcNaN},
5872 {FromStr("448"), FromStr("-32"), "416", APFloat::opOK, APFloat::fcNormal},
5873 {FromStr("448"), FromStr("0"), "448", APFloat::opOK, APFloat::fcNormal},
5874 {FromStr("448"), FromStr("32"), "448", APFloat::opInexact,
5875 APFloat::fcNormal, APFloat::rmTowardZero},
5876 {FromStr("448"), FromStr("448"), "448", APFloat::opInexact,
5877 APFloat::fcNormal, APFloat::rmTowardZero},
5880 for (size_t i = 0; i < std::size(AdditionTests); ++i) {
5881 APFloat x(AdditionTests[i].x);
5882 APFloat y(AdditionTests[i].y);
5883 APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode);
5885 APFloat result(APFloat::Float8E4M3FN(), AdditionTests[i].result);
5887 EXPECT_TRUE(result.bitwiseIsEqual(x));
5888 EXPECT_EQ(AdditionTests[i].status, (int)status);
5889 EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory());
5893 TEST(APFloatTest, Float8E4M3FNDivideByZero) {
5894 APFloat x(APFloat::Float8E4M3FN(), "1");
5895 APFloat zero(APFloat::Float8E4M3FN(), "0");
5896 EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero);
5897 EXPECT_TRUE(x.isNaN());
5900 TEST(APFloatTest, Float8E4M3FNNext) {
5901 APFloat test(APFloat::Float8E4M3FN(), APFloat::uninitialized);
5902 APFloat expected(APFloat::Float8E4M3FN(), APFloat::uninitialized);
5904 // nextUp on positive numbers
5905 for (int i = 0; i < 127; i++) {
5906 test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i));
5907 expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i + 1));
5908 EXPECT_EQ(test.next(false), APFloat::opOK);
5909 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5912 // nextUp on negative zero
5913 test = APFloat::getZero(APFloat::Float8E4M3FN(), true);
5914 expected = APFloat::getSmallest(APFloat::Float8E4M3FN(), false);
5915 EXPECT_EQ(test.next(false), APFloat::opOK);
5916 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5918 // nextUp on negative nonzero numbers
5919 for (int i = 129; i < 255; i++) {
5920 test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i));
5921 expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i - 1));
5922 EXPECT_EQ(test.next(false), APFloat::opOK);
5923 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5926 // nextUp on NaN
5927 test = APFloat::getQNaN(APFloat::Float8E4M3FN(), false);
5928 expected = APFloat::getQNaN(APFloat::Float8E4M3FN(), false);
5929 EXPECT_EQ(test.next(false), APFloat::opOK);
5930 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5932 // nextDown on positive nonzero finite numbers
5933 for (int i = 1; i < 127; i++) {
5934 test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i));
5935 expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i - 1));
5936 EXPECT_EQ(test.next(true), APFloat::opOK);
5937 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5940 // nextDown on positive zero
5941 test = APFloat::getZero(APFloat::Float8E4M3FN(), true);
5942 expected = APFloat::getSmallest(APFloat::Float8E4M3FN(), true);
5943 EXPECT_EQ(test.next(true), APFloat::opOK);
5944 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5946 // nextDown on negative finite numbers
5947 for (int i = 128; i < 255; i++) {
5948 test = APFloat(APFloat::Float8E4M3FN(), APInt(8, i));
5949 expected = APFloat(APFloat::Float8E4M3FN(), APInt(8, i + 1));
5950 EXPECT_EQ(test.next(true), APFloat::opOK);
5951 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5954 // nextDown on NaN
5955 test = APFloat::getQNaN(APFloat::Float8E4M3FN(), false);
5956 expected = APFloat::getQNaN(APFloat::Float8E4M3FN(), false);
5957 EXPECT_EQ(test.next(true), APFloat::opOK);
5958 EXPECT_TRUE(test.bitwiseIsEqual(expected));
5961 TEST(APFloatTest, Float8E4M3FNExhaustive) {
5962 // Test each of the 256 Float8E4M3FN values.
5963 for (int i = 0; i < 256; i++) {
5964 APFloat test(APFloat::Float8E4M3FN(), APInt(8, i));
5965 SCOPED_TRACE("i=" + std::to_string(i));
5967 // isLargest
5968 if (i == 126 || i == 254) {
5969 EXPECT_TRUE(test.isLargest());
5970 EXPECT_EQ(abs(test).convertToDouble(), 448.);
5971 } else {
5972 EXPECT_FALSE(test.isLargest());
5975 // isSmallest
5976 if (i == 1 || i == 129) {
5977 EXPECT_TRUE(test.isSmallest());
5978 EXPECT_EQ(abs(test).convertToDouble(), 0x1p-9);
5979 } else {
5980 EXPECT_FALSE(test.isSmallest());
5983 // convert to BFloat
5984 APFloat test2 = test;
5985 bool losesInfo;
5986 APFloat::opStatus status = test2.convert(
5987 APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
5988 EXPECT_EQ(status, APFloat::opOK);
5989 EXPECT_FALSE(losesInfo);
5990 if (i == 127 || i == 255)
5991 EXPECT_TRUE(test2.isNaN());
5992 else
5993 EXPECT_EQ(test.convertToFloat(), test2.convertToFloat());
5995 // bitcastToAPInt
5996 EXPECT_EQ(i, test.bitcastToAPInt());
6000 TEST(APFloatTest, Float8E8M0FNUExhaustive) {
6001 // Test each of the 256 Float8E8M0FNU values.
6002 for (int i = 0; i < 256; i++) {
6003 APFloat test(APFloat::Float8E8M0FNU(), APInt(8, i));
6004 SCOPED_TRACE("i=" + std::to_string(i));
6006 // bitcastToAPInt
6007 EXPECT_EQ(i, test.bitcastToAPInt());
6009 // isLargest
6010 if (i == 254) {
6011 EXPECT_TRUE(test.isLargest());
6012 EXPECT_EQ(abs(test).convertToDouble(), 0x1.0p127);
6013 } else {
6014 EXPECT_FALSE(test.isLargest());
6017 // isSmallest
6018 if (i == 0) {
6019 EXPECT_TRUE(test.isSmallest());
6020 EXPECT_EQ(abs(test).convertToDouble(), 0x1.0p-127);
6021 } else {
6022 EXPECT_FALSE(test.isSmallest());
6025 // convert to Double
6026 bool losesInfo;
6027 std::string val = std::to_string(i - 127); // 127 is the bias
6028 llvm::SmallString<16> str("0x1.0p");
6029 str += val;
6030 APFloat test2(APFloat::IEEEdouble(), str);
6032 APFloat::opStatus status = test.convert(
6033 APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo);
6034 EXPECT_EQ(status, APFloat::opOK);
6035 EXPECT_FALSE(losesInfo);
6036 if (i == 255)
6037 EXPECT_TRUE(test.isNaN());
6038 else
6039 EXPECT_EQ(test.convertToDouble(), test2.convertToDouble());
6043 TEST(APFloatTest, Float8E5M2FNUZNext) {
6044 APFloat test(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized);
6045 APFloat expected(APFloat::Float8E5M2FNUZ(), APFloat::uninitialized);
6047 // 1. NextUp of largest bit pattern is nan
6048 test = APFloat::getLargest(APFloat::Float8E5M2FNUZ());
6049 expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
6050 EXPECT_EQ(test.next(false), APFloat::opOK);
6051 EXPECT_FALSE(test.isInfinity());
6052 EXPECT_FALSE(test.isZero());
6053 EXPECT_TRUE(test.isNaN());
6054 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6056 // 2. NextUp of smallest negative denormal is +0
6057 test = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true);
6058 expected = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false);
6059 EXPECT_EQ(test.next(false), APFloat::opOK);
6060 EXPECT_FALSE(test.isNegZero());
6061 EXPECT_TRUE(test.isPosZero());
6062 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6064 // 3. nextDown of negative of largest value is NaN
6065 test = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
6066 expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
6067 EXPECT_EQ(test.next(true), APFloat::opOK);
6068 EXPECT_FALSE(test.isInfinity());
6069 EXPECT_FALSE(test.isZero());
6070 EXPECT_TRUE(test.isNaN());
6071 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6073 // 4. nextDown of +0 is smallest negative denormal
6074 test = APFloat::getZero(APFloat::Float8E5M2FNUZ(), false);
6075 expected = APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), true);
6076 EXPECT_EQ(test.next(true), APFloat::opOK);
6077 EXPECT_FALSE(test.isZero());
6078 EXPECT_TRUE(test.isDenormal());
6079 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6081 // 5. nextUp of NaN is NaN
6082 test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);
6083 expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true);
6084 EXPECT_EQ(test.next(false), APFloat::opOK);
6085 EXPECT_TRUE(test.isNaN());
6087 // 6. nextDown of NaN is NaN
6088 test = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);
6089 expected = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), true);
6090 EXPECT_EQ(test.next(true), APFloat::opOK);
6091 EXPECT_TRUE(test.isNaN());
6094 TEST(APFloatTest, Float8E5M2FNUZChangeSign) {
6095 APFloat test = APFloat(APFloat::Float8E5M2FNUZ(), "1.0");
6096 APFloat expected = APFloat(APFloat::Float8E5M2FNUZ(), "-1.0");
6097 test.changeSign();
6098 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6100 test = APFloat::getZero(APFloat::Float8E5M2FNUZ());
6101 expected = test;
6102 test.changeSign();
6103 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6105 test = APFloat::getNaN(APFloat::Float8E5M2FNUZ());
6106 expected = test;
6107 test.changeSign();
6108 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6111 TEST(APFloatTest, Float8E5M2FNUZFromString) {
6112 // Exactly representable
6113 EXPECT_EQ(57344,
6114 APFloat(APFloat::Float8E5M2FNUZ(), "57344").convertToDouble());
6115 // Round down to maximum value
6116 EXPECT_EQ(57344,
6117 APFloat(APFloat::Float8E5M2FNUZ(), "59392").convertToDouble());
6118 // Round up, causing overflow to NaN
6119 EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "61440").isNaN());
6120 // Overflow without rounding
6121 EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "131072").isNaN());
6122 // Inf converted to NaN
6123 EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "inf").isNaN());
6124 // NaN converted to NaN
6125 EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "nan").isNaN());
6126 // Negative zero converted to positive zero
6127 EXPECT_TRUE(APFloat(APFloat::Float8E5M2FNUZ(), "-0").isPosZero());
6130 TEST(APFloatTest, UnsignedZeroArithmeticSpecial) {
6131 // Float semantics with only unsigned zero (ex. Float8E4M3FNUZ) violate the
6132 // IEEE rules about signs in arithmetic operations when producing zeros,
6133 // because they only have one zero. Most of the rest of the complexities of
6134 // arithmetic on these values are covered by the other Float8 types' test
6135 // cases and so are not repeated here.
6137 // The IEEE round towards negative rule doesn't apply
6138 for (APFloat::Semantics S :
6139 {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) {
6140 const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S);
6141 APFloat test = APFloat::getSmallest(Sem);
6142 APFloat rhs = test;
6143 EXPECT_EQ(test.subtract(rhs, APFloat::rmTowardNegative), APFloat::opOK);
6144 EXPECT_TRUE(test.isZero());
6145 EXPECT_FALSE(test.isNegative());
6147 // Multiplication of (small) * (-small) is +0
6148 test = APFloat::getSmallestNormalized(Sem);
6149 rhs = -test;
6150 EXPECT_EQ(test.multiply(rhs, APFloat::rmNearestTiesToAway),
6151 APFloat::opInexact | APFloat::opUnderflow);
6152 EXPECT_TRUE(test.isZero());
6153 EXPECT_FALSE(test.isNegative());
6155 // Dividing the negatize float_min by anything gives +0
6156 test = APFloat::getSmallest(Sem, true);
6157 rhs = APFloat(Sem, "2.0");
6158 EXPECT_EQ(test.divide(rhs, APFloat::rmNearestTiesToEven),
6159 APFloat::opInexact | APFloat::opUnderflow);
6160 EXPECT_TRUE(test.isZero());
6161 EXPECT_FALSE(test.isNegative());
6163 // Remainder can't copy sign because there's only one zero
6164 test = APFloat(Sem, "-4.0");
6165 rhs = APFloat(Sem, "2.0");
6166 EXPECT_EQ(test.remainder(rhs), APFloat::opOK);
6167 EXPECT_TRUE(test.isZero());
6168 EXPECT_FALSE(test.isNegative());
6170 // And same for mod
6171 test = APFloat(Sem, "-4.0");
6172 rhs = APFloat(Sem, "2.0");
6173 EXPECT_EQ(test.mod(rhs), APFloat::opOK);
6174 EXPECT_TRUE(test.isZero());
6175 EXPECT_FALSE(test.isNegative());
6177 // FMA correctly handles both the multiply and add parts of all this
6178 test = APFloat(Sem, "2.0");
6179 rhs = test;
6180 APFloat addend = APFloat(Sem, "-4.0");
6181 EXPECT_EQ(test.fusedMultiplyAdd(rhs, addend, APFloat::rmTowardNegative),
6182 APFloat::opOK);
6183 EXPECT_TRUE(test.isZero());
6184 EXPECT_FALSE(test.isNegative());
6188 TEST(APFloatTest, Float8E5M2FNUZAdd) {
6189 APFloat QNaN = APFloat::getNaN(APFloat::Float8E5M2FNUZ(), false);
6191 auto FromStr = [](StringRef S) {
6192 return APFloat(APFloat::Float8E5M2FNUZ(), S);
6195 struct {
6196 APFloat x;
6197 APFloat y;
6198 const char *result;
6199 int status;
6200 int category;
6201 APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
6202 } AdditionTests[] = {
6203 // Test addition operations involving NaN, overflow, and the max E5M2FNUZ
6204 // value (57344) because E5M2FNUZ differs from IEEE-754 types in these
6205 // regards
6206 {FromStr("57344"), FromStr("2048"), "57344", APFloat::opInexact,
6207 APFloat::fcNormal},
6208 {FromStr("57344"), FromStr("4096"), "NaN",
6209 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
6210 {FromStr("-57344"), FromStr("-4096"), "NaN",
6211 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
6212 {QNaN, FromStr("-57344"), "NaN", APFloat::opOK, APFloat::fcNaN},
6213 {FromStr("57344"), FromStr("-8192"), "49152", APFloat::opOK,
6214 APFloat::fcNormal},
6215 {FromStr("57344"), FromStr("0"), "57344", APFloat::opOK,
6216 APFloat::fcNormal},
6217 {FromStr("57344"), FromStr("4096"), "57344", APFloat::opInexact,
6218 APFloat::fcNormal, APFloat::rmTowardZero},
6219 {FromStr("57344"), FromStr("57344"), "57344", APFloat::opInexact,
6220 APFloat::fcNormal, APFloat::rmTowardZero},
6223 for (size_t i = 0; i < std::size(AdditionTests); ++i) {
6224 APFloat x(AdditionTests[i].x);
6225 APFloat y(AdditionTests[i].y);
6226 APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode);
6228 APFloat result(APFloat::Float8E5M2FNUZ(), AdditionTests[i].result);
6230 EXPECT_TRUE(result.bitwiseIsEqual(x));
6231 EXPECT_EQ(AdditionTests[i].status, (int)status);
6232 EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory());
6236 TEST(APFloatTest, Float8E5M2FNUZDivideByZero) {
6237 APFloat x(APFloat::Float8E5M2FNUZ(), "1");
6238 APFloat zero(APFloat::Float8E5M2FNUZ(), "0");
6239 EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero);
6240 EXPECT_TRUE(x.isNaN());
6243 TEST(APFloatTest, Float8UnsignedZeroExhaustive) {
6244 struct {
6245 const fltSemantics *semantics;
6246 const double largest;
6247 const double smallest;
6248 } const exhaustiveTests[] = {{&APFloat::Float8E5M2FNUZ(), 57344., 0x1.0p-17},
6249 {&APFloat::Float8E4M3FNUZ(), 240., 0x1.0p-10},
6250 {&APFloat::Float8E4M3B11FNUZ(), 30., 0x1.0p-13}};
6251 for (const auto &testInfo : exhaustiveTests) {
6252 const fltSemantics &sem = *testInfo.semantics;
6253 SCOPED_TRACE("Semantics=" + std::to_string(APFloat::SemanticsToEnum(sem)));
6254 // Test each of the 256 values.
6255 for (int i = 0; i < 256; i++) {
6256 SCOPED_TRACE("i=" + std::to_string(i));
6257 APFloat test(sem, APInt(8, i));
6259 // isLargest
6260 if (i == 127 || i == 255) {
6261 EXPECT_TRUE(test.isLargest());
6262 EXPECT_EQ(abs(test).convertToDouble(), testInfo.largest);
6263 } else {
6264 EXPECT_FALSE(test.isLargest());
6267 // isSmallest
6268 if (i == 1 || i == 129) {
6269 EXPECT_TRUE(test.isSmallest());
6270 EXPECT_EQ(abs(test).convertToDouble(), testInfo.smallest);
6271 } else {
6272 EXPECT_FALSE(test.isSmallest());
6275 // convert to BFloat
6276 APFloat test2 = test;
6277 bool losesInfo;
6278 APFloat::opStatus status = test2.convert(
6279 APFloat::BFloat(), APFloat::rmNearestTiesToEven, &losesInfo);
6280 EXPECT_EQ(status, APFloat::opOK);
6281 EXPECT_FALSE(losesInfo);
6282 if (i == 128)
6283 EXPECT_TRUE(test2.isNaN());
6284 else
6285 EXPECT_EQ(test.convertToFloat(), test2.convertToFloat());
6287 // bitcastToAPInt
6288 EXPECT_EQ(i, test.bitcastToAPInt());
6293 TEST(APFloatTest, Float8E4M3FNUZNext) {
6294 for (APFloat::Semantics S :
6295 {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) {
6296 const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S);
6297 APFloat test(Sem, APFloat::uninitialized);
6298 APFloat expected(Sem, APFloat::uninitialized);
6300 // 1. NextUp of largest bit pattern is nan
6301 test = APFloat::getLargest(Sem);
6302 expected = APFloat::getNaN(Sem);
6303 EXPECT_EQ(test.next(false), APFloat::opOK);
6304 EXPECT_FALSE(test.isInfinity());
6305 EXPECT_FALSE(test.isZero());
6306 EXPECT_TRUE(test.isNaN());
6307 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6309 // 2. NextUp of smallest negative denormal is +0
6310 test = APFloat::getSmallest(Sem, true);
6311 expected = APFloat::getZero(Sem, false);
6312 EXPECT_EQ(test.next(false), APFloat::opOK);
6313 EXPECT_FALSE(test.isNegZero());
6314 EXPECT_TRUE(test.isPosZero());
6315 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6317 // 3. nextDown of negative of largest value is NaN
6318 test = APFloat::getLargest(Sem, true);
6319 expected = APFloat::getNaN(Sem);
6320 EXPECT_EQ(test.next(true), APFloat::opOK);
6321 EXPECT_FALSE(test.isInfinity());
6322 EXPECT_FALSE(test.isZero());
6323 EXPECT_TRUE(test.isNaN());
6324 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6326 // 4. nextDown of +0 is smallest negative denormal
6327 test = APFloat::getZero(Sem, false);
6328 expected = APFloat::getSmallest(Sem, true);
6329 EXPECT_EQ(test.next(true), APFloat::opOK);
6330 EXPECT_FALSE(test.isZero());
6331 EXPECT_TRUE(test.isDenormal());
6332 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6334 // 5. nextUp of NaN is NaN
6335 test = APFloat::getNaN(Sem, false);
6336 expected = APFloat::getNaN(Sem, true);
6337 EXPECT_EQ(test.next(false), APFloat::opOK);
6338 EXPECT_TRUE(test.isNaN());
6340 // 6. nextDown of NaN is NaN
6341 test = APFloat::getNaN(Sem, false);
6342 expected = APFloat::getNaN(Sem, true);
6343 EXPECT_EQ(test.next(true), APFloat::opOK);
6344 EXPECT_TRUE(test.isNaN());
6348 TEST(APFloatTest, Float8E4M3FNUZChangeSign) {
6349 for (APFloat::Semantics S :
6350 {APFloat::S_Float8E4M3FNUZ, APFloat::S_Float8E4M3B11FNUZ}) {
6351 const llvm::fltSemantics &Sem = APFloat::EnumToSemantics(S);
6352 APFloat test = APFloat(Sem, "1.0");
6353 APFloat expected = APFloat(Sem, "-1.0");
6354 test.changeSign();
6355 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6357 test = APFloat::getZero(Sem);
6358 expected = test;
6359 test.changeSign();
6360 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6362 test = APFloat::getNaN(Sem);
6363 expected = test;
6364 test.changeSign();
6365 EXPECT_TRUE(test.bitwiseIsEqual(expected));
6369 TEST(APFloatTest, Float8E4M3FNUZFromString) {
6370 // Exactly representable
6371 EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "240").convertToDouble());
6372 // Round down to maximum value
6373 EXPECT_EQ(240, APFloat(APFloat::Float8E4M3FNUZ(), "247").convertToDouble());
6374 // Round up, causing overflow to NaN
6375 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "248").isNaN());
6376 // Overflow without rounding
6377 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "480").isNaN());
6378 // Inf converted to NaN
6379 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "inf").isNaN());
6380 // NaN converted to NaN
6381 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "nan").isNaN());
6382 // Negative zero converted to positive zero
6383 EXPECT_TRUE(APFloat(APFloat::Float8E4M3FNUZ(), "-0").isPosZero());
6386 TEST(APFloatTest, Float8E4M3FNUZAdd) {
6387 APFloat QNaN = APFloat::getNaN(APFloat::Float8E4M3FNUZ(), false);
6389 auto FromStr = [](StringRef S) {
6390 return APFloat(APFloat::Float8E4M3FNUZ(), S);
6393 struct {
6394 APFloat x;
6395 APFloat y;
6396 const char *result;
6397 int status;
6398 int category;
6399 APFloat::roundingMode roundingMode = APFloat::rmNearestTiesToEven;
6400 } AdditionTests[] = {
6401 // Test addition operations involving NaN, overflow, and the max E4M3FNUZ
6402 // value (240) because E4M3FNUZ differs from IEEE-754 types in these
6403 // regards
6404 {FromStr("240"), FromStr("4"), "240", APFloat::opInexact,
6405 APFloat::fcNormal},
6406 {FromStr("240"), FromStr("8"), "NaN",
6407 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
6408 {FromStr("240"), FromStr("16"), "NaN",
6409 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
6410 {FromStr("-240"), FromStr("-16"), "NaN",
6411 APFloat::opOverflow | APFloat::opInexact, APFloat::fcNaN},
6412 {QNaN, FromStr("-240"), "NaN", APFloat::opOK, APFloat::fcNaN},
6413 {FromStr("240"), FromStr("-16"), "224", APFloat::opOK, APFloat::fcNormal},
6414 {FromStr("240"), FromStr("0"), "240", APFloat::opOK, APFloat::fcNormal},
6415 {FromStr("240"), FromStr("32"), "240", APFloat::opInexact,
6416 APFloat::fcNormal, APFloat::rmTowardZero},
6417 {FromStr("240"), FromStr("240"), "240", APFloat::opInexact,
6418 APFloat::fcNormal, APFloat::rmTowardZero},
6421 for (size_t i = 0; i < std::size(AdditionTests); ++i) {
6422 APFloat x(AdditionTests[i].x);
6423 APFloat y(AdditionTests[i].y);
6424 APFloat::opStatus status = x.add(y, AdditionTests[i].roundingMode);
6426 APFloat result(APFloat::Float8E4M3FNUZ(), AdditionTests[i].result);
6428 EXPECT_TRUE(result.bitwiseIsEqual(x));
6429 EXPECT_EQ(AdditionTests[i].status, (int)status);
6430 EXPECT_EQ(AdditionTests[i].category, (int)x.getCategory());
6434 TEST(APFloatTest, Float8E4M3FNUZDivideByZero) {
6435 APFloat x(APFloat::Float8E4M3FNUZ(), "1");
6436 APFloat zero(APFloat::Float8E4M3FNUZ(), "0");
6437 EXPECT_EQ(x.divide(zero, APFloat::rmNearestTiesToEven), APFloat::opDivByZero);
6438 EXPECT_TRUE(x.isNaN());
6441 TEST(APFloatTest, ConvertE5M2FNUZToE4M3FNUZ) {
6442 bool losesInfo;
6443 APFloat test(APFloat::Float8E5M2FNUZ(), "1.0");
6444 APFloat::opStatus status = test.convert(
6445 APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo);
6446 EXPECT_EQ(1.0f, test.convertToFloat());
6447 EXPECT_FALSE(losesInfo);
6448 EXPECT_EQ(status, APFloat::opOK);
6450 losesInfo = true;
6451 test = APFloat(APFloat::Float8E5M2FNUZ(), "0.0");
6452 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6453 &losesInfo);
6454 EXPECT_EQ(0.0f, test.convertToFloat());
6455 EXPECT_FALSE(losesInfo);
6456 EXPECT_EQ(status, APFloat::opOK);
6458 losesInfo = true;
6459 test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.Cp7"); // 224
6460 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6461 &losesInfo);
6462 EXPECT_EQ(0x1.Cp7 /* 224 */, test.convertToFloat());
6463 EXPECT_FALSE(losesInfo);
6464 EXPECT_EQ(status, APFloat::opOK);
6466 // Test overflow
6467 losesInfo = false;
6468 test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p8"); // 256
6469 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6470 &losesInfo);
6471 EXPECT_TRUE(std::isnan(test.convertToFloat()));
6472 EXPECT_TRUE(losesInfo);
6473 EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact);
6475 // Test underflow
6476 test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.0p-11");
6477 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6478 &losesInfo);
6479 EXPECT_EQ(0., test.convertToFloat());
6480 EXPECT_TRUE(losesInfo);
6481 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
6483 // Test rounding up to smallest denormal number
6484 losesInfo = false;
6485 test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-11");
6486 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6487 &losesInfo);
6488 EXPECT_EQ(0x1.0p-10, test.convertToFloat());
6489 EXPECT_TRUE(losesInfo);
6490 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
6492 // Testing inexact rounding to denormal number
6493 losesInfo = false;
6494 test = APFloat(APFloat::Float8E5M2FNUZ(), "0x1.8p-10");
6495 status = test.convert(APFloat::Float8E4M3FNUZ(), APFloat::rmNearestTiesToEven,
6496 &losesInfo);
6497 EXPECT_EQ(0x1.0p-9, test.convertToFloat());
6498 EXPECT_TRUE(losesInfo);
6499 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
6502 TEST(APFloatTest, ConvertE4M3FNUZToE5M2FNUZ) {
6503 bool losesInfo;
6504 APFloat test(APFloat::Float8E4M3FNUZ(), "1.0");
6505 APFloat::opStatus status = test.convert(
6506 APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven, &losesInfo);
6507 EXPECT_EQ(1.0f, test.convertToFloat());
6508 EXPECT_FALSE(losesInfo);
6509 EXPECT_EQ(status, APFloat::opOK);
6511 losesInfo = true;
6512 test = APFloat(APFloat::Float8E4M3FNUZ(), "0.0");
6513 status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
6514 &losesInfo);
6515 EXPECT_EQ(0.0f, test.convertToFloat());
6516 EXPECT_FALSE(losesInfo);
6517 EXPECT_EQ(status, APFloat::opOK);
6519 losesInfo = false;
6520 test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.2p0"); // 1.125
6521 status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
6522 &losesInfo);
6523 EXPECT_EQ(0x1.0p0 /* 1.0 */, test.convertToFloat());
6524 EXPECT_TRUE(losesInfo);
6525 EXPECT_EQ(status, APFloat::opInexact);
6527 losesInfo = false;
6528 test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.6p0"); // 1.375
6529 status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
6530 &losesInfo);
6531 EXPECT_EQ(0x1.8p0 /* 1.5 */, test.convertToFloat());
6532 EXPECT_TRUE(losesInfo);
6533 EXPECT_EQ(status, APFloat::opInexact);
6535 // Convert E4M3FNUZ denormal to E5M2 normal. Should not be truncated, despite
6536 // the destination format having one fewer significand bit
6537 losesInfo = true;
6538 test = APFloat(APFloat::Float8E4M3FNUZ(), "0x1.Cp-8");
6539 status = test.convert(APFloat::Float8E5M2FNUZ(), APFloat::rmNearestTiesToEven,
6540 &losesInfo);
6541 EXPECT_EQ(0x1.Cp-8, test.convertToFloat());
6542 EXPECT_FALSE(losesInfo);
6543 EXPECT_EQ(status, APFloat::opOK);
6546 TEST(APFloatTest, F8ToString) {
6547 for (APFloat::Semantics S :
6548 {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN,
6549 APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ,
6550 APFloat::S_Float8E4M3B11FNUZ}) {
6551 SCOPED_TRACE("Semantics=" + std::to_string(S));
6552 for (int i = 0; i < 256; i++) {
6553 SCOPED_TRACE("i=" + std::to_string(i));
6554 APFloat test(APFloat::EnumToSemantics(S), APInt(8, i));
6555 llvm::SmallString<128> str;
6556 test.toString(str);
6558 if (test.isNaN()) {
6559 EXPECT_EQ(str, "NaN");
6560 } else {
6561 APFloat test2(APFloat::EnumToSemantics(S), str);
6562 EXPECT_TRUE(test.bitwiseIsEqual(test2));
6568 TEST(APFloatTest, BitsToF8ToBits) {
6569 for (APFloat::Semantics S :
6570 {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN,
6571 APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ,
6572 APFloat::S_Float8E4M3B11FNUZ}) {
6573 SCOPED_TRACE("Semantics=" + std::to_string(S));
6574 for (int i = 0; i < 256; i++) {
6575 SCOPED_TRACE("i=" + std::to_string(i));
6576 APInt bits_in = APInt(8, i);
6577 APFloat test(APFloat::EnumToSemantics(S), bits_in);
6578 APInt bits_out = test.bitcastToAPInt();
6579 EXPECT_EQ(bits_in, bits_out);
6584 TEST(APFloatTest, F8ToBitsToF8) {
6585 for (APFloat::Semantics S :
6586 {APFloat::S_Float8E5M2, APFloat::S_Float8E4M3FN,
6587 APFloat::S_Float8E5M2FNUZ, APFloat::S_Float8E4M3FNUZ,
6588 APFloat::S_Float8E4M3B11FNUZ}) {
6589 SCOPED_TRACE("Semantics=" + std::to_string(S));
6590 auto &Sem = APFloat::EnumToSemantics(S);
6591 for (bool negative : {false, true}) {
6592 SCOPED_TRACE("negative=" + std::to_string(negative));
6593 APFloat test = APFloat::getZero(Sem, /*Negative=*/negative);
6594 for (int i = 0; i < 128; i++, test.next(/*nextDown=*/negative)) {
6595 SCOPED_TRACE("i=" + std::to_string(i));
6596 APInt bits = test.bitcastToAPInt();
6597 APFloat test2 = APFloat(Sem, bits);
6598 if (test.isNaN()) {
6599 EXPECT_TRUE(test2.isNaN());
6600 } else {
6601 EXPECT_TRUE(test.bitwiseIsEqual(test2));
6608 TEST(APFloatTest, IEEEdoubleToDouble) {
6609 APFloat DPosZero(0.0);
6610 APFloat DPosZeroToDouble(DPosZero.convertToDouble());
6611 EXPECT_TRUE(DPosZeroToDouble.isPosZero());
6612 APFloat DNegZero(-0.0);
6613 APFloat DNegZeroToDouble(DNegZero.convertToDouble());
6614 EXPECT_TRUE(DNegZeroToDouble.isNegZero());
6616 APFloat DOne(1.0);
6617 EXPECT_EQ(1.0, DOne.convertToDouble());
6618 APFloat DPosLargest = APFloat::getLargest(APFloat::IEEEdouble(), false);
6619 EXPECT_EQ(std::numeric_limits<double>::max(), DPosLargest.convertToDouble());
6620 APFloat DNegLargest = APFloat::getLargest(APFloat::IEEEdouble(), true);
6621 EXPECT_EQ(-std::numeric_limits<double>::max(), DNegLargest.convertToDouble());
6622 APFloat DPosSmallest =
6623 APFloat::getSmallestNormalized(APFloat::IEEEdouble(), false);
6624 EXPECT_EQ(std::numeric_limits<double>::min(), DPosSmallest.convertToDouble());
6625 APFloat DNegSmallest =
6626 APFloat::getSmallestNormalized(APFloat::IEEEdouble(), true);
6627 EXPECT_EQ(-std::numeric_limits<double>::min(),
6628 DNegSmallest.convertToDouble());
6630 APFloat DSmallestDenorm = APFloat::getSmallest(APFloat::IEEEdouble(), false);
6631 EXPECT_EQ(std::numeric_limits<double>::denorm_min(),
6632 DSmallestDenorm.convertToDouble());
6633 APFloat DLargestDenorm(APFloat::IEEEdouble(), "0x0.FFFFFFFFFFFFFp-1022");
6634 EXPECT_EQ(/*0x0.FFFFFFFFFFFFFp-1022*/ 2.225073858507201e-308,
6635 DLargestDenorm.convertToDouble());
6637 APFloat DPosInf = APFloat::getInf(APFloat::IEEEdouble());
6638 EXPECT_EQ(std::numeric_limits<double>::infinity(), DPosInf.convertToDouble());
6639 APFloat DNegInf = APFloat::getInf(APFloat::IEEEdouble(), true);
6640 EXPECT_EQ(-std::numeric_limits<double>::infinity(),
6641 DNegInf.convertToDouble());
6642 APFloat DQNaN = APFloat::getQNaN(APFloat::IEEEdouble());
6643 EXPECT_TRUE(std::isnan(DQNaN.convertToDouble()));
6646 TEST(APFloatTest, IEEEsingleToDouble) {
6647 APFloat FPosZero(0.0F);
6648 APFloat FPosZeroToDouble(FPosZero.convertToDouble());
6649 EXPECT_TRUE(FPosZeroToDouble.isPosZero());
6650 APFloat FNegZero(-0.0F);
6651 APFloat FNegZeroToDouble(FNegZero.convertToDouble());
6652 EXPECT_TRUE(FNegZeroToDouble.isNegZero());
6654 APFloat FOne(1.0F);
6655 EXPECT_EQ(1.0, FOne.convertToDouble());
6656 APFloat FPosLargest = APFloat::getLargest(APFloat::IEEEsingle(), false);
6657 EXPECT_EQ(std::numeric_limits<float>::max(), FPosLargest.convertToDouble());
6658 APFloat FNegLargest = APFloat::getLargest(APFloat::IEEEsingle(), true);
6659 EXPECT_EQ(-std::numeric_limits<float>::max(), FNegLargest.convertToDouble());
6660 APFloat FPosSmallest =
6661 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
6662 EXPECT_EQ(std::numeric_limits<float>::min(), FPosSmallest.convertToDouble());
6663 APFloat FNegSmallest =
6664 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
6665 EXPECT_EQ(-std::numeric_limits<float>::min(), FNegSmallest.convertToDouble());
6667 APFloat FSmallestDenorm = APFloat::getSmallest(APFloat::IEEEsingle(), false);
6668 EXPECT_EQ(std::numeric_limits<float>::denorm_min(),
6669 FSmallestDenorm.convertToDouble());
6670 APFloat FLargestDenorm(APFloat::IEEEdouble(), "0x0.FFFFFEp-126");
6671 EXPECT_EQ(/*0x0.FFFFFEp-126*/ 1.1754942106924411e-38,
6672 FLargestDenorm.convertToDouble());
6674 APFloat FPosInf = APFloat::getInf(APFloat::IEEEsingle());
6675 EXPECT_EQ(std::numeric_limits<double>::infinity(), FPosInf.convertToDouble());
6676 APFloat FNegInf = APFloat::getInf(APFloat::IEEEsingle(), true);
6677 EXPECT_EQ(-std::numeric_limits<double>::infinity(),
6678 FNegInf.convertToDouble());
6679 APFloat FQNaN = APFloat::getQNaN(APFloat::IEEEsingle());
6680 EXPECT_TRUE(std::isnan(FQNaN.convertToDouble()));
6683 TEST(APFloatTest, IEEEhalfToDouble) {
6684 APFloat HPosZero = APFloat::getZero(APFloat::IEEEhalf());
6685 APFloat HPosZeroToDouble(HPosZero.convertToDouble());
6686 EXPECT_TRUE(HPosZeroToDouble.isPosZero());
6687 APFloat HNegZero = APFloat::getZero(APFloat::IEEEhalf(), true);
6688 APFloat HNegZeroToDouble(HNegZero.convertToDouble());
6689 EXPECT_TRUE(HNegZeroToDouble.isNegZero());
6691 APFloat HOne(APFloat::IEEEhalf(), "1.0");
6692 EXPECT_EQ(1.0, HOne.convertToDouble());
6693 APFloat HPosLargest = APFloat::getLargest(APFloat::IEEEhalf(), false);
6694 EXPECT_EQ(65504.0, HPosLargest.convertToDouble());
6695 APFloat HNegLargest = APFloat::getLargest(APFloat::IEEEhalf(), true);
6696 EXPECT_EQ(-65504.0, HNegLargest.convertToDouble());
6697 APFloat HPosSmallest =
6698 APFloat::getSmallestNormalized(APFloat::IEEEhalf(), false);
6699 EXPECT_EQ(/*0x1.p-14*/ 6.103515625e-05, HPosSmallest.convertToDouble());
6700 APFloat HNegSmallest =
6701 APFloat::getSmallestNormalized(APFloat::IEEEhalf(), true);
6702 EXPECT_EQ(/*-0x1.p-14*/ -6.103515625e-05, HNegSmallest.convertToDouble());
6704 APFloat HSmallestDenorm = APFloat::getSmallest(APFloat::IEEEhalf(), false);
6705 EXPECT_EQ(/*0x1.p-24*/ 5.960464477539063e-08,
6706 HSmallestDenorm.convertToDouble());
6707 APFloat HLargestDenorm(APFloat::IEEEhalf(), "0x1.FFCp-14");
6708 EXPECT_EQ(/*0x1.FFCp-14*/ 0.00012201070785522461,
6709 HLargestDenorm.convertToDouble());
6711 APFloat HPosInf = APFloat::getInf(APFloat::IEEEhalf());
6712 EXPECT_EQ(std::numeric_limits<double>::infinity(), HPosInf.convertToDouble());
6713 APFloat HNegInf = APFloat::getInf(APFloat::IEEEhalf(), true);
6714 EXPECT_EQ(-std::numeric_limits<double>::infinity(),
6715 HNegInf.convertToDouble());
6716 APFloat HQNaN = APFloat::getQNaN(APFloat::IEEEhalf());
6717 EXPECT_TRUE(std::isnan(HQNaN.convertToDouble()));
6719 APFloat BPosZero = APFloat::getZero(APFloat::IEEEhalf());
6720 APFloat BPosZeroToDouble(BPosZero.convertToDouble());
6721 EXPECT_TRUE(BPosZeroToDouble.isPosZero());
6722 APFloat BNegZero = APFloat::getZero(APFloat::IEEEhalf(), true);
6723 APFloat BNegZeroToDouble(BNegZero.convertToDouble());
6724 EXPECT_TRUE(BNegZeroToDouble.isNegZero());
6727 TEST(APFloatTest, BFloatToDouble) {
6728 APFloat BOne(APFloat::BFloat(), "1.0");
6729 EXPECT_EQ(1.0, BOne.convertToDouble());
6730 APFloat BPosLargest = APFloat::getLargest(APFloat::BFloat(), false);
6731 EXPECT_EQ(/*0x1.FEp127*/ 3.3895313892515355e+38,
6732 BPosLargest.convertToDouble());
6733 APFloat BNegLargest = APFloat::getLargest(APFloat::BFloat(), true);
6734 EXPECT_EQ(/*-0x1.FEp127*/ -3.3895313892515355e+38,
6735 BNegLargest.convertToDouble());
6736 APFloat BPosSmallest =
6737 APFloat::getSmallestNormalized(APFloat::BFloat(), false);
6738 EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38,
6739 BPosSmallest.convertToDouble());
6740 APFloat BNegSmallest =
6741 APFloat::getSmallestNormalized(APFloat::BFloat(), true);
6742 EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38,
6743 BNegSmallest.convertToDouble());
6745 APFloat BSmallestDenorm = APFloat::getSmallest(APFloat::BFloat(), false);
6746 EXPECT_EQ(/*0x1.p-133*/ 9.183549615799121e-41,
6747 BSmallestDenorm.convertToDouble());
6748 APFloat BLargestDenorm(APFloat::BFloat(), "0x1.FCp-127");
6749 EXPECT_EQ(/*0x1.FCp-127*/ 1.1663108012064884e-38,
6750 BLargestDenorm.convertToDouble());
6752 APFloat BPosInf = APFloat::getInf(APFloat::BFloat());
6753 EXPECT_EQ(std::numeric_limits<double>::infinity(), BPosInf.convertToDouble());
6754 APFloat BNegInf = APFloat::getInf(APFloat::BFloat(), true);
6755 EXPECT_EQ(-std::numeric_limits<double>::infinity(),
6756 BNegInf.convertToDouble());
6757 APFloat BQNaN = APFloat::getQNaN(APFloat::BFloat());
6758 EXPECT_TRUE(std::isnan(BQNaN.convertToDouble()));
6761 TEST(APFloatTest, Float8E5M2ToDouble) {
6762 APFloat One(APFloat::Float8E5M2(), "1.0");
6763 EXPECT_EQ(1.0, One.convertToDouble());
6764 APFloat Two(APFloat::Float8E5M2(), "2.0");
6765 EXPECT_EQ(2.0, Two.convertToDouble());
6766 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2(), false);
6767 EXPECT_EQ(5.734400e+04, PosLargest.convertToDouble());
6768 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2(), true);
6769 EXPECT_EQ(-5.734400e+04, NegLargest.convertToDouble());
6770 APFloat PosSmallest =
6771 APFloat::getSmallestNormalized(APFloat::Float8E5M2(), false);
6772 EXPECT_EQ(0x1.p-14, PosSmallest.convertToDouble());
6773 APFloat NegSmallest =
6774 APFloat::getSmallestNormalized(APFloat::Float8E5M2(), true);
6775 EXPECT_EQ(-0x1.p-14, NegSmallest.convertToDouble());
6777 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E5M2(), false);
6778 EXPECT_TRUE(SmallestDenorm.isDenormal());
6779 EXPECT_EQ(0x1p-16, SmallestDenorm.convertToDouble());
6781 APFloat PosInf = APFloat::getInf(APFloat::Float8E5M2());
6782 EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble());
6783 APFloat NegInf = APFloat::getInf(APFloat::Float8E5M2(), true);
6784 EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble());
6785 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2());
6786 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6789 TEST(APFloatTest, Float8E4M3ToDouble) {
6790 APFloat One(APFloat::Float8E4M3(), "1.0");
6791 EXPECT_EQ(1.0, One.convertToDouble());
6792 APFloat Two(APFloat::Float8E4M3(), "2.0");
6793 EXPECT_EQ(2.0, Two.convertToDouble());
6794 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false);
6795 EXPECT_EQ(240.0F, PosLargest.convertToDouble());
6796 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true);
6797 EXPECT_EQ(-240.0F, NegLargest.convertToDouble());
6798 APFloat PosSmallest =
6799 APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false);
6800 EXPECT_EQ(0x1.p-6, PosSmallest.convertToDouble());
6801 APFloat NegSmallest =
6802 APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true);
6803 EXPECT_EQ(-0x1.p-6, NegSmallest.convertToDouble());
6805 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false);
6806 EXPECT_TRUE(SmallestDenorm.isDenormal());
6807 EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToDouble());
6809 APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3());
6810 EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble());
6811 APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true);
6812 EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble());
6813 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3());
6814 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6817 TEST(APFloatTest, Float8E4M3FNToDouble) {
6818 APFloat One(APFloat::Float8E4M3FN(), "1.0");
6819 EXPECT_EQ(1.0, One.convertToDouble());
6820 APFloat Two(APFloat::Float8E4M3FN(), "2.0");
6821 EXPECT_EQ(2.0, Two.convertToDouble());
6822 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), false);
6823 EXPECT_EQ(448., PosLargest.convertToDouble());
6824 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), true);
6825 EXPECT_EQ(-448., NegLargest.convertToDouble());
6826 APFloat PosSmallest =
6827 APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), false);
6828 EXPECT_EQ(0x1.p-6, PosSmallest.convertToDouble());
6829 APFloat NegSmallest =
6830 APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), true);
6831 EXPECT_EQ(-0x1.p-6, NegSmallest.convertToDouble());
6833 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false);
6834 EXPECT_TRUE(SmallestDenorm.isDenormal());
6835 EXPECT_EQ(0x1p-9, SmallestDenorm.convertToDouble());
6837 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN());
6838 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6841 TEST(APFloatTest, Float8E5M2FNUZToDouble) {
6842 APFloat One(APFloat::Float8E5M2FNUZ(), "1.0");
6843 EXPECT_EQ(1.0, One.convertToDouble());
6844 APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0");
6845 EXPECT_EQ(2.0, Two.convertToDouble());
6846 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false);
6847 EXPECT_EQ(57344., PosLargest.convertToDouble());
6848 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
6849 EXPECT_EQ(-57344., NegLargest.convertToDouble());
6850 APFloat PosSmallest =
6851 APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
6852 EXPECT_EQ(0x1.p-15, PosSmallest.convertToDouble());
6853 APFloat NegSmallest =
6854 APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true);
6855 EXPECT_EQ(-0x1.p-15, NegSmallest.convertToDouble());
6857 APFloat SmallestDenorm =
6858 APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
6859 EXPECT_TRUE(SmallestDenorm.isDenormal());
6860 EXPECT_EQ(0x1p-17, SmallestDenorm.convertToDouble());
6862 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ());
6863 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6866 TEST(APFloatTest, Float8E4M3FNUZToDouble) {
6867 APFloat One(APFloat::Float8E4M3FNUZ(), "1.0");
6868 EXPECT_EQ(1.0, One.convertToDouble());
6869 APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0");
6870 EXPECT_EQ(2.0, Two.convertToDouble());
6871 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false);
6872 EXPECT_EQ(240., PosLargest.convertToDouble());
6873 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true);
6874 EXPECT_EQ(-240., NegLargest.convertToDouble());
6875 APFloat PosSmallest =
6876 APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
6877 EXPECT_EQ(0x1.p-7, PosSmallest.convertToDouble());
6878 APFloat NegSmallest =
6879 APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true);
6880 EXPECT_EQ(-0x1.p-7, NegSmallest.convertToDouble());
6882 APFloat SmallestDenorm =
6883 APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
6884 EXPECT_TRUE(SmallestDenorm.isDenormal());
6885 EXPECT_EQ(0x1p-10, SmallestDenorm.convertToDouble());
6887 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ());
6888 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6891 TEST(APFloatTest, Float8E3M4ToDouble) {
6892 APFloat PosZero = APFloat::getZero(APFloat::Float8E3M4(), false);
6893 APFloat PosZeroToDouble(PosZero.convertToDouble());
6894 EXPECT_TRUE(PosZeroToDouble.isPosZero());
6895 APFloat NegZero = APFloat::getZero(APFloat::Float8E3M4(), true);
6896 APFloat NegZeroToDouble(NegZero.convertToDouble());
6897 EXPECT_TRUE(NegZeroToDouble.isNegZero());
6899 APFloat One(APFloat::Float8E3M4(), "1.0");
6900 EXPECT_EQ(1.0, One.convertToDouble());
6901 APFloat Two(APFloat::Float8E3M4(), "2.0");
6902 EXPECT_EQ(2.0, Two.convertToDouble());
6903 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E3M4(), false);
6904 EXPECT_EQ(15.5F, PosLargest.convertToDouble());
6905 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E3M4(), true);
6906 EXPECT_EQ(-15.5F, NegLargest.convertToDouble());
6907 APFloat PosSmallest =
6908 APFloat::getSmallestNormalized(APFloat::Float8E3M4(), false);
6909 EXPECT_EQ(0x1.p-2, PosSmallest.convertToDouble());
6910 APFloat NegSmallest =
6911 APFloat::getSmallestNormalized(APFloat::Float8E3M4(), true);
6912 EXPECT_EQ(-0x1.p-2, NegSmallest.convertToDouble());
6914 APFloat PosSmallestDenorm =
6915 APFloat::getSmallest(APFloat::Float8E3M4(), false);
6916 EXPECT_TRUE(PosSmallestDenorm.isDenormal());
6917 EXPECT_EQ(0x1.p-6, PosSmallestDenorm.convertToDouble());
6918 APFloat NegSmallestDenorm = APFloat::getSmallest(APFloat::Float8E3M4(), true);
6919 EXPECT_TRUE(NegSmallestDenorm.isDenormal());
6920 EXPECT_EQ(-0x1.p-6, NegSmallestDenorm.convertToDouble());
6922 APFloat PosInf = APFloat::getInf(APFloat::Float8E3M4());
6923 EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble());
6924 APFloat NegInf = APFloat::getInf(APFloat::Float8E3M4(), true);
6925 EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble());
6926 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E3M4());
6927 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6930 TEST(APFloatTest, FloatTF32ToDouble) {
6931 APFloat One(APFloat::FloatTF32(), "1.0");
6932 EXPECT_EQ(1.0, One.convertToDouble());
6933 APFloat PosLargest = APFloat::getLargest(APFloat::FloatTF32(), false);
6934 EXPECT_EQ(3.401162134214653489792616e+38, PosLargest.convertToDouble());
6935 APFloat NegLargest = APFloat::getLargest(APFloat::FloatTF32(), true);
6936 EXPECT_EQ(-3.401162134214653489792616e+38, NegLargest.convertToDouble());
6937 APFloat PosSmallest =
6938 APFloat::getSmallestNormalized(APFloat::FloatTF32(), false);
6939 EXPECT_EQ(1.1754943508222875079687e-38, PosSmallest.convertToDouble());
6940 APFloat NegSmallest =
6941 APFloat::getSmallestNormalized(APFloat::FloatTF32(), true);
6942 EXPECT_EQ(-1.1754943508222875079687e-38, NegSmallest.convertToDouble());
6944 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::FloatTF32(), false);
6945 EXPECT_EQ(1.1479437019748901445007e-41, SmallestDenorm.convertToDouble());
6946 APFloat LargestDenorm(APFloat::FloatTF32(), "0x1.FF8p-127");
6947 EXPECT_EQ(/*0x1.FF8p-127*/ 1.1743464071203126178242e-38,
6948 LargestDenorm.convertToDouble());
6950 APFloat PosInf = APFloat::getInf(APFloat::FloatTF32());
6951 EXPECT_EQ(std::numeric_limits<double>::infinity(), PosInf.convertToDouble());
6952 APFloat NegInf = APFloat::getInf(APFloat::FloatTF32(), true);
6953 EXPECT_EQ(-std::numeric_limits<double>::infinity(), NegInf.convertToDouble());
6954 APFloat QNaN = APFloat::getQNaN(APFloat::FloatTF32());
6955 EXPECT_TRUE(std::isnan(QNaN.convertToDouble()));
6958 TEST(APFloatTest, Float8E5M2FNUZToFloat) {
6959 APFloat PosZero = APFloat::getZero(APFloat::Float8E5M2FNUZ());
6960 APFloat PosZeroToFloat(PosZero.convertToFloat());
6961 EXPECT_TRUE(PosZeroToFloat.isPosZero());
6962 // Negative zero is not supported
6963 APFloat NegZero = APFloat::getZero(APFloat::Float8E5M2FNUZ(), true);
6964 APFloat NegZeroToFloat(NegZero.convertToFloat());
6965 EXPECT_TRUE(NegZeroToFloat.isPosZero());
6966 APFloat One(APFloat::Float8E5M2FNUZ(), "1.0");
6967 EXPECT_EQ(1.0F, One.convertToFloat());
6968 APFloat Two(APFloat::Float8E5M2FNUZ(), "2.0");
6969 EXPECT_EQ(2.0F, Two.convertToFloat());
6970 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), false);
6971 EXPECT_EQ(57344.F, PosLargest.convertToFloat());
6972 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2FNUZ(), true);
6973 EXPECT_EQ(-57344.F, NegLargest.convertToFloat());
6974 APFloat PosSmallest =
6975 APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), false);
6976 EXPECT_EQ(0x1.p-15F, PosSmallest.convertToFloat());
6977 APFloat NegSmallest =
6978 APFloat::getSmallestNormalized(APFloat::Float8E5M2FNUZ(), true);
6979 EXPECT_EQ(-0x1.p-15F, NegSmallest.convertToFloat());
6981 APFloat SmallestDenorm =
6982 APFloat::getSmallest(APFloat::Float8E5M2FNUZ(), false);
6983 EXPECT_TRUE(SmallestDenorm.isDenormal());
6984 EXPECT_EQ(0x1p-17F, SmallestDenorm.convertToFloat());
6986 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2FNUZ());
6987 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
6990 TEST(APFloatTest, Float8E4M3FNUZToFloat) {
6991 APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FNUZ());
6992 APFloat PosZeroToFloat(PosZero.convertToFloat());
6993 EXPECT_TRUE(PosZeroToFloat.isPosZero());
6994 // Negative zero is not supported
6995 APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3FNUZ(), true);
6996 APFloat NegZeroToFloat(NegZero.convertToFloat());
6997 EXPECT_TRUE(NegZeroToFloat.isPosZero());
6998 APFloat One(APFloat::Float8E4M3FNUZ(), "1.0");
6999 EXPECT_EQ(1.0F, One.convertToFloat());
7000 APFloat Two(APFloat::Float8E4M3FNUZ(), "2.0");
7001 EXPECT_EQ(2.0F, Two.convertToFloat());
7002 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), false);
7003 EXPECT_EQ(240.F, PosLargest.convertToFloat());
7004 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FNUZ(), true);
7005 EXPECT_EQ(-240.F, NegLargest.convertToFloat());
7006 APFloat PosSmallest =
7007 APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), false);
7008 EXPECT_EQ(0x1.p-7F, PosSmallest.convertToFloat());
7009 APFloat NegSmallest =
7010 APFloat::getSmallestNormalized(APFloat::Float8E4M3FNUZ(), true);
7011 EXPECT_EQ(-0x1.p-7F, NegSmallest.convertToFloat());
7013 APFloat SmallestDenorm =
7014 APFloat::getSmallest(APFloat::Float8E4M3FNUZ(), false);
7015 EXPECT_TRUE(SmallestDenorm.isDenormal());
7016 EXPECT_EQ(0x1p-10F, SmallestDenorm.convertToFloat());
7018 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FNUZ());
7019 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7022 TEST(APFloatTest, IEEEsingleToFloat) {
7023 APFloat FPosZero(0.0F);
7024 APFloat FPosZeroToFloat(FPosZero.convertToFloat());
7025 EXPECT_TRUE(FPosZeroToFloat.isPosZero());
7026 APFloat FNegZero(-0.0F);
7027 APFloat FNegZeroToFloat(FNegZero.convertToFloat());
7028 EXPECT_TRUE(FNegZeroToFloat.isNegZero());
7030 APFloat FOne(1.0F);
7031 EXPECT_EQ(1.0F, FOne.convertToFloat());
7032 APFloat FPosLargest = APFloat::getLargest(APFloat::IEEEsingle(), false);
7033 EXPECT_EQ(std::numeric_limits<float>::max(), FPosLargest.convertToFloat());
7034 APFloat FNegLargest = APFloat::getLargest(APFloat::IEEEsingle(), true);
7035 EXPECT_EQ(-std::numeric_limits<float>::max(), FNegLargest.convertToFloat());
7036 APFloat FPosSmallest =
7037 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), false);
7038 EXPECT_EQ(std::numeric_limits<float>::min(), FPosSmallest.convertToFloat());
7039 APFloat FNegSmallest =
7040 APFloat::getSmallestNormalized(APFloat::IEEEsingle(), true);
7041 EXPECT_EQ(-std::numeric_limits<float>::min(), FNegSmallest.convertToFloat());
7043 APFloat FSmallestDenorm = APFloat::getSmallest(APFloat::IEEEsingle(), false);
7044 EXPECT_EQ(std::numeric_limits<float>::denorm_min(),
7045 FSmallestDenorm.convertToFloat());
7046 APFloat FLargestDenorm(APFloat::IEEEsingle(), "0x1.FFFFFEp-126");
7047 EXPECT_EQ(/*0x1.FFFFFEp-126*/ 2.3509885615147286e-38F,
7048 FLargestDenorm.convertToFloat());
7050 APFloat FPosInf = APFloat::getInf(APFloat::IEEEsingle());
7051 EXPECT_EQ(std::numeric_limits<float>::infinity(), FPosInf.convertToFloat());
7052 APFloat FNegInf = APFloat::getInf(APFloat::IEEEsingle(), true);
7053 EXPECT_EQ(-std::numeric_limits<float>::infinity(), FNegInf.convertToFloat());
7054 APFloat FQNaN = APFloat::getQNaN(APFloat::IEEEsingle());
7055 EXPECT_TRUE(std::isnan(FQNaN.convertToFloat()));
7058 TEST(APFloatTest, IEEEhalfToFloat) {
7059 APFloat HPosZero = APFloat::getZero(APFloat::IEEEhalf());
7060 APFloat HPosZeroToFloat(HPosZero.convertToFloat());
7061 EXPECT_TRUE(HPosZeroToFloat.isPosZero());
7062 APFloat HNegZero = APFloat::getZero(APFloat::IEEEhalf(), true);
7063 APFloat HNegZeroToFloat(HNegZero.convertToFloat());
7064 EXPECT_TRUE(HNegZeroToFloat.isNegZero());
7066 APFloat HOne(APFloat::IEEEhalf(), "1.0");
7067 EXPECT_EQ(1.0F, HOne.convertToFloat());
7068 APFloat HPosLargest = APFloat::getLargest(APFloat::IEEEhalf(), false);
7069 EXPECT_EQ(/*0x1.FFCp15*/ 65504.0F, HPosLargest.convertToFloat());
7070 APFloat HNegLargest = APFloat::getLargest(APFloat::IEEEhalf(), true);
7071 EXPECT_EQ(/*-0x1.FFCp15*/ -65504.0F, HNegLargest.convertToFloat());
7072 APFloat HPosSmallest =
7073 APFloat::getSmallestNormalized(APFloat::IEEEhalf(), false);
7074 EXPECT_EQ(/*0x1.p-14*/ 6.103515625e-05F, HPosSmallest.convertToFloat());
7075 APFloat HNegSmallest =
7076 APFloat::getSmallestNormalized(APFloat::IEEEhalf(), true);
7077 EXPECT_EQ(/*-0x1.p-14*/ -6.103515625e-05F, HNegSmallest.convertToFloat());
7079 APFloat HSmallestDenorm = APFloat::getSmallest(APFloat::IEEEhalf(), false);
7080 EXPECT_EQ(/*0x1.p-24*/ 5.960464477539063e-08F,
7081 HSmallestDenorm.convertToFloat());
7082 APFloat HLargestDenorm(APFloat::IEEEhalf(), "0x1.FFCp-14");
7083 EXPECT_EQ(/*0x1.FFCp-14*/ 0.00012201070785522461F,
7084 HLargestDenorm.convertToFloat());
7086 APFloat HPosInf = APFloat::getInf(APFloat::IEEEhalf());
7087 EXPECT_EQ(std::numeric_limits<float>::infinity(), HPosInf.convertToFloat());
7088 APFloat HNegInf = APFloat::getInf(APFloat::IEEEhalf(), true);
7089 EXPECT_EQ(-std::numeric_limits<float>::infinity(), HNegInf.convertToFloat());
7090 APFloat HQNaN = APFloat::getQNaN(APFloat::IEEEhalf());
7091 EXPECT_TRUE(std::isnan(HQNaN.convertToFloat()));
7094 TEST(APFloatTest, BFloatToFloat) {
7095 APFloat BPosZero = APFloat::getZero(APFloat::BFloat());
7096 APFloat BPosZeroToDouble(BPosZero.convertToFloat());
7097 EXPECT_TRUE(BPosZeroToDouble.isPosZero());
7098 APFloat BNegZero = APFloat::getZero(APFloat::BFloat(), true);
7099 APFloat BNegZeroToDouble(BNegZero.convertToFloat());
7100 EXPECT_TRUE(BNegZeroToDouble.isNegZero());
7102 APFloat BOne(APFloat::BFloat(), "1.0");
7103 EXPECT_EQ(1.0F, BOne.convertToFloat());
7104 APFloat BPosLargest = APFloat::getLargest(APFloat::BFloat(), false);
7105 EXPECT_EQ(/*0x1.FEp127*/ 3.3895313892515355e+38F,
7106 BPosLargest.convertToFloat());
7107 APFloat BNegLargest = APFloat::getLargest(APFloat::BFloat(), true);
7108 EXPECT_EQ(/*-0x1.FEp127*/ -3.3895313892515355e+38F,
7109 BNegLargest.convertToFloat());
7110 APFloat BPosSmallest =
7111 APFloat::getSmallestNormalized(APFloat::BFloat(), false);
7112 EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38F,
7113 BPosSmallest.convertToFloat());
7114 APFloat BNegSmallest =
7115 APFloat::getSmallestNormalized(APFloat::BFloat(), true);
7116 EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38F,
7117 BNegSmallest.convertToFloat());
7119 APFloat BSmallestDenorm = APFloat::getSmallest(APFloat::BFloat(), false);
7120 EXPECT_EQ(/*0x1.p-133*/ 9.183549615799121e-41F,
7121 BSmallestDenorm.convertToFloat());
7122 APFloat BLargestDenorm(APFloat::BFloat(), "0x1.FCp-127");
7123 EXPECT_EQ(/*0x1.FCp-127*/ 1.1663108012064884e-38F,
7124 BLargestDenorm.convertToFloat());
7126 APFloat BPosInf = APFloat::getInf(APFloat::BFloat());
7127 EXPECT_EQ(std::numeric_limits<float>::infinity(), BPosInf.convertToFloat());
7128 APFloat BNegInf = APFloat::getInf(APFloat::BFloat(), true);
7129 EXPECT_EQ(-std::numeric_limits<float>::infinity(), BNegInf.convertToFloat());
7130 APFloat BQNaN = APFloat::getQNaN(APFloat::BFloat());
7131 EXPECT_TRUE(std::isnan(BQNaN.convertToFloat()));
7134 TEST(APFloatTest, Float8E5M2ToFloat) {
7135 APFloat PosZero = APFloat::getZero(APFloat::Float8E5M2());
7136 APFloat PosZeroToFloat(PosZero.convertToFloat());
7137 EXPECT_TRUE(PosZeroToFloat.isPosZero());
7138 APFloat NegZero = APFloat::getZero(APFloat::Float8E5M2(), true);
7139 APFloat NegZeroToFloat(NegZero.convertToFloat());
7140 EXPECT_TRUE(NegZeroToFloat.isNegZero());
7142 APFloat One(APFloat::Float8E5M2(), "1.0");
7143 EXPECT_EQ(1.0F, One.convertToFloat());
7144 APFloat Two(APFloat::Float8E5M2(), "2.0");
7145 EXPECT_EQ(2.0F, Two.convertToFloat());
7147 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E5M2(), false);
7148 EXPECT_EQ(5.734400e+04, PosLargest.convertToFloat());
7149 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E5M2(), true);
7150 EXPECT_EQ(-5.734400e+04, NegLargest.convertToFloat());
7151 APFloat PosSmallest =
7152 APFloat::getSmallestNormalized(APFloat::Float8E5M2(), false);
7153 EXPECT_EQ(0x1.p-14, PosSmallest.convertToFloat());
7154 APFloat NegSmallest =
7155 APFloat::getSmallestNormalized(APFloat::Float8E5M2(), true);
7156 EXPECT_EQ(-0x1.p-14, NegSmallest.convertToFloat());
7158 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E5M2(), false);
7159 EXPECT_TRUE(SmallestDenorm.isDenormal());
7160 EXPECT_EQ(0x1.p-16, SmallestDenorm.convertToFloat());
7162 APFloat PosInf = APFloat::getInf(APFloat::Float8E5M2());
7163 EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat());
7164 APFloat NegInf = APFloat::getInf(APFloat::Float8E5M2(), true);
7165 EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat());
7166 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E5M2());
7167 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7170 TEST(APFloatTest, Float8E4M3ToFloat) {
7171 APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3());
7172 APFloat PosZeroToFloat(PosZero.convertToFloat());
7173 EXPECT_TRUE(PosZeroToFloat.isPosZero());
7174 APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3(), true);
7175 APFloat NegZeroToFloat(NegZero.convertToFloat());
7176 EXPECT_TRUE(NegZeroToFloat.isNegZero());
7178 APFloat One(APFloat::Float8E4M3(), "1.0");
7179 EXPECT_EQ(1.0F, One.convertToFloat());
7180 APFloat Two(APFloat::Float8E4M3(), "2.0");
7181 EXPECT_EQ(2.0F, Two.convertToFloat());
7183 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3(), false);
7184 EXPECT_EQ(240.0F, PosLargest.convertToFloat());
7185 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3(), true);
7186 EXPECT_EQ(-240.0F, NegLargest.convertToFloat());
7187 APFloat PosSmallest =
7188 APFloat::getSmallestNormalized(APFloat::Float8E4M3(), false);
7189 EXPECT_EQ(0x1.p-6, PosSmallest.convertToFloat());
7190 APFloat NegSmallest =
7191 APFloat::getSmallestNormalized(APFloat::Float8E4M3(), true);
7192 EXPECT_EQ(-0x1.p-6, NegSmallest.convertToFloat());
7194 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3(), false);
7195 EXPECT_TRUE(SmallestDenorm.isDenormal());
7196 EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToFloat());
7198 APFloat PosInf = APFloat::getInf(APFloat::Float8E4M3());
7199 EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat());
7200 APFloat NegInf = APFloat::getInf(APFloat::Float8E4M3(), true);
7201 EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat());
7202 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3());
7203 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7206 TEST(APFloatTest, Float8E4M3FNToFloat) {
7207 APFloat PosZero = APFloat::getZero(APFloat::Float8E4M3FN());
7208 APFloat PosZeroToFloat(PosZero.convertToFloat());
7209 EXPECT_TRUE(PosZeroToFloat.isPosZero());
7210 APFloat NegZero = APFloat::getZero(APFloat::Float8E4M3FN(), true);
7211 APFloat NegZeroToFloat(NegZero.convertToFloat());
7212 EXPECT_TRUE(NegZeroToFloat.isNegZero());
7214 APFloat One(APFloat::Float8E4M3FN(), "1.0");
7215 EXPECT_EQ(1.0F, One.convertToFloat());
7216 APFloat Two(APFloat::Float8E4M3FN(), "2.0");
7217 EXPECT_EQ(2.0F, Two.convertToFloat());
7219 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), false);
7220 EXPECT_EQ(448., PosLargest.convertToFloat());
7221 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E4M3FN(), true);
7222 EXPECT_EQ(-448, NegLargest.convertToFloat());
7223 APFloat PosSmallest =
7224 APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), false);
7225 EXPECT_EQ(0x1.p-6, PosSmallest.convertToFloat());
7226 APFloat NegSmallest =
7227 APFloat::getSmallestNormalized(APFloat::Float8E4M3FN(), true);
7228 EXPECT_EQ(-0x1.p-6, NegSmallest.convertToFloat());
7230 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float8E4M3FN(), false);
7231 EXPECT_TRUE(SmallestDenorm.isDenormal());
7232 EXPECT_EQ(0x1.p-9, SmallestDenorm.convertToFloat());
7234 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E4M3FN());
7235 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7238 TEST(APFloatTest, Float8E3M4ToFloat) {
7239 APFloat PosZero = APFloat::getZero(APFloat::Float8E3M4(), false);
7240 APFloat PosZeroToFloat(PosZero.convertToFloat());
7241 EXPECT_TRUE(PosZeroToFloat.isPosZero());
7242 APFloat NegZero = APFloat::getZero(APFloat::Float8E3M4(), true);
7243 APFloat NegZeroToFloat(NegZero.convertToFloat());
7244 EXPECT_TRUE(NegZeroToFloat.isNegZero());
7246 APFloat One(APFloat::Float8E3M4(), "1.0");
7247 EXPECT_EQ(1.0F, One.convertToFloat());
7248 APFloat Two(APFloat::Float8E3M4(), "2.0");
7249 EXPECT_EQ(2.0F, Two.convertToFloat());
7251 APFloat PosLargest = APFloat::getLargest(APFloat::Float8E3M4(), false);
7252 EXPECT_EQ(15.5F, PosLargest.convertToFloat());
7253 APFloat NegLargest = APFloat::getLargest(APFloat::Float8E3M4(), true);
7254 EXPECT_EQ(-15.5F, NegLargest.convertToFloat());
7255 APFloat PosSmallest =
7256 APFloat::getSmallestNormalized(APFloat::Float8E3M4(), false);
7257 EXPECT_EQ(0x1.p-2, PosSmallest.convertToFloat());
7258 APFloat NegSmallest =
7259 APFloat::getSmallestNormalized(APFloat::Float8E3M4(), true);
7260 EXPECT_EQ(-0x1.p-2, NegSmallest.convertToFloat());
7262 APFloat PosSmallestDenorm =
7263 APFloat::getSmallest(APFloat::Float8E3M4(), false);
7264 EXPECT_TRUE(PosSmallestDenorm.isDenormal());
7265 EXPECT_EQ(0x1.p-6, PosSmallestDenorm.convertToFloat());
7266 APFloat NegSmallestDenorm = APFloat::getSmallest(APFloat::Float8E3M4(), true);
7267 EXPECT_TRUE(NegSmallestDenorm.isDenormal());
7268 EXPECT_EQ(-0x1.p-6, NegSmallestDenorm.convertToFloat());
7270 APFloat PosInf = APFloat::getInf(APFloat::Float8E3M4());
7271 EXPECT_EQ(std::numeric_limits<float>::infinity(), PosInf.convertToFloat());
7272 APFloat NegInf = APFloat::getInf(APFloat::Float8E3M4(), true);
7273 EXPECT_EQ(-std::numeric_limits<float>::infinity(), NegInf.convertToFloat());
7274 APFloat QNaN = APFloat::getQNaN(APFloat::Float8E3M4());
7275 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7278 TEST(APFloatTest, FloatTF32ToFloat) {
7279 APFloat PosZero = APFloat::getZero(APFloat::FloatTF32());
7280 APFloat PosZeroToFloat(PosZero.convertToFloat());
7281 EXPECT_TRUE(PosZeroToFloat.isPosZero());
7282 APFloat NegZero = APFloat::getZero(APFloat::FloatTF32(), true);
7283 APFloat NegZeroToFloat(NegZero.convertToFloat());
7284 EXPECT_TRUE(NegZeroToFloat.isNegZero());
7286 APFloat One(APFloat::FloatTF32(), "1.0");
7287 EXPECT_EQ(1.0F, One.convertToFloat());
7288 APFloat Two(APFloat::FloatTF32(), "2.0");
7289 EXPECT_EQ(2.0F, Two.convertToFloat());
7291 APFloat PosLargest = APFloat::getLargest(APFloat::FloatTF32(), false);
7292 EXPECT_EQ(3.40116213421e+38F, PosLargest.convertToFloat());
7294 APFloat NegLargest = APFloat::getLargest(APFloat::FloatTF32(), true);
7295 EXPECT_EQ(-3.40116213421e+38F, NegLargest.convertToFloat());
7297 APFloat PosSmallest =
7298 APFloat::getSmallestNormalized(APFloat::FloatTF32(), false);
7299 EXPECT_EQ(/*0x1.p-126*/ 1.1754943508222875e-38F,
7300 PosSmallest.convertToFloat());
7301 APFloat NegSmallest =
7302 APFloat::getSmallestNormalized(APFloat::FloatTF32(), true);
7303 EXPECT_EQ(/*-0x1.p-126*/ -1.1754943508222875e-38F,
7304 NegSmallest.convertToFloat());
7306 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::FloatTF32(), false);
7307 EXPECT_TRUE(SmallestDenorm.isDenormal());
7308 EXPECT_EQ(0x0.004p-126, SmallestDenorm.convertToFloat());
7310 APFloat QNaN = APFloat::getQNaN(APFloat::FloatTF32());
7311 EXPECT_TRUE(std::isnan(QNaN.convertToFloat()));
7314 TEST(APFloatTest, getExactLog2) {
7315 for (unsigned I = 0; I != APFloat::S_MaxSemantics + 1; ++I) {
7316 auto SemEnum = static_cast<APFloat::Semantics>(I);
7317 const fltSemantics &Semantics = APFloat::EnumToSemantics(SemEnum);
7319 // For the Float8E8M0FNU format, the below cases along
7320 // with some more corner cases are tested through
7321 // Float8E8M0FNUGetExactLog2.
7322 if (I == APFloat::S_Float8E8M0FNU)
7323 continue;
7325 APFloat One(Semantics, "1.0");
7327 if (I == APFloat::S_PPCDoubleDouble) {
7328 // Not implemented
7329 EXPECT_EQ(INT_MIN, One.getExactLog2());
7330 EXPECT_EQ(INT_MIN, One.getExactLog2Abs());
7331 continue;
7334 int MinExp = APFloat::semanticsMinExponent(Semantics);
7335 int MaxExp = APFloat::semanticsMaxExponent(Semantics);
7336 int Precision = APFloat::semanticsPrecision(Semantics);
7338 EXPECT_EQ(0, One.getExactLog2());
7339 EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2());
7340 EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2());
7341 EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs());
7342 EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs());
7344 if (I == APFloat::S_Float6E2M3FN || I == APFloat::S_Float4E2M1FN) {
7345 EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2());
7346 EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2());
7347 EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs());
7348 EXPECT_EQ(2, APFloat(Semantics, "-4.0").getExactLog2Abs());
7349 } else {
7350 EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2());
7351 EXPECT_EQ(INT_MIN, APFloat(Semantics, "-8.0").getExactLog2());
7352 EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2());
7353 EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs());
7354 EXPECT_EQ(INT_MIN, APFloat(Semantics, "-0.25").getExactLog2());
7355 EXPECT_EQ(-2, APFloat(Semantics, "-0.25").getExactLog2Abs());
7356 EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs());
7357 EXPECT_EQ(3, APFloat(Semantics, "-8.0").getExactLog2Abs());
7360 EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2());
7361 EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2());
7362 EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, false).getExactLog2Abs());
7363 EXPECT_EQ(INT_MIN, APFloat::getZero(Semantics, true).getExactLog2Abs());
7365 if (APFloat::semanticsHasNaN(Semantics)) {
7366 // Types that do not support Inf will return NaN when asked for Inf.
7367 // (But only if they support NaN.)
7368 EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2());
7369 EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2());
7370 EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2());
7371 EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2());
7373 EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics).getExactLog2Abs());
7374 EXPECT_EQ(INT_MIN, APFloat::getInf(Semantics, true).getExactLog2Abs());
7375 EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, false).getExactLog2Abs());
7376 EXPECT_EQ(INT_MIN, APFloat::getNaN(Semantics, true).getExactLog2Abs());
7379 EXPECT_EQ(INT_MIN,
7380 scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven)
7381 .getExactLog2());
7382 EXPECT_EQ(INT_MIN,
7383 scalbn(One, MinExp - Precision, APFloat::rmNearestTiesToEven)
7384 .getExactLog2());
7386 EXPECT_EQ(
7387 INT_MIN,
7388 scalbn(One, MaxExp + 1, APFloat::rmNearestTiesToEven).getExactLog2());
7390 for (int i = MinExp - Precision + 1; i <= MaxExp; ++i) {
7391 EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2());
7396 TEST(APFloatTest, Float8E8M0FNUGetZero) {
7397 #ifdef GTEST_HAS_DEATH_TEST
7398 #ifndef NDEBUG
7399 EXPECT_DEATH(APFloat::getZero(APFloat::Float8E8M0FNU(), false),
7400 "This floating point format does not support Zero");
7401 EXPECT_DEATH(APFloat::getZero(APFloat::Float8E8M0FNU(), true),
7402 "This floating point format does not support Zero");
7403 #endif
7404 #endif
7407 TEST(APFloatTest, Float8E8M0FNUGetSignedValues) {
7408 #ifdef GTEST_HAS_DEATH_TEST
7409 #ifndef NDEBUG
7410 EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-64"),
7411 "This floating point format does not support signed values");
7412 EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-0x1.0p128"),
7413 "This floating point format does not support signed values");
7414 EXPECT_DEATH(APFloat(APFloat::Float8E8M0FNU(), "-inf"),
7415 "This floating point format does not support signed values");
7416 EXPECT_DEATH(APFloat::getNaN(APFloat::Float8E8M0FNU(), true),
7417 "This floating point format does not support signed values");
7418 EXPECT_DEATH(APFloat::getInf(APFloat::Float8E8M0FNU(), true),
7419 "This floating point format does not support signed values");
7420 EXPECT_DEATH(APFloat::getSmallest(APFloat::Float8E8M0FNU(), true),
7421 "This floating point format does not support signed values");
7422 EXPECT_DEATH(APFloat::getSmallestNormalized(APFloat::Float8E8M0FNU(), true),
7423 "This floating point format does not support signed values");
7424 EXPECT_DEATH(APFloat::getLargest(APFloat::Float8E8M0FNU(), true),
7425 "This floating point format does not support signed values");
7426 APFloat x = APFloat(APFloat::Float8E8M0FNU(), "4");
7427 APFloat y = APFloat(APFloat::Float8E8M0FNU(), "8");
7428 EXPECT_DEATH(x.subtract(y, APFloat::rmNearestTiesToEven),
7429 "This floating point format does not support signed values");
7430 #endif
7431 #endif
7434 TEST(APFloatTest, Float8E8M0FNUGetInf) {
7435 // The E8M0 format does not support infinity and the
7436 // all ones representation is treated as NaN.
7437 APFloat t = APFloat::getInf(APFloat::Float8E8M0FNU());
7438 EXPECT_TRUE(t.isNaN());
7439 EXPECT_FALSE(t.isInfinity());
7442 TEST(APFloatTest, Float8E8M0FNUFromString) {
7443 // Exactly representable
7444 EXPECT_EQ(64, APFloat(APFloat::Float8E8M0FNU(), "64").convertToDouble());
7445 // Overflow to NaN
7446 EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "0x1.0p128").isNaN());
7447 // Inf converted to NaN
7448 EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "inf").isNaN());
7449 // NaN converted to NaN
7450 EXPECT_TRUE(APFloat(APFloat::Float8E8M0FNU(), "nan").isNaN());
7453 TEST(APFloatTest, Float8E8M0FNUDivideByZero) {
7454 APFloat x(APFloat::Float8E8M0FNU(), "1");
7455 APFloat zero(APFloat::Float8E8M0FNU(), "0");
7456 x.divide(zero, APFloat::rmNearestTiesToEven);
7458 // Zero is represented as the smallest normalized value
7459 // in this format i.e 2^-127.
7460 // This tests the fix in convertFromDecimalString() function.
7461 EXPECT_EQ(0x1.0p-127, zero.convertToDouble());
7463 // [1 / (2^-127)] = 2^127
7464 EXPECT_EQ(0x1.0p127, x.convertToDouble());
7467 TEST(APFloatTest, Float8E8M0FNUGetExactLog2) {
7468 const fltSemantics &Semantics = APFloat::Float8E8M0FNU();
7469 APFloat One(Semantics, "1.0");
7470 EXPECT_EQ(0, One.getExactLog2());
7472 // In the Float8E8M0FNU format, 3 is rounded-up to 4.
7473 // So, we expect 2 as the result.
7474 EXPECT_EQ(2, APFloat(Semantics, "3.0").getExactLog2());
7475 EXPECT_EQ(2, APFloat(Semantics, "3.0").getExactLog2Abs());
7477 // In the Float8E8M0FNU format, 5 is rounded-down to 4.
7478 // So, we expect 2 as the result.
7479 EXPECT_EQ(2, APFloat(Semantics, "5.0").getExactLog2());
7480 EXPECT_EQ(2, APFloat(Semantics, "5.0").getExactLog2Abs());
7482 // Exact power-of-two value.
7483 EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2());
7484 EXPECT_EQ(3, APFloat(Semantics, "8.0").getExactLog2Abs());
7486 // Negative exponent value.
7487 EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2());
7488 EXPECT_EQ(-2, APFloat(Semantics, "0.25").getExactLog2Abs());
7490 int MinExp = APFloat::semanticsMinExponent(Semantics);
7491 int MaxExp = APFloat::semanticsMaxExponent(Semantics);
7492 int Precision = APFloat::semanticsPrecision(Semantics);
7494 // Values below the minExp getting capped to minExp.
7495 EXPECT_EQ(-127,
7496 scalbn(One, MinExp - Precision - 1, APFloat::rmNearestTiesToEven)
7497 .getExactLog2());
7498 EXPECT_EQ(-127, scalbn(One, MinExp - Precision, APFloat::rmNearestTiesToEven)
7499 .getExactLog2());
7501 // Values above the maxExp overflow to NaN, and getExactLog2() returns
7502 // INT_MIN for these cases.
7503 EXPECT_EQ(
7504 INT_MIN,
7505 scalbn(One, MaxExp + 1, APFloat::rmNearestTiesToEven).getExactLog2());
7507 // This format can represent [minExp, maxExp].
7508 // So, the result is the same as the 'Exp' of the scalbn.
7509 for (int i = MinExp - Precision + 1; i <= MaxExp; ++i) {
7510 EXPECT_EQ(i, scalbn(One, i, APFloat::rmNearestTiesToEven).getExactLog2());
7514 TEST(APFloatTest, Float8E8M0FNUSmallest) {
7515 APFloat test(APFloat::getSmallest(APFloat::Float8E8M0FNU()));
7516 EXPECT_EQ(0x1.0p-127, test.convertToDouble());
7518 // For E8M0 format, there are no denorms.
7519 // So, getSmallest is equal to isSmallestNormalized().
7520 EXPECT_TRUE(test.isSmallestNormalized());
7521 EXPECT_EQ(fcPosNormal, test.classify());
7523 test = APFloat::getAllOnesValue(APFloat::Float8E8M0FNU());
7524 EXPECT_FALSE(test.isSmallestNormalized());
7525 EXPECT_TRUE(test.isNaN());
7528 TEST(APFloatTest, Float8E8M0FNUNext) {
7529 APFloat test(APFloat::getSmallest(APFloat::Float8E8M0FNU()));
7530 // Increment of 1 should reach 2^-126
7531 EXPECT_EQ(APFloat::opOK, test.next(false));
7532 EXPECT_FALSE(test.isSmallestNormalized());
7533 EXPECT_EQ(0x1.0p-126, test.convertToDouble());
7535 // Decrement of 1, again, should reach 2^-127
7536 // i.e. smallest normalized
7537 EXPECT_EQ(APFloat::opOK, test.next(true));
7538 EXPECT_TRUE(test.isSmallestNormalized());
7540 // Decrement again, but gets capped at the smallest normalized
7541 EXPECT_EQ(APFloat::opOK, test.next(true));
7542 EXPECT_TRUE(test.isSmallestNormalized());
7545 TEST(APFloatTest, Float8E8M0FNUFMA) {
7546 APFloat f1(APFloat::Float8E8M0FNU(), "4.0");
7547 APFloat f2(APFloat::Float8E8M0FNU(), "2.0");
7548 APFloat f3(APFloat::Float8E8M0FNU(), "8.0");
7550 // Exact value: 4*2 + 8 = 16.
7551 f1.fusedMultiplyAdd(f2, f3, APFloat::rmNearestTiesToEven);
7552 EXPECT_EQ(16.0, f1.convertToDouble());
7554 // 4*2 + 4 = 12 but it gets rounded-up to 16.
7555 f1 = APFloat(APFloat::Float8E8M0FNU(), "4.0");
7556 f1.fusedMultiplyAdd(f2, f1, APFloat::rmNearestTiesToEven);
7557 EXPECT_EQ(16.0, f1.convertToDouble());
7559 // 4*2 + 2 = 10 but it gets rounded-down to 8.
7560 f1 = APFloat(APFloat::Float8E8M0FNU(), "4.0");
7561 f1.fusedMultiplyAdd(f2, f2, APFloat::rmNearestTiesToEven);
7562 EXPECT_EQ(8.0, f1.convertToDouble());
7564 // All of them using the same value.
7565 f1 = APFloat(APFloat::Float8E8M0FNU(), "1.0");
7566 f1.fusedMultiplyAdd(f1, f1, APFloat::rmNearestTiesToEven);
7567 EXPECT_EQ(2.0, f1.convertToDouble());
7570 TEST(APFloatTest, ConvertDoubleToE8M0FNU) {
7571 bool losesInfo;
7572 APFloat test(APFloat::IEEEdouble(), "1.0");
7573 APFloat::opStatus status = test.convert(
7574 APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven, &losesInfo);
7575 EXPECT_EQ(1.0, test.convertToDouble());
7576 EXPECT_FALSE(losesInfo);
7577 EXPECT_EQ(status, APFloat::opOK);
7579 // For E8M0, zero encoding is represented as the smallest normalized value.
7580 test = APFloat(APFloat::IEEEdouble(), "0.0");
7581 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7582 &losesInfo);
7583 EXPECT_TRUE(test.isSmallestNormalized());
7584 EXPECT_EQ(0x1.0p-127, test.convertToDouble());
7585 EXPECT_FALSE(losesInfo);
7586 EXPECT_EQ(status, APFloat::opOK);
7588 // Test that the conversion of a power-of-two value is precise.
7589 test = APFloat(APFloat::IEEEdouble(), "8.0");
7590 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7591 &losesInfo);
7592 EXPECT_EQ(8.0f, test.convertToDouble());
7593 EXPECT_FALSE(losesInfo);
7594 EXPECT_EQ(status, APFloat::opOK);
7596 // Test to check round-down conversion to power-of-two.
7597 // The fractional part of 9 is "001" (i.e. 1.125x2^3=9).
7598 test = APFloat(APFloat::IEEEdouble(), "9.0");
7599 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7600 &losesInfo);
7601 EXPECT_EQ(8.0f, test.convertToDouble());
7602 EXPECT_TRUE(losesInfo);
7603 EXPECT_EQ(status, APFloat::opInexact);
7605 // Test to check round-up conversion to power-of-two.
7606 // The fractional part of 13 is "101" (i.e. 1.625x2^3=13).
7607 test = APFloat(APFloat::IEEEdouble(), "13.0");
7608 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7609 &losesInfo);
7610 EXPECT_EQ(16.0f, test.convertToDouble());
7611 EXPECT_TRUE(losesInfo);
7612 EXPECT_EQ(status, APFloat::opInexact);
7614 // Test to check round-up conversion to power-of-two.
7615 // The fractional part of 12 is "100" (i.e. 1.5x2^3=12).
7616 test = APFloat(APFloat::IEEEdouble(), "12.0");
7617 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7618 &losesInfo);
7619 EXPECT_EQ(16.0f, test.convertToDouble());
7620 EXPECT_TRUE(losesInfo);
7621 EXPECT_EQ(status, APFloat::opInexact);
7623 // Overflow to NaN.
7624 test = APFloat(APFloat::IEEEdouble(), "0x1.0p128");
7625 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7626 &losesInfo);
7627 EXPECT_TRUE(test.isNaN());
7628 EXPECT_TRUE(losesInfo);
7629 EXPECT_EQ(status, APFloat::opOverflow | APFloat::opInexact);
7631 // Underflow to smallest normalized value.
7632 test = APFloat(APFloat::IEEEdouble(), "0x1.0p-128");
7633 status = test.convert(APFloat::Float8E8M0FNU(), APFloat::rmNearestTiesToEven,
7634 &losesInfo);
7635 EXPECT_TRUE(test.isSmallestNormalized());
7636 EXPECT_TRUE(losesInfo);
7637 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
7640 TEST(APFloatTest, Float6E3M2FNFromString) {
7641 // Exactly representable
7642 EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "28").convertToDouble());
7643 // Round down to maximum value
7644 EXPECT_EQ(28, APFloat(APFloat::Float6E3M2FN(), "32").convertToDouble());
7646 #ifdef GTEST_HAS_DEATH_TEST
7647 #ifndef NDEBUG
7648 EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "inf"),
7649 "This floating point format does not support Inf");
7650 EXPECT_DEATH(APFloat(APFloat::Float6E3M2FN(), "nan"),
7651 "This floating point format does not support NaN");
7652 #endif
7653 #endif
7655 EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "0").isPosZero());
7656 EXPECT_TRUE(APFloat(APFloat::Float6E3M2FN(), "-0").isNegZero());
7659 TEST(APFloatTest, Float6E2M3FNFromString) {
7660 // Exactly representable
7661 EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "7.5").convertToDouble());
7662 // Round down to maximum value
7663 EXPECT_EQ(7.5, APFloat(APFloat::Float6E2M3FN(), "32").convertToDouble());
7665 #ifdef GTEST_HAS_DEATH_TEST
7666 #ifndef NDEBUG
7667 EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "inf"),
7668 "This floating point format does not support Inf");
7669 EXPECT_DEATH(APFloat(APFloat::Float6E2M3FN(), "nan"),
7670 "This floating point format does not support NaN");
7671 #endif
7672 #endif
7674 EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "0").isPosZero());
7675 EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero());
7678 TEST(APFloatTest, Float4E2M1FNFromString) {
7679 // Exactly representable
7680 EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "6").convertToDouble());
7681 // Round down to maximum value
7682 EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "32").convertToDouble());
7684 #ifdef GTEST_HAS_DEATH_TEST
7685 #ifndef NDEBUG
7686 EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "inf"),
7687 "This floating point format does not support Inf");
7688 EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "nan"),
7689 "This floating point format does not support NaN");
7690 #endif
7691 #endif
7693 EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "0").isPosZero());
7694 EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "-0").isNegZero());
7697 TEST(APFloatTest, ConvertE3M2FToE2M3F) {
7698 bool losesInfo;
7699 APFloat test(APFloat::Float6E3M2FN(), "1.0");
7700 APFloat::opStatus status = test.convert(
7701 APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo);
7702 EXPECT_EQ(1.0f, test.convertToFloat());
7703 EXPECT_FALSE(losesInfo);
7704 EXPECT_EQ(status, APFloat::opOK);
7706 test = APFloat(APFloat::Float6E3M2FN(), "0.0");
7707 status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
7708 &losesInfo);
7709 EXPECT_EQ(0.0f, test.convertToFloat());
7710 EXPECT_FALSE(losesInfo);
7711 EXPECT_EQ(status, APFloat::opOK);
7713 // Test overflow
7714 test = APFloat(APFloat::Float6E3M2FN(), "28");
7715 status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
7716 &losesInfo);
7717 EXPECT_EQ(7.5f, test.convertToFloat());
7718 EXPECT_TRUE(losesInfo);
7719 EXPECT_EQ(status, APFloat::opInexact);
7721 // Test underflow
7722 test = APFloat(APFloat::Float6E3M2FN(), ".0625");
7723 status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
7724 &losesInfo);
7725 EXPECT_EQ(0., test.convertToFloat());
7726 EXPECT_TRUE(losesInfo);
7727 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
7729 // Testing inexact rounding to denormal number
7730 test = APFloat(APFloat::Float6E3M2FN(), "0.1875");
7731 status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven,
7732 &losesInfo);
7733 EXPECT_EQ(0.25, test.convertToFloat());
7734 EXPECT_TRUE(losesInfo);
7735 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
7738 TEST(APFloatTest, ConvertE2M3FToE3M2F) {
7739 bool losesInfo;
7740 APFloat test(APFloat::Float6E2M3FN(), "1.0");
7741 APFloat::opStatus status = test.convert(
7742 APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo);
7743 EXPECT_EQ(1.0f, test.convertToFloat());
7744 EXPECT_FALSE(losesInfo);
7745 EXPECT_EQ(status, APFloat::opOK);
7747 test = APFloat(APFloat::Float6E2M3FN(), "0.0");
7748 status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
7749 &losesInfo);
7750 EXPECT_EQ(0.0f, test.convertToFloat());
7751 EXPECT_FALSE(losesInfo);
7752 EXPECT_EQ(status, APFloat::opOK);
7754 test = APFloat(APFloat::Float6E2M3FN(), ".125");
7755 status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
7756 &losesInfo);
7757 EXPECT_EQ(.125, test.convertToFloat());
7758 EXPECT_FALSE(losesInfo);
7759 EXPECT_EQ(status, APFloat::opOK);
7761 // Test inexact rounding
7762 test = APFloat(APFloat::Float6E2M3FN(), "7.5");
7763 status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven,
7764 &losesInfo);
7765 EXPECT_EQ(8, test.convertToFloat());
7766 EXPECT_TRUE(losesInfo);
7767 EXPECT_EQ(status, APFloat::opInexact);
7770 TEST(APFloatTest, ConvertDoubleToE2M1F) {
7771 bool losesInfo;
7772 APFloat test(APFloat::IEEEdouble(), "1.0");
7773 APFloat::opStatus status = test.convert(
7774 APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, &losesInfo);
7775 EXPECT_EQ(1.0, test.convertToDouble());
7776 EXPECT_FALSE(losesInfo);
7777 EXPECT_EQ(status, APFloat::opOK);
7779 test = APFloat(APFloat::IEEEdouble(), "0.0");
7780 status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven,
7781 &losesInfo);
7782 EXPECT_EQ(0.0f, test.convertToDouble());
7783 EXPECT_FALSE(losesInfo);
7784 EXPECT_EQ(status, APFloat::opOK);
7786 // Test overflow
7787 test = APFloat(APFloat::IEEEdouble(), "8");
7788 status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven,
7789 &losesInfo);
7790 EXPECT_EQ(6, test.convertToDouble());
7791 EXPECT_TRUE(losesInfo);
7792 EXPECT_EQ(status, APFloat::opInexact);
7794 // Test underflow
7795 test = APFloat(APFloat::IEEEdouble(), "0.25");
7796 status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven,
7797 &losesInfo);
7798 EXPECT_EQ(0., test.convertToDouble());
7799 EXPECT_TRUE(losesInfo);
7800 EXPECT_FALSE(test.isDenormal());
7801 EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact);
7804 TEST(APFloatTest, Float6E3M2FNNext) {
7805 APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized);
7806 APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized);
7808 // 1. NextUp of largest bit pattern is the same
7809 test = APFloat::getLargest(APFloat::Float6E3M2FN());
7810 expected = APFloat::getLargest(APFloat::Float6E3M2FN());
7811 EXPECT_EQ(test.next(false), APFloat::opOK);
7812 EXPECT_FALSE(test.isInfinity());
7813 EXPECT_FALSE(test.isZero());
7814 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7816 // 2. NextUp of smallest negative denormal is -0
7817 test = APFloat::getSmallest(APFloat::Float6E3M2FN(), true);
7818 expected = APFloat::getZero(APFloat::Float6E3M2FN(), true);
7819 EXPECT_EQ(test.next(false), APFloat::opOK);
7820 EXPECT_TRUE(test.isNegZero());
7821 EXPECT_FALSE(test.isPosZero());
7822 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7824 // 3. nextDown of negative of largest value is the same
7825 test = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
7826 expected = test;
7827 EXPECT_EQ(test.next(true), APFloat::opOK);
7828 EXPECT_FALSE(test.isInfinity());
7829 EXPECT_FALSE(test.isZero());
7830 EXPECT_FALSE(test.isNaN());
7831 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7833 // 4. nextDown of +0 is smallest negative denormal
7834 test = APFloat::getZero(APFloat::Float6E3M2FN(), false);
7835 expected = APFloat::getSmallest(APFloat::Float6E3M2FN(), true);
7836 EXPECT_EQ(test.next(true), APFloat::opOK);
7837 EXPECT_FALSE(test.isZero());
7838 EXPECT_TRUE(test.isDenormal());
7839 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7842 TEST(APFloatTest, Float6E2M3FNNext) {
7843 APFloat test(APFloat::Float6E2M3FN(), APFloat::uninitialized);
7844 APFloat expected(APFloat::Float6E2M3FN(), APFloat::uninitialized);
7846 // 1. NextUp of largest bit pattern is the same
7847 test = APFloat::getLargest(APFloat::Float6E2M3FN());
7848 expected = APFloat::getLargest(APFloat::Float6E2M3FN());
7849 EXPECT_EQ(test.next(false), APFloat::opOK);
7850 EXPECT_FALSE(test.isInfinity());
7851 EXPECT_FALSE(test.isZero());
7852 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7854 // 2. NextUp of smallest negative denormal is -0
7855 test = APFloat::getSmallest(APFloat::Float6E2M3FN(), true);
7856 expected = APFloat::getZero(APFloat::Float6E2M3FN(), true);
7857 EXPECT_EQ(test.next(false), APFloat::opOK);
7858 EXPECT_TRUE(test.isNegZero());
7859 EXPECT_FALSE(test.isPosZero());
7860 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7862 // 3. nextDown of negative of largest value is the same
7863 test = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
7864 expected = test;
7865 EXPECT_EQ(test.next(true), APFloat::opOK);
7866 EXPECT_FALSE(test.isInfinity());
7867 EXPECT_FALSE(test.isZero());
7868 EXPECT_FALSE(test.isNaN());
7869 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7871 // 4. nextDown of +0 is smallest negative denormal
7872 test = APFloat::getZero(APFloat::Float6E2M3FN(), false);
7873 expected = APFloat::getSmallest(APFloat::Float6E2M3FN(), true);
7874 EXPECT_EQ(test.next(true), APFloat::opOK);
7875 EXPECT_FALSE(test.isZero());
7876 EXPECT_TRUE(test.isDenormal());
7877 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7880 TEST(APFloatTest, Float4E2M1FNNext) {
7881 APFloat test(APFloat::Float4E2M1FN(), APFloat::uninitialized);
7882 APFloat expected(APFloat::Float4E2M1FN(), APFloat::uninitialized);
7884 // 1. NextUp of largest bit pattern is the same
7885 test = APFloat::getLargest(APFloat::Float4E2M1FN());
7886 expected = APFloat::getLargest(APFloat::Float4E2M1FN());
7887 EXPECT_EQ(test.next(false), APFloat::opOK);
7888 EXPECT_FALSE(test.isInfinity());
7889 EXPECT_FALSE(test.isZero());
7890 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7892 // 2. NextUp of smallest negative denormal is -0
7893 test = APFloat::getSmallest(APFloat::Float4E2M1FN(), true);
7894 expected = APFloat::getZero(APFloat::Float4E2M1FN(), true);
7895 EXPECT_EQ(test.next(false), APFloat::opOK);
7896 EXPECT_TRUE(test.isNegZero());
7897 EXPECT_FALSE(test.isPosZero());
7898 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7900 // 3. nextDown of negative of largest value is the same
7901 test = APFloat::getLargest(APFloat::Float4E2M1FN(), true);
7902 expected = test;
7903 EXPECT_EQ(test.next(true), APFloat::opOK);
7904 EXPECT_FALSE(test.isInfinity());
7905 EXPECT_FALSE(test.isZero());
7906 EXPECT_FALSE(test.isNaN());
7907 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7909 // 4. nextDown of +0 is smallest negative denormal
7910 test = APFloat::getZero(APFloat::Float4E2M1FN(), false);
7911 expected = APFloat::getSmallest(APFloat::Float4E2M1FN(), true);
7912 EXPECT_EQ(test.next(true), APFloat::opOK);
7913 EXPECT_FALSE(test.isZero());
7914 EXPECT_TRUE(test.isDenormal());
7915 EXPECT_TRUE(test.bitwiseIsEqual(expected));
7918 #ifdef GTEST_HAS_DEATH_TEST
7919 #ifndef NDEBUG
7920 TEST(APFloatTest, Float6E3M2FNGetInfNaN) {
7921 EXPECT_DEATH(APFloat::getInf(APFloat::Float6E3M2FN()),
7922 "This floating point format does not support Inf");
7923 EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E3M2FN()),
7924 "This floating point format does not support NaN");
7927 TEST(APFloatTest, Float6E2M3FNGetInfNaN) {
7928 EXPECT_DEATH(APFloat::getInf(APFloat::Float6E2M3FN()),
7929 "This floating point format does not support Inf");
7930 EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()),
7931 "This floating point format does not support NaN");
7934 TEST(APFloatTest, Float4E2M1FNGetInfNaN) {
7935 EXPECT_DEATH(APFloat::getInf(APFloat::Float4E2M1FN()),
7936 "This floating point format does not support Inf");
7937 EXPECT_DEATH(APFloat::getNaN(APFloat::Float4E2M1FN()),
7938 "This floating point format does not support NaN");
7940 #endif
7941 #endif
7943 TEST(APFloatTest, Float6E3M2FNToDouble) {
7944 APFloat One(APFloat::Float6E3M2FN(), "1.0");
7945 EXPECT_EQ(1.0, One.convertToDouble());
7946 APFloat Two(APFloat::Float6E3M2FN(), "2.0");
7947 EXPECT_EQ(2.0, Two.convertToDouble());
7948 APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false);
7949 EXPECT_EQ(28., PosLargest.convertToDouble());
7950 APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
7951 EXPECT_EQ(-28., NegLargest.convertToDouble());
7952 APFloat PosSmallest =
7953 APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
7954 EXPECT_EQ(0x1p-2, PosSmallest.convertToDouble());
7955 APFloat NegSmallest =
7956 APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true);
7957 EXPECT_EQ(-0x1p-2, NegSmallest.convertToDouble());
7959 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
7960 EXPECT_TRUE(SmallestDenorm.isDenormal());
7961 EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToDouble());
7964 TEST(APFloatTest, Float6E2M3FNToDouble) {
7965 APFloat One(APFloat::Float6E2M3FN(), "1.0");
7966 EXPECT_EQ(1.0, One.convertToDouble());
7967 APFloat Two(APFloat::Float6E2M3FN(), "2.0");
7968 EXPECT_EQ(2.0, Two.convertToDouble());
7969 APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false);
7970 EXPECT_EQ(7.5, PosLargest.convertToDouble());
7971 APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
7972 EXPECT_EQ(-7.5, NegLargest.convertToDouble());
7973 APFloat PosSmallest =
7974 APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
7975 EXPECT_EQ(0x1p0, PosSmallest.convertToDouble());
7976 APFloat NegSmallest =
7977 APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true);
7978 EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble());
7980 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
7981 EXPECT_TRUE(SmallestDenorm.isDenormal());
7982 EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble());
7985 TEST(APFloatTest, Float4E2M1FNToDouble) {
7986 APFloat One(APFloat::Float4E2M1FN(), "1.0");
7987 EXPECT_EQ(1.0, One.convertToDouble());
7988 APFloat Two(APFloat::Float4E2M1FN(), "2.0");
7989 EXPECT_EQ(2.0, Two.convertToDouble());
7990 APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false);
7991 EXPECT_EQ(6, PosLargest.convertToDouble());
7992 APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true);
7993 EXPECT_EQ(-6, NegLargest.convertToDouble());
7994 APFloat PosSmallest =
7995 APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false);
7996 EXPECT_EQ(0x1p0, PosSmallest.convertToDouble());
7997 APFloat NegSmallest =
7998 APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true);
7999 EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble());
8001 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false);
8002 EXPECT_TRUE(SmallestDenorm.isDenormal());
8003 EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToDouble());
8006 TEST(APFloatTest, Float6E3M2FNToFloat) {
8007 APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN());
8008 APFloat PosZeroToFloat(PosZero.convertToFloat());
8009 EXPECT_TRUE(PosZeroToFloat.isPosZero());
8010 APFloat NegZero = APFloat::getZero(APFloat::Float6E3M2FN(), true);
8011 APFloat NegZeroToFloat(NegZero.convertToFloat());
8012 EXPECT_TRUE(NegZeroToFloat.isNegZero());
8014 APFloat One(APFloat::Float6E3M2FN(), "1.0");
8015 EXPECT_EQ(1.0F, One.convertToFloat());
8016 APFloat Two(APFloat::Float6E3M2FN(), "2.0");
8017 EXPECT_EQ(2.0F, Two.convertToFloat());
8019 APFloat PosLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), false);
8020 EXPECT_EQ(28., PosLargest.convertToFloat());
8021 APFloat NegLargest = APFloat::getLargest(APFloat::Float6E3M2FN(), true);
8022 EXPECT_EQ(-28, NegLargest.convertToFloat());
8023 APFloat PosSmallest =
8024 APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false);
8025 EXPECT_EQ(0x1p-2, PosSmallest.convertToFloat());
8026 APFloat NegSmallest =
8027 APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), true);
8028 EXPECT_EQ(-0x1p-2, NegSmallest.convertToFloat());
8030 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E3M2FN(), false);
8031 EXPECT_TRUE(SmallestDenorm.isDenormal());
8032 EXPECT_EQ(0x0.1p0, SmallestDenorm.convertToFloat());
8035 TEST(APFloatTest, Float6E2M3FNToFloat) {
8036 APFloat PosZero = APFloat::getZero(APFloat::Float6E2M3FN());
8037 APFloat PosZeroToFloat(PosZero.convertToFloat());
8038 EXPECT_TRUE(PosZeroToFloat.isPosZero());
8039 APFloat NegZero = APFloat::getZero(APFloat::Float6E2M3FN(), true);
8040 APFloat NegZeroToFloat(NegZero.convertToFloat());
8041 EXPECT_TRUE(NegZeroToFloat.isNegZero());
8043 APFloat One(APFloat::Float6E2M3FN(), "1.0");
8044 EXPECT_EQ(1.0F, One.convertToFloat());
8045 APFloat Two(APFloat::Float6E2M3FN(), "2.0");
8046 EXPECT_EQ(2.0F, Two.convertToFloat());
8048 APFloat PosLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), false);
8049 EXPECT_EQ(7.5, PosLargest.convertToFloat());
8050 APFloat NegLargest = APFloat::getLargest(APFloat::Float6E2M3FN(), true);
8051 EXPECT_EQ(-7.5, NegLargest.convertToFloat());
8052 APFloat PosSmallest =
8053 APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), false);
8054 EXPECT_EQ(0x1p0, PosSmallest.convertToFloat());
8055 APFloat NegSmallest =
8056 APFloat::getSmallestNormalized(APFloat::Float6E2M3FN(), true);
8057 EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat());
8059 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float6E2M3FN(), false);
8060 EXPECT_TRUE(SmallestDenorm.isDenormal());
8061 EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat());
8064 TEST(APFloatTest, Float4E2M1FNToFloat) {
8065 APFloat PosZero = APFloat::getZero(APFloat::Float4E2M1FN());
8066 APFloat PosZeroToFloat(PosZero.convertToFloat());
8067 EXPECT_TRUE(PosZeroToFloat.isPosZero());
8068 APFloat NegZero = APFloat::getZero(APFloat::Float4E2M1FN(), true);
8069 APFloat NegZeroToFloat(NegZero.convertToFloat());
8070 EXPECT_TRUE(NegZeroToFloat.isNegZero());
8072 APFloat One(APFloat::Float4E2M1FN(), "1.0");
8073 EXPECT_EQ(1.0F, One.convertToFloat());
8074 APFloat Two(APFloat::Float4E2M1FN(), "2.0");
8075 EXPECT_EQ(2.0F, Two.convertToFloat());
8077 APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false);
8078 EXPECT_EQ(6, PosLargest.convertToFloat());
8079 APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true);
8080 EXPECT_EQ(-6, NegLargest.convertToFloat());
8081 APFloat PosSmallest =
8082 APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false);
8083 EXPECT_EQ(0x1p0, PosSmallest.convertToFloat());
8084 APFloat NegSmallest =
8085 APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true);
8086 EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat());
8088 APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false);
8089 EXPECT_TRUE(SmallestDenorm.isDenormal());
8090 EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToFloat());
8092 } // namespace