1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
33 #define DEBUG_TYPE "apint"
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords
) {
38 uint64_t *result
= new uint64_t[numWords
];
39 memset(result
, 0, numWords
* sizeof(uint64_t));
43 /// A utility function for allocating memory and checking for allocation
44 /// failure. The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords
) {
46 return new uint64_t[numWords
];
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit
, uint8_t radix
) {
53 if (radix
== 16 || radix
== 36) {
77 void APInt::initSlowCase(uint64_t val
, bool isSigned
) {
78 U
.pVal
= getClearedMemory(getNumWords());
80 if (isSigned
&& int64_t(val
) < 0)
81 for (unsigned i
= 1; i
< getNumWords(); ++i
)
82 U
.pVal
[i
] = WORDTYPE_MAX
;
86 void APInt::initSlowCase(const APInt
& that
) {
87 U
.pVal
= getMemory(getNumWords());
88 memcpy(U
.pVal
, that
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
91 void APInt::initFromArray(ArrayRef
<uint64_t> bigVal
) {
92 assert(BitWidth
&& "Bitwidth too small");
93 assert(bigVal
.data() && "Null pointer detected!");
97 // Get memory, cleared to 0
98 U
.pVal
= getClearedMemory(getNumWords());
99 // Calculate the number of words to copy
100 unsigned words
= std::min
<unsigned>(bigVal
.size(), getNumWords());
101 // Copy the words from bigVal to pVal
102 memcpy(U
.pVal
, bigVal
.data(), words
* APINT_WORD_SIZE
);
104 // Make sure unused high bits are cleared
108 APInt::APInt(unsigned numBits
, ArrayRef
<uint64_t> bigVal
)
109 : BitWidth(numBits
) {
110 initFromArray(bigVal
);
113 APInt::APInt(unsigned numBits
, unsigned numWords
, const uint64_t bigVal
[])
114 : BitWidth(numBits
) {
115 initFromArray(makeArrayRef(bigVal
, numWords
));
118 APInt::APInt(unsigned numbits
, StringRef Str
, uint8_t radix
)
119 : BitWidth(numbits
) {
120 assert(BitWidth
&& "Bitwidth too small");
121 fromString(numbits
, Str
, radix
);
124 void APInt::reallocate(unsigned NewBitWidth
) {
125 // If the number of words is the same we can just change the width and stop.
126 if (getNumWords() == getNumWords(NewBitWidth
)) {
127 BitWidth
= NewBitWidth
;
131 // If we have an allocation, delete it.
136 BitWidth
= NewBitWidth
;
138 // If we are supposed to have an allocation, create it.
140 U
.pVal
= getMemory(getNumWords());
143 void APInt::AssignSlowCase(const APInt
& RHS
) {
144 // Don't do anything for X = X
148 // Adjust the bit width and handle allocations as necessary.
149 reallocate(RHS
.getBitWidth());
155 memcpy(U
.pVal
, RHS
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID
& ID
) const {
160 ID
.AddInteger(BitWidth
);
162 if (isSingleWord()) {
163 ID
.AddInteger(U
.VAL
);
167 unsigned NumWords
= getNumWords();
168 for (unsigned i
= 0; i
< NumWords
; ++i
)
169 ID
.AddInteger(U
.pVal
[i
]);
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt
& APInt::operator++() {
177 tcIncrement(U
.pVal
, getNumWords());
178 return clearUnusedBits();
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt
& APInt::operator--() {
186 tcDecrement(U
.pVal
, getNumWords());
187 return clearUnusedBits();
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt
& APInt::operator+=(const APInt
& RHS
) {
194 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
198 tcAdd(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
199 return clearUnusedBits();
202 APInt
& APInt::operator+=(uint64_t RHS
) {
206 tcAddPart(U
.pVal
, RHS
, getNumWords());
207 return clearUnusedBits();
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt
& APInt::operator-=(const APInt
& RHS
) {
214 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
218 tcSubtract(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
219 return clearUnusedBits();
222 APInt
& APInt::operator-=(uint64_t RHS
) {
226 tcSubtractPart(U
.pVal
, RHS
, getNumWords());
227 return clearUnusedBits();
230 APInt
APInt::operator*(const APInt
& RHS
) const {
231 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
233 return APInt(BitWidth
, U
.VAL
* RHS
.U
.VAL
);
235 APInt
Result(getMemory(getNumWords()), getBitWidth());
237 tcMultiply(Result
.U
.pVal
, U
.pVal
, RHS
.U
.pVal
, getNumWords());
239 Result
.clearUnusedBits();
243 void APInt::AndAssignSlowCase(const APInt
& RHS
) {
244 tcAnd(U
.pVal
, RHS
.U
.pVal
, getNumWords());
247 void APInt::OrAssignSlowCase(const APInt
& RHS
) {
248 tcOr(U
.pVal
, RHS
.U
.pVal
, getNumWords());
251 void APInt::XorAssignSlowCase(const APInt
& RHS
) {
252 tcXor(U
.pVal
, RHS
.U
.pVal
, getNumWords());
255 APInt
& APInt::operator*=(const APInt
& RHS
) {
256 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
261 APInt
& APInt::operator*=(uint64_t RHS
) {
262 if (isSingleWord()) {
265 unsigned NumWords
= getNumWords();
266 tcMultiplyPart(U
.pVal
, U
.pVal
, RHS
, 0, NumWords
, NumWords
, false);
268 return clearUnusedBits();
271 bool APInt::EqualSlowCase(const APInt
& RHS
) const {
272 return std::equal(U
.pVal
, U
.pVal
+ getNumWords(), RHS
.U
.pVal
);
275 int APInt::compare(const APInt
& RHS
) const {
276 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
278 return U
.VAL
< RHS
.U
.VAL
? -1 : U
.VAL
> RHS
.U
.VAL
;
280 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
283 int APInt::compareSigned(const APInt
& RHS
) const {
284 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
285 if (isSingleWord()) {
286 int64_t lhsSext
= SignExtend64(U
.VAL
, BitWidth
);
287 int64_t rhsSext
= SignExtend64(RHS
.U
.VAL
, BitWidth
);
288 return lhsSext
< rhsSext
? -1 : lhsSext
> rhsSext
;
291 bool lhsNeg
= isNegative();
292 bool rhsNeg
= RHS
.isNegative();
294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295 if (lhsNeg
!= rhsNeg
)
296 return lhsNeg
? -1 : 1;
298 // Otherwise we can just use an unsigned comparison, because even negative
299 // numbers compare correctly this way if both have the same signed-ness.
300 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
303 void APInt::setBitsSlowCase(unsigned loBit
, unsigned hiBit
) {
304 unsigned loWord
= whichWord(loBit
);
305 unsigned hiWord
= whichWord(hiBit
);
307 // Create an initial mask for the low word with zeros below loBit.
308 uint64_t loMask
= WORDTYPE_MAX
<< whichBit(loBit
);
310 // If hiBit is not aligned, we need a high mask.
311 unsigned hiShiftAmt
= whichBit(hiBit
);
312 if (hiShiftAmt
!= 0) {
313 // Create a high mask with zeros above hiBit.
314 uint64_t hiMask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- hiShiftAmt
);
315 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316 // set the bits in hiWord.
317 if (hiWord
== loWord
)
320 U
.pVal
[hiWord
] |= hiMask
;
322 // Apply the mask to the low word.
323 U
.pVal
[loWord
] |= loMask
;
325 // Fill any words between loWord and hiWord with all ones.
326 for (unsigned word
= loWord
+ 1; word
< hiWord
; ++word
)
327 U
.pVal
[word
] = WORDTYPE_MAX
;
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332 tcComplement(U
.pVal
, getNumWords());
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition
) {
340 assert(bitPosition
< BitWidth
&& "Out of the bit-width range!");
341 if ((*this)[bitPosition
]) clearBit(bitPosition
);
342 else setBit(bitPosition
);
345 void APInt::insertBits(const APInt
&subBits
, unsigned bitPosition
) {
346 unsigned subBitWidth
= subBits
.getBitWidth();
347 assert(0 < subBitWidth
&& (subBitWidth
+ bitPosition
) <= BitWidth
&&
348 "Illegal bit insertion");
350 // Insertion is a direct copy.
351 if (subBitWidth
== BitWidth
) {
356 // Single word result can be done as a direct bitmask.
357 if (isSingleWord()) {
358 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
359 U
.VAL
&= ~(mask
<< bitPosition
);
360 U
.VAL
|= (subBits
.U
.VAL
<< bitPosition
);
364 unsigned loBit
= whichBit(bitPosition
);
365 unsigned loWord
= whichWord(bitPosition
);
366 unsigned hi1Word
= whichWord(bitPosition
+ subBitWidth
- 1);
368 // Insertion within a single word can be done as a direct bitmask.
369 if (loWord
== hi1Word
) {
370 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
371 U
.pVal
[loWord
] &= ~(mask
<< loBit
);
372 U
.pVal
[loWord
] |= (subBits
.U
.VAL
<< loBit
);
376 // Insert on word boundaries.
378 // Direct copy whole words.
379 unsigned numWholeSubWords
= subBitWidth
/ APINT_BITS_PER_WORD
;
380 memcpy(U
.pVal
+ loWord
, subBits
.getRawData(),
381 numWholeSubWords
* APINT_WORD_SIZE
);
383 // Mask+insert remaining bits.
384 unsigned remainingBits
= subBitWidth
% APINT_BITS_PER_WORD
;
385 if (remainingBits
!= 0) {
386 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- remainingBits
);
387 U
.pVal
[hi1Word
] &= ~mask
;
388 U
.pVal
[hi1Word
] |= subBits
.getWord(subBitWidth
- 1);
393 // General case - set/clear individual bits in dst based on src.
394 // TODO - there is scope for optimization here, but at the moment this code
395 // path is barely used so prefer readability over performance.
396 for (unsigned i
= 0; i
!= subBitWidth
; ++i
) {
398 setBit(bitPosition
+ i
);
400 clearBit(bitPosition
+ i
);
404 APInt
APInt::extractBits(unsigned numBits
, unsigned bitPosition
) const {
405 assert(numBits
> 0 && "Can't extract zero bits");
406 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
407 "Illegal bit extraction");
410 return APInt(numBits
, U
.VAL
>> bitPosition
);
412 unsigned loBit
= whichBit(bitPosition
);
413 unsigned loWord
= whichWord(bitPosition
);
414 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
416 // Single word result extracting bits from a single word source.
417 if (loWord
== hiWord
)
418 return APInt(numBits
, U
.pVal
[loWord
] >> loBit
);
420 // Extracting bits that start on a source word boundary can be done
421 // as a fast memory copy.
423 return APInt(numBits
, makeArrayRef(U
.pVal
+ loWord
, 1 + hiWord
- loWord
));
425 // General case - shift + copy source words directly into place.
426 APInt
Result(numBits
, 0);
427 unsigned NumSrcWords
= getNumWords();
428 unsigned NumDstWords
= Result
.getNumWords();
430 uint64_t *DestPtr
= Result
.isSingleWord() ? &Result
.U
.VAL
: Result
.U
.pVal
;
431 for (unsigned word
= 0; word
< NumDstWords
; ++word
) {
432 uint64_t w0
= U
.pVal
[loWord
+ word
];
434 (loWord
+ word
+ 1) < NumSrcWords
? U
.pVal
[loWord
+ word
+ 1] : 0;
435 DestPtr
[word
] = (w0
>> loBit
) | (w1
<< (APINT_BITS_PER_WORD
- loBit
));
438 return Result
.clearUnusedBits();
441 unsigned APInt::getBitsNeeded(StringRef str
, uint8_t radix
) {
442 assert(!str
.empty() && "Invalid string length");
443 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
445 "Radix should be 2, 8, 10, 16, or 36!");
447 size_t slen
= str
.size();
449 // Each computation below needs to know if it's negative.
450 StringRef::iterator p
= str
.begin();
451 unsigned isNegative
= *p
== '-';
452 if (*p
== '-' || *p
== '+') {
455 assert(slen
&& "String is only a sign, needs a value.");
458 // For radixes of power-of-two values, the bits required is accurately and
461 return slen
+ isNegative
;
463 return slen
* 3 + isNegative
;
465 return slen
* 4 + isNegative
;
469 // This is grossly inefficient but accurate. We could probably do something
470 // with a computation of roughly slen*64/20 and then adjust by the value of
471 // the first few digits. But, I'm not sure how accurate that could be.
473 // Compute a sufficient number of bits that is always large enough but might
474 // be too large. This avoids the assertion in the constructor. This
475 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476 // bits in that case.
478 = radix
== 10? (slen
== 1 ? 4 : slen
* 64/18)
479 : (slen
== 1 ? 7 : slen
* 16/3);
481 // Convert to the actual binary value.
482 APInt
tmp(sufficient
, StringRef(p
, slen
), radix
);
484 // Compute how many bits are required. If the log is infinite, assume we need
485 // just bit. If the log is exact and value is negative, then the value is
486 // MinSignedValue with (log + 1) bits.
487 unsigned log
= tmp
.logBase2();
488 if (log
== (unsigned)-1) {
489 return isNegative
+ 1;
490 } else if (isNegative
&& tmp
.isPowerOf2()) {
491 return isNegative
+ log
;
493 return isNegative
+ log
+ 1;
497 hash_code
llvm::hash_value(const APInt
&Arg
) {
498 if (Arg
.isSingleWord())
499 return hash_combine(Arg
.U
.VAL
);
501 return hash_combine_range(Arg
.U
.pVal
, Arg
.U
.pVal
+ Arg
.getNumWords());
504 bool APInt::isSplat(unsigned SplatSizeInBits
) const {
505 assert(getBitWidth() % SplatSizeInBits
== 0 &&
506 "SplatSizeInBits must divide width!");
507 // We can check that all parts of an integer are equal by making use of a
508 // little trick: rotate and check if it's still the same value.
509 return *this == rotl(SplatSizeInBits
);
512 /// This function returns the high "numBits" bits of this APInt.
513 APInt
APInt::getHiBits(unsigned numBits
) const {
514 return this->lshr(BitWidth
- numBits
);
517 /// This function returns the low "numBits" bits of this APInt.
518 APInt
APInt::getLoBits(unsigned numBits
) const {
519 APInt
Result(getLowBitsSet(BitWidth
, numBits
));
524 /// Return a value containing V broadcasted over NewLen bits.
525 APInt
APInt::getSplat(unsigned NewLen
, const APInt
&V
) {
526 assert(NewLen
>= V
.getBitWidth() && "Can't splat to smaller bit width!");
528 APInt Val
= V
.zextOrSelf(NewLen
);
529 for (unsigned I
= V
.getBitWidth(); I
< NewLen
; I
<<= 1)
535 unsigned APInt::countLeadingZerosSlowCase() const {
537 for (int i
= getNumWords()-1; i
>= 0; --i
) {
538 uint64_t V
= U
.pVal
[i
];
540 Count
+= APINT_BITS_PER_WORD
;
542 Count
+= llvm::countLeadingZeros(V
);
546 // Adjust for unused bits in the most significant word (they are zero).
547 unsigned Mod
= BitWidth
% APINT_BITS_PER_WORD
;
548 Count
-= Mod
> 0 ? APINT_BITS_PER_WORD
- Mod
: 0;
552 unsigned APInt::countLeadingOnesSlowCase() const {
553 unsigned highWordBits
= BitWidth
% APINT_BITS_PER_WORD
;
556 highWordBits
= APINT_BITS_PER_WORD
;
559 shift
= APINT_BITS_PER_WORD
- highWordBits
;
561 int i
= getNumWords() - 1;
562 unsigned Count
= llvm::countLeadingOnes(U
.pVal
[i
] << shift
);
563 if (Count
== highWordBits
) {
564 for (i
--; i
>= 0; --i
) {
565 if (U
.pVal
[i
] == WORDTYPE_MAX
)
566 Count
+= APINT_BITS_PER_WORD
;
568 Count
+= llvm::countLeadingOnes(U
.pVal
[i
]);
576 unsigned APInt::countTrailingZerosSlowCase() const {
579 for (; i
< getNumWords() && U
.pVal
[i
] == 0; ++i
)
580 Count
+= APINT_BITS_PER_WORD
;
581 if (i
< getNumWords())
582 Count
+= llvm::countTrailingZeros(U
.pVal
[i
]);
583 return std::min(Count
, BitWidth
);
586 unsigned APInt::countTrailingOnesSlowCase() const {
589 for (; i
< getNumWords() && U
.pVal
[i
] == WORDTYPE_MAX
; ++i
)
590 Count
+= APINT_BITS_PER_WORD
;
591 if (i
< getNumWords())
592 Count
+= llvm::countTrailingOnes(U
.pVal
[i
]);
593 assert(Count
<= BitWidth
);
597 unsigned APInt::countPopulationSlowCase() const {
599 for (unsigned i
= 0; i
< getNumWords(); ++i
)
600 Count
+= llvm::countPopulation(U
.pVal
[i
]);
604 bool APInt::intersectsSlowCase(const APInt
&RHS
) const {
605 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
606 if ((U
.pVal
[i
] & RHS
.U
.pVal
[i
]) != 0)
612 bool APInt::isSubsetOfSlowCase(const APInt
&RHS
) const {
613 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
614 if ((U
.pVal
[i
] & ~RHS
.U
.pVal
[i
]) != 0)
620 APInt
APInt::byteSwap() const {
621 assert(BitWidth
>= 16 && BitWidth
% 16 == 0 && "Cannot byteswap!");
623 return APInt(BitWidth
, ByteSwap_16(uint16_t(U
.VAL
)));
625 return APInt(BitWidth
, ByteSwap_32(unsigned(U
.VAL
)));
626 if (BitWidth
== 48) {
627 unsigned Tmp1
= unsigned(U
.VAL
>> 16);
628 Tmp1
= ByteSwap_32(Tmp1
);
629 uint16_t Tmp2
= uint16_t(U
.VAL
);
630 Tmp2
= ByteSwap_16(Tmp2
);
631 return APInt(BitWidth
, (uint64_t(Tmp2
) << 32) | Tmp1
);
634 return APInt(BitWidth
, ByteSwap_64(U
.VAL
));
636 APInt
Result(getNumWords() * APINT_BITS_PER_WORD
, 0);
637 for (unsigned I
= 0, N
= getNumWords(); I
!= N
; ++I
)
638 Result
.U
.pVal
[I
] = ByteSwap_64(U
.pVal
[N
- I
- 1]);
639 if (Result
.BitWidth
!= BitWidth
) {
640 Result
.lshrInPlace(Result
.BitWidth
- BitWidth
);
641 Result
.BitWidth
= BitWidth
;
646 APInt
APInt::reverseBits() const {
649 return APInt(BitWidth
, llvm::reverseBits
<uint64_t>(U
.VAL
));
651 return APInt(BitWidth
, llvm::reverseBits
<uint32_t>(U
.VAL
));
653 return APInt(BitWidth
, llvm::reverseBits
<uint16_t>(U
.VAL
));
655 return APInt(BitWidth
, llvm::reverseBits
<uint8_t>(U
.VAL
));
661 APInt
Reversed(BitWidth
, 0);
662 unsigned S
= BitWidth
;
664 for (; Val
!= 0; Val
.lshrInPlace(1)) {
674 APInt
llvm::APIntOps::GreatestCommonDivisor(APInt A
, APInt B
) {
675 // Fast-path a common case.
676 if (A
== B
) return A
;
678 // Corner cases: if either operand is zero, the other is the gcd.
682 // Count common powers of 2 and remove all other powers of 2.
685 unsigned Pow2_A
= A
.countTrailingZeros();
686 unsigned Pow2_B
= B
.countTrailingZeros();
687 if (Pow2_A
> Pow2_B
) {
688 A
.lshrInPlace(Pow2_A
- Pow2_B
);
690 } else if (Pow2_B
> Pow2_A
) {
691 B
.lshrInPlace(Pow2_B
- Pow2_A
);
698 // Both operands are odd multiples of 2^Pow_2:
700 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
702 // This is a modified version of Stein's algorithm, taking advantage of
703 // efficient countTrailingZeros().
707 A
.lshrInPlace(A
.countTrailingZeros() - Pow2
);
710 B
.lshrInPlace(B
.countTrailingZeros() - Pow2
);
717 APInt
llvm::APIntOps::RoundDoubleToAPInt(double Double
, unsigned width
) {
718 uint64_t I
= bit_cast
<uint64_t>(Double
);
720 // Get the sign bit from the highest order bit
721 bool isNeg
= I
>> 63;
723 // Get the 11-bit exponent and adjust for the 1023 bit bias
724 int64_t exp
= ((I
>> 52) & 0x7ff) - 1023;
726 // If the exponent is negative, the value is < 0 so just return 0.
728 return APInt(width
, 0u);
730 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731 uint64_t mantissa
= (I
& (~0ULL >> 12)) | 1ULL << 52;
733 // If the exponent doesn't shift all bits out of the mantissa
735 return isNeg
? -APInt(width
, mantissa
>> (52 - exp
)) :
736 APInt(width
, mantissa
>> (52 - exp
));
738 // If the client didn't provide enough bits for us to shift the mantissa into
739 // then the result is undefined, just return 0
740 if (width
<= exp
- 52)
741 return APInt(width
, 0);
743 // Otherwise, we have to shift the mantissa bits up to the right location
744 APInt
Tmp(width
, mantissa
);
745 Tmp
<<= (unsigned)exp
- 52;
746 return isNeg
? -Tmp
: Tmp
;
749 /// This function converts this APInt to a double.
750 /// The layout for double is as following (IEEE Standard 754):
751 /// --------------------------------------
752 /// | Sign Exponent Fraction Bias |
753 /// |-------------------------------------- |
754 /// | 1[63] 11[62-52] 52[51-00] 1023 |
755 /// --------------------------------------
756 double APInt::roundToDouble(bool isSigned
) const {
758 // Handle the simple case where the value is contained in one uint64_t.
759 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD
) {
762 int64_t sext
= SignExtend64(getWord(0), BitWidth
);
765 return double(getWord(0));
768 // Determine if the value is negative.
769 bool isNeg
= isSigned
? (*this)[BitWidth
-1] : false;
771 // Construct the absolute value if we're negative.
772 APInt
Tmp(isNeg
? -(*this) : (*this));
774 // Figure out how many bits we're using.
775 unsigned n
= Tmp
.getActiveBits();
777 // The exponent (without bias normalization) is just the number of bits
778 // we are using. Note that the sign bit is gone since we constructed the
782 // Return infinity for exponent overflow
784 if (!isSigned
|| !isNeg
)
785 return std::numeric_limits
<double>::infinity();
787 return -std::numeric_limits
<double>::infinity();
789 exp
+= 1023; // Increment for 1023 bias
791 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792 // extract the high 52 bits from the correct words in pVal.
794 unsigned hiWord
= whichWord(n
-1);
796 mantissa
= Tmp
.U
.pVal
[0];
798 mantissa
>>= n
- 52; // shift down, we want the top 52 bits.
800 assert(hiWord
> 0 && "huh?");
801 uint64_t hibits
= Tmp
.U
.pVal
[hiWord
] << (52 - n
% APINT_BITS_PER_WORD
);
802 uint64_t lobits
= Tmp
.U
.pVal
[hiWord
-1] >> (11 + n
% APINT_BITS_PER_WORD
);
803 mantissa
= hibits
| lobits
;
806 // The leading bit of mantissa is implicit, so get rid of it.
807 uint64_t sign
= isNeg
? (1ULL << (APINT_BITS_PER_WORD
- 1)) : 0;
808 uint64_t I
= sign
| (exp
<< 52) | mantissa
;
809 return bit_cast
<double>(I
);
812 // Truncate to new width.
813 APInt
APInt::trunc(unsigned width
) const {
814 assert(width
< BitWidth
&& "Invalid APInt Truncate request");
815 assert(width
&& "Can't truncate to 0 bits");
817 if (width
<= APINT_BITS_PER_WORD
)
818 return APInt(width
, getRawData()[0]);
820 APInt
Result(getMemory(getNumWords(width
)), width
);
824 for (i
= 0; i
!= width
/ APINT_BITS_PER_WORD
; i
++)
825 Result
.U
.pVal
[i
] = U
.pVal
[i
];
827 // Truncate and copy any partial word.
828 unsigned bits
= (0 - width
) % APINT_BITS_PER_WORD
;
830 Result
.U
.pVal
[i
] = U
.pVal
[i
] << bits
>> bits
;
835 // Sign extend to a new width.
836 APInt
APInt::sext(unsigned Width
) const {
837 assert(Width
> BitWidth
&& "Invalid APInt SignExtend request");
839 if (Width
<= APINT_BITS_PER_WORD
)
840 return APInt(Width
, SignExtend64(U
.VAL
, BitWidth
));
842 APInt
Result(getMemory(getNumWords(Width
)), Width
);
845 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
847 // Sign extend the last word since there may be unused bits in the input.
848 Result
.U
.pVal
[getNumWords() - 1] =
849 SignExtend64(Result
.U
.pVal
[getNumWords() - 1],
850 ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
852 // Fill with sign bits.
853 std::memset(Result
.U
.pVal
+ getNumWords(), isNegative() ? -1 : 0,
854 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
855 Result
.clearUnusedBits();
859 // Zero extend to a new width.
860 APInt
APInt::zext(unsigned width
) const {
861 assert(width
> BitWidth
&& "Invalid APInt ZeroExtend request");
863 if (width
<= APINT_BITS_PER_WORD
)
864 return APInt(width
, U
.VAL
);
866 APInt
Result(getMemory(getNumWords(width
)), width
);
869 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
871 // Zero remaining words.
872 std::memset(Result
.U
.pVal
+ getNumWords(), 0,
873 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
878 APInt
APInt::zextOrTrunc(unsigned width
) const {
879 if (BitWidth
< width
)
881 if (BitWidth
> width
)
886 APInt
APInt::sextOrTrunc(unsigned width
) const {
887 if (BitWidth
< width
)
889 if (BitWidth
> width
)
894 APInt
APInt::zextOrSelf(unsigned width
) const {
895 if (BitWidth
< width
)
900 APInt
APInt::sextOrSelf(unsigned width
) const {
901 if (BitWidth
< width
)
906 /// Arithmetic right-shift this APInt by shiftAmt.
907 /// Arithmetic right-shift function.
908 void APInt::ashrInPlace(const APInt
&shiftAmt
) {
909 ashrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
912 /// Arithmetic right-shift this APInt by shiftAmt.
913 /// Arithmetic right-shift function.
914 void APInt::ashrSlowCase(unsigned ShiftAmt
) {
915 // Don't bother performing a no-op shift.
919 // Save the original sign bit for later.
920 bool Negative
= isNegative();
922 // WordShift is the inter-part shift; BitShift is intra-part shift.
923 unsigned WordShift
= ShiftAmt
/ APINT_BITS_PER_WORD
;
924 unsigned BitShift
= ShiftAmt
% APINT_BITS_PER_WORD
;
926 unsigned WordsToMove
= getNumWords() - WordShift
;
927 if (WordsToMove
!= 0) {
928 // Sign extend the last word to fill in the unused bits.
929 U
.pVal
[getNumWords() - 1] = SignExtend64(
930 U
.pVal
[getNumWords() - 1], ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
932 // Fastpath for moving by whole words.
934 std::memmove(U
.pVal
, U
.pVal
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
936 // Move the words containing significant bits.
937 for (unsigned i
= 0; i
!= WordsToMove
- 1; ++i
)
938 U
.pVal
[i
] = (U
.pVal
[i
+ WordShift
] >> BitShift
) |
939 (U
.pVal
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
));
941 // Handle the last word which has no high bits to copy.
942 U
.pVal
[WordsToMove
- 1] = U
.pVal
[WordShift
+ WordsToMove
- 1] >> BitShift
;
943 // Sign extend one more time.
944 U
.pVal
[WordsToMove
- 1] =
945 SignExtend64(U
.pVal
[WordsToMove
- 1], APINT_BITS_PER_WORD
- BitShift
);
949 // Fill in the remainder based on the original sign.
950 std::memset(U
.pVal
+ WordsToMove
, Negative
? -1 : 0,
951 WordShift
* APINT_WORD_SIZE
);
955 /// Logical right-shift this APInt by shiftAmt.
956 /// Logical right-shift function.
957 void APInt::lshrInPlace(const APInt
&shiftAmt
) {
958 lshrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
961 /// Logical right-shift this APInt by shiftAmt.
962 /// Logical right-shift function.
963 void APInt::lshrSlowCase(unsigned ShiftAmt
) {
964 tcShiftRight(U
.pVal
, getNumWords(), ShiftAmt
);
967 /// Left-shift this APInt by shiftAmt.
968 /// Left-shift function.
969 APInt
&APInt::operator<<=(const APInt
&shiftAmt
) {
970 // It's undefined behavior in C to shift by BitWidth or greater.
971 *this <<= (unsigned)shiftAmt
.getLimitedValue(BitWidth
);
975 void APInt::shlSlowCase(unsigned ShiftAmt
) {
976 tcShiftLeft(U
.pVal
, getNumWords(), ShiftAmt
);
980 // Calculate the rotate amount modulo the bit width.
981 static unsigned rotateModulo(unsigned BitWidth
, const APInt
&rotateAmt
) {
982 unsigned rotBitWidth
= rotateAmt
.getBitWidth();
983 APInt rot
= rotateAmt
;
984 if (rotBitWidth
< BitWidth
) {
985 // Extend the rotate APInt, so that the urem doesn't divide by 0.
986 // e.g. APInt(1, 32) would give APInt(1, 0).
987 rot
= rotateAmt
.zext(BitWidth
);
989 rot
= rot
.urem(APInt(rot
.getBitWidth(), BitWidth
));
990 return rot
.getLimitedValue(BitWidth
);
993 APInt
APInt::rotl(const APInt
&rotateAmt
) const {
994 return rotl(rotateModulo(BitWidth
, rotateAmt
));
997 APInt
APInt::rotl(unsigned rotateAmt
) const {
998 rotateAmt
%= BitWidth
;
1001 return shl(rotateAmt
) | lshr(BitWidth
- rotateAmt
);
1004 APInt
APInt::rotr(const APInt
&rotateAmt
) const {
1005 return rotr(rotateModulo(BitWidth
, rotateAmt
));
1008 APInt
APInt::rotr(unsigned rotateAmt
) const {
1009 rotateAmt
%= BitWidth
;
1012 return lshr(rotateAmt
) | shl(BitWidth
- rotateAmt
);
1015 // Square Root - this method computes and returns the square root of "this".
1016 // Three mechanisms are used for computation. For small values (<= 5 bits),
1017 // a table lookup is done. This gets some performance for common cases. For
1018 // values using less than 52 bits, the value is converted to double and then
1019 // the libc sqrt function is called. The result is rounded and then converted
1020 // back to a uint64_t which is then used to construct the result. Finally,
1021 // the Babylonian method for computing square roots is used.
1022 APInt
APInt::sqrt() const {
1024 // Determine the magnitude of the value.
1025 unsigned magnitude
= getActiveBits();
1027 // Use a fast table for some small values. This also gets rid of some
1028 // rounding errors in libc sqrt for small values.
1029 if (magnitude
<= 5) {
1030 static const uint8_t results
[32] = {
1033 /* 3- 6 */ 2, 2, 2, 2,
1034 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1035 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1039 return APInt(BitWidth
, results
[ (isSingleWord() ? U
.VAL
: U
.pVal
[0]) ]);
1042 // If the magnitude of the value fits in less than 52 bits (the precision of
1043 // an IEEE double precision floating point value), then we can use the
1044 // libc sqrt function which will probably use a hardware sqrt computation.
1045 // This should be faster than the algorithm below.
1046 if (magnitude
< 52) {
1047 return APInt(BitWidth
,
1048 uint64_t(::round(::sqrt(double(isSingleWord() ? U
.VAL
1052 // Okay, all the short cuts are exhausted. We must compute it. The following
1053 // is a classical Babylonian method for computing the square root. This code
1054 // was adapted to APInt from a wikipedia article on such computations.
1055 // See http://www.wikipedia.org/ and go to the page named
1056 // Calculate_an_integer_square_root.
1057 unsigned nbits
= BitWidth
, i
= 4;
1058 APInt
testy(BitWidth
, 16);
1059 APInt
x_old(BitWidth
, 1);
1060 APInt
x_new(BitWidth
, 0);
1061 APInt
two(BitWidth
, 2);
1063 // Select a good starting value using binary logarithms.
1064 for (;; i
+= 2, testy
= testy
.shl(2))
1065 if (i
>= nbits
|| this->ule(testy
)) {
1066 x_old
= x_old
.shl(i
/ 2);
1070 // Use the Babylonian method to arrive at the integer square root:
1072 x_new
= (this->udiv(x_old
) + x_old
).udiv(two
);
1073 if (x_old
.ule(x_new
))
1078 // Make sure we return the closest approximation
1079 // NOTE: The rounding calculation below is correct. It will produce an
1080 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081 // determined to be a rounding issue with pari/gp as it begins to use a
1082 // floating point representation after 192 bits. There are no discrepancies
1083 // between this algorithm and pari/gp for bit widths < 192 bits.
1084 APInt
square(x_old
* x_old
);
1085 APInt
nextSquare((x_old
+ 1) * (x_old
+1));
1086 if (this->ult(square
))
1088 assert(this->ule(nextSquare
) && "Error in APInt::sqrt computation");
1089 APInt
midpoint((nextSquare
- square
).udiv(two
));
1090 APInt
offset(*this - square
);
1091 if (offset
.ult(midpoint
))
1096 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1097 /// iterative extended Euclidean algorithm is used to solve for this value,
1098 /// however we simplify it to speed up calculating only the inverse, and take
1099 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1100 /// (potentially large) APInts around.
1101 /// WARNING: a value of '0' may be returned,
1102 /// signifying that no multiplicative inverse exists!
1103 APInt
APInt::multiplicativeInverse(const APInt
& modulo
) const {
1104 assert(ult(modulo
) && "This APInt must be smaller than the modulo");
1106 // Using the properties listed at the following web page (accessed 06/21/08):
1107 // http://www.numbertheory.org/php/euclid.html
1108 // (especially the properties numbered 3, 4 and 9) it can be proved that
1109 // BitWidth bits suffice for all the computations in the algorithm implemented
1110 // below. More precisely, this number of bits suffice if the multiplicative
1111 // inverse exists, but may not suffice for the general extended Euclidean
1114 APInt r
[2] = { modulo
, *this };
1115 APInt t
[2] = { APInt(BitWidth
, 0), APInt(BitWidth
, 1) };
1116 APInt
q(BitWidth
, 0);
1119 for (i
= 0; r
[i
^1] != 0; i
^= 1) {
1120 // An overview of the math without the confusing bit-flipping:
1121 // q = r[i-2] / r[i-1]
1122 // r[i] = r[i-2] % r[i-1]
1123 // t[i] = t[i-2] - t[i-1] * q
1124 udivrem(r
[i
], r
[i
^1], q
, r
[i
]);
1128 // If this APInt and the modulo are not coprime, there is no multiplicative
1129 // inverse, so return 0. We check this by looking at the next-to-last
1130 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1133 return APInt(BitWidth
, 0);
1135 // The next-to-last t is the multiplicative inverse. However, we are
1136 // interested in a positive inverse. Calculate a positive one from a negative
1137 // one if necessary. A simple addition of the modulo suffices because
1138 // abs(t[i]) is known to be less than *this/2 (see the link above).
1139 if (t
[i
].isNegative())
1142 return std::move(t
[i
]);
1145 /// Calculate the magic numbers required to implement a signed integer division
1146 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1147 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1148 /// Warren, Jr., chapter 10.
1149 APInt::ms
APInt::magic() const {
1150 const APInt
& d
= *this;
1152 APInt ad
, anc
, delta
, q1
, r1
, q2
, r2
, t
;
1153 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1157 t
= signedMin
+ (d
.lshr(d
.getBitWidth() - 1));
1158 anc
= t
- 1 - t
.urem(ad
); // absolute value of nc
1159 p
= d
.getBitWidth() - 1; // initialize p
1160 q1
= signedMin
.udiv(anc
); // initialize q1 = 2p/abs(nc)
1161 r1
= signedMin
- q1
*anc
; // initialize r1 = rem(2p,abs(nc))
1162 q2
= signedMin
.udiv(ad
); // initialize q2 = 2p/abs(d)
1163 r2
= signedMin
- q2
*ad
; // initialize r2 = rem(2p,abs(d))
1166 q1
= q1
<<1; // update q1 = 2p/abs(nc)
1167 r1
= r1
<<1; // update r1 = rem(2p/abs(nc))
1168 if (r1
.uge(anc
)) { // must be unsigned comparison
1172 q2
= q2
<<1; // update q2 = 2p/abs(d)
1173 r2
= r2
<<1; // update r2 = rem(2p/abs(d))
1174 if (r2
.uge(ad
)) { // must be unsigned comparison
1179 } while (q1
.ult(delta
) || (q1
== delta
&& r1
== 0));
1182 if (d
.isNegative()) mag
.m
= -mag
.m
; // resulting magic number
1183 mag
.s
= p
- d
.getBitWidth(); // resulting shift
1187 /// Calculate the magic numbers required to implement an unsigned integer
1188 /// division by a constant as a sequence of multiplies, adds and shifts.
1189 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1190 /// S. Warren, Jr., chapter 10.
1191 /// LeadingZeros can be used to simplify the calculation if the upper bits
1192 /// of the divided value are known zero.
1193 APInt::mu
APInt::magicu(unsigned LeadingZeros
) const {
1194 const APInt
& d
= *this;
1196 APInt nc
, delta
, q1
, r1
, q2
, r2
;
1198 magu
.a
= 0; // initialize "add" indicator
1199 APInt allOnes
= APInt::getAllOnesValue(d
.getBitWidth()).lshr(LeadingZeros
);
1200 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1201 APInt signedMax
= APInt::getSignedMaxValue(d
.getBitWidth());
1203 nc
= allOnes
- (allOnes
- d
).urem(d
);
1204 p
= d
.getBitWidth() - 1; // initialize p
1205 q1
= signedMin
.udiv(nc
); // initialize q1 = 2p/nc
1206 r1
= signedMin
- q1
*nc
; // initialize r1 = rem(2p,nc)
1207 q2
= signedMax
.udiv(d
); // initialize q2 = (2p-1)/d
1208 r2
= signedMax
- q2
*d
; // initialize r2 = rem((2p-1),d)
1211 if (r1
.uge(nc
- r1
)) {
1212 q1
= q1
+ q1
+ 1; // update q1
1213 r1
= r1
+ r1
- nc
; // update r1
1216 q1
= q1
+q1
; // update q1
1217 r1
= r1
+r1
; // update r1
1219 if ((r2
+ 1).uge(d
- r2
)) {
1220 if (q2
.uge(signedMax
)) magu
.a
= 1;
1221 q2
= q2
+q2
+ 1; // update q2
1222 r2
= r2
+r2
+ 1 - d
; // update r2
1225 if (q2
.uge(signedMin
)) magu
.a
= 1;
1226 q2
= q2
+q2
; // update q2
1227 r2
= r2
+r2
+ 1; // update r2
1230 } while (p
< d
.getBitWidth()*2 &&
1231 (q1
.ult(delta
) || (q1
== delta
&& r1
== 0)));
1232 magu
.m
= q2
+ 1; // resulting magic number
1233 magu
.s
= p
- d
.getBitWidth(); // resulting shift
1237 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239 /// variables here have the same names as in the algorithm. Comments explain
1240 /// the algorithm and any deviation from it.
1241 static void KnuthDiv(uint32_t *u
, uint32_t *v
, uint32_t *q
, uint32_t* r
,
1242 unsigned m
, unsigned n
) {
1243 assert(u
&& "Must provide dividend");
1244 assert(v
&& "Must provide divisor");
1245 assert(q
&& "Must provide quotient");
1246 assert(u
!= v
&& u
!= q
&& v
!= q
&& "Must use different memory");
1247 assert(n
>1 && "n must be > 1");
1249 // b denotes the base of the number system. In our case b is 2^32.
1250 const uint64_t b
= uint64_t(1) << 32;
1252 // The DEBUG macros here tend to be spam in the debug output if you're not
1253 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1255 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1257 #define DEBUG_KNUTH(X) do {} while(false)
1260 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m
<< " n=" << n
<< '\n');
1261 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1263 DEBUG_KNUTH(dbgs() << " by");
1264 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1265 DEBUG_KNUTH(dbgs() << '\n');
1266 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267 // u and v by d. Note that we have taken Knuth's advice here to use a power
1268 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269 // 2 allows us to shift instead of multiply and it is easy to determine the
1270 // shift amount from the leading zeros. We are basically normalizing the u
1271 // and v so that its high bits are shifted to the top of v's range without
1272 // overflow. Note that this can require an extra word in u so that u must
1273 // be of length m+n+1.
1274 unsigned shift
= countLeadingZeros(v
[n
-1]);
1275 uint32_t v_carry
= 0;
1276 uint32_t u_carry
= 0;
1278 for (unsigned i
= 0; i
< m
+n
; ++i
) {
1279 uint32_t u_tmp
= u
[i
] >> (32 - shift
);
1280 u
[i
] = (u
[i
] << shift
) | u_carry
;
1283 for (unsigned i
= 0; i
< n
; ++i
) {
1284 uint32_t v_tmp
= v
[i
] >> (32 - shift
);
1285 v
[i
] = (v
[i
] << shift
) | v_carry
;
1291 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1292 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1293 DEBUG_KNUTH(dbgs() << " by");
1294 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1295 DEBUG_KNUTH(dbgs() << '\n');
1297 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1300 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j
<< '\n');
1301 // D3. [Calculate q'.].
1302 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306 // on v[n-2] determines at high speed most of the cases in which the trial
1307 // value qp is one too large, and it eliminates all cases where qp is two
1309 uint64_t dividend
= Make_64(u
[j
+n
], u
[j
+n
-1]);
1310 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend
<< '\n');
1311 uint64_t qp
= dividend
/ v
[n
-1];
1312 uint64_t rp
= dividend
% v
[n
-1];
1313 if (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]) {
1316 if (rp
< b
&& (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]))
1319 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp
<< ", rp == " << rp
<< '\n');
1321 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323 // consists of a simple multiplication by a one-place number, combined with
1325 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327 // true value plus b**(n+1), namely as the b's complement of
1328 // the true value, and a "borrow" to the left should be remembered.
1330 for (unsigned i
= 0; i
< n
; ++i
) {
1331 uint64_t p
= uint64_t(qp
) * uint64_t(v
[i
]);
1332 int64_t subres
= int64_t(u
[j
+i
]) - borrow
- Lo_32(p
);
1333 u
[j
+i
] = Lo_32(subres
);
1334 borrow
= Hi_32(p
) - Hi_32(subres
);
1335 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u
[j
+ i
]
1336 << ", borrow = " << borrow
<< '\n');
1338 bool isNeg
= u
[j
+n
] < borrow
;
1339 u
[j
+n
] -= Lo_32(borrow
);
1341 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1343 DEBUG_KNUTH(dbgs() << '\n');
1345 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346 // negative, go to step D6; otherwise go on to step D7.
1349 // D6. [Add back]. The probability that this step is necessary is very
1350 // small, on the order of only 2/b. Make sure that test data accounts for
1351 // this possibility. Decrease q[j] by 1
1353 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354 // A carry will occur to the left of u[j+n], and it should be ignored
1355 // since it cancels with the borrow that occurred in D4.
1357 for (unsigned i
= 0; i
< n
; i
++) {
1358 uint32_t limit
= std::min(u
[j
+i
],v
[i
]);
1359 u
[j
+i
] += v
[i
] + carry
;
1360 carry
= u
[j
+i
] < limit
|| (carry
&& u
[j
+i
] == limit
);
1364 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1366 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q
[j
] << '\n');
1368 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1371 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372 DEBUG_KNUTH(for (int i
= m
; i
>= 0; i
--) dbgs() << " " << q
[i
]);
1373 DEBUG_KNUTH(dbgs() << '\n');
1375 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377 // compute the remainder (urem uses this).
1379 // The value d is expressed by the "shift" value above since we avoided
1380 // multiplication by d by using a shift left. So, all we have to do is
1381 // shift right here.
1384 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385 for (int i
= n
-1; i
>= 0; i
--) {
1386 r
[i
] = (u
[i
] >> shift
) | carry
;
1387 carry
= u
[i
] << (32 - shift
);
1388 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1391 for (int i
= n
-1; i
>= 0; i
--) {
1393 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1396 DEBUG_KNUTH(dbgs() << '\n');
1398 DEBUG_KNUTH(dbgs() << '\n');
1401 void APInt::divide(const WordType
*LHS
, unsigned lhsWords
, const WordType
*RHS
,
1402 unsigned rhsWords
, WordType
*Quotient
, WordType
*Remainder
) {
1403 assert(lhsWords
>= rhsWords
&& "Fractional result");
1405 // First, compose the values into an array of 32-bit words instead of
1406 // 64-bit words. This is a necessity of both the "short division" algorithm
1407 // and the Knuth "classical algorithm" which requires there to be native
1408 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409 // can't use 64-bit operands here because we don't have native results of
1410 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411 // work on large-endian machines.
1412 unsigned n
= rhsWords
* 2;
1413 unsigned m
= (lhsWords
* 2) - n
;
1415 // Allocate space for the temporary values we need either on the stack, if
1416 // it will fit, or on the heap if it won't.
1417 uint32_t SPACE
[128];
1418 uint32_t *U
= nullptr;
1419 uint32_t *V
= nullptr;
1420 uint32_t *Q
= nullptr;
1421 uint32_t *R
= nullptr;
1422 if ((Remainder
?4:3)*n
+2*m
+1 <= 128) {
1425 Q
= &SPACE
[(m
+n
+1) + n
];
1427 R
= &SPACE
[(m
+n
+1) + n
+ (m
+n
)];
1429 U
= new uint32_t[m
+ n
+ 1];
1430 V
= new uint32_t[n
];
1431 Q
= new uint32_t[m
+n
];
1433 R
= new uint32_t[n
];
1436 // Initialize the dividend
1437 memset(U
, 0, (m
+n
+1)*sizeof(uint32_t));
1438 for (unsigned i
= 0; i
< lhsWords
; ++i
) {
1439 uint64_t tmp
= LHS
[i
];
1440 U
[i
* 2] = Lo_32(tmp
);
1441 U
[i
* 2 + 1] = Hi_32(tmp
);
1443 U
[m
+n
] = 0; // this extra word is for "spill" in the Knuth algorithm.
1445 // Initialize the divisor
1446 memset(V
, 0, (n
)*sizeof(uint32_t));
1447 for (unsigned i
= 0; i
< rhsWords
; ++i
) {
1448 uint64_t tmp
= RHS
[i
];
1449 V
[i
* 2] = Lo_32(tmp
);
1450 V
[i
* 2 + 1] = Hi_32(tmp
);
1453 // initialize the quotient and remainder
1454 memset(Q
, 0, (m
+n
) * sizeof(uint32_t));
1456 memset(R
, 0, n
* sizeof(uint32_t));
1458 // Now, adjust m and n for the Knuth division. n is the number of words in
1459 // the divisor. m is the number of words by which the dividend exceeds the
1460 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461 // contain any zero words or the Knuth algorithm fails.
1462 for (unsigned i
= n
; i
> 0 && V
[i
-1] == 0; i
--) {
1466 for (unsigned i
= m
+n
; i
> 0 && U
[i
-1] == 0; i
--)
1469 // If we're left with only a single word for the divisor, Knuth doesn't work
1470 // so we implement the short division algorithm here. This is much simpler
1471 // and faster because we are certain that we can divide a 64-bit quantity
1472 // by a 32-bit quantity at hardware speed and short division is simply a
1473 // series of such operations. This is just like doing short division but we
1474 // are using base 2^32 instead of base 10.
1475 assert(n
!= 0 && "Divide by zero?");
1477 uint32_t divisor
= V
[0];
1478 uint32_t remainder
= 0;
1479 for (int i
= m
; i
>= 0; i
--) {
1480 uint64_t partial_dividend
= Make_64(remainder
, U
[i
]);
1481 if (partial_dividend
== 0) {
1484 } else if (partial_dividend
< divisor
) {
1486 remainder
= Lo_32(partial_dividend
);
1487 } else if (partial_dividend
== divisor
) {
1491 Q
[i
] = Lo_32(partial_dividend
/ divisor
);
1492 remainder
= Lo_32(partial_dividend
- (Q
[i
] * divisor
));
1498 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1500 KnuthDiv(U
, V
, Q
, R
, m
, n
);
1503 // If the caller wants the quotient
1505 for (unsigned i
= 0; i
< lhsWords
; ++i
)
1506 Quotient
[i
] = Make_64(Q
[i
*2+1], Q
[i
*2]);
1509 // If the caller wants the remainder
1511 for (unsigned i
= 0; i
< rhsWords
; ++i
)
1512 Remainder
[i
] = Make_64(R
[i
*2+1], R
[i
*2]);
1515 // Clean up the memory we allocated.
1516 if (U
!= &SPACE
[0]) {
1524 APInt
APInt::udiv(const APInt
&RHS
) const {
1525 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1527 // First, deal with the easy case
1528 if (isSingleWord()) {
1529 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1530 return APInt(BitWidth
, U
.VAL
/ RHS
.U
.VAL
);
1533 // Get some facts about the LHS and RHS number of bits and words
1534 unsigned lhsWords
= getNumWords(getActiveBits());
1535 unsigned rhsBits
= RHS
.getActiveBits();
1536 unsigned rhsWords
= getNumWords(rhsBits
);
1537 assert(rhsWords
&& "Divided by zero???");
1539 // Deal with some degenerate cases
1542 return APInt(BitWidth
, 0);
1546 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1547 // X / Y ===> 0, iff X < Y
1548 return APInt(BitWidth
, 0);
1551 return APInt(BitWidth
, 1);
1552 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1553 // All high words are zero, just use native divide
1554 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
.U
.pVal
[0]);
1556 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557 APInt
Quotient(BitWidth
, 0); // to hold result.
1558 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
, nullptr);
1562 APInt
APInt::udiv(uint64_t RHS
) const {
1563 assert(RHS
!= 0 && "Divide by zero?");
1565 // First, deal with the easy case
1567 return APInt(BitWidth
, U
.VAL
/ RHS
);
1569 // Get some facts about the LHS words.
1570 unsigned lhsWords
= getNumWords(getActiveBits());
1572 // Deal with some degenerate cases
1575 return APInt(BitWidth
, 0);
1580 // X / Y ===> 0, iff X < Y
1581 return APInt(BitWidth
, 0);
1584 return APInt(BitWidth
, 1);
1585 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1586 // All high words are zero, just use native divide
1587 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
);
1589 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590 APInt
Quotient(BitWidth
, 0); // to hold result.
1591 divide(U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, nullptr);
1595 APInt
APInt::sdiv(const APInt
&RHS
) const {
1597 if (RHS
.isNegative())
1598 return (-(*this)).udiv(-RHS
);
1599 return -((-(*this)).udiv(RHS
));
1601 if (RHS
.isNegative())
1602 return -(this->udiv(-RHS
));
1603 return this->udiv(RHS
);
1606 APInt
APInt::sdiv(int64_t RHS
) const {
1609 return (-(*this)).udiv(-RHS
);
1610 return -((-(*this)).udiv(RHS
));
1613 return -(this->udiv(-RHS
));
1614 return this->udiv(RHS
);
1617 APInt
APInt::urem(const APInt
&RHS
) const {
1618 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1619 if (isSingleWord()) {
1620 assert(RHS
.U
.VAL
!= 0 && "Remainder by zero?");
1621 return APInt(BitWidth
, U
.VAL
% RHS
.U
.VAL
);
1624 // Get some facts about the LHS
1625 unsigned lhsWords
= getNumWords(getActiveBits());
1627 // Get some facts about the RHS
1628 unsigned rhsBits
= RHS
.getActiveBits();
1629 unsigned rhsWords
= getNumWords(rhsBits
);
1630 assert(rhsWords
&& "Performing remainder operation by zero ???");
1632 // Check the degenerate cases
1635 return APInt(BitWidth
, 0);
1638 return APInt(BitWidth
, 0);
1639 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1640 // X % Y ===> X, iff X < Y
1644 return APInt(BitWidth
, 0);
1646 // All high words are zero, just use native remainder
1647 return APInt(BitWidth
, U
.pVal
[0] % RHS
.U
.pVal
[0]);
1649 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650 APInt
Remainder(BitWidth
, 0);
1651 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, nullptr, Remainder
.U
.pVal
);
1655 uint64_t APInt::urem(uint64_t RHS
) const {
1656 assert(RHS
!= 0 && "Remainder by zero?");
1661 // Get some facts about the LHS
1662 unsigned lhsWords
= getNumWords(getActiveBits());
1664 // Check the degenerate cases
1672 // X % Y ===> X, iff X < Y
1673 return getZExtValue();
1678 // All high words are zero, just use native remainder
1679 return U
.pVal
[0] % RHS
;
1681 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1683 divide(U
.pVal
, lhsWords
, &RHS
, 1, nullptr, &Remainder
);
1687 APInt
APInt::srem(const APInt
&RHS
) const {
1689 if (RHS
.isNegative())
1690 return -((-(*this)).urem(-RHS
));
1691 return -((-(*this)).urem(RHS
));
1693 if (RHS
.isNegative())
1694 return this->urem(-RHS
);
1695 return this->urem(RHS
);
1698 int64_t APInt::srem(int64_t RHS
) const {
1701 return -((-(*this)).urem(-RHS
));
1702 return -((-(*this)).urem(RHS
));
1705 return this->urem(-RHS
);
1706 return this->urem(RHS
);
1709 void APInt::udivrem(const APInt
&LHS
, const APInt
&RHS
,
1710 APInt
&Quotient
, APInt
&Remainder
) {
1711 assert(LHS
.BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1712 unsigned BitWidth
= LHS
.BitWidth
;
1714 // First, deal with the easy case
1715 if (LHS
.isSingleWord()) {
1716 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1717 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
.U
.VAL
;
1718 uint64_t RemVal
= LHS
.U
.VAL
% RHS
.U
.VAL
;
1719 Quotient
= APInt(BitWidth
, QuotVal
);
1720 Remainder
= APInt(BitWidth
, RemVal
);
1724 // Get some size facts about the dividend and divisor
1725 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1726 unsigned rhsBits
= RHS
.getActiveBits();
1727 unsigned rhsWords
= getNumWords(rhsBits
);
1728 assert(rhsWords
&& "Performing divrem operation by zero ???");
1730 // Check the degenerate cases
1731 if (lhsWords
== 0) {
1732 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1733 Remainder
= APInt(BitWidth
, 0); // 0 % Y ===> 0
1738 Quotient
= LHS
; // X / 1 ===> X
1739 Remainder
= APInt(BitWidth
, 0); // X % 1 ===> 0
1742 if (lhsWords
< rhsWords
|| LHS
.ult(RHS
)) {
1743 Remainder
= LHS
; // X % Y ===> X, iff X < Y
1744 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1749 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1750 Remainder
= APInt(BitWidth
, 0); // X % X ===> 0;
1754 // Make sure there is enough space to hold the results.
1755 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756 // change the size. This is necessary if Quotient or Remainder is aliased
1758 Quotient
.reallocate(BitWidth
);
1759 Remainder
.reallocate(BitWidth
);
1761 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1762 // There is only one word to consider so use the native versions.
1763 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1764 uint64_t rhsValue
= RHS
.U
.pVal
[0];
1765 Quotient
= lhsValue
/ rhsValue
;
1766 Remainder
= lhsValue
% rhsValue
;
1770 // Okay, lets do it the long way
1771 divide(LHS
.U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
,
1773 // Clear the rest of the Quotient and Remainder.
1774 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1775 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1776 std::memset(Remainder
.U
.pVal
+ rhsWords
, 0,
1777 (getNumWords(BitWidth
) - rhsWords
) * APINT_WORD_SIZE
);
1780 void APInt::udivrem(const APInt
&LHS
, uint64_t RHS
, APInt
&Quotient
,
1781 uint64_t &Remainder
) {
1782 assert(RHS
!= 0 && "Divide by zero?");
1783 unsigned BitWidth
= LHS
.BitWidth
;
1785 // First, deal with the easy case
1786 if (LHS
.isSingleWord()) {
1787 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
;
1788 Remainder
= LHS
.U
.VAL
% RHS
;
1789 Quotient
= APInt(BitWidth
, QuotVal
);
1793 // Get some size facts about the dividend and divisor
1794 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1796 // Check the degenerate cases
1797 if (lhsWords
== 0) {
1798 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1799 Remainder
= 0; // 0 % Y ===> 0
1804 Quotient
= LHS
; // X / 1 ===> X
1805 Remainder
= 0; // X % 1 ===> 0
1810 Remainder
= LHS
.getZExtValue(); // X % Y ===> X, iff X < Y
1811 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1816 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1817 Remainder
= 0; // X % X ===> 0;
1821 // Make sure there is enough space to hold the results.
1822 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823 // change the size. This is necessary if Quotient is aliased with LHS.
1824 Quotient
.reallocate(BitWidth
);
1826 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1827 // There is only one word to consider so use the native versions.
1828 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1829 Quotient
= lhsValue
/ RHS
;
1830 Remainder
= lhsValue
% RHS
;
1834 // Okay, lets do it the long way
1835 divide(LHS
.U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, &Remainder
);
1836 // Clear the rest of the Quotient.
1837 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1838 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1841 void APInt::sdivrem(const APInt
&LHS
, const APInt
&RHS
,
1842 APInt
&Quotient
, APInt
&Remainder
) {
1843 if (LHS
.isNegative()) {
1844 if (RHS
.isNegative())
1845 APInt::udivrem(-LHS
, -RHS
, Quotient
, Remainder
);
1847 APInt::udivrem(-LHS
, RHS
, Quotient
, Remainder
);
1851 } else if (RHS
.isNegative()) {
1852 APInt::udivrem(LHS
, -RHS
, Quotient
, Remainder
);
1855 APInt::udivrem(LHS
, RHS
, Quotient
, Remainder
);
1859 void APInt::sdivrem(const APInt
&LHS
, int64_t RHS
,
1860 APInt
&Quotient
, int64_t &Remainder
) {
1861 uint64_t R
= Remainder
;
1862 if (LHS
.isNegative()) {
1864 APInt::udivrem(-LHS
, -RHS
, Quotient
, R
);
1866 APInt::udivrem(-LHS
, RHS
, Quotient
, R
);
1870 } else if (RHS
< 0) {
1871 APInt::udivrem(LHS
, -RHS
, Quotient
, R
);
1874 APInt::udivrem(LHS
, RHS
, Quotient
, R
);
1879 APInt
APInt::sadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1880 APInt Res
= *this+RHS
;
1881 Overflow
= isNonNegative() == RHS
.isNonNegative() &&
1882 Res
.isNonNegative() != isNonNegative();
1886 APInt
APInt::uadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1887 APInt Res
= *this+RHS
;
1888 Overflow
= Res
.ult(RHS
);
1892 APInt
APInt::ssub_ov(const APInt
&RHS
, bool &Overflow
) const {
1893 APInt Res
= *this - RHS
;
1894 Overflow
= isNonNegative() != RHS
.isNonNegative() &&
1895 Res
.isNonNegative() != isNonNegative();
1899 APInt
APInt::usub_ov(const APInt
&RHS
, bool &Overflow
) const {
1900 APInt Res
= *this-RHS
;
1901 Overflow
= Res
.ugt(*this);
1905 APInt
APInt::sdiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1906 // MININT/-1 --> overflow.
1907 Overflow
= isMinSignedValue() && RHS
.isAllOnesValue();
1911 APInt
APInt::smul_ov(const APInt
&RHS
, bool &Overflow
) const {
1912 APInt Res
= *this * RHS
;
1914 if (*this != 0 && RHS
!= 0)
1915 Overflow
= Res
.sdiv(RHS
) != *this || Res
.sdiv(*this) != RHS
;
1921 APInt
APInt::umul_ov(const APInt
&RHS
, bool &Overflow
) const {
1922 if (countLeadingZeros() + RHS
.countLeadingZeros() + 2 <= BitWidth
) {
1927 APInt Res
= lshr(1) * RHS
;
1928 Overflow
= Res
.isNegative();
1938 APInt
APInt::sshl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1939 Overflow
= ShAmt
.uge(getBitWidth());
1941 return APInt(BitWidth
, 0);
1943 if (isNonNegative()) // Don't allow sign change.
1944 Overflow
= ShAmt
.uge(countLeadingZeros());
1946 Overflow
= ShAmt
.uge(countLeadingOnes());
1948 return *this << ShAmt
;
1951 APInt
APInt::ushl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1952 Overflow
= ShAmt
.uge(getBitWidth());
1954 return APInt(BitWidth
, 0);
1956 Overflow
= ShAmt
.ugt(countLeadingZeros());
1958 return *this << ShAmt
;
1961 APInt
APInt::sadd_sat(const APInt
&RHS
) const {
1963 APInt Res
= sadd_ov(RHS
, Overflow
);
1967 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
1968 : APInt::getSignedMaxValue(BitWidth
);
1971 APInt
APInt::uadd_sat(const APInt
&RHS
) const {
1973 APInt Res
= uadd_ov(RHS
, Overflow
);
1977 return APInt::getMaxValue(BitWidth
);
1980 APInt
APInt::ssub_sat(const APInt
&RHS
) const {
1982 APInt Res
= ssub_ov(RHS
, Overflow
);
1986 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
1987 : APInt::getSignedMaxValue(BitWidth
);
1990 APInt
APInt::usub_sat(const APInt
&RHS
) const {
1992 APInt Res
= usub_ov(RHS
, Overflow
);
1996 return APInt(BitWidth
, 0);
2000 void APInt::fromString(unsigned numbits
, StringRef str
, uint8_t radix
) {
2001 // Check our assumptions here
2002 assert(!str
.empty() && "Invalid string length");
2003 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
2005 "Radix should be 2, 8, 10, 16, or 36!");
2007 StringRef::iterator p
= str
.begin();
2008 size_t slen
= str
.size();
2009 bool isNeg
= *p
== '-';
2010 if (*p
== '-' || *p
== '+') {
2013 assert(slen
&& "String is only a sign, needs a value.");
2015 assert((slen
<= numbits
|| radix
!= 2) && "Insufficient bit width");
2016 assert(((slen
-1)*3 <= numbits
|| radix
!= 8) && "Insufficient bit width");
2017 assert(((slen
-1)*4 <= numbits
|| radix
!= 16) && "Insufficient bit width");
2018 assert((((slen
-1)*64)/22 <= numbits
|| radix
!= 10) &&
2019 "Insufficient bit width");
2021 // Allocate memory if needed
2025 U
.pVal
= getClearedMemory(getNumWords());
2027 // Figure out if we can shift instead of multiply
2028 unsigned shift
= (radix
== 16 ? 4 : radix
== 8 ? 3 : radix
== 2 ? 1 : 0);
2030 // Enter digit traversal loop
2031 for (StringRef::iterator e
= str
.end(); p
!= e
; ++p
) {
2032 unsigned digit
= getDigit(*p
, radix
);
2033 assert(digit
< radix
&& "Invalid character in digit string");
2035 // Shift or multiply the value by the radix
2043 // Add in the digit we just interpreted
2046 // If its negative, put it in two's complement form
2051 void APInt::toString(SmallVectorImpl
<char> &Str
, unsigned Radix
,
2052 bool Signed
, bool formatAsCLiteral
) const {
2053 assert((Radix
== 10 || Radix
== 8 || Radix
== 16 || Radix
== 2 ||
2055 "Radix should be 2, 8, 10, 16, or 36!");
2057 const char *Prefix
= "";
2058 if (formatAsCLiteral
) {
2061 // Binary literals are a non-standard extension added in gcc 4.3:
2062 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2074 llvm_unreachable("Invalid radix!");
2078 // First, check for a zero value and just short circuit the logic below.
2081 Str
.push_back(*Prefix
);
2088 static const char Digits
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2090 if (isSingleWord()) {
2092 char *BufPtr
= std::end(Buffer
);
2098 int64_t I
= getSExtValue();
2108 Str
.push_back(*Prefix
);
2113 *--BufPtr
= Digits
[N
% Radix
];
2116 Str
.append(BufPtr
, std::end(Buffer
));
2122 if (Signed
&& isNegative()) {
2123 // They want to print the signed version and it is a negative value
2124 // Flip the bits and add one to turn it into the equivalent positive
2125 // value and put a '-' in the result.
2131 Str
.push_back(*Prefix
);
2135 // We insert the digits backward, then reverse them to get the right order.
2136 unsigned StartDig
= Str
.size();
2138 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139 // because the number of bits per digit (1, 3 and 4 respectively) divides
2140 // equally. We just shift until the value is zero.
2141 if (Radix
== 2 || Radix
== 8 || Radix
== 16) {
2142 // Just shift tmp right for each digit width until it becomes zero
2143 unsigned ShiftAmt
= (Radix
== 16 ? 4 : (Radix
== 8 ? 3 : 1));
2144 unsigned MaskAmt
= Radix
- 1;
2146 while (Tmp
.getBoolValue()) {
2147 unsigned Digit
= unsigned(Tmp
.getRawData()[0]) & MaskAmt
;
2148 Str
.push_back(Digits
[Digit
]);
2149 Tmp
.lshrInPlace(ShiftAmt
);
2152 while (Tmp
.getBoolValue()) {
2154 udivrem(Tmp
, Radix
, Tmp
, Digit
);
2155 assert(Digit
< Radix
&& "divide failed");
2156 Str
.push_back(Digits
[Digit
]);
2160 // Reverse the digits before returning.
2161 std::reverse(Str
.begin()+StartDig
, Str
.end());
2164 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2165 /// It is better to pass in a SmallVector/SmallString to the methods above.
2166 std::string
APInt::toString(unsigned Radix
= 10, bool Signed
= true) const {
2168 toString(S
, Radix
, Signed
, /* formatAsCLiteral = */false);
2172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173 LLVM_DUMP_METHOD
void APInt::dump() const {
2174 SmallString
<40> S
, U
;
2175 this->toStringUnsigned(U
);
2176 this->toStringSigned(S
);
2177 dbgs() << "APInt(" << BitWidth
<< "b, "
2178 << U
<< "u " << S
<< "s)\n";
2182 void APInt::print(raw_ostream
&OS
, bool isSigned
) const {
2184 this->toString(S
, 10, isSigned
, /* formatAsCLiteral = */false);
2188 // This implements a variety of operations on a representation of
2189 // arbitrary precision, two's-complement, bignum integer values.
2191 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2192 // and unrestricting assumption.
2193 static_assert(APInt::APINT_BITS_PER_WORD
% 2 == 0,
2194 "Part width must be divisible by 2!");
2196 /* Some handy functions local to this file. */
2198 /* Returns the integer part with the least significant BITS set.
2199 BITS cannot be zero. */
2200 static inline APInt::WordType
lowBitMask(unsigned bits
) {
2201 assert(bits
!= 0 && bits
<= APInt::APINT_BITS_PER_WORD
);
2203 return ~(APInt::WordType
) 0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
2206 /* Returns the value of the lower half of PART. */
2207 static inline APInt::WordType
lowHalf(APInt::WordType part
) {
2208 return part
& lowBitMask(APInt::APINT_BITS_PER_WORD
/ 2);
2211 /* Returns the value of the upper half of PART. */
2212 static inline APInt::WordType
highHalf(APInt::WordType part
) {
2213 return part
>> (APInt::APINT_BITS_PER_WORD
/ 2);
2216 /* Returns the bit number of the most significant set bit of a part.
2217 If the input number has no bits set -1U is returned. */
2218 static unsigned partMSB(APInt::WordType value
) {
2219 return findLastSet(value
, ZB_Max
);
2222 /* Returns the bit number of the least significant set bit of a
2223 part. If the input number has no bits set -1U is returned. */
2224 static unsigned partLSB(APInt::WordType value
) {
2225 return findFirstSet(value
, ZB_Max
);
2228 /* Sets the least significant part of a bignum to the input value, and
2229 zeroes out higher parts. */
2230 void APInt::tcSet(WordType
*dst
, WordType part
, unsigned parts
) {
2234 for (unsigned i
= 1; i
< parts
; i
++)
2238 /* Assign one bignum to another. */
2239 void APInt::tcAssign(WordType
*dst
, const WordType
*src
, unsigned parts
) {
2240 for (unsigned i
= 0; i
< parts
; i
++)
2244 /* Returns true if a bignum is zero, false otherwise. */
2245 bool APInt::tcIsZero(const WordType
*src
, unsigned parts
) {
2246 for (unsigned i
= 0; i
< parts
; i
++)
2253 /* Extract the given bit of a bignum; returns 0 or 1. */
2254 int APInt::tcExtractBit(const WordType
*parts
, unsigned bit
) {
2255 return (parts
[whichWord(bit
)] & maskBit(bit
)) != 0;
2258 /* Set the given bit of a bignum. */
2259 void APInt::tcSetBit(WordType
*parts
, unsigned bit
) {
2260 parts
[whichWord(bit
)] |= maskBit(bit
);
2263 /* Clears the given bit of a bignum. */
2264 void APInt::tcClearBit(WordType
*parts
, unsigned bit
) {
2265 parts
[whichWord(bit
)] &= ~maskBit(bit
);
2268 /* Returns the bit number of the least significant set bit of a
2269 number. If the input number has no bits set -1U is returned. */
2270 unsigned APInt::tcLSB(const WordType
*parts
, unsigned n
) {
2271 for (unsigned i
= 0; i
< n
; i
++) {
2272 if (parts
[i
] != 0) {
2273 unsigned lsb
= partLSB(parts
[i
]);
2275 return lsb
+ i
* APINT_BITS_PER_WORD
;
2282 /* Returns the bit number of the most significant set bit of a number.
2283 If the input number has no bits set -1U is returned. */
2284 unsigned APInt::tcMSB(const WordType
*parts
, unsigned n
) {
2288 if (parts
[n
] != 0) {
2289 unsigned msb
= partMSB(parts
[n
]);
2291 return msb
+ n
* APINT_BITS_PER_WORD
;
2298 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2299 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300 the least significant bit of DST. All high bits above srcBITS in
2301 DST are zero-filled. */
2303 APInt::tcExtract(WordType
*dst
, unsigned dstCount
, const WordType
*src
,
2304 unsigned srcBits
, unsigned srcLSB
) {
2305 unsigned dstParts
= (srcBits
+ APINT_BITS_PER_WORD
- 1) / APINT_BITS_PER_WORD
;
2306 assert(dstParts
<= dstCount
);
2308 unsigned firstSrcPart
= srcLSB
/ APINT_BITS_PER_WORD
;
2309 tcAssign (dst
, src
+ firstSrcPart
, dstParts
);
2311 unsigned shift
= srcLSB
% APINT_BITS_PER_WORD
;
2312 tcShiftRight (dst
, dstParts
, shift
);
2314 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315 in DST. If this is less that srcBits, append the rest, else
2316 clear the high bits. */
2317 unsigned n
= dstParts
* APINT_BITS_PER_WORD
- shift
;
2319 WordType mask
= lowBitMask (srcBits
- n
);
2320 dst
[dstParts
- 1] |= ((src
[firstSrcPart
+ dstParts
] & mask
)
2321 << n
% APINT_BITS_PER_WORD
);
2322 } else if (n
> srcBits
) {
2323 if (srcBits
% APINT_BITS_PER_WORD
)
2324 dst
[dstParts
- 1] &= lowBitMask (srcBits
% APINT_BITS_PER_WORD
);
2327 /* Clear high parts. */
2328 while (dstParts
< dstCount
)
2329 dst
[dstParts
++] = 0;
2332 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2333 APInt::WordType
APInt::tcAdd(WordType
*dst
, const WordType
*rhs
,
2334 WordType c
, unsigned parts
) {
2337 for (unsigned i
= 0; i
< parts
; i
++) {
2338 WordType l
= dst
[i
];
2340 dst
[i
] += rhs
[i
] + 1;
2351 /// This function adds a single "word" integer, src, to the multiple
2352 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2354 /// @returns the carry of the addition.
2355 APInt::WordType
APInt::tcAddPart(WordType
*dst
, WordType src
,
2357 for (unsigned i
= 0; i
< parts
; ++i
) {
2360 return 0; // No need to carry so exit early.
2361 src
= 1; // Carry one to next digit.
2367 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2368 APInt::WordType
APInt::tcSubtract(WordType
*dst
, const WordType
*rhs
,
2369 WordType c
, unsigned parts
) {
2372 for (unsigned i
= 0; i
< parts
; i
++) {
2373 WordType l
= dst
[i
];
2375 dst
[i
] -= rhs
[i
] + 1;
2386 /// This function subtracts a single "word" (64-bit word), src, from
2387 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388 /// no further borrowing is needed or it runs out of "words" in dst. The result
2389 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390 /// exhausted. In other words, if src > dst then this function returns 1,
2392 /// @returns the borrow out of the subtraction
2393 APInt::WordType
APInt::tcSubtractPart(WordType
*dst
, WordType src
,
2395 for (unsigned i
= 0; i
< parts
; ++i
) {
2396 WordType Dst
= dst
[i
];
2399 return 0; // No need to borrow so exit early.
2400 src
= 1; // We have to "borrow 1" from next "word"
2406 /* Negate a bignum in-place. */
2407 void APInt::tcNegate(WordType
*dst
, unsigned parts
) {
2408 tcComplement(dst
, parts
);
2409 tcIncrement(dst
, parts
);
2412 /* DST += SRC * MULTIPLIER + CARRY if add is true
2413 DST = SRC * MULTIPLIER + CARRY if add is false
2415 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2416 they must start at the same point, i.e. DST == SRC.
2418 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419 returned. Otherwise DST is filled with the least significant
2420 DSTPARTS parts of the result, and if all of the omitted higher
2421 parts were zero return zero, otherwise overflow occurred and
2423 int APInt::tcMultiplyPart(WordType
*dst
, const WordType
*src
,
2424 WordType multiplier
, WordType carry
,
2425 unsigned srcParts
, unsigned dstParts
,
2427 /* Otherwise our writes of DST kill our later reads of SRC. */
2428 assert(dst
<= src
|| dst
>= src
+ srcParts
);
2429 assert(dstParts
<= srcParts
+ 1);
2431 /* N loops; minimum of dstParts and srcParts. */
2432 unsigned n
= std::min(dstParts
, srcParts
);
2434 for (unsigned i
= 0; i
< n
; i
++) {
2435 WordType low
, mid
, high
, srcPart
;
2437 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2439 This cannot overflow, because
2441 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2443 which is less than n^2. */
2447 if (multiplier
== 0 || srcPart
== 0) {
2451 low
= lowHalf(srcPart
) * lowHalf(multiplier
);
2452 high
= highHalf(srcPart
) * highHalf(multiplier
);
2454 mid
= lowHalf(srcPart
) * highHalf(multiplier
);
2455 high
+= highHalf(mid
);
2456 mid
<<= APINT_BITS_PER_WORD
/ 2;
2457 if (low
+ mid
< low
)
2461 mid
= highHalf(srcPart
) * lowHalf(multiplier
);
2462 high
+= highHalf(mid
);
2463 mid
<<= APINT_BITS_PER_WORD
/ 2;
2464 if (low
+ mid
< low
)
2468 /* Now add carry. */
2469 if (low
+ carry
< low
)
2475 /* And now DST[i], and store the new low part there. */
2476 if (low
+ dst
[i
] < low
)
2485 if (srcParts
< dstParts
) {
2486 /* Full multiplication, there is no overflow. */
2487 assert(srcParts
+ 1 == dstParts
);
2488 dst
[srcParts
] = carry
;
2492 /* We overflowed if there is carry. */
2496 /* We would overflow if any significant unwritten parts would be
2497 non-zero. This is true if any remaining src parts are non-zero
2498 and the multiplier is non-zero. */
2500 for (unsigned i
= dstParts
; i
< srcParts
; i
++)
2504 /* We fitted in the narrow destination. */
2508 /* DST = LHS * RHS, where DST has the same width as the operands and
2509 is filled with the least significant parts of the result. Returns
2510 one if overflow occurred, otherwise zero. DST must be disjoint
2511 from both operands. */
2512 int APInt::tcMultiply(WordType
*dst
, const WordType
*lhs
,
2513 const WordType
*rhs
, unsigned parts
) {
2514 assert(dst
!= lhs
&& dst
!= rhs
);
2517 tcSet(dst
, 0, parts
);
2519 for (unsigned i
= 0; i
< parts
; i
++)
2520 overflow
|= tcMultiplyPart(&dst
[i
], lhs
, rhs
[i
], 0, parts
,
2526 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2527 /// operands. No overflow occurs. DST must be disjoint from both operands.
2528 void APInt::tcFullMultiply(WordType
*dst
, const WordType
*lhs
,
2529 const WordType
*rhs
, unsigned lhsParts
,
2530 unsigned rhsParts
) {
2531 /* Put the narrower number on the LHS for less loops below. */
2532 if (lhsParts
> rhsParts
)
2533 return tcFullMultiply (dst
, rhs
, lhs
, rhsParts
, lhsParts
);
2535 assert(dst
!= lhs
&& dst
!= rhs
);
2537 tcSet(dst
, 0, rhsParts
);
2539 for (unsigned i
= 0; i
< lhsParts
; i
++)
2540 tcMultiplyPart(&dst
[i
], rhs
, lhs
[i
], 0, rhsParts
, rhsParts
+ 1, true);
2543 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545 set REMAINDER to the remainder, return zero. i.e.
2547 OLD_LHS = RHS * LHS + REMAINDER
2549 SCRATCH is a bignum of the same size as the operands and result for
2550 use by the routine; its contents need not be initialized and are
2551 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2553 int APInt::tcDivide(WordType
*lhs
, const WordType
*rhs
,
2554 WordType
*remainder
, WordType
*srhs
,
2556 assert(lhs
!= remainder
&& lhs
!= srhs
&& remainder
!= srhs
);
2558 unsigned shiftCount
= tcMSB(rhs
, parts
) + 1;
2559 if (shiftCount
== 0)
2562 shiftCount
= parts
* APINT_BITS_PER_WORD
- shiftCount
;
2563 unsigned n
= shiftCount
/ APINT_BITS_PER_WORD
;
2564 WordType mask
= (WordType
) 1 << (shiftCount
% APINT_BITS_PER_WORD
);
2566 tcAssign(srhs
, rhs
, parts
);
2567 tcShiftLeft(srhs
, parts
, shiftCount
);
2568 tcAssign(remainder
, lhs
, parts
);
2569 tcSet(lhs
, 0, parts
);
2571 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2574 int compare
= tcCompare(remainder
, srhs
, parts
);
2576 tcSubtract(remainder
, srhs
, 0, parts
);
2580 if (shiftCount
== 0)
2583 tcShiftRight(srhs
, parts
, 1);
2584 if ((mask
>>= 1) == 0) {
2585 mask
= (WordType
) 1 << (APINT_BITS_PER_WORD
- 1);
2593 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594 /// no restrictions on Count.
2595 void APInt::tcShiftLeft(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2596 // Don't bother performing a no-op shift.
2600 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2602 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2604 // Fastpath for moving by whole words.
2605 if (BitShift
== 0) {
2606 std::memmove(Dst
+ WordShift
, Dst
, (Words
- WordShift
) * APINT_WORD_SIZE
);
2608 while (Words
-- > WordShift
) {
2609 Dst
[Words
] = Dst
[Words
- WordShift
] << BitShift
;
2610 if (Words
> WordShift
)
2612 Dst
[Words
- WordShift
- 1] >> (APINT_BITS_PER_WORD
- BitShift
);
2616 // Fill in the remainder with 0s.
2617 std::memset(Dst
, 0, WordShift
* APINT_WORD_SIZE
);
2620 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621 /// are no restrictions on Count.
2622 void APInt::tcShiftRight(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2623 // Don't bother performing a no-op shift.
2627 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2629 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2631 unsigned WordsToMove
= Words
- WordShift
;
2632 // Fastpath for moving by whole words.
2633 if (BitShift
== 0) {
2634 std::memmove(Dst
, Dst
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
2636 for (unsigned i
= 0; i
!= WordsToMove
; ++i
) {
2637 Dst
[i
] = Dst
[i
+ WordShift
] >> BitShift
;
2638 if (i
+ 1 != WordsToMove
)
2639 Dst
[i
] |= Dst
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
);
2643 // Fill in the remainder with 0s.
2644 std::memset(Dst
+ WordsToMove
, 0, WordShift
* APINT_WORD_SIZE
);
2647 /* Bitwise and of two bignums. */
2648 void APInt::tcAnd(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2649 for (unsigned i
= 0; i
< parts
; i
++)
2653 /* Bitwise inclusive or of two bignums. */
2654 void APInt::tcOr(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2655 for (unsigned i
= 0; i
< parts
; i
++)
2659 /* Bitwise exclusive or of two bignums. */
2660 void APInt::tcXor(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2661 for (unsigned i
= 0; i
< parts
; i
++)
2665 /* Complement a bignum in-place. */
2666 void APInt::tcComplement(WordType
*dst
, unsigned parts
) {
2667 for (unsigned i
= 0; i
< parts
; i
++)
2671 /* Comparison (unsigned) of two bignums. */
2672 int APInt::tcCompare(const WordType
*lhs
, const WordType
*rhs
,
2676 if (lhs
[parts
] != rhs
[parts
])
2677 return (lhs
[parts
] > rhs
[parts
]) ? 1 : -1;
2683 /* Set the least significant BITS bits of a bignum, clear the
2685 void APInt::tcSetLeastSignificantBits(WordType
*dst
, unsigned parts
,
2688 while (bits
> APINT_BITS_PER_WORD
) {
2689 dst
[i
++] = ~(WordType
) 0;
2690 bits
-= APINT_BITS_PER_WORD
;
2694 dst
[i
++] = ~(WordType
) 0 >> (APINT_BITS_PER_WORD
- bits
);
2700 APInt
llvm::APIntOps::RoundingUDiv(const APInt
&A
, const APInt
&B
,
2701 APInt::Rounding RM
) {
2702 // Currently udivrem always rounds down.
2704 case APInt::Rounding::DOWN
:
2705 case APInt::Rounding::TOWARD_ZERO
:
2707 case APInt::Rounding::UP
: {
2709 APInt::udivrem(A
, B
, Quo
, Rem
);
2715 llvm_unreachable("Unknown APInt::Rounding enum");
2718 APInt
llvm::APIntOps::RoundingSDiv(const APInt
&A
, const APInt
&B
,
2719 APInt::Rounding RM
) {
2721 case APInt::Rounding::DOWN
:
2722 case APInt::Rounding::UP
: {
2724 APInt::sdivrem(A
, B
, Quo
, Rem
);
2727 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728 // We want to check whether the non-integer part of the mathematical value
2729 // is negative or not. If the non-integer part is negative, we need to round
2730 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731 // already rounded down.
2732 if (RM
== APInt::Rounding::DOWN
) {
2733 if (Rem
.isNegative() != B
.isNegative())
2737 if (Rem
.isNegative() != B
.isNegative())
2741 // Currently sdiv rounds twards zero.
2742 case APInt::Rounding::TOWARD_ZERO
:
2745 llvm_unreachable("Unknown APInt::Rounding enum");
2749 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A
, APInt B
, APInt C
,
2750 unsigned RangeWidth
) {
2751 unsigned CoeffWidth
= A
.getBitWidth();
2752 assert(CoeffWidth
== B
.getBitWidth() && CoeffWidth
== C
.getBitWidth());
2753 assert(RangeWidth
<= CoeffWidth
&&
2754 "Value range width should be less than coefficient width");
2755 assert(RangeWidth
> 1 && "Value range bit width should be > 1");
2757 LLVM_DEBUG(dbgs() << __func__
<< ": solving " << A
<< "x^2 + " << B
2758 << "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2760 // Identify 0 as a (non)solution immediately.
2761 if (C
.sextOrTrunc(RangeWidth
).isNullValue() ) {
2762 LLVM_DEBUG(dbgs() << __func__
<< ": zero solution\n");
2763 return APInt(CoeffWidth
, 0);
2766 // The result of APInt arithmetic has the same bit width as the operands,
2767 // so it can actually lose high bits. A product of two n-bit integers needs
2768 // 2n-1 bits to represent the full value.
2769 // The operation done below (on quadratic coefficients) that can produce
2770 // the largest value is the evaluation of the equation during bisection,
2771 // which needs 3 times the bitwidth of the coefficient, so the total number
2772 // of required bits is 3n.
2774 // The purpose of this extension is to simulate the set Z of all integers,
2775 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776 // and negative numbers (not so much in a modulo arithmetic). The method
2777 // used to solve the equation is based on the standard formula for real
2778 // numbers, and uses the concepts of "positive" and "negative" with their
2781 A
= A
.sext(CoeffWidth
);
2782 B
= B
.sext(CoeffWidth
);
2783 C
= C
.sext(CoeffWidth
);
2785 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786 // the bit width has increased.
2787 if (A
.isNegative()) {
2793 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795 // and R = 2^BitWidth.
2796 // Since we're trying not only to find exact solutions, but also values
2797 // that "wrap around", such a set will always have a solution, i.e. an x
2798 // that satisfies at least one of the equations, or such that |q(x)|
2799 // exceeds kR, while |q(x-1)| for the same k does not.
2801 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802 // positive solution n (in the above sense), and also such that the n
2803 // will be the least among all solutions corresponding to k = 0, 1, ...
2804 // (more precisely, the least element in the set
2805 // { n(k) | k is such that a solution n(k) exists }).
2807 // Consider the parabola (over real numbers) that corresponds to the
2808 // quadratic equation. Since A > 0, the arms of the parabola will point
2809 // up. Picking different values of k will shift it up and down by R.
2811 // We want to shift the parabola in such a way as to reduce the problem
2812 // of solving q(x) = kR to solving shifted_q(x) = 0.
2813 // (The interesting solutions are the ceilings of the real number
2815 APInt R
= APInt::getOneBitSet(CoeffWidth
, RangeWidth
);
2820 auto RoundUp
= [] (const APInt
&V
, const APInt
&A
) -> APInt
{
2821 assert(A
.isStrictlyPositive());
2822 APInt T
= V
.abs().urem(A
);
2823 if (T
.isNullValue())
2825 return V
.isNegative() ? V
+T
: V
+(A
-T
);
2828 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829 // iff B is positive.
2830 if (B
.isNonNegative()) {
2831 // If B >= 0, the vertex it at a negative location (or at 0), so in
2832 // order to have a non-negative solution we need to pick k that makes
2833 // C-kR negative. To satisfy all the requirements for the solution
2834 // that we are looking for, it needs to be closest to 0 of all k.
2836 if (C
.isStrictlyPositive())
2838 // Pick the greater solution.
2841 // If B < 0, the vertex is at a positive location. For any solution
2842 // to exist, the discriminant must be non-negative. This means that
2843 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844 // lower bound on values of k: kR >= C - B^2/4A.
2845 APInt LowkR
= C
- SqrB
.udiv(2*TwoA
); // udiv because all values > 0.
2846 // Round LowkR up (towards +inf) to the nearest kR.
2847 LowkR
= RoundUp(LowkR
, R
);
2849 // If there exists k meeting the condition above, and such that
2850 // C-kR > 0, there will be two positive real number solutions of
2851 // q(x) = kR. Out of all such values of k, pick the one that makes
2852 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853 // In other words, find maximum k such that LowkR <= kR < C.
2855 // If LowkR < C, then such a k is guaranteed to exist because
2856 // LowkR itself is a multiple of R.
2857 C
-= -RoundUp(-C
, R
); // C = C - RoundDown(C, R)
2858 // Pick the smaller solution.
2861 // If C-kR < 0 for all potential k's, it means that one solution
2862 // will be negative, while the other will be positive. The positive
2863 // solution will shift towards 0 if the parabola is moved up.
2864 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865 // to 0, or in other words, out of all parabolas that have solutions,
2866 // pick the one that is the farthest "up").
2867 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2869 // Pick the greater solution.
2874 LLVM_DEBUG(dbgs() << __func__
<< ": updated coefficients " << A
<< "x^2 + "
2875 << B
<< "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2877 APInt D
= SqrB
- 4*A
*C
;
2878 assert(D
.isNonNegative() && "Negative discriminant");
2879 APInt SQ
= D
.sqrt();
2882 bool InexactSQ
= Q
!= D
;
2883 // The calculated SQ may actually be greater than the exact (non-integer)
2884 // value. If that's the case, decremement SQ to get a value that is lower.
2891 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892 // When using the quadratic formula directly, the calculated low root
2893 // may be greater than the exact one, since we would be subtracting SQ.
2894 // To make sure that the calculated root is not greater than the exact
2895 // one, subtract SQ+1 when calculating the low root (for inexact value
2898 APInt::sdivrem(-B
- (SQ
+InexactSQ
), TwoA
, X
, Rem
);
2900 APInt::sdivrem(-B
+ SQ
, TwoA
, X
, Rem
);
2902 // The updated coefficients should be such that the (exact) solution is
2903 // positive. Since APInt division rounds towards 0, the calculated one
2904 // can be 0, but cannot be negative.
2905 assert(X
.isNonNegative() && "Solution should be non-negative");
2907 if (!InexactSQ
&& Rem
.isNullValue()) {
2908 LLVM_DEBUG(dbgs() << __func__
<< ": solution (root): " << X
<< '\n');
2912 assert((SQ
*SQ
).sle(D
) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913 // The exact value of the square root of D should be between SQ and SQ+1.
2914 // This implies that the solution should be between that corresponding to
2915 // SQ (i.e. X) and that corresponding to SQ+1.
2917 // The calculated X cannot be greater than the exact (real) solution.
2918 // Actually it must be strictly less than the exact solution, while
2919 // X+1 will be greater than or equal to it.
2921 APInt VX
= (A
*X
+ B
)*X
+ C
;
2922 APInt VY
= VX
+ TwoA
*X
+ A
+ B
;
2923 bool SignChange
= VX
.isNegative() != VY
.isNegative() ||
2924 VX
.isNullValue() != VY
.isNullValue();
2925 // If the sign did not change between X and X+1, X is not a valid solution.
2926 // This could happen when the actual (exact) roots don't have an integer
2927 // between them, so they would both be contained between X and X+1.
2929 LLVM_DEBUG(dbgs() << __func__
<< ": no valid solution\n");
2934 LLVM_DEBUG(dbgs() << __func__
<< ": solution (wrap): " << X
<< '\n');
2938 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939 /// with the integer held in IntVal.
2940 void llvm::StoreIntToMemory(const APInt
&IntVal
, uint8_t *Dst
,
2941 unsigned StoreBytes
) {
2942 assert((IntVal
.getBitWidth()+7)/8 >= StoreBytes
&& "Integer too small!");
2943 const uint8_t *Src
= (const uint8_t *)IntVal
.getRawData();
2945 if (sys::IsLittleEndianHost
) {
2946 // Little-endian host - the source is ordered from LSB to MSB. Order the
2947 // destination from LSB to MSB: Do a straight copy.
2948 memcpy(Dst
, Src
, StoreBytes
);
2950 // Big-endian host - the source is an array of 64 bit words ordered from
2951 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
2952 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953 while (StoreBytes
> sizeof(uint64_t)) {
2954 StoreBytes
-= sizeof(uint64_t);
2955 // May not be aligned so use memcpy.
2956 memcpy(Dst
+ StoreBytes
, Src
, sizeof(uint64_t));
2957 Src
+= sizeof(uint64_t);
2960 memcpy(Dst
, Src
+ sizeof(uint64_t) - StoreBytes
, StoreBytes
);
2964 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966 void llvm::LoadIntFromMemory(APInt
&IntVal
, uint8_t *Src
, unsigned LoadBytes
) {
2967 assert((IntVal
.getBitWidth()+7)/8 >= LoadBytes
&& "Integer too small!");
2968 uint8_t *Dst
= reinterpret_cast<uint8_t *>(
2969 const_cast<uint64_t *>(IntVal
.getRawData()));
2971 if (sys::IsLittleEndianHost
)
2972 // Little-endian host - the destination must be ordered from LSB to MSB.
2973 // The source is ordered from LSB to MSB: Do a straight copy.
2974 memcpy(Dst
, Src
, LoadBytes
);
2976 // Big-endian - the destination is an array of 64 bit words ordered from
2977 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
2978 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2980 while (LoadBytes
> sizeof(uint64_t)) {
2981 LoadBytes
-= sizeof(uint64_t);
2982 // May not be aligned so use memcpy.
2983 memcpy(Dst
, Src
+ LoadBytes
, sizeof(uint64_t));
2984 Dst
+= sizeof(uint64_t);
2987 memcpy(Dst
+ sizeof(uint64_t) - LoadBytes
, Src
, LoadBytes
);