1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/bit.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
34 #define DEBUG_TYPE "apint"
36 /// A utility function for allocating memory, checking for allocation failures,
37 /// and ensuring the contents are zeroed.
38 inline static uint64_t* getClearedMemory(unsigned numWords
) {
39 uint64_t *result
= new uint64_t[numWords
];
40 memset(result
, 0, numWords
* sizeof(uint64_t));
44 /// A utility function for allocating memory and checking for allocation
45 /// failure. The content is not zeroed.
46 inline static uint64_t* getMemory(unsigned numWords
) {
47 return new uint64_t[numWords
];
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit
, uint8_t radix
) {
54 if (radix
== 16 || radix
== 36) {
78 void APInt::initSlowCase(uint64_t val
, bool isSigned
) {
79 U
.pVal
= getClearedMemory(getNumWords());
81 if (isSigned
&& int64_t(val
) < 0)
82 for (unsigned i
= 1; i
< getNumWords(); ++i
)
83 U
.pVal
[i
] = WORDTYPE_MAX
;
87 void APInt::initSlowCase(const APInt
& that
) {
88 U
.pVal
= getMemory(getNumWords());
89 memcpy(U
.pVal
, that
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
92 void APInt::initFromArray(ArrayRef
<uint64_t> bigVal
) {
93 assert(BitWidth
&& "Bitwidth too small");
94 assert(bigVal
.data() && "Null pointer detected!");
98 // Get memory, cleared to 0
99 U
.pVal
= getClearedMemory(getNumWords());
100 // Calculate the number of words to copy
101 unsigned words
= std::min
<unsigned>(bigVal
.size(), getNumWords());
102 // Copy the words from bigVal to pVal
103 memcpy(U
.pVal
, bigVal
.data(), words
* APINT_WORD_SIZE
);
105 // Make sure unused high bits are cleared
109 APInt::APInt(unsigned numBits
, ArrayRef
<uint64_t> bigVal
)
110 : BitWidth(numBits
) {
111 initFromArray(bigVal
);
114 APInt::APInt(unsigned numBits
, unsigned numWords
, const uint64_t bigVal
[])
115 : BitWidth(numBits
) {
116 initFromArray(makeArrayRef(bigVal
, numWords
));
119 APInt::APInt(unsigned numbits
, StringRef Str
, uint8_t radix
)
120 : BitWidth(numbits
) {
121 assert(BitWidth
&& "Bitwidth too small");
122 fromString(numbits
, Str
, radix
);
125 void APInt::reallocate(unsigned NewBitWidth
) {
126 // If the number of words is the same we can just change the width and stop.
127 if (getNumWords() == getNumWords(NewBitWidth
)) {
128 BitWidth
= NewBitWidth
;
132 // If we have an allocation, delete it.
137 BitWidth
= NewBitWidth
;
139 // If we are supposed to have an allocation, create it.
141 U
.pVal
= getMemory(getNumWords());
144 void APInt::AssignSlowCase(const APInt
& RHS
) {
145 // Don't do anything for X = X
149 // Adjust the bit width and handle allocations as necessary.
150 reallocate(RHS
.getBitWidth());
156 memcpy(U
.pVal
, RHS
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
159 /// This method 'profiles' an APInt for use with FoldingSet.
160 void APInt::Profile(FoldingSetNodeID
& ID
) const {
161 ID
.AddInteger(BitWidth
);
163 if (isSingleWord()) {
164 ID
.AddInteger(U
.VAL
);
168 unsigned NumWords
= getNumWords();
169 for (unsigned i
= 0; i
< NumWords
; ++i
)
170 ID
.AddInteger(U
.pVal
[i
]);
173 /// Prefix increment operator. Increments the APInt by one.
174 APInt
& APInt::operator++() {
178 tcIncrement(U
.pVal
, getNumWords());
179 return clearUnusedBits();
182 /// Prefix decrement operator. Decrements the APInt by one.
183 APInt
& APInt::operator--() {
187 tcDecrement(U
.pVal
, getNumWords());
188 return clearUnusedBits();
191 /// Adds the RHS APint to this APInt.
192 /// @returns this, after addition of RHS.
193 /// Addition assignment operator.
194 APInt
& APInt::operator+=(const APInt
& RHS
) {
195 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
199 tcAdd(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
200 return clearUnusedBits();
203 APInt
& APInt::operator+=(uint64_t RHS
) {
207 tcAddPart(U
.pVal
, RHS
, getNumWords());
208 return clearUnusedBits();
211 /// Subtracts the RHS APInt from this APInt
212 /// @returns this, after subtraction
213 /// Subtraction assignment operator.
214 APInt
& APInt::operator-=(const APInt
& RHS
) {
215 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
219 tcSubtract(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
220 return clearUnusedBits();
223 APInt
& APInt::operator-=(uint64_t RHS
) {
227 tcSubtractPart(U
.pVal
, RHS
, getNumWords());
228 return clearUnusedBits();
231 APInt
APInt::operator*(const APInt
& RHS
) const {
232 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
234 return APInt(BitWidth
, U
.VAL
* RHS
.U
.VAL
);
236 APInt
Result(getMemory(getNumWords()), getBitWidth());
238 tcMultiply(Result
.U
.pVal
, U
.pVal
, RHS
.U
.pVal
, getNumWords());
240 Result
.clearUnusedBits();
244 void APInt::AndAssignSlowCase(const APInt
& RHS
) {
245 tcAnd(U
.pVal
, RHS
.U
.pVal
, getNumWords());
248 void APInt::OrAssignSlowCase(const APInt
& RHS
) {
249 tcOr(U
.pVal
, RHS
.U
.pVal
, getNumWords());
252 void APInt::XorAssignSlowCase(const APInt
& RHS
) {
253 tcXor(U
.pVal
, RHS
.U
.pVal
, getNumWords());
256 APInt
& APInt::operator*=(const APInt
& RHS
) {
257 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
262 APInt
& APInt::operator*=(uint64_t RHS
) {
263 if (isSingleWord()) {
266 unsigned NumWords
= getNumWords();
267 tcMultiplyPart(U
.pVal
, U
.pVal
, RHS
, 0, NumWords
, NumWords
, false);
269 return clearUnusedBits();
272 bool APInt::EqualSlowCase(const APInt
& RHS
) const {
273 return std::equal(U
.pVal
, U
.pVal
+ getNumWords(), RHS
.U
.pVal
);
276 int APInt::compare(const APInt
& RHS
) const {
277 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
279 return U
.VAL
< RHS
.U
.VAL
? -1 : U
.VAL
> RHS
.U
.VAL
;
281 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
284 int APInt::compareSigned(const APInt
& RHS
) const {
285 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
286 if (isSingleWord()) {
287 int64_t lhsSext
= SignExtend64(U
.VAL
, BitWidth
);
288 int64_t rhsSext
= SignExtend64(RHS
.U
.VAL
, BitWidth
);
289 return lhsSext
< rhsSext
? -1 : lhsSext
> rhsSext
;
292 bool lhsNeg
= isNegative();
293 bool rhsNeg
= RHS
.isNegative();
295 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
296 if (lhsNeg
!= rhsNeg
)
297 return lhsNeg
? -1 : 1;
299 // Otherwise we can just use an unsigned comparison, because even negative
300 // numbers compare correctly this way if both have the same signed-ness.
301 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
304 void APInt::setBitsSlowCase(unsigned loBit
, unsigned hiBit
) {
305 unsigned loWord
= whichWord(loBit
);
306 unsigned hiWord
= whichWord(hiBit
);
308 // Create an initial mask for the low word with zeros below loBit.
309 uint64_t loMask
= WORDTYPE_MAX
<< whichBit(loBit
);
311 // If hiBit is not aligned, we need a high mask.
312 unsigned hiShiftAmt
= whichBit(hiBit
);
313 if (hiShiftAmt
!= 0) {
314 // Create a high mask with zeros above hiBit.
315 uint64_t hiMask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- hiShiftAmt
);
316 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
317 // set the bits in hiWord.
318 if (hiWord
== loWord
)
321 U
.pVal
[hiWord
] |= hiMask
;
323 // Apply the mask to the low word.
324 U
.pVal
[loWord
] |= loMask
;
326 // Fill any words between loWord and hiWord with all ones.
327 for (unsigned word
= loWord
+ 1; word
< hiWord
; ++word
)
328 U
.pVal
[word
] = WORDTYPE_MAX
;
331 /// Toggle every bit to its opposite value.
332 void APInt::flipAllBitsSlowCase() {
333 tcComplement(U
.pVal
, getNumWords());
337 /// Toggle a given bit to its opposite value whose position is given
338 /// as "bitPosition".
339 /// Toggles a given bit to its opposite value.
340 void APInt::flipBit(unsigned bitPosition
) {
341 assert(bitPosition
< BitWidth
&& "Out of the bit-width range!");
342 if ((*this)[bitPosition
]) clearBit(bitPosition
);
343 else setBit(bitPosition
);
346 void APInt::insertBits(const APInt
&subBits
, unsigned bitPosition
) {
347 unsigned subBitWidth
= subBits
.getBitWidth();
348 assert(0 < subBitWidth
&& (subBitWidth
+ bitPosition
) <= BitWidth
&&
349 "Illegal bit insertion");
351 // Insertion is a direct copy.
352 if (subBitWidth
== BitWidth
) {
357 // Single word result can be done as a direct bitmask.
358 if (isSingleWord()) {
359 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
360 U
.VAL
&= ~(mask
<< bitPosition
);
361 U
.VAL
|= (subBits
.U
.VAL
<< bitPosition
);
365 unsigned loBit
= whichBit(bitPosition
);
366 unsigned loWord
= whichWord(bitPosition
);
367 unsigned hi1Word
= whichWord(bitPosition
+ subBitWidth
- 1);
369 // Insertion within a single word can be done as a direct bitmask.
370 if (loWord
== hi1Word
) {
371 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
372 U
.pVal
[loWord
] &= ~(mask
<< loBit
);
373 U
.pVal
[loWord
] |= (subBits
.U
.VAL
<< loBit
);
377 // Insert on word boundaries.
379 // Direct copy whole words.
380 unsigned numWholeSubWords
= subBitWidth
/ APINT_BITS_PER_WORD
;
381 memcpy(U
.pVal
+ loWord
, subBits
.getRawData(),
382 numWholeSubWords
* APINT_WORD_SIZE
);
384 // Mask+insert remaining bits.
385 unsigned remainingBits
= subBitWidth
% APINT_BITS_PER_WORD
;
386 if (remainingBits
!= 0) {
387 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- remainingBits
);
388 U
.pVal
[hi1Word
] &= ~mask
;
389 U
.pVal
[hi1Word
] |= subBits
.getWord(subBitWidth
- 1);
394 // General case - set/clear individual bits in dst based on src.
395 // TODO - there is scope for optimization here, but at the moment this code
396 // path is barely used so prefer readability over performance.
397 for (unsigned i
= 0; i
!= subBitWidth
; ++i
) {
399 setBit(bitPosition
+ i
);
401 clearBit(bitPosition
+ i
);
405 APInt
APInt::extractBits(unsigned numBits
, unsigned bitPosition
) const {
406 assert(numBits
> 0 && "Can't extract zero bits");
407 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
408 "Illegal bit extraction");
411 return APInt(numBits
, U
.VAL
>> bitPosition
);
413 unsigned loBit
= whichBit(bitPosition
);
414 unsigned loWord
= whichWord(bitPosition
);
415 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
417 // Single word result extracting bits from a single word source.
418 if (loWord
== hiWord
)
419 return APInt(numBits
, U
.pVal
[loWord
] >> loBit
);
421 // Extracting bits that start on a source word boundary can be done
422 // as a fast memory copy.
424 return APInt(numBits
, makeArrayRef(U
.pVal
+ loWord
, 1 + hiWord
- loWord
));
426 // General case - shift + copy source words directly into place.
427 APInt
Result(numBits
, 0);
428 unsigned NumSrcWords
= getNumWords();
429 unsigned NumDstWords
= Result
.getNumWords();
431 uint64_t *DestPtr
= Result
.isSingleWord() ? &Result
.U
.VAL
: Result
.U
.pVal
;
432 for (unsigned word
= 0; word
< NumDstWords
; ++word
) {
433 uint64_t w0
= U
.pVal
[loWord
+ word
];
435 (loWord
+ word
+ 1) < NumSrcWords
? U
.pVal
[loWord
+ word
+ 1] : 0;
436 DestPtr
[word
] = (w0
>> loBit
) | (w1
<< (APINT_BITS_PER_WORD
- loBit
));
439 return Result
.clearUnusedBits();
442 unsigned APInt::getBitsNeeded(StringRef str
, uint8_t radix
) {
443 assert(!str
.empty() && "Invalid string length");
444 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
446 "Radix should be 2, 8, 10, 16, or 36!");
448 size_t slen
= str
.size();
450 // Each computation below needs to know if it's negative.
451 StringRef::iterator p
= str
.begin();
452 unsigned isNegative
= *p
== '-';
453 if (*p
== '-' || *p
== '+') {
456 assert(slen
&& "String is only a sign, needs a value.");
459 // For radixes of power-of-two values, the bits required is accurately and
462 return slen
+ isNegative
;
464 return slen
* 3 + isNegative
;
466 return slen
* 4 + isNegative
;
470 // This is grossly inefficient but accurate. We could probably do something
471 // with a computation of roughly slen*64/20 and then adjust by the value of
472 // the first few digits. But, I'm not sure how accurate that could be.
474 // Compute a sufficient number of bits that is always large enough but might
475 // be too large. This avoids the assertion in the constructor. This
476 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
477 // bits in that case.
479 = radix
== 10? (slen
== 1 ? 4 : slen
* 64/18)
480 : (slen
== 1 ? 7 : slen
* 16/3);
482 // Convert to the actual binary value.
483 APInt
tmp(sufficient
, StringRef(p
, slen
), radix
);
485 // Compute how many bits are required. If the log is infinite, assume we need
487 unsigned log
= tmp
.logBase2();
488 if (log
== (unsigned)-1) {
489 return isNegative
+ 1;
491 return isNegative
+ log
+ 1;
495 hash_code
llvm::hash_value(const APInt
&Arg
) {
496 if (Arg
.isSingleWord())
497 return hash_combine(Arg
.U
.VAL
);
499 return hash_combine_range(Arg
.U
.pVal
, Arg
.U
.pVal
+ Arg
.getNumWords());
502 bool APInt::isSplat(unsigned SplatSizeInBits
) const {
503 assert(getBitWidth() % SplatSizeInBits
== 0 &&
504 "SplatSizeInBits must divide width!");
505 // We can check that all parts of an integer are equal by making use of a
506 // little trick: rotate and check if it's still the same value.
507 return *this == rotl(SplatSizeInBits
);
510 /// This function returns the high "numBits" bits of this APInt.
511 APInt
APInt::getHiBits(unsigned numBits
) const {
512 return this->lshr(BitWidth
- numBits
);
515 /// This function returns the low "numBits" bits of this APInt.
516 APInt
APInt::getLoBits(unsigned numBits
) const {
517 APInt
Result(getLowBitsSet(BitWidth
, numBits
));
522 /// Return a value containing V broadcasted over NewLen bits.
523 APInt
APInt::getSplat(unsigned NewLen
, const APInt
&V
) {
524 assert(NewLen
>= V
.getBitWidth() && "Can't splat to smaller bit width!");
526 APInt Val
= V
.zextOrSelf(NewLen
);
527 for (unsigned I
= V
.getBitWidth(); I
< NewLen
; I
<<= 1)
533 unsigned APInt::countLeadingZerosSlowCase() const {
535 for (int i
= getNumWords()-1; i
>= 0; --i
) {
536 uint64_t V
= U
.pVal
[i
];
538 Count
+= APINT_BITS_PER_WORD
;
540 Count
+= llvm::countLeadingZeros(V
);
544 // Adjust for unused bits in the most significant word (they are zero).
545 unsigned Mod
= BitWidth
% APINT_BITS_PER_WORD
;
546 Count
-= Mod
> 0 ? APINT_BITS_PER_WORD
- Mod
: 0;
550 unsigned APInt::countLeadingOnesSlowCase() const {
551 unsigned highWordBits
= BitWidth
% APINT_BITS_PER_WORD
;
554 highWordBits
= APINT_BITS_PER_WORD
;
557 shift
= APINT_BITS_PER_WORD
- highWordBits
;
559 int i
= getNumWords() - 1;
560 unsigned Count
= llvm::countLeadingOnes(U
.pVal
[i
] << shift
);
561 if (Count
== highWordBits
) {
562 for (i
--; i
>= 0; --i
) {
563 if (U
.pVal
[i
] == WORDTYPE_MAX
)
564 Count
+= APINT_BITS_PER_WORD
;
566 Count
+= llvm::countLeadingOnes(U
.pVal
[i
]);
574 unsigned APInt::countTrailingZerosSlowCase() const {
577 for (; i
< getNumWords() && U
.pVal
[i
] == 0; ++i
)
578 Count
+= APINT_BITS_PER_WORD
;
579 if (i
< getNumWords())
580 Count
+= llvm::countTrailingZeros(U
.pVal
[i
]);
581 return std::min(Count
, BitWidth
);
584 unsigned APInt::countTrailingOnesSlowCase() const {
587 for (; i
< getNumWords() && U
.pVal
[i
] == WORDTYPE_MAX
; ++i
)
588 Count
+= APINT_BITS_PER_WORD
;
589 if (i
< getNumWords())
590 Count
+= llvm::countTrailingOnes(U
.pVal
[i
]);
591 assert(Count
<= BitWidth
);
595 unsigned APInt::countPopulationSlowCase() const {
597 for (unsigned i
= 0; i
< getNumWords(); ++i
)
598 Count
+= llvm::countPopulation(U
.pVal
[i
]);
602 bool APInt::intersectsSlowCase(const APInt
&RHS
) const {
603 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
604 if ((U
.pVal
[i
] & RHS
.U
.pVal
[i
]) != 0)
610 bool APInt::isSubsetOfSlowCase(const APInt
&RHS
) const {
611 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
612 if ((U
.pVal
[i
] & ~RHS
.U
.pVal
[i
]) != 0)
618 APInt
APInt::byteSwap() const {
619 assert(BitWidth
>= 16 && BitWidth
% 16 == 0 && "Cannot byteswap!");
621 return APInt(BitWidth
, ByteSwap_16(uint16_t(U
.VAL
)));
623 return APInt(BitWidth
, ByteSwap_32(unsigned(U
.VAL
)));
624 if (BitWidth
== 48) {
625 unsigned Tmp1
= unsigned(U
.VAL
>> 16);
626 Tmp1
= ByteSwap_32(Tmp1
);
627 uint16_t Tmp2
= uint16_t(U
.VAL
);
628 Tmp2
= ByteSwap_16(Tmp2
);
629 return APInt(BitWidth
, (uint64_t(Tmp2
) << 32) | Tmp1
);
632 return APInt(BitWidth
, ByteSwap_64(U
.VAL
));
634 APInt
Result(getNumWords() * APINT_BITS_PER_WORD
, 0);
635 for (unsigned I
= 0, N
= getNumWords(); I
!= N
; ++I
)
636 Result
.U
.pVal
[I
] = ByteSwap_64(U
.pVal
[N
- I
- 1]);
637 if (Result
.BitWidth
!= BitWidth
) {
638 Result
.lshrInPlace(Result
.BitWidth
- BitWidth
);
639 Result
.BitWidth
= BitWidth
;
644 APInt
APInt::reverseBits() const {
647 return APInt(BitWidth
, llvm::reverseBits
<uint64_t>(U
.VAL
));
649 return APInt(BitWidth
, llvm::reverseBits
<uint32_t>(U
.VAL
));
651 return APInt(BitWidth
, llvm::reverseBits
<uint16_t>(U
.VAL
));
653 return APInt(BitWidth
, llvm::reverseBits
<uint8_t>(U
.VAL
));
659 APInt
Reversed(BitWidth
, 0);
660 unsigned S
= BitWidth
;
662 for (; Val
!= 0; Val
.lshrInPlace(1)) {
672 APInt
llvm::APIntOps::GreatestCommonDivisor(APInt A
, APInt B
) {
673 // Fast-path a common case.
674 if (A
== B
) return A
;
676 // Corner cases: if either operand is zero, the other is the gcd.
680 // Count common powers of 2 and remove all other powers of 2.
683 unsigned Pow2_A
= A
.countTrailingZeros();
684 unsigned Pow2_B
= B
.countTrailingZeros();
685 if (Pow2_A
> Pow2_B
) {
686 A
.lshrInPlace(Pow2_A
- Pow2_B
);
688 } else if (Pow2_B
> Pow2_A
) {
689 B
.lshrInPlace(Pow2_B
- Pow2_A
);
696 // Both operands are odd multiples of 2^Pow_2:
698 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
700 // This is a modified version of Stein's algorithm, taking advantage of
701 // efficient countTrailingZeros().
705 A
.lshrInPlace(A
.countTrailingZeros() - Pow2
);
708 B
.lshrInPlace(B
.countTrailingZeros() - Pow2
);
715 APInt
llvm::APIntOps::RoundDoubleToAPInt(double Double
, unsigned width
) {
716 uint64_t I
= bit_cast
<uint64_t>(Double
);
718 // Get the sign bit from the highest order bit
719 bool isNeg
= I
>> 63;
721 // Get the 11-bit exponent and adjust for the 1023 bit bias
722 int64_t exp
= ((I
>> 52) & 0x7ff) - 1023;
724 // If the exponent is negative, the value is < 0 so just return 0.
726 return APInt(width
, 0u);
728 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
729 uint64_t mantissa
= (I
& (~0ULL >> 12)) | 1ULL << 52;
731 // If the exponent doesn't shift all bits out of the mantissa
733 return isNeg
? -APInt(width
, mantissa
>> (52 - exp
)) :
734 APInt(width
, mantissa
>> (52 - exp
));
736 // If the client didn't provide enough bits for us to shift the mantissa into
737 // then the result is undefined, just return 0
738 if (width
<= exp
- 52)
739 return APInt(width
, 0);
741 // Otherwise, we have to shift the mantissa bits up to the right location
742 APInt
Tmp(width
, mantissa
);
743 Tmp
<<= (unsigned)exp
- 52;
744 return isNeg
? -Tmp
: Tmp
;
747 /// This function converts this APInt to a double.
748 /// The layout for double is as following (IEEE Standard 754):
749 /// --------------------------------------
750 /// | Sign Exponent Fraction Bias |
751 /// |-------------------------------------- |
752 /// | 1[63] 11[62-52] 52[51-00] 1023 |
753 /// --------------------------------------
754 double APInt::roundToDouble(bool isSigned
) const {
756 // Handle the simple case where the value is contained in one uint64_t.
757 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
758 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD
) {
760 int64_t sext
= SignExtend64(getWord(0), BitWidth
);
763 return double(getWord(0));
766 // Determine if the value is negative.
767 bool isNeg
= isSigned
? (*this)[BitWidth
-1] : false;
769 // Construct the absolute value if we're negative.
770 APInt
Tmp(isNeg
? -(*this) : (*this));
772 // Figure out how many bits we're using.
773 unsigned n
= Tmp
.getActiveBits();
775 // The exponent (without bias normalization) is just the number of bits
776 // we are using. Note that the sign bit is gone since we constructed the
780 // Return infinity for exponent overflow
782 if (!isSigned
|| !isNeg
)
783 return std::numeric_limits
<double>::infinity();
785 return -std::numeric_limits
<double>::infinity();
787 exp
+= 1023; // Increment for 1023 bias
789 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
790 // extract the high 52 bits from the correct words in pVal.
792 unsigned hiWord
= whichWord(n
-1);
794 mantissa
= Tmp
.U
.pVal
[0];
796 mantissa
>>= n
- 52; // shift down, we want the top 52 bits.
798 assert(hiWord
> 0 && "huh?");
799 uint64_t hibits
= Tmp
.U
.pVal
[hiWord
] << (52 - n
% APINT_BITS_PER_WORD
);
800 uint64_t lobits
= Tmp
.U
.pVal
[hiWord
-1] >> (11 + n
% APINT_BITS_PER_WORD
);
801 mantissa
= hibits
| lobits
;
804 // The leading bit of mantissa is implicit, so get rid of it.
805 uint64_t sign
= isNeg
? (1ULL << (APINT_BITS_PER_WORD
- 1)) : 0;
806 uint64_t I
= sign
| (exp
<< 52) | mantissa
;
807 return bit_cast
<double>(I
);
810 // Truncate to new width.
811 APInt
APInt::trunc(unsigned width
) const {
812 assert(width
< BitWidth
&& "Invalid APInt Truncate request");
813 assert(width
&& "Can't truncate to 0 bits");
815 if (width
<= APINT_BITS_PER_WORD
)
816 return APInt(width
, getRawData()[0]);
818 APInt
Result(getMemory(getNumWords(width
)), width
);
822 for (i
= 0; i
!= width
/ APINT_BITS_PER_WORD
; i
++)
823 Result
.U
.pVal
[i
] = U
.pVal
[i
];
825 // Truncate and copy any partial word.
826 unsigned bits
= (0 - width
) % APINT_BITS_PER_WORD
;
828 Result
.U
.pVal
[i
] = U
.pVal
[i
] << bits
>> bits
;
833 // Sign extend to a new width.
834 APInt
APInt::sext(unsigned Width
) const {
835 assert(Width
> BitWidth
&& "Invalid APInt SignExtend request");
837 if (Width
<= APINT_BITS_PER_WORD
)
838 return APInt(Width
, SignExtend64(U
.VAL
, BitWidth
));
840 APInt
Result(getMemory(getNumWords(Width
)), Width
);
843 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
845 // Sign extend the last word since there may be unused bits in the input.
846 Result
.U
.pVal
[getNumWords() - 1] =
847 SignExtend64(Result
.U
.pVal
[getNumWords() - 1],
848 ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
850 // Fill with sign bits.
851 std::memset(Result
.U
.pVal
+ getNumWords(), isNegative() ? -1 : 0,
852 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
853 Result
.clearUnusedBits();
857 // Zero extend to a new width.
858 APInt
APInt::zext(unsigned width
) const {
859 assert(width
> BitWidth
&& "Invalid APInt ZeroExtend request");
861 if (width
<= APINT_BITS_PER_WORD
)
862 return APInt(width
, U
.VAL
);
864 APInt
Result(getMemory(getNumWords(width
)), width
);
867 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
869 // Zero remaining words.
870 std::memset(Result
.U
.pVal
+ getNumWords(), 0,
871 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
876 APInt
APInt::zextOrTrunc(unsigned width
) const {
877 if (BitWidth
< width
)
879 if (BitWidth
> width
)
884 APInt
APInt::sextOrTrunc(unsigned width
) const {
885 if (BitWidth
< width
)
887 if (BitWidth
> width
)
892 APInt
APInt::zextOrSelf(unsigned width
) const {
893 if (BitWidth
< width
)
898 APInt
APInt::sextOrSelf(unsigned width
) const {
899 if (BitWidth
< width
)
904 /// Arithmetic right-shift this APInt by shiftAmt.
905 /// Arithmetic right-shift function.
906 void APInt::ashrInPlace(const APInt
&shiftAmt
) {
907 ashrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
910 /// Arithmetic right-shift this APInt by shiftAmt.
911 /// Arithmetic right-shift function.
912 void APInt::ashrSlowCase(unsigned ShiftAmt
) {
913 // Don't bother performing a no-op shift.
917 // Save the original sign bit for later.
918 bool Negative
= isNegative();
920 // WordShift is the inter-part shift; BitShift is intra-part shift.
921 unsigned WordShift
= ShiftAmt
/ APINT_BITS_PER_WORD
;
922 unsigned BitShift
= ShiftAmt
% APINT_BITS_PER_WORD
;
924 unsigned WordsToMove
= getNumWords() - WordShift
;
925 if (WordsToMove
!= 0) {
926 // Sign extend the last word to fill in the unused bits.
927 U
.pVal
[getNumWords() - 1] = SignExtend64(
928 U
.pVal
[getNumWords() - 1], ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
930 // Fastpath for moving by whole words.
932 std::memmove(U
.pVal
, U
.pVal
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
934 // Move the words containing significant bits.
935 for (unsigned i
= 0; i
!= WordsToMove
- 1; ++i
)
936 U
.pVal
[i
] = (U
.pVal
[i
+ WordShift
] >> BitShift
) |
937 (U
.pVal
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
));
939 // Handle the last word which has no high bits to copy.
940 U
.pVal
[WordsToMove
- 1] = U
.pVal
[WordShift
+ WordsToMove
- 1] >> BitShift
;
941 // Sign extend one more time.
942 U
.pVal
[WordsToMove
- 1] =
943 SignExtend64(U
.pVal
[WordsToMove
- 1], APINT_BITS_PER_WORD
- BitShift
);
947 // Fill in the remainder based on the original sign.
948 std::memset(U
.pVal
+ WordsToMove
, Negative
? -1 : 0,
949 WordShift
* APINT_WORD_SIZE
);
953 /// Logical right-shift this APInt by shiftAmt.
954 /// Logical right-shift function.
955 void APInt::lshrInPlace(const APInt
&shiftAmt
) {
956 lshrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
959 /// Logical right-shift this APInt by shiftAmt.
960 /// Logical right-shift function.
961 void APInt::lshrSlowCase(unsigned ShiftAmt
) {
962 tcShiftRight(U
.pVal
, getNumWords(), ShiftAmt
);
965 /// Left-shift this APInt by shiftAmt.
966 /// Left-shift function.
967 APInt
&APInt::operator<<=(const APInt
&shiftAmt
) {
968 // It's undefined behavior in C to shift by BitWidth or greater.
969 *this <<= (unsigned)shiftAmt
.getLimitedValue(BitWidth
);
973 void APInt::shlSlowCase(unsigned ShiftAmt
) {
974 tcShiftLeft(U
.pVal
, getNumWords(), ShiftAmt
);
978 // Calculate the rotate amount modulo the bit width.
979 static unsigned rotateModulo(unsigned BitWidth
, const APInt
&rotateAmt
) {
980 unsigned rotBitWidth
= rotateAmt
.getBitWidth();
981 APInt rot
= rotateAmt
;
982 if (rotBitWidth
< BitWidth
) {
983 // Extend the rotate APInt, so that the urem doesn't divide by 0.
984 // e.g. APInt(1, 32) would give APInt(1, 0).
985 rot
= rotateAmt
.zext(BitWidth
);
987 rot
= rot
.urem(APInt(rot
.getBitWidth(), BitWidth
));
988 return rot
.getLimitedValue(BitWidth
);
991 APInt
APInt::rotl(const APInt
&rotateAmt
) const {
992 return rotl(rotateModulo(BitWidth
, rotateAmt
));
995 APInt
APInt::rotl(unsigned rotateAmt
) const {
996 rotateAmt
%= BitWidth
;
999 return shl(rotateAmt
) | lshr(BitWidth
- rotateAmt
);
1002 APInt
APInt::rotr(const APInt
&rotateAmt
) const {
1003 return rotr(rotateModulo(BitWidth
, rotateAmt
));
1006 APInt
APInt::rotr(unsigned rotateAmt
) const {
1007 rotateAmt
%= BitWidth
;
1010 return lshr(rotateAmt
) | shl(BitWidth
- rotateAmt
);
1013 // Square Root - this method computes and returns the square root of "this".
1014 // Three mechanisms are used for computation. For small values (<= 5 bits),
1015 // a table lookup is done. This gets some performance for common cases. For
1016 // values using less than 52 bits, the value is converted to double and then
1017 // the libc sqrt function is called. The result is rounded and then converted
1018 // back to a uint64_t which is then used to construct the result. Finally,
1019 // the Babylonian method for computing square roots is used.
1020 APInt
APInt::sqrt() const {
1022 // Determine the magnitude of the value.
1023 unsigned magnitude
= getActiveBits();
1025 // Use a fast table for some small values. This also gets rid of some
1026 // rounding errors in libc sqrt for small values.
1027 if (magnitude
<= 5) {
1028 static const uint8_t results
[32] = {
1031 /* 3- 6 */ 2, 2, 2, 2,
1032 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1033 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1034 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037 return APInt(BitWidth
, results
[ (isSingleWord() ? U
.VAL
: U
.pVal
[0]) ]);
1040 // If the magnitude of the value fits in less than 52 bits (the precision of
1041 // an IEEE double precision floating point value), then we can use the
1042 // libc sqrt function which will probably use a hardware sqrt computation.
1043 // This should be faster than the algorithm below.
1044 if (magnitude
< 52) {
1045 return APInt(BitWidth
,
1046 uint64_t(::round(::sqrt(double(isSingleWord() ? U
.VAL
1050 // Okay, all the short cuts are exhausted. We must compute it. The following
1051 // is a classical Babylonian method for computing the square root. This code
1052 // was adapted to APInt from a wikipedia article on such computations.
1053 // See http://www.wikipedia.org/ and go to the page named
1054 // Calculate_an_integer_square_root.
1055 unsigned nbits
= BitWidth
, i
= 4;
1056 APInt
testy(BitWidth
, 16);
1057 APInt
x_old(BitWidth
, 1);
1058 APInt
x_new(BitWidth
, 0);
1059 APInt
two(BitWidth
, 2);
1061 // Select a good starting value using binary logarithms.
1062 for (;; i
+= 2, testy
= testy
.shl(2))
1063 if (i
>= nbits
|| this->ule(testy
)) {
1064 x_old
= x_old
.shl(i
/ 2);
1068 // Use the Babylonian method to arrive at the integer square root:
1070 x_new
= (this->udiv(x_old
) + x_old
).udiv(two
);
1071 if (x_old
.ule(x_new
))
1076 // Make sure we return the closest approximation
1077 // NOTE: The rounding calculation below is correct. It will produce an
1078 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1079 // determined to be a rounding issue with pari/gp as it begins to use a
1080 // floating point representation after 192 bits. There are no discrepancies
1081 // between this algorithm and pari/gp for bit widths < 192 bits.
1082 APInt
square(x_old
* x_old
);
1083 APInt
nextSquare((x_old
+ 1) * (x_old
+1));
1084 if (this->ult(square
))
1086 assert(this->ule(nextSquare
) && "Error in APInt::sqrt computation");
1087 APInt
midpoint((nextSquare
- square
).udiv(two
));
1088 APInt
offset(*this - square
);
1089 if (offset
.ult(midpoint
))
1094 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1095 /// iterative extended Euclidean algorithm is used to solve for this value,
1096 /// however we simplify it to speed up calculating only the inverse, and take
1097 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1098 /// (potentially large) APInts around.
1099 APInt
APInt::multiplicativeInverse(const APInt
& modulo
) const {
1100 assert(ult(modulo
) && "This APInt must be smaller than the modulo");
1102 // Using the properties listed at the following web page (accessed 06/21/08):
1103 // http://www.numbertheory.org/php/euclid.html
1104 // (especially the properties numbered 3, 4 and 9) it can be proved that
1105 // BitWidth bits suffice for all the computations in the algorithm implemented
1106 // below. More precisely, this number of bits suffice if the multiplicative
1107 // inverse exists, but may not suffice for the general extended Euclidean
1110 APInt r
[2] = { modulo
, *this };
1111 APInt t
[2] = { APInt(BitWidth
, 0), APInt(BitWidth
, 1) };
1112 APInt
q(BitWidth
, 0);
1115 for (i
= 0; r
[i
^1] != 0; i
^= 1) {
1116 // An overview of the math without the confusing bit-flipping:
1117 // q = r[i-2] / r[i-1]
1118 // r[i] = r[i-2] % r[i-1]
1119 // t[i] = t[i-2] - t[i-1] * q
1120 udivrem(r
[i
], r
[i
^1], q
, r
[i
]);
1124 // If this APInt and the modulo are not coprime, there is no multiplicative
1125 // inverse, so return 0. We check this by looking at the next-to-last
1126 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1129 return APInt(BitWidth
, 0);
1131 // The next-to-last t is the multiplicative inverse. However, we are
1132 // interested in a positive inverse. Calculate a positive one from a negative
1133 // one if necessary. A simple addition of the modulo suffices because
1134 // abs(t[i]) is known to be less than *this/2 (see the link above).
1135 if (t
[i
].isNegative())
1138 return std::move(t
[i
]);
1141 /// Calculate the magic numbers required to implement a signed integer division
1142 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1143 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1144 /// Warren, Jr., chapter 10.
1145 APInt::ms
APInt::magic() const {
1146 const APInt
& d
= *this;
1148 APInt ad
, anc
, delta
, q1
, r1
, q2
, r2
, t
;
1149 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1153 t
= signedMin
+ (d
.lshr(d
.getBitWidth() - 1));
1154 anc
= t
- 1 - t
.urem(ad
); // absolute value of nc
1155 p
= d
.getBitWidth() - 1; // initialize p
1156 q1
= signedMin
.udiv(anc
); // initialize q1 = 2p/abs(nc)
1157 r1
= signedMin
- q1
*anc
; // initialize r1 = rem(2p,abs(nc))
1158 q2
= signedMin
.udiv(ad
); // initialize q2 = 2p/abs(d)
1159 r2
= signedMin
- q2
*ad
; // initialize r2 = rem(2p,abs(d))
1162 q1
= q1
<<1; // update q1 = 2p/abs(nc)
1163 r1
= r1
<<1; // update r1 = rem(2p/abs(nc))
1164 if (r1
.uge(anc
)) { // must be unsigned comparison
1168 q2
= q2
<<1; // update q2 = 2p/abs(d)
1169 r2
= r2
<<1; // update r2 = rem(2p/abs(d))
1170 if (r2
.uge(ad
)) { // must be unsigned comparison
1175 } while (q1
.ult(delta
) || (q1
== delta
&& r1
== 0));
1178 if (d
.isNegative()) mag
.m
= -mag
.m
; // resulting magic number
1179 mag
.s
= p
- d
.getBitWidth(); // resulting shift
1183 /// Calculate the magic numbers required to implement an unsigned integer
1184 /// division by a constant as a sequence of multiplies, adds and shifts.
1185 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1186 /// S. Warren, Jr., chapter 10.
1187 /// LeadingZeros can be used to simplify the calculation if the upper bits
1188 /// of the divided value are known zero.
1189 APInt::mu
APInt::magicu(unsigned LeadingZeros
) const {
1190 const APInt
& d
= *this;
1192 APInt nc
, delta
, q1
, r1
, q2
, r2
;
1194 magu
.a
= 0; // initialize "add" indicator
1195 APInt allOnes
= APInt::getAllOnesValue(d
.getBitWidth()).lshr(LeadingZeros
);
1196 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1197 APInt signedMax
= APInt::getSignedMaxValue(d
.getBitWidth());
1199 nc
= allOnes
- (allOnes
- d
).urem(d
);
1200 p
= d
.getBitWidth() - 1; // initialize p
1201 q1
= signedMin
.udiv(nc
); // initialize q1 = 2p/nc
1202 r1
= signedMin
- q1
*nc
; // initialize r1 = rem(2p,nc)
1203 q2
= signedMax
.udiv(d
); // initialize q2 = (2p-1)/d
1204 r2
= signedMax
- q2
*d
; // initialize r2 = rem((2p-1),d)
1207 if (r1
.uge(nc
- r1
)) {
1208 q1
= q1
+ q1
+ 1; // update q1
1209 r1
= r1
+ r1
- nc
; // update r1
1212 q1
= q1
+q1
; // update q1
1213 r1
= r1
+r1
; // update r1
1215 if ((r2
+ 1).uge(d
- r2
)) {
1216 if (q2
.uge(signedMax
)) magu
.a
= 1;
1217 q2
= q2
+q2
+ 1; // update q2
1218 r2
= r2
+r2
+ 1 - d
; // update r2
1221 if (q2
.uge(signedMin
)) magu
.a
= 1;
1222 q2
= q2
+q2
; // update q2
1223 r2
= r2
+r2
+ 1; // update r2
1226 } while (p
< d
.getBitWidth()*2 &&
1227 (q1
.ult(delta
) || (q1
== delta
&& r1
== 0)));
1228 magu
.m
= q2
+ 1; // resulting magic number
1229 magu
.s
= p
- d
.getBitWidth(); // resulting shift
1233 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1234 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1235 /// variables here have the same names as in the algorithm. Comments explain
1236 /// the algorithm and any deviation from it.
1237 static void KnuthDiv(uint32_t *u
, uint32_t *v
, uint32_t *q
, uint32_t* r
,
1238 unsigned m
, unsigned n
) {
1239 assert(u
&& "Must provide dividend");
1240 assert(v
&& "Must provide divisor");
1241 assert(q
&& "Must provide quotient");
1242 assert(u
!= v
&& u
!= q
&& v
!= q
&& "Must use different memory");
1243 assert(n
>1 && "n must be > 1");
1245 // b denotes the base of the number system. In our case b is 2^32.
1246 const uint64_t b
= uint64_t(1) << 32;
1248 // The DEBUG macros here tend to be spam in the debug output if you're not
1249 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1251 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1253 #define DEBUG_KNUTH(X) do {} while(false)
1256 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m
<< " n=" << n
<< '\n');
1257 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1258 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1259 DEBUG_KNUTH(dbgs() << " by");
1260 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1261 DEBUG_KNUTH(dbgs() << '\n');
1262 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1263 // u and v by d. Note that we have taken Knuth's advice here to use a power
1264 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1265 // 2 allows us to shift instead of multiply and it is easy to determine the
1266 // shift amount from the leading zeros. We are basically normalizing the u
1267 // and v so that its high bits are shifted to the top of v's range without
1268 // overflow. Note that this can require an extra word in u so that u must
1269 // be of length m+n+1.
1270 unsigned shift
= countLeadingZeros(v
[n
-1]);
1271 uint32_t v_carry
= 0;
1272 uint32_t u_carry
= 0;
1274 for (unsigned i
= 0; i
< m
+n
; ++i
) {
1275 uint32_t u_tmp
= u
[i
] >> (32 - shift
);
1276 u
[i
] = (u
[i
] << shift
) | u_carry
;
1279 for (unsigned i
= 0; i
< n
; ++i
) {
1280 uint32_t v_tmp
= v
[i
] >> (32 - shift
);
1281 v
[i
] = (v
[i
] << shift
) | v_carry
;
1287 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1288 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1289 DEBUG_KNUTH(dbgs() << " by");
1290 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1291 DEBUG_KNUTH(dbgs() << '\n');
1293 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1296 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j
<< '\n');
1297 // D3. [Calculate q'.].
1298 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1299 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1300 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1301 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1302 // on v[n-2] determines at high speed most of the cases in which the trial
1303 // value qp is one too large, and it eliminates all cases where qp is two
1305 uint64_t dividend
= Make_64(u
[j
+n
], u
[j
+n
-1]);
1306 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend
<< '\n');
1307 uint64_t qp
= dividend
/ v
[n
-1];
1308 uint64_t rp
= dividend
% v
[n
-1];
1309 if (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]) {
1312 if (rp
< b
&& (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]))
1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp
<< ", rp == " << rp
<< '\n');
1317 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1318 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1319 // consists of a simple multiplication by a one-place number, combined with
1321 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1322 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1323 // true value plus b**(n+1), namely as the b's complement of
1324 // the true value, and a "borrow" to the left should be remembered.
1326 for (unsigned i
= 0; i
< n
; ++i
) {
1327 uint64_t p
= uint64_t(qp
) * uint64_t(v
[i
]);
1328 int64_t subres
= int64_t(u
[j
+i
]) - borrow
- Lo_32(p
);
1329 u
[j
+i
] = Lo_32(subres
);
1330 borrow
= Hi_32(p
) - Hi_32(subres
);
1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u
[j
+ i
]
1332 << ", borrow = " << borrow
<< '\n');
1334 bool isNeg
= u
[j
+n
] < borrow
;
1335 u
[j
+n
] -= Lo_32(borrow
);
1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1338 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1339 DEBUG_KNUTH(dbgs() << '\n');
1341 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1342 // negative, go to step D6; otherwise go on to step D7.
1345 // D6. [Add back]. The probability that this step is necessary is very
1346 // small, on the order of only 2/b. Make sure that test data accounts for
1347 // this possibility. Decrease q[j] by 1
1349 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1350 // A carry will occur to the left of u[j+n], and it should be ignored
1351 // since it cancels with the borrow that occurred in D4.
1353 for (unsigned i
= 0; i
< n
; i
++) {
1354 uint32_t limit
= std::min(u
[j
+i
],v
[i
]);
1355 u
[j
+i
] += v
[i
] + carry
;
1356 carry
= u
[j
+i
] < limit
|| (carry
&& u
[j
+i
] == limit
);
1360 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1361 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1362 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q
[j
] << '\n');
1364 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1368 DEBUG_KNUTH(for (int i
= m
; i
>= 0; i
--) dbgs() << " " << q
[i
]);
1369 DEBUG_KNUTH(dbgs() << '\n');
1371 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1372 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1373 // compute the remainder (urem uses this).
1375 // The value d is expressed by the "shift" value above since we avoided
1376 // multiplication by d by using a shift left. So, all we have to do is
1377 // shift right here.
1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1381 for (int i
= n
-1; i
>= 0; i
--) {
1382 r
[i
] = (u
[i
] >> shift
) | carry
;
1383 carry
= u
[i
] << (32 - shift
);
1384 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1387 for (int i
= n
-1; i
>= 0; i
--) {
1389 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1392 DEBUG_KNUTH(dbgs() << '\n');
1394 DEBUG_KNUTH(dbgs() << '\n');
1397 void APInt::divide(const WordType
*LHS
, unsigned lhsWords
, const WordType
*RHS
,
1398 unsigned rhsWords
, WordType
*Quotient
, WordType
*Remainder
) {
1399 assert(lhsWords
>= rhsWords
&& "Fractional result");
1401 // First, compose the values into an array of 32-bit words instead of
1402 // 64-bit words. This is a necessity of both the "short division" algorithm
1403 // and the Knuth "classical algorithm" which requires there to be native
1404 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1405 // can't use 64-bit operands here because we don't have native results of
1406 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1407 // work on large-endian machines.
1408 unsigned n
= rhsWords
* 2;
1409 unsigned m
= (lhsWords
* 2) - n
;
1411 // Allocate space for the temporary values we need either on the stack, if
1412 // it will fit, or on the heap if it won't.
1413 uint32_t SPACE
[128];
1414 uint32_t *U
= nullptr;
1415 uint32_t *V
= nullptr;
1416 uint32_t *Q
= nullptr;
1417 uint32_t *R
= nullptr;
1418 if ((Remainder
?4:3)*n
+2*m
+1 <= 128) {
1421 Q
= &SPACE
[(m
+n
+1) + n
];
1423 R
= &SPACE
[(m
+n
+1) + n
+ (m
+n
)];
1425 U
= new uint32_t[m
+ n
+ 1];
1426 V
= new uint32_t[n
];
1427 Q
= new uint32_t[m
+n
];
1429 R
= new uint32_t[n
];
1432 // Initialize the dividend
1433 memset(U
, 0, (m
+n
+1)*sizeof(uint32_t));
1434 for (unsigned i
= 0; i
< lhsWords
; ++i
) {
1435 uint64_t tmp
= LHS
[i
];
1436 U
[i
* 2] = Lo_32(tmp
);
1437 U
[i
* 2 + 1] = Hi_32(tmp
);
1439 U
[m
+n
] = 0; // this extra word is for "spill" in the Knuth algorithm.
1441 // Initialize the divisor
1442 memset(V
, 0, (n
)*sizeof(uint32_t));
1443 for (unsigned i
= 0; i
< rhsWords
; ++i
) {
1444 uint64_t tmp
= RHS
[i
];
1445 V
[i
* 2] = Lo_32(tmp
);
1446 V
[i
* 2 + 1] = Hi_32(tmp
);
1449 // initialize the quotient and remainder
1450 memset(Q
, 0, (m
+n
) * sizeof(uint32_t));
1452 memset(R
, 0, n
* sizeof(uint32_t));
1454 // Now, adjust m and n for the Knuth division. n is the number of words in
1455 // the divisor. m is the number of words by which the dividend exceeds the
1456 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1457 // contain any zero words or the Knuth algorithm fails.
1458 for (unsigned i
= n
; i
> 0 && V
[i
-1] == 0; i
--) {
1462 for (unsigned i
= m
+n
; i
> 0 && U
[i
-1] == 0; i
--)
1465 // If we're left with only a single word for the divisor, Knuth doesn't work
1466 // so we implement the short division algorithm here. This is much simpler
1467 // and faster because we are certain that we can divide a 64-bit quantity
1468 // by a 32-bit quantity at hardware speed and short division is simply a
1469 // series of such operations. This is just like doing short division but we
1470 // are using base 2^32 instead of base 10.
1471 assert(n
!= 0 && "Divide by zero?");
1473 uint32_t divisor
= V
[0];
1474 uint32_t remainder
= 0;
1475 for (int i
= m
; i
>= 0; i
--) {
1476 uint64_t partial_dividend
= Make_64(remainder
, U
[i
]);
1477 if (partial_dividend
== 0) {
1480 } else if (partial_dividend
< divisor
) {
1482 remainder
= Lo_32(partial_dividend
);
1483 } else if (partial_dividend
== divisor
) {
1487 Q
[i
] = Lo_32(partial_dividend
/ divisor
);
1488 remainder
= Lo_32(partial_dividend
- (Q
[i
] * divisor
));
1494 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1496 KnuthDiv(U
, V
, Q
, R
, m
, n
);
1499 // If the caller wants the quotient
1501 for (unsigned i
= 0; i
< lhsWords
; ++i
)
1502 Quotient
[i
] = Make_64(Q
[i
*2+1], Q
[i
*2]);
1505 // If the caller wants the remainder
1507 for (unsigned i
= 0; i
< rhsWords
; ++i
)
1508 Remainder
[i
] = Make_64(R
[i
*2+1], R
[i
*2]);
1511 // Clean up the memory we allocated.
1512 if (U
!= &SPACE
[0]) {
1520 APInt
APInt::udiv(const APInt
&RHS
) const {
1521 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1523 // First, deal with the easy case
1524 if (isSingleWord()) {
1525 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1526 return APInt(BitWidth
, U
.VAL
/ RHS
.U
.VAL
);
1529 // Get some facts about the LHS and RHS number of bits and words
1530 unsigned lhsWords
= getNumWords(getActiveBits());
1531 unsigned rhsBits
= RHS
.getActiveBits();
1532 unsigned rhsWords
= getNumWords(rhsBits
);
1533 assert(rhsWords
&& "Divided by zero???");
1535 // Deal with some degenerate cases
1538 return APInt(BitWidth
, 0);
1542 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1543 // X / Y ===> 0, iff X < Y
1544 return APInt(BitWidth
, 0);
1547 return APInt(BitWidth
, 1);
1548 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1549 // All high words are zero, just use native divide
1550 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
.U
.pVal
[0]);
1552 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1553 APInt
Quotient(BitWidth
, 0); // to hold result.
1554 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
, nullptr);
1558 APInt
APInt::udiv(uint64_t RHS
) const {
1559 assert(RHS
!= 0 && "Divide by zero?");
1561 // First, deal with the easy case
1563 return APInt(BitWidth
, U
.VAL
/ RHS
);
1565 // Get some facts about the LHS words.
1566 unsigned lhsWords
= getNumWords(getActiveBits());
1568 // Deal with some degenerate cases
1571 return APInt(BitWidth
, 0);
1576 // X / Y ===> 0, iff X < Y
1577 return APInt(BitWidth
, 0);
1580 return APInt(BitWidth
, 1);
1581 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1582 // All high words are zero, just use native divide
1583 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
);
1585 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586 APInt
Quotient(BitWidth
, 0); // to hold result.
1587 divide(U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, nullptr);
1591 APInt
APInt::sdiv(const APInt
&RHS
) const {
1593 if (RHS
.isNegative())
1594 return (-(*this)).udiv(-RHS
);
1595 return -((-(*this)).udiv(RHS
));
1597 if (RHS
.isNegative())
1598 return -(this->udiv(-RHS
));
1599 return this->udiv(RHS
);
1602 APInt
APInt::sdiv(int64_t RHS
) const {
1605 return (-(*this)).udiv(-RHS
);
1606 return -((-(*this)).udiv(RHS
));
1609 return -(this->udiv(-RHS
));
1610 return this->udiv(RHS
);
1613 APInt
APInt::urem(const APInt
&RHS
) const {
1614 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1615 if (isSingleWord()) {
1616 assert(RHS
.U
.VAL
!= 0 && "Remainder by zero?");
1617 return APInt(BitWidth
, U
.VAL
% RHS
.U
.VAL
);
1620 // Get some facts about the LHS
1621 unsigned lhsWords
= getNumWords(getActiveBits());
1623 // Get some facts about the RHS
1624 unsigned rhsBits
= RHS
.getActiveBits();
1625 unsigned rhsWords
= getNumWords(rhsBits
);
1626 assert(rhsWords
&& "Performing remainder operation by zero ???");
1628 // Check the degenerate cases
1631 return APInt(BitWidth
, 0);
1634 return APInt(BitWidth
, 0);
1635 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1636 // X % Y ===> X, iff X < Y
1640 return APInt(BitWidth
, 0);
1642 // All high words are zero, just use native remainder
1643 return APInt(BitWidth
, U
.pVal
[0] % RHS
.U
.pVal
[0]);
1645 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1646 APInt
Remainder(BitWidth
, 0);
1647 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, nullptr, Remainder
.U
.pVal
);
1651 uint64_t APInt::urem(uint64_t RHS
) const {
1652 assert(RHS
!= 0 && "Remainder by zero?");
1657 // Get some facts about the LHS
1658 unsigned lhsWords
= getNumWords(getActiveBits());
1660 // Check the degenerate cases
1668 // X % Y ===> X, iff X < Y
1669 return getZExtValue();
1674 // All high words are zero, just use native remainder
1675 return U
.pVal
[0] % RHS
;
1677 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1679 divide(U
.pVal
, lhsWords
, &RHS
, 1, nullptr, &Remainder
);
1683 APInt
APInt::srem(const APInt
&RHS
) const {
1685 if (RHS
.isNegative())
1686 return -((-(*this)).urem(-RHS
));
1687 return -((-(*this)).urem(RHS
));
1689 if (RHS
.isNegative())
1690 return this->urem(-RHS
);
1691 return this->urem(RHS
);
1694 int64_t APInt::srem(int64_t RHS
) const {
1697 return -((-(*this)).urem(-RHS
));
1698 return -((-(*this)).urem(RHS
));
1701 return this->urem(-RHS
);
1702 return this->urem(RHS
);
1705 void APInt::udivrem(const APInt
&LHS
, const APInt
&RHS
,
1706 APInt
&Quotient
, APInt
&Remainder
) {
1707 assert(LHS
.BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1708 unsigned BitWidth
= LHS
.BitWidth
;
1710 // First, deal with the easy case
1711 if (LHS
.isSingleWord()) {
1712 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1713 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
.U
.VAL
;
1714 uint64_t RemVal
= LHS
.U
.VAL
% RHS
.U
.VAL
;
1715 Quotient
= APInt(BitWidth
, QuotVal
);
1716 Remainder
= APInt(BitWidth
, RemVal
);
1720 // Get some size facts about the dividend and divisor
1721 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1722 unsigned rhsBits
= RHS
.getActiveBits();
1723 unsigned rhsWords
= getNumWords(rhsBits
);
1724 assert(rhsWords
&& "Performing divrem operation by zero ???");
1726 // Check the degenerate cases
1727 if (lhsWords
== 0) {
1728 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1729 Remainder
= APInt(BitWidth
, 0); // 0 % Y ===> 0
1734 Quotient
= LHS
; // X / 1 ===> X
1735 Remainder
= APInt(BitWidth
, 0); // X % 1 ===> 0
1738 if (lhsWords
< rhsWords
|| LHS
.ult(RHS
)) {
1739 Remainder
= LHS
; // X % Y ===> X, iff X < Y
1740 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1745 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1746 Remainder
= APInt(BitWidth
, 0); // X % X ===> 0;
1750 // Make sure there is enough space to hold the results.
1751 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1752 // change the size. This is necessary if Quotient or Remainder is aliased
1754 Quotient
.reallocate(BitWidth
);
1755 Remainder
.reallocate(BitWidth
);
1757 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1758 // There is only one word to consider so use the native versions.
1759 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1760 uint64_t rhsValue
= RHS
.U
.pVal
[0];
1761 Quotient
= lhsValue
/ rhsValue
;
1762 Remainder
= lhsValue
% rhsValue
;
1766 // Okay, lets do it the long way
1767 divide(LHS
.U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
,
1769 // Clear the rest of the Quotient and Remainder.
1770 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1771 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1772 std::memset(Remainder
.U
.pVal
+ rhsWords
, 0,
1773 (getNumWords(BitWidth
) - rhsWords
) * APINT_WORD_SIZE
);
1776 void APInt::udivrem(const APInt
&LHS
, uint64_t RHS
, APInt
&Quotient
,
1777 uint64_t &Remainder
) {
1778 assert(RHS
!= 0 && "Divide by zero?");
1779 unsigned BitWidth
= LHS
.BitWidth
;
1781 // First, deal with the easy case
1782 if (LHS
.isSingleWord()) {
1783 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
;
1784 Remainder
= LHS
.U
.VAL
% RHS
;
1785 Quotient
= APInt(BitWidth
, QuotVal
);
1789 // Get some size facts about the dividend and divisor
1790 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1792 // Check the degenerate cases
1793 if (lhsWords
== 0) {
1794 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1795 Remainder
= 0; // 0 % Y ===> 0
1800 Quotient
= LHS
; // X / 1 ===> X
1801 Remainder
= 0; // X % 1 ===> 0
1806 Remainder
= LHS
.getZExtValue(); // X % Y ===> X, iff X < Y
1807 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1812 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1813 Remainder
= 0; // X % X ===> 0;
1817 // Make sure there is enough space to hold the results.
1818 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1819 // change the size. This is necessary if Quotient is aliased with LHS.
1820 Quotient
.reallocate(BitWidth
);
1822 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1823 // There is only one word to consider so use the native versions.
1824 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1825 Quotient
= lhsValue
/ RHS
;
1826 Remainder
= lhsValue
% RHS
;
1830 // Okay, lets do it the long way
1831 divide(LHS
.U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, &Remainder
);
1832 // Clear the rest of the Quotient.
1833 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1834 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1837 void APInt::sdivrem(const APInt
&LHS
, const APInt
&RHS
,
1838 APInt
&Quotient
, APInt
&Remainder
) {
1839 if (LHS
.isNegative()) {
1840 if (RHS
.isNegative())
1841 APInt::udivrem(-LHS
, -RHS
, Quotient
, Remainder
);
1843 APInt::udivrem(-LHS
, RHS
, Quotient
, Remainder
);
1847 } else if (RHS
.isNegative()) {
1848 APInt::udivrem(LHS
, -RHS
, Quotient
, Remainder
);
1851 APInt::udivrem(LHS
, RHS
, Quotient
, Remainder
);
1855 void APInt::sdivrem(const APInt
&LHS
, int64_t RHS
,
1856 APInt
&Quotient
, int64_t &Remainder
) {
1857 uint64_t R
= Remainder
;
1858 if (LHS
.isNegative()) {
1860 APInt::udivrem(-LHS
, -RHS
, Quotient
, R
);
1862 APInt::udivrem(-LHS
, RHS
, Quotient
, R
);
1866 } else if (RHS
< 0) {
1867 APInt::udivrem(LHS
, -RHS
, Quotient
, R
);
1870 APInt::udivrem(LHS
, RHS
, Quotient
, R
);
1875 APInt
APInt::sadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1876 APInt Res
= *this+RHS
;
1877 Overflow
= isNonNegative() == RHS
.isNonNegative() &&
1878 Res
.isNonNegative() != isNonNegative();
1882 APInt
APInt::uadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1883 APInt Res
= *this+RHS
;
1884 Overflow
= Res
.ult(RHS
);
1888 APInt
APInt::ssub_ov(const APInt
&RHS
, bool &Overflow
) const {
1889 APInt Res
= *this - RHS
;
1890 Overflow
= isNonNegative() != RHS
.isNonNegative() &&
1891 Res
.isNonNegative() != isNonNegative();
1895 APInt
APInt::usub_ov(const APInt
&RHS
, bool &Overflow
) const {
1896 APInt Res
= *this-RHS
;
1897 Overflow
= Res
.ugt(*this);
1901 APInt
APInt::sdiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1902 // MININT/-1 --> overflow.
1903 Overflow
= isMinSignedValue() && RHS
.isAllOnesValue();
1907 APInt
APInt::smul_ov(const APInt
&RHS
, bool &Overflow
) const {
1908 APInt Res
= *this * RHS
;
1910 if (*this != 0 && RHS
!= 0)
1911 Overflow
= Res
.sdiv(RHS
) != *this || Res
.sdiv(*this) != RHS
;
1917 APInt
APInt::umul_ov(const APInt
&RHS
, bool &Overflow
) const {
1918 APInt Res
= *this * RHS
;
1920 if (*this != 0 && RHS
!= 0)
1921 Overflow
= Res
.udiv(RHS
) != *this || Res
.udiv(*this) != RHS
;
1927 APInt
APInt::sshl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1928 Overflow
= ShAmt
.uge(getBitWidth());
1930 return APInt(BitWidth
, 0);
1932 if (isNonNegative()) // Don't allow sign change.
1933 Overflow
= ShAmt
.uge(countLeadingZeros());
1935 Overflow
= ShAmt
.uge(countLeadingOnes());
1937 return *this << ShAmt
;
1940 APInt
APInt::ushl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1941 Overflow
= ShAmt
.uge(getBitWidth());
1943 return APInt(BitWidth
, 0);
1945 Overflow
= ShAmt
.ugt(countLeadingZeros());
1947 return *this << ShAmt
;
1953 void APInt::fromString(unsigned numbits
, StringRef str
, uint8_t radix
) {
1954 // Check our assumptions here
1955 assert(!str
.empty() && "Invalid string length");
1956 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
1958 "Radix should be 2, 8, 10, 16, or 36!");
1960 StringRef::iterator p
= str
.begin();
1961 size_t slen
= str
.size();
1962 bool isNeg
= *p
== '-';
1963 if (*p
== '-' || *p
== '+') {
1966 assert(slen
&& "String is only a sign, needs a value.");
1968 assert((slen
<= numbits
|| radix
!= 2) && "Insufficient bit width");
1969 assert(((slen
-1)*3 <= numbits
|| radix
!= 8) && "Insufficient bit width");
1970 assert(((slen
-1)*4 <= numbits
|| radix
!= 16) && "Insufficient bit width");
1971 assert((((slen
-1)*64)/22 <= numbits
|| radix
!= 10) &&
1972 "Insufficient bit width");
1974 // Allocate memory if needed
1978 U
.pVal
= getClearedMemory(getNumWords());
1980 // Figure out if we can shift instead of multiply
1981 unsigned shift
= (radix
== 16 ? 4 : radix
== 8 ? 3 : radix
== 2 ? 1 : 0);
1983 // Enter digit traversal loop
1984 for (StringRef::iterator e
= str
.end(); p
!= e
; ++p
) {
1985 unsigned digit
= getDigit(*p
, radix
);
1986 assert(digit
< radix
&& "Invalid character in digit string");
1988 // Shift or multiply the value by the radix
1996 // Add in the digit we just interpreted
1999 // If its negative, put it in two's complement form
2004 void APInt::toString(SmallVectorImpl
<char> &Str
, unsigned Radix
,
2005 bool Signed
, bool formatAsCLiteral
) const {
2006 assert((Radix
== 10 || Radix
== 8 || Radix
== 16 || Radix
== 2 ||
2008 "Radix should be 2, 8, 10, 16, or 36!");
2010 const char *Prefix
= "";
2011 if (formatAsCLiteral
) {
2014 // Binary literals are a non-standard extension added in gcc 4.3:
2015 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2027 llvm_unreachable("Invalid radix!");
2031 // First, check for a zero value and just short circuit the logic below.
2034 Str
.push_back(*Prefix
);
2041 static const char Digits
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2043 if (isSingleWord()) {
2045 char *BufPtr
= std::end(Buffer
);
2051 int64_t I
= getSExtValue();
2061 Str
.push_back(*Prefix
);
2066 *--BufPtr
= Digits
[N
% Radix
];
2069 Str
.append(BufPtr
, std::end(Buffer
));
2075 if (Signed
&& isNegative()) {
2076 // They want to print the signed version and it is a negative value
2077 // Flip the bits and add one to turn it into the equivalent positive
2078 // value and put a '-' in the result.
2084 Str
.push_back(*Prefix
);
2088 // We insert the digits backward, then reverse them to get the right order.
2089 unsigned StartDig
= Str
.size();
2091 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2092 // because the number of bits per digit (1, 3 and 4 respectively) divides
2093 // equally. We just shift until the value is zero.
2094 if (Radix
== 2 || Radix
== 8 || Radix
== 16) {
2095 // Just shift tmp right for each digit width until it becomes zero
2096 unsigned ShiftAmt
= (Radix
== 16 ? 4 : (Radix
== 8 ? 3 : 1));
2097 unsigned MaskAmt
= Radix
- 1;
2099 while (Tmp
.getBoolValue()) {
2100 unsigned Digit
= unsigned(Tmp
.getRawData()[0]) & MaskAmt
;
2101 Str
.push_back(Digits
[Digit
]);
2102 Tmp
.lshrInPlace(ShiftAmt
);
2105 while (Tmp
.getBoolValue()) {
2107 udivrem(Tmp
, Radix
, Tmp
, Digit
);
2108 assert(Digit
< Radix
&& "divide failed");
2109 Str
.push_back(Digits
[Digit
]);
2113 // Reverse the digits before returning.
2114 std::reverse(Str
.begin()+StartDig
, Str
.end());
2117 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2118 /// It is better to pass in a SmallVector/SmallString to the methods above.
2119 std::string
APInt::toString(unsigned Radix
= 10, bool Signed
= true) const {
2121 toString(S
, Radix
, Signed
, /* formatAsCLiteral = */false);
2125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2126 LLVM_DUMP_METHOD
void APInt::dump() const {
2127 SmallString
<40> S
, U
;
2128 this->toStringUnsigned(U
);
2129 this->toStringSigned(S
);
2130 dbgs() << "APInt(" << BitWidth
<< "b, "
2131 << U
<< "u " << S
<< "s)\n";
2135 void APInt::print(raw_ostream
&OS
, bool isSigned
) const {
2137 this->toString(S
, 10, isSigned
, /* formatAsCLiteral = */false);
2141 // This implements a variety of operations on a representation of
2142 // arbitrary precision, two's-complement, bignum integer values.
2144 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2145 // and unrestricting assumption.
2146 static_assert(APInt::APINT_BITS_PER_WORD
% 2 == 0,
2147 "Part width must be divisible by 2!");
2149 /* Some handy functions local to this file. */
2151 /* Returns the integer part with the least significant BITS set.
2152 BITS cannot be zero. */
2153 static inline APInt::WordType
lowBitMask(unsigned bits
) {
2154 assert(bits
!= 0 && bits
<= APInt::APINT_BITS_PER_WORD
);
2156 return ~(APInt::WordType
) 0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
2159 /* Returns the value of the lower half of PART. */
2160 static inline APInt::WordType
lowHalf(APInt::WordType part
) {
2161 return part
& lowBitMask(APInt::APINT_BITS_PER_WORD
/ 2);
2164 /* Returns the value of the upper half of PART. */
2165 static inline APInt::WordType
highHalf(APInt::WordType part
) {
2166 return part
>> (APInt::APINT_BITS_PER_WORD
/ 2);
2169 /* Returns the bit number of the most significant set bit of a part.
2170 If the input number has no bits set -1U is returned. */
2171 static unsigned partMSB(APInt::WordType value
) {
2172 return findLastSet(value
, ZB_Max
);
2175 /* Returns the bit number of the least significant set bit of a
2176 part. If the input number has no bits set -1U is returned. */
2177 static unsigned partLSB(APInt::WordType value
) {
2178 return findFirstSet(value
, ZB_Max
);
2181 /* Sets the least significant part of a bignum to the input value, and
2182 zeroes out higher parts. */
2183 void APInt::tcSet(WordType
*dst
, WordType part
, unsigned parts
) {
2187 for (unsigned i
= 1; i
< parts
; i
++)
2191 /* Assign one bignum to another. */
2192 void APInt::tcAssign(WordType
*dst
, const WordType
*src
, unsigned parts
) {
2193 for (unsigned i
= 0; i
< parts
; i
++)
2197 /* Returns true if a bignum is zero, false otherwise. */
2198 bool APInt::tcIsZero(const WordType
*src
, unsigned parts
) {
2199 for (unsigned i
= 0; i
< parts
; i
++)
2206 /* Extract the given bit of a bignum; returns 0 or 1. */
2207 int APInt::tcExtractBit(const WordType
*parts
, unsigned bit
) {
2208 return (parts
[whichWord(bit
)] & maskBit(bit
)) != 0;
2211 /* Set the given bit of a bignum. */
2212 void APInt::tcSetBit(WordType
*parts
, unsigned bit
) {
2213 parts
[whichWord(bit
)] |= maskBit(bit
);
2216 /* Clears the given bit of a bignum. */
2217 void APInt::tcClearBit(WordType
*parts
, unsigned bit
) {
2218 parts
[whichWord(bit
)] &= ~maskBit(bit
);
2221 /* Returns the bit number of the least significant set bit of a
2222 number. If the input number has no bits set -1U is returned. */
2223 unsigned APInt::tcLSB(const WordType
*parts
, unsigned n
) {
2224 for (unsigned i
= 0; i
< n
; i
++) {
2225 if (parts
[i
] != 0) {
2226 unsigned lsb
= partLSB(parts
[i
]);
2228 return lsb
+ i
* APINT_BITS_PER_WORD
;
2235 /* Returns the bit number of the most significant set bit of a number.
2236 If the input number has no bits set -1U is returned. */
2237 unsigned APInt::tcMSB(const WordType
*parts
, unsigned n
) {
2241 if (parts
[n
] != 0) {
2242 unsigned msb
= partMSB(parts
[n
]);
2244 return msb
+ n
* APINT_BITS_PER_WORD
;
2251 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2252 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2253 the least significant bit of DST. All high bits above srcBITS in
2254 DST are zero-filled. */
2256 APInt::tcExtract(WordType
*dst
, unsigned dstCount
, const WordType
*src
,
2257 unsigned srcBits
, unsigned srcLSB
) {
2258 unsigned dstParts
= (srcBits
+ APINT_BITS_PER_WORD
- 1) / APINT_BITS_PER_WORD
;
2259 assert(dstParts
<= dstCount
);
2261 unsigned firstSrcPart
= srcLSB
/ APINT_BITS_PER_WORD
;
2262 tcAssign (dst
, src
+ firstSrcPart
, dstParts
);
2264 unsigned shift
= srcLSB
% APINT_BITS_PER_WORD
;
2265 tcShiftRight (dst
, dstParts
, shift
);
2267 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2268 in DST. If this is less that srcBits, append the rest, else
2269 clear the high bits. */
2270 unsigned n
= dstParts
* APINT_BITS_PER_WORD
- shift
;
2272 WordType mask
= lowBitMask (srcBits
- n
);
2273 dst
[dstParts
- 1] |= ((src
[firstSrcPart
+ dstParts
] & mask
)
2274 << n
% APINT_BITS_PER_WORD
);
2275 } else if (n
> srcBits
) {
2276 if (srcBits
% APINT_BITS_PER_WORD
)
2277 dst
[dstParts
- 1] &= lowBitMask (srcBits
% APINT_BITS_PER_WORD
);
2280 /* Clear high parts. */
2281 while (dstParts
< dstCount
)
2282 dst
[dstParts
++] = 0;
2285 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2286 APInt::WordType
APInt::tcAdd(WordType
*dst
, const WordType
*rhs
,
2287 WordType c
, unsigned parts
) {
2290 for (unsigned i
= 0; i
< parts
; i
++) {
2291 WordType l
= dst
[i
];
2293 dst
[i
] += rhs
[i
] + 1;
2304 /// This function adds a single "word" integer, src, to the multiple
2305 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2306 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2307 /// @returns the carry of the addition.
2308 APInt::WordType
APInt::tcAddPart(WordType
*dst
, WordType src
,
2310 for (unsigned i
= 0; i
< parts
; ++i
) {
2313 return 0; // No need to carry so exit early.
2314 src
= 1; // Carry one to next digit.
2320 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2321 APInt::WordType
APInt::tcSubtract(WordType
*dst
, const WordType
*rhs
,
2322 WordType c
, unsigned parts
) {
2325 for (unsigned i
= 0; i
< parts
; i
++) {
2326 WordType l
= dst
[i
];
2328 dst
[i
] -= rhs
[i
] + 1;
2339 /// This function subtracts a single "word" (64-bit word), src, from
2340 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2341 /// no further borrowing is needed or it runs out of "words" in dst. The result
2342 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2343 /// exhausted. In other words, if src > dst then this function returns 1,
2345 /// @returns the borrow out of the subtraction
2346 APInt::WordType
APInt::tcSubtractPart(WordType
*dst
, WordType src
,
2348 for (unsigned i
= 0; i
< parts
; ++i
) {
2349 WordType Dst
= dst
[i
];
2352 return 0; // No need to borrow so exit early.
2353 src
= 1; // We have to "borrow 1" from next "word"
2359 /* Negate a bignum in-place. */
2360 void APInt::tcNegate(WordType
*dst
, unsigned parts
) {
2361 tcComplement(dst
, parts
);
2362 tcIncrement(dst
, parts
);
2365 /* DST += SRC * MULTIPLIER + CARRY if add is true
2366 DST = SRC * MULTIPLIER + CARRY if add is false
2368 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2369 they must start at the same point, i.e. DST == SRC.
2371 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2372 returned. Otherwise DST is filled with the least significant
2373 DSTPARTS parts of the result, and if all of the omitted higher
2374 parts were zero return zero, otherwise overflow occurred and
2376 int APInt::tcMultiplyPart(WordType
*dst
, const WordType
*src
,
2377 WordType multiplier
, WordType carry
,
2378 unsigned srcParts
, unsigned dstParts
,
2380 /* Otherwise our writes of DST kill our later reads of SRC. */
2381 assert(dst
<= src
|| dst
>= src
+ srcParts
);
2382 assert(dstParts
<= srcParts
+ 1);
2384 /* N loops; minimum of dstParts and srcParts. */
2385 unsigned n
= std::min(dstParts
, srcParts
);
2387 for (unsigned i
= 0; i
< n
; i
++) {
2388 WordType low
, mid
, high
, srcPart
;
2390 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2392 This cannot overflow, because
2394 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2396 which is less than n^2. */
2400 if (multiplier
== 0 || srcPart
== 0) {
2404 low
= lowHalf(srcPart
) * lowHalf(multiplier
);
2405 high
= highHalf(srcPart
) * highHalf(multiplier
);
2407 mid
= lowHalf(srcPart
) * highHalf(multiplier
);
2408 high
+= highHalf(mid
);
2409 mid
<<= APINT_BITS_PER_WORD
/ 2;
2410 if (low
+ mid
< low
)
2414 mid
= highHalf(srcPart
) * lowHalf(multiplier
);
2415 high
+= highHalf(mid
);
2416 mid
<<= APINT_BITS_PER_WORD
/ 2;
2417 if (low
+ mid
< low
)
2421 /* Now add carry. */
2422 if (low
+ carry
< low
)
2428 /* And now DST[i], and store the new low part there. */
2429 if (low
+ dst
[i
] < low
)
2438 if (srcParts
< dstParts
) {
2439 /* Full multiplication, there is no overflow. */
2440 assert(srcParts
+ 1 == dstParts
);
2441 dst
[srcParts
] = carry
;
2445 /* We overflowed if there is carry. */
2449 /* We would overflow if any significant unwritten parts would be
2450 non-zero. This is true if any remaining src parts are non-zero
2451 and the multiplier is non-zero. */
2453 for (unsigned i
= dstParts
; i
< srcParts
; i
++)
2457 /* We fitted in the narrow destination. */
2461 /* DST = LHS * RHS, where DST has the same width as the operands and
2462 is filled with the least significant parts of the result. Returns
2463 one if overflow occurred, otherwise zero. DST must be disjoint
2464 from both operands. */
2465 int APInt::tcMultiply(WordType
*dst
, const WordType
*lhs
,
2466 const WordType
*rhs
, unsigned parts
) {
2467 assert(dst
!= lhs
&& dst
!= rhs
);
2470 tcSet(dst
, 0, parts
);
2472 for (unsigned i
= 0; i
< parts
; i
++)
2473 overflow
|= tcMultiplyPart(&dst
[i
], lhs
, rhs
[i
], 0, parts
,
2479 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2480 /// operands. No overflow occurs. DST must be disjoint from both operands.
2481 void APInt::tcFullMultiply(WordType
*dst
, const WordType
*lhs
,
2482 const WordType
*rhs
, unsigned lhsParts
,
2483 unsigned rhsParts
) {
2484 /* Put the narrower number on the LHS for less loops below. */
2485 if (lhsParts
> rhsParts
)
2486 return tcFullMultiply (dst
, rhs
, lhs
, rhsParts
, lhsParts
);
2488 assert(dst
!= lhs
&& dst
!= rhs
);
2490 tcSet(dst
, 0, rhsParts
);
2492 for (unsigned i
= 0; i
< lhsParts
; i
++)
2493 tcMultiplyPart(&dst
[i
], rhs
, lhs
[i
], 0, rhsParts
, rhsParts
+ 1, true);
2496 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2497 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2498 set REMAINDER to the remainder, return zero. i.e.
2500 OLD_LHS = RHS * LHS + REMAINDER
2502 SCRATCH is a bignum of the same size as the operands and result for
2503 use by the routine; its contents need not be initialized and are
2504 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2506 int APInt::tcDivide(WordType
*lhs
, const WordType
*rhs
,
2507 WordType
*remainder
, WordType
*srhs
,
2509 assert(lhs
!= remainder
&& lhs
!= srhs
&& remainder
!= srhs
);
2511 unsigned shiftCount
= tcMSB(rhs
, parts
) + 1;
2512 if (shiftCount
== 0)
2515 shiftCount
= parts
* APINT_BITS_PER_WORD
- shiftCount
;
2516 unsigned n
= shiftCount
/ APINT_BITS_PER_WORD
;
2517 WordType mask
= (WordType
) 1 << (shiftCount
% APINT_BITS_PER_WORD
);
2519 tcAssign(srhs
, rhs
, parts
);
2520 tcShiftLeft(srhs
, parts
, shiftCount
);
2521 tcAssign(remainder
, lhs
, parts
);
2522 tcSet(lhs
, 0, parts
);
2524 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2527 int compare
= tcCompare(remainder
, srhs
, parts
);
2529 tcSubtract(remainder
, srhs
, 0, parts
);
2533 if (shiftCount
== 0)
2536 tcShiftRight(srhs
, parts
, 1);
2537 if ((mask
>>= 1) == 0) {
2538 mask
= (WordType
) 1 << (APINT_BITS_PER_WORD
- 1);
2546 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2547 /// no restrictions on Count.
2548 void APInt::tcShiftLeft(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2549 // Don't bother performing a no-op shift.
2553 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2554 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2555 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2557 // Fastpath for moving by whole words.
2558 if (BitShift
== 0) {
2559 std::memmove(Dst
+ WordShift
, Dst
, (Words
- WordShift
) * APINT_WORD_SIZE
);
2561 while (Words
-- > WordShift
) {
2562 Dst
[Words
] = Dst
[Words
- WordShift
] << BitShift
;
2563 if (Words
> WordShift
)
2565 Dst
[Words
- WordShift
- 1] >> (APINT_BITS_PER_WORD
- BitShift
);
2569 // Fill in the remainder with 0s.
2570 std::memset(Dst
, 0, WordShift
* APINT_WORD_SIZE
);
2573 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2574 /// are no restrictions on Count.
2575 void APInt::tcShiftRight(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2576 // Don't bother performing a no-op shift.
2580 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2581 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2582 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2584 unsigned WordsToMove
= Words
- WordShift
;
2585 // Fastpath for moving by whole words.
2586 if (BitShift
== 0) {
2587 std::memmove(Dst
, Dst
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
2589 for (unsigned i
= 0; i
!= WordsToMove
; ++i
) {
2590 Dst
[i
] = Dst
[i
+ WordShift
] >> BitShift
;
2591 if (i
+ 1 != WordsToMove
)
2592 Dst
[i
] |= Dst
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
);
2596 // Fill in the remainder with 0s.
2597 std::memset(Dst
+ WordsToMove
, 0, WordShift
* APINT_WORD_SIZE
);
2600 /* Bitwise and of two bignums. */
2601 void APInt::tcAnd(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2602 for (unsigned i
= 0; i
< parts
; i
++)
2606 /* Bitwise inclusive or of two bignums. */
2607 void APInt::tcOr(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2608 for (unsigned i
= 0; i
< parts
; i
++)
2612 /* Bitwise exclusive or of two bignums. */
2613 void APInt::tcXor(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2614 for (unsigned i
= 0; i
< parts
; i
++)
2618 /* Complement a bignum in-place. */
2619 void APInt::tcComplement(WordType
*dst
, unsigned parts
) {
2620 for (unsigned i
= 0; i
< parts
; i
++)
2624 /* Comparison (unsigned) of two bignums. */
2625 int APInt::tcCompare(const WordType
*lhs
, const WordType
*rhs
,
2629 if (lhs
[parts
] != rhs
[parts
])
2630 return (lhs
[parts
] > rhs
[parts
]) ? 1 : -1;
2636 /* Set the least significant BITS bits of a bignum, clear the
2638 void APInt::tcSetLeastSignificantBits(WordType
*dst
, unsigned parts
,
2641 while (bits
> APINT_BITS_PER_WORD
) {
2642 dst
[i
++] = ~(WordType
) 0;
2643 bits
-= APINT_BITS_PER_WORD
;
2647 dst
[i
++] = ~(WordType
) 0 >> (APINT_BITS_PER_WORD
- bits
);
2653 APInt
llvm::APIntOps::RoundingUDiv(const APInt
&A
, const APInt
&B
,
2654 APInt::Rounding RM
) {
2655 // Currently udivrem always rounds down.
2657 case APInt::Rounding::DOWN
:
2658 case APInt::Rounding::TOWARD_ZERO
:
2660 case APInt::Rounding::UP
: {
2662 APInt::udivrem(A
, B
, Quo
, Rem
);
2668 llvm_unreachable("Unknown APInt::Rounding enum");
2671 APInt
llvm::APIntOps::RoundingSDiv(const APInt
&A
, const APInt
&B
,
2672 APInt::Rounding RM
) {
2674 case APInt::Rounding::DOWN
:
2675 case APInt::Rounding::UP
: {
2677 APInt::sdivrem(A
, B
, Quo
, Rem
);
2680 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2681 // We want to check whether the non-integer part of the mathematical value
2682 // is negative or not. If the non-integer part is negative, we need to round
2683 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2684 // already rounded down.
2685 if (RM
== APInt::Rounding::DOWN
) {
2686 if (Rem
.isNegative() != B
.isNegative())
2690 if (Rem
.isNegative() != B
.isNegative())
2694 // Currently sdiv rounds twards zero.
2695 case APInt::Rounding::TOWARD_ZERO
:
2698 llvm_unreachable("Unknown APInt::Rounding enum");
2702 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A
, APInt B
, APInt C
,
2703 unsigned RangeWidth
) {
2704 unsigned CoeffWidth
= A
.getBitWidth();
2705 assert(CoeffWidth
== B
.getBitWidth() && CoeffWidth
== C
.getBitWidth());
2706 assert(RangeWidth
<= CoeffWidth
&&
2707 "Value range width should be less than coefficient width");
2708 assert(RangeWidth
> 1 && "Value range bit width should be > 1");
2710 LLVM_DEBUG(dbgs() << __func__
<< ": solving " << A
<< "x^2 + " << B
2711 << "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2713 // Identify 0 as a (non)solution immediately.
2714 if (C
.sextOrTrunc(RangeWidth
).isNullValue() ) {
2715 LLVM_DEBUG(dbgs() << __func__
<< ": zero solution\n");
2716 return APInt(CoeffWidth
, 0);
2719 // The result of APInt arithmetic has the same bit width as the operands,
2720 // so it can actually lose high bits. A product of two n-bit integers needs
2721 // 2n-1 bits to represent the full value.
2722 // The operation done below (on quadratic coefficients) that can produce
2723 // the largest value is the evaluation of the equation during bisection,
2724 // which needs 3 times the bitwidth of the coefficient, so the total number
2725 // of required bits is 3n.
2727 // The purpose of this extension is to simulate the set Z of all integers,
2728 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2729 // and negative numbers (not so much in a modulo arithmetic). The method
2730 // used to solve the equation is based on the standard formula for real
2731 // numbers, and uses the concepts of "positive" and "negative" with their
2734 A
= A
.sext(CoeffWidth
);
2735 B
= B
.sext(CoeffWidth
);
2736 C
= C
.sext(CoeffWidth
);
2738 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2739 // the bit width has increased.
2740 if (A
.isNegative()) {
2746 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2747 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2748 // and R = 2^BitWidth.
2749 // Since we're trying not only to find exact solutions, but also values
2750 // that "wrap around", such a set will always have a solution, i.e. an x
2751 // that satisfies at least one of the equations, or such that |q(x)|
2752 // exceeds kR, while |q(x-1)| for the same k does not.
2754 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2755 // positive solution n (in the above sense), and also such that the n
2756 // will be the least among all solutions corresponding to k = 0, 1, ...
2757 // (more precisely, the least element in the set
2758 // { n(k) | k is such that a solution n(k) exists }).
2760 // Consider the parabola (over real numbers) that corresponds to the
2761 // quadratic equation. Since A > 0, the arms of the parabola will point
2762 // up. Picking different values of k will shift it up and down by R.
2764 // We want to shift the parabola in such a way as to reduce the problem
2765 // of solving q(x) = kR to solving shifted_q(x) = 0.
2766 // (The interesting solutions are the ceilings of the real number
2768 APInt R
= APInt::getOneBitSet(CoeffWidth
, RangeWidth
);
2773 auto RoundUp
= [] (const APInt
&V
, const APInt
&A
) -> APInt
{
2774 assert(A
.isStrictlyPositive());
2775 APInt T
= V
.abs().urem(A
);
2776 if (T
.isNullValue())
2778 return V
.isNegative() ? V
+T
: V
+(A
-T
);
2781 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2782 // iff B is positive.
2783 if (B
.isNonNegative()) {
2784 // If B >= 0, the vertex it at a negative location (or at 0), so in
2785 // order to have a non-negative solution we need to pick k that makes
2786 // C-kR negative. To satisfy all the requirements for the solution
2787 // that we are looking for, it needs to be closest to 0 of all k.
2789 if (C
.isStrictlyPositive())
2791 // Pick the greater solution.
2794 // If B < 0, the vertex is at a positive location. For any solution
2795 // to exist, the discriminant must be non-negative. This means that
2796 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2797 // lower bound on values of k: kR >= C - B^2/4A.
2798 APInt LowkR
= C
- SqrB
.udiv(2*TwoA
); // udiv because all values > 0.
2799 // Round LowkR up (towards +inf) to the nearest kR.
2800 LowkR
= RoundUp(LowkR
, R
);
2802 // If there exists k meeting the condition above, and such that
2803 // C-kR > 0, there will be two positive real number solutions of
2804 // q(x) = kR. Out of all such values of k, pick the one that makes
2805 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2806 // In other words, find maximum k such that LowkR <= kR < C.
2808 // If LowkR < C, then such a k is guaranteed to exist because
2809 // LowkR itself is a multiple of R.
2810 C
-= -RoundUp(-C
, R
); // C = C - RoundDown(C, R)
2811 // Pick the smaller solution.
2814 // If C-kR < 0 for all potential k's, it means that one solution
2815 // will be negative, while the other will be positive. The positive
2816 // solution will shift towards 0 if the parabola is moved up.
2817 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2818 // to 0, or in other words, out of all parabolas that have solutions,
2819 // pick the one that is the farthest "up").
2820 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2822 // Pick the greater solution.
2827 LLVM_DEBUG(dbgs() << __func__
<< ": updated coefficients " << A
<< "x^2 + "
2828 << B
<< "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2830 APInt D
= SqrB
- 4*A
*C
;
2831 assert(D
.isNonNegative() && "Negative discriminant");
2832 APInt SQ
= D
.sqrt();
2835 bool InexactSQ
= Q
!= D
;
2836 // The calculated SQ may actually be greater than the exact (non-integer)
2837 // value. If that's the case, decremement SQ to get a value that is lower.
2844 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2845 // When using the quadratic formula directly, the calculated low root
2846 // may be greater than the exact one, since we would be subtracting SQ.
2847 // To make sure that the calculated root is not greater than the exact
2848 // one, subtract SQ+1 when calculating the low root (for inexact value
2851 APInt::sdivrem(-B
- (SQ
+InexactSQ
), TwoA
, X
, Rem
);
2853 APInt::sdivrem(-B
+ SQ
, TwoA
, X
, Rem
);
2855 // The updated coefficients should be such that the (exact) solution is
2856 // positive. Since APInt division rounds towards 0, the calculated one
2857 // can be 0, but cannot be negative.
2858 assert(X
.isNonNegative() && "Solution should be non-negative");
2860 if (!InexactSQ
&& Rem
.isNullValue()) {
2861 LLVM_DEBUG(dbgs() << __func__
<< ": solution (root): " << X
<< '\n');
2865 assert((SQ
*SQ
).sle(D
) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2866 // The exact value of the square root of D should be between SQ and SQ+1.
2867 // This implies that the solution should be between that corresponding to
2868 // SQ (i.e. X) and that corresponding to SQ+1.
2870 // The calculated X cannot be greater than the exact (real) solution.
2871 // Actually it must be strictly less than the exact solution, while
2872 // X+1 will be greater than or equal to it.
2874 APInt VX
= (A
*X
+ B
)*X
+ C
;
2875 APInt VY
= VX
+ TwoA
*X
+ A
+ B
;
2876 bool SignChange
= VX
.isNegative() != VY
.isNegative() ||
2877 VX
.isNullValue() != VY
.isNullValue();
2878 // If the sign did not change between X and X+1, X is not a valid solution.
2879 // This could happen when the actual (exact) roots don't have an integer
2880 // between them, so they would both be contained between X and X+1.
2882 LLVM_DEBUG(dbgs() << __func__
<< ": no valid solution\n");
2887 LLVM_DEBUG(dbgs() << __func__
<< ": solution (wrap): " << X
<< '\n');