1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
33 #define DEBUG_TYPE "apint"
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords
) {
38 uint64_t *result
= new uint64_t[numWords
];
39 memset(result
, 0, numWords
* sizeof(uint64_t));
43 /// A utility function for allocating memory and checking for allocation
44 /// failure. The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords
) {
46 return new uint64_t[numWords
];
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit
, uint8_t radix
) {
53 if (radix
== 16 || radix
== 36) {
77 void APInt::initSlowCase(uint64_t val
, bool isSigned
) {
78 U
.pVal
= getClearedMemory(getNumWords());
80 if (isSigned
&& int64_t(val
) < 0)
81 for (unsigned i
= 1; i
< getNumWords(); ++i
)
82 U
.pVal
[i
] = WORDTYPE_MAX
;
86 void APInt::initSlowCase(const APInt
& that
) {
87 U
.pVal
= getMemory(getNumWords());
88 memcpy(U
.pVal
, that
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
91 void APInt::initFromArray(ArrayRef
<uint64_t> bigVal
) {
92 assert(BitWidth
&& "Bitwidth too small");
93 assert(bigVal
.data() && "Null pointer detected!");
97 // Get memory, cleared to 0
98 U
.pVal
= getClearedMemory(getNumWords());
99 // Calculate the number of words to copy
100 unsigned words
= std::min
<unsigned>(bigVal
.size(), getNumWords());
101 // Copy the words from bigVal to pVal
102 memcpy(U
.pVal
, bigVal
.data(), words
* APINT_WORD_SIZE
);
104 // Make sure unused high bits are cleared
108 APInt::APInt(unsigned numBits
, ArrayRef
<uint64_t> bigVal
)
109 : BitWidth(numBits
) {
110 initFromArray(bigVal
);
113 APInt::APInt(unsigned numBits
, unsigned numWords
, const uint64_t bigVal
[])
114 : BitWidth(numBits
) {
115 initFromArray(makeArrayRef(bigVal
, numWords
));
118 APInt::APInt(unsigned numbits
, StringRef Str
, uint8_t radix
)
119 : BitWidth(numbits
) {
120 assert(BitWidth
&& "Bitwidth too small");
121 fromString(numbits
, Str
, radix
);
124 void APInt::reallocate(unsigned NewBitWidth
) {
125 // If the number of words is the same we can just change the width and stop.
126 if (getNumWords() == getNumWords(NewBitWidth
)) {
127 BitWidth
= NewBitWidth
;
131 // If we have an allocation, delete it.
136 BitWidth
= NewBitWidth
;
138 // If we are supposed to have an allocation, create it.
140 U
.pVal
= getMemory(getNumWords());
143 void APInt::AssignSlowCase(const APInt
& RHS
) {
144 // Don't do anything for X = X
148 // Adjust the bit width and handle allocations as necessary.
149 reallocate(RHS
.getBitWidth());
155 memcpy(U
.pVal
, RHS
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID
& ID
) const {
160 ID
.AddInteger(BitWidth
);
162 if (isSingleWord()) {
163 ID
.AddInteger(U
.VAL
);
167 unsigned NumWords
= getNumWords();
168 for (unsigned i
= 0; i
< NumWords
; ++i
)
169 ID
.AddInteger(U
.pVal
[i
]);
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt
& APInt::operator++() {
177 tcIncrement(U
.pVal
, getNumWords());
178 return clearUnusedBits();
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt
& APInt::operator--() {
186 tcDecrement(U
.pVal
, getNumWords());
187 return clearUnusedBits();
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt
& APInt::operator+=(const APInt
& RHS
) {
194 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
198 tcAdd(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
199 return clearUnusedBits();
202 APInt
& APInt::operator+=(uint64_t RHS
) {
206 tcAddPart(U
.pVal
, RHS
, getNumWords());
207 return clearUnusedBits();
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt
& APInt::operator-=(const APInt
& RHS
) {
214 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
218 tcSubtract(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
219 return clearUnusedBits();
222 APInt
& APInt::operator-=(uint64_t RHS
) {
226 tcSubtractPart(U
.pVal
, RHS
, getNumWords());
227 return clearUnusedBits();
230 APInt
APInt::operator*(const APInt
& RHS
) const {
231 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
233 return APInt(BitWidth
, U
.VAL
* RHS
.U
.VAL
);
235 APInt
Result(getMemory(getNumWords()), getBitWidth());
237 tcMultiply(Result
.U
.pVal
, U
.pVal
, RHS
.U
.pVal
, getNumWords());
239 Result
.clearUnusedBits();
243 void APInt::AndAssignSlowCase(const APInt
& RHS
) {
244 tcAnd(U
.pVal
, RHS
.U
.pVal
, getNumWords());
247 void APInt::OrAssignSlowCase(const APInt
& RHS
) {
248 tcOr(U
.pVal
, RHS
.U
.pVal
, getNumWords());
251 void APInt::XorAssignSlowCase(const APInt
& RHS
) {
252 tcXor(U
.pVal
, RHS
.U
.pVal
, getNumWords());
255 APInt
& APInt::operator*=(const APInt
& RHS
) {
256 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
261 APInt
& APInt::operator*=(uint64_t RHS
) {
262 if (isSingleWord()) {
265 unsigned NumWords
= getNumWords();
266 tcMultiplyPart(U
.pVal
, U
.pVal
, RHS
, 0, NumWords
, NumWords
, false);
268 return clearUnusedBits();
271 bool APInt::EqualSlowCase(const APInt
& RHS
) const {
272 return std::equal(U
.pVal
, U
.pVal
+ getNumWords(), RHS
.U
.pVal
);
275 int APInt::compare(const APInt
& RHS
) const {
276 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
278 return U
.VAL
< RHS
.U
.VAL
? -1 : U
.VAL
> RHS
.U
.VAL
;
280 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
283 int APInt::compareSigned(const APInt
& RHS
) const {
284 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
285 if (isSingleWord()) {
286 int64_t lhsSext
= SignExtend64(U
.VAL
, BitWidth
);
287 int64_t rhsSext
= SignExtend64(RHS
.U
.VAL
, BitWidth
);
288 return lhsSext
< rhsSext
? -1 : lhsSext
> rhsSext
;
291 bool lhsNeg
= isNegative();
292 bool rhsNeg
= RHS
.isNegative();
294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295 if (lhsNeg
!= rhsNeg
)
296 return lhsNeg
? -1 : 1;
298 // Otherwise we can just use an unsigned comparison, because even negative
299 // numbers compare correctly this way if both have the same signed-ness.
300 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
303 void APInt::setBitsSlowCase(unsigned loBit
, unsigned hiBit
) {
304 unsigned loWord
= whichWord(loBit
);
305 unsigned hiWord
= whichWord(hiBit
);
307 // Create an initial mask for the low word with zeros below loBit.
308 uint64_t loMask
= WORDTYPE_MAX
<< whichBit(loBit
);
310 // If hiBit is not aligned, we need a high mask.
311 unsigned hiShiftAmt
= whichBit(hiBit
);
312 if (hiShiftAmt
!= 0) {
313 // Create a high mask with zeros above hiBit.
314 uint64_t hiMask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- hiShiftAmt
);
315 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316 // set the bits in hiWord.
317 if (hiWord
== loWord
)
320 U
.pVal
[hiWord
] |= hiMask
;
322 // Apply the mask to the low word.
323 U
.pVal
[loWord
] |= loMask
;
325 // Fill any words between loWord and hiWord with all ones.
326 for (unsigned word
= loWord
+ 1; word
< hiWord
; ++word
)
327 U
.pVal
[word
] = WORDTYPE_MAX
;
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332 tcComplement(U
.pVal
, getNumWords());
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition
) {
340 assert(bitPosition
< BitWidth
&& "Out of the bit-width range!");
341 if ((*this)[bitPosition
]) clearBit(bitPosition
);
342 else setBit(bitPosition
);
345 void APInt::insertBits(const APInt
&subBits
, unsigned bitPosition
) {
346 unsigned subBitWidth
= subBits
.getBitWidth();
347 assert(0 < subBitWidth
&& (subBitWidth
+ bitPosition
) <= BitWidth
&&
348 "Illegal bit insertion");
350 // Insertion is a direct copy.
351 if (subBitWidth
== BitWidth
) {
356 // Single word result can be done as a direct bitmask.
357 if (isSingleWord()) {
358 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
359 U
.VAL
&= ~(mask
<< bitPosition
);
360 U
.VAL
|= (subBits
.U
.VAL
<< bitPosition
);
364 unsigned loBit
= whichBit(bitPosition
);
365 unsigned loWord
= whichWord(bitPosition
);
366 unsigned hi1Word
= whichWord(bitPosition
+ subBitWidth
- 1);
368 // Insertion within a single word can be done as a direct bitmask.
369 if (loWord
== hi1Word
) {
370 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
371 U
.pVal
[loWord
] &= ~(mask
<< loBit
);
372 U
.pVal
[loWord
] |= (subBits
.U
.VAL
<< loBit
);
376 // Insert on word boundaries.
378 // Direct copy whole words.
379 unsigned numWholeSubWords
= subBitWidth
/ APINT_BITS_PER_WORD
;
380 memcpy(U
.pVal
+ loWord
, subBits
.getRawData(),
381 numWholeSubWords
* APINT_WORD_SIZE
);
383 // Mask+insert remaining bits.
384 unsigned remainingBits
= subBitWidth
% APINT_BITS_PER_WORD
;
385 if (remainingBits
!= 0) {
386 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- remainingBits
);
387 U
.pVal
[hi1Word
] &= ~mask
;
388 U
.pVal
[hi1Word
] |= subBits
.getWord(subBitWidth
- 1);
393 // General case - set/clear individual bits in dst based on src.
394 // TODO - there is scope for optimization here, but at the moment this code
395 // path is barely used so prefer readability over performance.
396 for (unsigned i
= 0; i
!= subBitWidth
; ++i
) {
398 setBit(bitPosition
+ i
);
400 clearBit(bitPosition
+ i
);
404 void APInt::insertBits(uint64_t subBits
, unsigned bitPosition
, unsigned numBits
) {
405 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
407 if (isSingleWord()) {
408 U
.VAL
&= ~(maskBits
<< bitPosition
);
409 U
.VAL
|= subBits
<< bitPosition
;
413 unsigned loBit
= whichBit(bitPosition
);
414 unsigned loWord
= whichWord(bitPosition
);
415 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
416 if (loWord
== hiWord
) {
417 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
418 U
.pVal
[loWord
] |= subBits
<< loBit
;
422 static_assert(8 * sizeof(WordType
) <= 64, "This code assumes only two words affected");
423 unsigned wordBits
= 8 * sizeof(WordType
);
424 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
425 U
.pVal
[loWord
] |= subBits
<< loBit
;
427 U
.pVal
[hiWord
] &= ~(maskBits
>> (wordBits
- loBit
));
428 U
.pVal
[hiWord
] |= subBits
>> (wordBits
- loBit
);
431 APInt
APInt::extractBits(unsigned numBits
, unsigned bitPosition
) const {
432 assert(numBits
> 0 && "Can't extract zero bits");
433 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
434 "Illegal bit extraction");
437 return APInt(numBits
, U
.VAL
>> bitPosition
);
439 unsigned loBit
= whichBit(bitPosition
);
440 unsigned loWord
= whichWord(bitPosition
);
441 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
443 // Single word result extracting bits from a single word source.
444 if (loWord
== hiWord
)
445 return APInt(numBits
, U
.pVal
[loWord
] >> loBit
);
447 // Extracting bits that start on a source word boundary can be done
448 // as a fast memory copy.
450 return APInt(numBits
, makeArrayRef(U
.pVal
+ loWord
, 1 + hiWord
- loWord
));
452 // General case - shift + copy source words directly into place.
453 APInt
Result(numBits
, 0);
454 unsigned NumSrcWords
= getNumWords();
455 unsigned NumDstWords
= Result
.getNumWords();
457 uint64_t *DestPtr
= Result
.isSingleWord() ? &Result
.U
.VAL
: Result
.U
.pVal
;
458 for (unsigned word
= 0; word
< NumDstWords
; ++word
) {
459 uint64_t w0
= U
.pVal
[loWord
+ word
];
461 (loWord
+ word
+ 1) < NumSrcWords
? U
.pVal
[loWord
+ word
+ 1] : 0;
462 DestPtr
[word
] = (w0
>> loBit
) | (w1
<< (APINT_BITS_PER_WORD
- loBit
));
465 return Result
.clearUnusedBits();
468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits
,
469 unsigned bitPosition
) const {
470 assert(numBits
> 0 && "Can't extract zero bits");
471 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
472 "Illegal bit extraction");
473 assert(numBits
<= 64 && "Illegal bit extraction");
475 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
477 return (U
.VAL
>> bitPosition
) & maskBits
;
479 unsigned loBit
= whichBit(bitPosition
);
480 unsigned loWord
= whichWord(bitPosition
);
481 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
482 if (loWord
== hiWord
)
483 return (U
.pVal
[loWord
] >> loBit
) & maskBits
;
485 static_assert(8 * sizeof(WordType
) <= 64, "This code assumes only two words affected");
486 unsigned wordBits
= 8 * sizeof(WordType
);
487 uint64_t retBits
= U
.pVal
[loWord
] >> loBit
;
488 retBits
|= U
.pVal
[hiWord
] << (wordBits
- loBit
);
493 unsigned APInt::getBitsNeeded(StringRef str
, uint8_t radix
) {
494 assert(!str
.empty() && "Invalid string length");
495 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
497 "Radix should be 2, 8, 10, 16, or 36!");
499 size_t slen
= str
.size();
501 // Each computation below needs to know if it's negative.
502 StringRef::iterator p
= str
.begin();
503 unsigned isNegative
= *p
== '-';
504 if (*p
== '-' || *p
== '+') {
507 assert(slen
&& "String is only a sign, needs a value.");
510 // For radixes of power-of-two values, the bits required is accurately and
513 return slen
+ isNegative
;
515 return slen
* 3 + isNegative
;
517 return slen
* 4 + isNegative
;
521 // This is grossly inefficient but accurate. We could probably do something
522 // with a computation of roughly slen*64/20 and then adjust by the value of
523 // the first few digits. But, I'm not sure how accurate that could be.
525 // Compute a sufficient number of bits that is always large enough but might
526 // be too large. This avoids the assertion in the constructor. This
527 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528 // bits in that case.
530 = radix
== 10? (slen
== 1 ? 4 : slen
* 64/18)
531 : (slen
== 1 ? 7 : slen
* 16/3);
533 // Convert to the actual binary value.
534 APInt
tmp(sufficient
, StringRef(p
, slen
), radix
);
536 // Compute how many bits are required. If the log is infinite, assume we need
537 // just bit. If the log is exact and value is negative, then the value is
538 // MinSignedValue with (log + 1) bits.
539 unsigned log
= tmp
.logBase2();
540 if (log
== (unsigned)-1) {
541 return isNegative
+ 1;
542 } else if (isNegative
&& tmp
.isPowerOf2()) {
543 return isNegative
+ log
;
545 return isNegative
+ log
+ 1;
549 hash_code
llvm::hash_value(const APInt
&Arg
) {
550 if (Arg
.isSingleWord())
551 return hash_combine(Arg
.U
.VAL
);
553 return hash_combine_range(Arg
.U
.pVal
, Arg
.U
.pVal
+ Arg
.getNumWords());
556 bool APInt::isSplat(unsigned SplatSizeInBits
) const {
557 assert(getBitWidth() % SplatSizeInBits
== 0 &&
558 "SplatSizeInBits must divide width!");
559 // We can check that all parts of an integer are equal by making use of a
560 // little trick: rotate and check if it's still the same value.
561 return *this == rotl(SplatSizeInBits
);
564 /// This function returns the high "numBits" bits of this APInt.
565 APInt
APInt::getHiBits(unsigned numBits
) const {
566 return this->lshr(BitWidth
- numBits
);
569 /// This function returns the low "numBits" bits of this APInt.
570 APInt
APInt::getLoBits(unsigned numBits
) const {
571 APInt
Result(getLowBitsSet(BitWidth
, numBits
));
576 /// Return a value containing V broadcasted over NewLen bits.
577 APInt
APInt::getSplat(unsigned NewLen
, const APInt
&V
) {
578 assert(NewLen
>= V
.getBitWidth() && "Can't splat to smaller bit width!");
580 APInt Val
= V
.zextOrSelf(NewLen
);
581 for (unsigned I
= V
.getBitWidth(); I
< NewLen
; I
<<= 1)
587 unsigned APInt::countLeadingZerosSlowCase() const {
589 for (int i
= getNumWords()-1; i
>= 0; --i
) {
590 uint64_t V
= U
.pVal
[i
];
592 Count
+= APINT_BITS_PER_WORD
;
594 Count
+= llvm::countLeadingZeros(V
);
598 // Adjust for unused bits in the most significant word (they are zero).
599 unsigned Mod
= BitWidth
% APINT_BITS_PER_WORD
;
600 Count
-= Mod
> 0 ? APINT_BITS_PER_WORD
- Mod
: 0;
604 unsigned APInt::countLeadingOnesSlowCase() const {
605 unsigned highWordBits
= BitWidth
% APINT_BITS_PER_WORD
;
608 highWordBits
= APINT_BITS_PER_WORD
;
611 shift
= APINT_BITS_PER_WORD
- highWordBits
;
613 int i
= getNumWords() - 1;
614 unsigned Count
= llvm::countLeadingOnes(U
.pVal
[i
] << shift
);
615 if (Count
== highWordBits
) {
616 for (i
--; i
>= 0; --i
) {
617 if (U
.pVal
[i
] == WORDTYPE_MAX
)
618 Count
+= APINT_BITS_PER_WORD
;
620 Count
+= llvm::countLeadingOnes(U
.pVal
[i
]);
628 unsigned APInt::countTrailingZerosSlowCase() const {
631 for (; i
< getNumWords() && U
.pVal
[i
] == 0; ++i
)
632 Count
+= APINT_BITS_PER_WORD
;
633 if (i
< getNumWords())
634 Count
+= llvm::countTrailingZeros(U
.pVal
[i
]);
635 return std::min(Count
, BitWidth
);
638 unsigned APInt::countTrailingOnesSlowCase() const {
641 for (; i
< getNumWords() && U
.pVal
[i
] == WORDTYPE_MAX
; ++i
)
642 Count
+= APINT_BITS_PER_WORD
;
643 if (i
< getNumWords())
644 Count
+= llvm::countTrailingOnes(U
.pVal
[i
]);
645 assert(Count
<= BitWidth
);
649 unsigned APInt::countPopulationSlowCase() const {
651 for (unsigned i
= 0; i
< getNumWords(); ++i
)
652 Count
+= llvm::countPopulation(U
.pVal
[i
]);
656 bool APInt::intersectsSlowCase(const APInt
&RHS
) const {
657 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
658 if ((U
.pVal
[i
] & RHS
.U
.pVal
[i
]) != 0)
664 bool APInt::isSubsetOfSlowCase(const APInt
&RHS
) const {
665 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
666 if ((U
.pVal
[i
] & ~RHS
.U
.pVal
[i
]) != 0)
672 APInt
APInt::byteSwap() const {
673 assert(BitWidth
>= 16 && BitWidth
% 16 == 0 && "Cannot byteswap!");
675 return APInt(BitWidth
, ByteSwap_16(uint16_t(U
.VAL
)));
677 return APInt(BitWidth
, ByteSwap_32(unsigned(U
.VAL
)));
678 if (BitWidth
== 48) {
679 unsigned Tmp1
= unsigned(U
.VAL
>> 16);
680 Tmp1
= ByteSwap_32(Tmp1
);
681 uint16_t Tmp2
= uint16_t(U
.VAL
);
682 Tmp2
= ByteSwap_16(Tmp2
);
683 return APInt(BitWidth
, (uint64_t(Tmp2
) << 32) | Tmp1
);
686 return APInt(BitWidth
, ByteSwap_64(U
.VAL
));
688 APInt
Result(getNumWords() * APINT_BITS_PER_WORD
, 0);
689 for (unsigned I
= 0, N
= getNumWords(); I
!= N
; ++I
)
690 Result
.U
.pVal
[I
] = ByteSwap_64(U
.pVal
[N
- I
- 1]);
691 if (Result
.BitWidth
!= BitWidth
) {
692 Result
.lshrInPlace(Result
.BitWidth
- BitWidth
);
693 Result
.BitWidth
= BitWidth
;
698 APInt
APInt::reverseBits() const {
701 return APInt(BitWidth
, llvm::reverseBits
<uint64_t>(U
.VAL
));
703 return APInt(BitWidth
, llvm::reverseBits
<uint32_t>(U
.VAL
));
705 return APInt(BitWidth
, llvm::reverseBits
<uint16_t>(U
.VAL
));
707 return APInt(BitWidth
, llvm::reverseBits
<uint8_t>(U
.VAL
));
713 APInt
Reversed(BitWidth
, 0);
714 unsigned S
= BitWidth
;
716 for (; Val
!= 0; Val
.lshrInPlace(1)) {
726 APInt
llvm::APIntOps::GreatestCommonDivisor(APInt A
, APInt B
) {
727 // Fast-path a common case.
728 if (A
== B
) return A
;
730 // Corner cases: if either operand is zero, the other is the gcd.
734 // Count common powers of 2 and remove all other powers of 2.
737 unsigned Pow2_A
= A
.countTrailingZeros();
738 unsigned Pow2_B
= B
.countTrailingZeros();
739 if (Pow2_A
> Pow2_B
) {
740 A
.lshrInPlace(Pow2_A
- Pow2_B
);
742 } else if (Pow2_B
> Pow2_A
) {
743 B
.lshrInPlace(Pow2_B
- Pow2_A
);
750 // Both operands are odd multiples of 2^Pow_2:
752 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
754 // This is a modified version of Stein's algorithm, taking advantage of
755 // efficient countTrailingZeros().
759 A
.lshrInPlace(A
.countTrailingZeros() - Pow2
);
762 B
.lshrInPlace(B
.countTrailingZeros() - Pow2
);
769 APInt
llvm::APIntOps::RoundDoubleToAPInt(double Double
, unsigned width
) {
770 uint64_t I
= bit_cast
<uint64_t>(Double
);
772 // Get the sign bit from the highest order bit
773 bool isNeg
= I
>> 63;
775 // Get the 11-bit exponent and adjust for the 1023 bit bias
776 int64_t exp
= ((I
>> 52) & 0x7ff) - 1023;
778 // If the exponent is negative, the value is < 0 so just return 0.
780 return APInt(width
, 0u);
782 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
783 uint64_t mantissa
= (I
& (~0ULL >> 12)) | 1ULL << 52;
785 // If the exponent doesn't shift all bits out of the mantissa
787 return isNeg
? -APInt(width
, mantissa
>> (52 - exp
)) :
788 APInt(width
, mantissa
>> (52 - exp
));
790 // If the client didn't provide enough bits for us to shift the mantissa into
791 // then the result is undefined, just return 0
792 if (width
<= exp
- 52)
793 return APInt(width
, 0);
795 // Otherwise, we have to shift the mantissa bits up to the right location
796 APInt
Tmp(width
, mantissa
);
797 Tmp
<<= (unsigned)exp
- 52;
798 return isNeg
? -Tmp
: Tmp
;
801 /// This function converts this APInt to a double.
802 /// The layout for double is as following (IEEE Standard 754):
803 /// --------------------------------------
804 /// | Sign Exponent Fraction Bias |
805 /// |-------------------------------------- |
806 /// | 1[63] 11[62-52] 52[51-00] 1023 |
807 /// --------------------------------------
808 double APInt::roundToDouble(bool isSigned
) const {
810 // Handle the simple case where the value is contained in one uint64_t.
811 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
812 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD
) {
814 int64_t sext
= SignExtend64(getWord(0), BitWidth
);
817 return double(getWord(0));
820 // Determine if the value is negative.
821 bool isNeg
= isSigned
? (*this)[BitWidth
-1] : false;
823 // Construct the absolute value if we're negative.
824 APInt
Tmp(isNeg
? -(*this) : (*this));
826 // Figure out how many bits we're using.
827 unsigned n
= Tmp
.getActiveBits();
829 // The exponent (without bias normalization) is just the number of bits
830 // we are using. Note that the sign bit is gone since we constructed the
834 // Return infinity for exponent overflow
836 if (!isSigned
|| !isNeg
)
837 return std::numeric_limits
<double>::infinity();
839 return -std::numeric_limits
<double>::infinity();
841 exp
+= 1023; // Increment for 1023 bias
843 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
844 // extract the high 52 bits from the correct words in pVal.
846 unsigned hiWord
= whichWord(n
-1);
848 mantissa
= Tmp
.U
.pVal
[0];
850 mantissa
>>= n
- 52; // shift down, we want the top 52 bits.
852 assert(hiWord
> 0 && "huh?");
853 uint64_t hibits
= Tmp
.U
.pVal
[hiWord
] << (52 - n
% APINT_BITS_PER_WORD
);
854 uint64_t lobits
= Tmp
.U
.pVal
[hiWord
-1] >> (11 + n
% APINT_BITS_PER_WORD
);
855 mantissa
= hibits
| lobits
;
858 // The leading bit of mantissa is implicit, so get rid of it.
859 uint64_t sign
= isNeg
? (1ULL << (APINT_BITS_PER_WORD
- 1)) : 0;
860 uint64_t I
= sign
| (exp
<< 52) | mantissa
;
861 return bit_cast
<double>(I
);
864 // Truncate to new width.
865 APInt
APInt::trunc(unsigned width
) const {
866 assert(width
< BitWidth
&& "Invalid APInt Truncate request");
867 assert(width
&& "Can't truncate to 0 bits");
869 if (width
<= APINT_BITS_PER_WORD
)
870 return APInt(width
, getRawData()[0]);
872 APInt
Result(getMemory(getNumWords(width
)), width
);
876 for (i
= 0; i
!= width
/ APINT_BITS_PER_WORD
; i
++)
877 Result
.U
.pVal
[i
] = U
.pVal
[i
];
879 // Truncate and copy any partial word.
880 unsigned bits
= (0 - width
) % APINT_BITS_PER_WORD
;
882 Result
.U
.pVal
[i
] = U
.pVal
[i
] << bits
>> bits
;
887 // Sign extend to a new width.
888 APInt
APInt::sext(unsigned Width
) const {
889 assert(Width
> BitWidth
&& "Invalid APInt SignExtend request");
891 if (Width
<= APINT_BITS_PER_WORD
)
892 return APInt(Width
, SignExtend64(U
.VAL
, BitWidth
));
894 APInt
Result(getMemory(getNumWords(Width
)), Width
);
897 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
899 // Sign extend the last word since there may be unused bits in the input.
900 Result
.U
.pVal
[getNumWords() - 1] =
901 SignExtend64(Result
.U
.pVal
[getNumWords() - 1],
902 ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
904 // Fill with sign bits.
905 std::memset(Result
.U
.pVal
+ getNumWords(), isNegative() ? -1 : 0,
906 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
907 Result
.clearUnusedBits();
911 // Zero extend to a new width.
912 APInt
APInt::zext(unsigned width
) const {
913 assert(width
> BitWidth
&& "Invalid APInt ZeroExtend request");
915 if (width
<= APINT_BITS_PER_WORD
)
916 return APInt(width
, U
.VAL
);
918 APInt
Result(getMemory(getNumWords(width
)), width
);
921 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
923 // Zero remaining words.
924 std::memset(Result
.U
.pVal
+ getNumWords(), 0,
925 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
930 APInt
APInt::zextOrTrunc(unsigned width
) const {
931 if (BitWidth
< width
)
933 if (BitWidth
> width
)
938 APInt
APInt::sextOrTrunc(unsigned width
) const {
939 if (BitWidth
< width
)
941 if (BitWidth
> width
)
946 APInt
APInt::zextOrSelf(unsigned width
) const {
947 if (BitWidth
< width
)
952 APInt
APInt::sextOrSelf(unsigned width
) const {
953 if (BitWidth
< width
)
958 /// Arithmetic right-shift this APInt by shiftAmt.
959 /// Arithmetic right-shift function.
960 void APInt::ashrInPlace(const APInt
&shiftAmt
) {
961 ashrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
964 /// Arithmetic right-shift this APInt by shiftAmt.
965 /// Arithmetic right-shift function.
966 void APInt::ashrSlowCase(unsigned ShiftAmt
) {
967 // Don't bother performing a no-op shift.
971 // Save the original sign bit for later.
972 bool Negative
= isNegative();
974 // WordShift is the inter-part shift; BitShift is intra-part shift.
975 unsigned WordShift
= ShiftAmt
/ APINT_BITS_PER_WORD
;
976 unsigned BitShift
= ShiftAmt
% APINT_BITS_PER_WORD
;
978 unsigned WordsToMove
= getNumWords() - WordShift
;
979 if (WordsToMove
!= 0) {
980 // Sign extend the last word to fill in the unused bits.
981 U
.pVal
[getNumWords() - 1] = SignExtend64(
982 U
.pVal
[getNumWords() - 1], ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
984 // Fastpath for moving by whole words.
986 std::memmove(U
.pVal
, U
.pVal
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
988 // Move the words containing significant bits.
989 for (unsigned i
= 0; i
!= WordsToMove
- 1; ++i
)
990 U
.pVal
[i
] = (U
.pVal
[i
+ WordShift
] >> BitShift
) |
991 (U
.pVal
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
));
993 // Handle the last word which has no high bits to copy.
994 U
.pVal
[WordsToMove
- 1] = U
.pVal
[WordShift
+ WordsToMove
- 1] >> BitShift
;
995 // Sign extend one more time.
996 U
.pVal
[WordsToMove
- 1] =
997 SignExtend64(U
.pVal
[WordsToMove
- 1], APINT_BITS_PER_WORD
- BitShift
);
1001 // Fill in the remainder based on the original sign.
1002 std::memset(U
.pVal
+ WordsToMove
, Negative
? -1 : 0,
1003 WordShift
* APINT_WORD_SIZE
);
1007 /// Logical right-shift this APInt by shiftAmt.
1008 /// Logical right-shift function.
1009 void APInt::lshrInPlace(const APInt
&shiftAmt
) {
1010 lshrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
1013 /// Logical right-shift this APInt by shiftAmt.
1014 /// Logical right-shift function.
1015 void APInt::lshrSlowCase(unsigned ShiftAmt
) {
1016 tcShiftRight(U
.pVal
, getNumWords(), ShiftAmt
);
1019 /// Left-shift this APInt by shiftAmt.
1020 /// Left-shift function.
1021 APInt
&APInt::operator<<=(const APInt
&shiftAmt
) {
1022 // It's undefined behavior in C to shift by BitWidth or greater.
1023 *this <<= (unsigned)shiftAmt
.getLimitedValue(BitWidth
);
1027 void APInt::shlSlowCase(unsigned ShiftAmt
) {
1028 tcShiftLeft(U
.pVal
, getNumWords(), ShiftAmt
);
1032 // Calculate the rotate amount modulo the bit width.
1033 static unsigned rotateModulo(unsigned BitWidth
, const APInt
&rotateAmt
) {
1034 unsigned rotBitWidth
= rotateAmt
.getBitWidth();
1035 APInt rot
= rotateAmt
;
1036 if (rotBitWidth
< BitWidth
) {
1037 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1038 // e.g. APInt(1, 32) would give APInt(1, 0).
1039 rot
= rotateAmt
.zext(BitWidth
);
1041 rot
= rot
.urem(APInt(rot
.getBitWidth(), BitWidth
));
1042 return rot
.getLimitedValue(BitWidth
);
1045 APInt
APInt::rotl(const APInt
&rotateAmt
) const {
1046 return rotl(rotateModulo(BitWidth
, rotateAmt
));
1049 APInt
APInt::rotl(unsigned rotateAmt
) const {
1050 rotateAmt
%= BitWidth
;
1053 return shl(rotateAmt
) | lshr(BitWidth
- rotateAmt
);
1056 APInt
APInt::rotr(const APInt
&rotateAmt
) const {
1057 return rotr(rotateModulo(BitWidth
, rotateAmt
));
1060 APInt
APInt::rotr(unsigned rotateAmt
) const {
1061 rotateAmt
%= BitWidth
;
1064 return lshr(rotateAmt
) | shl(BitWidth
- rotateAmt
);
1067 // Square Root - this method computes and returns the square root of "this".
1068 // Three mechanisms are used for computation. For small values (<= 5 bits),
1069 // a table lookup is done. This gets some performance for common cases. For
1070 // values using less than 52 bits, the value is converted to double and then
1071 // the libc sqrt function is called. The result is rounded and then converted
1072 // back to a uint64_t which is then used to construct the result. Finally,
1073 // the Babylonian method for computing square roots is used.
1074 APInt
APInt::sqrt() const {
1076 // Determine the magnitude of the value.
1077 unsigned magnitude
= getActiveBits();
1079 // Use a fast table for some small values. This also gets rid of some
1080 // rounding errors in libc sqrt for small values.
1081 if (magnitude
<= 5) {
1082 static const uint8_t results
[32] = {
1085 /* 3- 6 */ 2, 2, 2, 2,
1086 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1087 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1088 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1091 return APInt(BitWidth
, results
[ (isSingleWord() ? U
.VAL
: U
.pVal
[0]) ]);
1094 // If the magnitude of the value fits in less than 52 bits (the precision of
1095 // an IEEE double precision floating point value), then we can use the
1096 // libc sqrt function which will probably use a hardware sqrt computation.
1097 // This should be faster than the algorithm below.
1098 if (magnitude
< 52) {
1099 return APInt(BitWidth
,
1100 uint64_t(::round(::sqrt(double(isSingleWord() ? U
.VAL
1104 // Okay, all the short cuts are exhausted. We must compute it. The following
1105 // is a classical Babylonian method for computing the square root. This code
1106 // was adapted to APInt from a wikipedia article on such computations.
1107 // See http://www.wikipedia.org/ and go to the page named
1108 // Calculate_an_integer_square_root.
1109 unsigned nbits
= BitWidth
, i
= 4;
1110 APInt
testy(BitWidth
, 16);
1111 APInt
x_old(BitWidth
, 1);
1112 APInt
x_new(BitWidth
, 0);
1113 APInt
two(BitWidth
, 2);
1115 // Select a good starting value using binary logarithms.
1116 for (;; i
+= 2, testy
= testy
.shl(2))
1117 if (i
>= nbits
|| this->ule(testy
)) {
1118 x_old
= x_old
.shl(i
/ 2);
1122 // Use the Babylonian method to arrive at the integer square root:
1124 x_new
= (this->udiv(x_old
) + x_old
).udiv(two
);
1125 if (x_old
.ule(x_new
))
1130 // Make sure we return the closest approximation
1131 // NOTE: The rounding calculation below is correct. It will produce an
1132 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1133 // determined to be a rounding issue with pari/gp as it begins to use a
1134 // floating point representation after 192 bits. There are no discrepancies
1135 // between this algorithm and pari/gp for bit widths < 192 bits.
1136 APInt
square(x_old
* x_old
);
1137 APInt
nextSquare((x_old
+ 1) * (x_old
+1));
1138 if (this->ult(square
))
1140 assert(this->ule(nextSquare
) && "Error in APInt::sqrt computation");
1141 APInt
midpoint((nextSquare
- square
).udiv(two
));
1142 APInt
offset(*this - square
);
1143 if (offset
.ult(midpoint
))
1148 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1149 /// iterative extended Euclidean algorithm is used to solve for this value,
1150 /// however we simplify it to speed up calculating only the inverse, and take
1151 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1152 /// (potentially large) APInts around.
1153 /// WARNING: a value of '0' may be returned,
1154 /// signifying that no multiplicative inverse exists!
1155 APInt
APInt::multiplicativeInverse(const APInt
& modulo
) const {
1156 assert(ult(modulo
) && "This APInt must be smaller than the modulo");
1158 // Using the properties listed at the following web page (accessed 06/21/08):
1159 // http://www.numbertheory.org/php/euclid.html
1160 // (especially the properties numbered 3, 4 and 9) it can be proved that
1161 // BitWidth bits suffice for all the computations in the algorithm implemented
1162 // below. More precisely, this number of bits suffice if the multiplicative
1163 // inverse exists, but may not suffice for the general extended Euclidean
1166 APInt r
[2] = { modulo
, *this };
1167 APInt t
[2] = { APInt(BitWidth
, 0), APInt(BitWidth
, 1) };
1168 APInt
q(BitWidth
, 0);
1171 for (i
= 0; r
[i
^1] != 0; i
^= 1) {
1172 // An overview of the math without the confusing bit-flipping:
1173 // q = r[i-2] / r[i-1]
1174 // r[i] = r[i-2] % r[i-1]
1175 // t[i] = t[i-2] - t[i-1] * q
1176 udivrem(r
[i
], r
[i
^1], q
, r
[i
]);
1180 // If this APInt and the modulo are not coprime, there is no multiplicative
1181 // inverse, so return 0. We check this by looking at the next-to-last
1182 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1185 return APInt(BitWidth
, 0);
1187 // The next-to-last t is the multiplicative inverse. However, we are
1188 // interested in a positive inverse. Calculate a positive one from a negative
1189 // one if necessary. A simple addition of the modulo suffices because
1190 // abs(t[i]) is known to be less than *this/2 (see the link above).
1191 if (t
[i
].isNegative())
1194 return std::move(t
[i
]);
1197 /// Calculate the magic numbers required to implement a signed integer division
1198 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1199 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1200 /// Warren, Jr., chapter 10.
1201 APInt::ms
APInt::magic() const {
1202 const APInt
& d
= *this;
1204 APInt ad
, anc
, delta
, q1
, r1
, q2
, r2
, t
;
1205 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1209 t
= signedMin
+ (d
.lshr(d
.getBitWidth() - 1));
1210 anc
= t
- 1 - t
.urem(ad
); // absolute value of nc
1211 p
= d
.getBitWidth() - 1; // initialize p
1212 q1
= signedMin
.udiv(anc
); // initialize q1 = 2p/abs(nc)
1213 r1
= signedMin
- q1
*anc
; // initialize r1 = rem(2p,abs(nc))
1214 q2
= signedMin
.udiv(ad
); // initialize q2 = 2p/abs(d)
1215 r2
= signedMin
- q2
*ad
; // initialize r2 = rem(2p,abs(d))
1218 q1
= q1
<<1; // update q1 = 2p/abs(nc)
1219 r1
= r1
<<1; // update r1 = rem(2p/abs(nc))
1220 if (r1
.uge(anc
)) { // must be unsigned comparison
1224 q2
= q2
<<1; // update q2 = 2p/abs(d)
1225 r2
= r2
<<1; // update r2 = rem(2p/abs(d))
1226 if (r2
.uge(ad
)) { // must be unsigned comparison
1231 } while (q1
.ult(delta
) || (q1
== delta
&& r1
== 0));
1234 if (d
.isNegative()) mag
.m
= -mag
.m
; // resulting magic number
1235 mag
.s
= p
- d
.getBitWidth(); // resulting shift
1239 /// Calculate the magic numbers required to implement an unsigned integer
1240 /// division by a constant as a sequence of multiplies, adds and shifts.
1241 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1242 /// S. Warren, Jr., chapter 10.
1243 /// LeadingZeros can be used to simplify the calculation if the upper bits
1244 /// of the divided value are known zero.
1245 APInt::mu
APInt::magicu(unsigned LeadingZeros
) const {
1246 const APInt
& d
= *this;
1248 APInt nc
, delta
, q1
, r1
, q2
, r2
;
1250 magu
.a
= 0; // initialize "add" indicator
1251 APInt allOnes
= APInt::getAllOnesValue(d
.getBitWidth()).lshr(LeadingZeros
);
1252 APInt signedMin
= APInt::getSignedMinValue(d
.getBitWidth());
1253 APInt signedMax
= APInt::getSignedMaxValue(d
.getBitWidth());
1255 nc
= allOnes
- (allOnes
- d
).urem(d
);
1256 p
= d
.getBitWidth() - 1; // initialize p
1257 q1
= signedMin
.udiv(nc
); // initialize q1 = 2p/nc
1258 r1
= signedMin
- q1
*nc
; // initialize r1 = rem(2p,nc)
1259 q2
= signedMax
.udiv(d
); // initialize q2 = (2p-1)/d
1260 r2
= signedMax
- q2
*d
; // initialize r2 = rem((2p-1),d)
1263 if (r1
.uge(nc
- r1
)) {
1264 q1
= q1
+ q1
+ 1; // update q1
1265 r1
= r1
+ r1
- nc
; // update r1
1268 q1
= q1
+q1
; // update q1
1269 r1
= r1
+r1
; // update r1
1271 if ((r2
+ 1).uge(d
- r2
)) {
1272 if (q2
.uge(signedMax
)) magu
.a
= 1;
1273 q2
= q2
+q2
+ 1; // update q2
1274 r2
= r2
+r2
+ 1 - d
; // update r2
1277 if (q2
.uge(signedMin
)) magu
.a
= 1;
1278 q2
= q2
+q2
; // update q2
1279 r2
= r2
+r2
+ 1; // update r2
1282 } while (p
< d
.getBitWidth()*2 &&
1283 (q1
.ult(delta
) || (q1
== delta
&& r1
== 0)));
1284 magu
.m
= q2
+ 1; // resulting magic number
1285 magu
.s
= p
- d
.getBitWidth(); // resulting shift
1289 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1290 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1291 /// variables here have the same names as in the algorithm. Comments explain
1292 /// the algorithm and any deviation from it.
1293 static void KnuthDiv(uint32_t *u
, uint32_t *v
, uint32_t *q
, uint32_t* r
,
1294 unsigned m
, unsigned n
) {
1295 assert(u
&& "Must provide dividend");
1296 assert(v
&& "Must provide divisor");
1297 assert(q
&& "Must provide quotient");
1298 assert(u
!= v
&& u
!= q
&& v
!= q
&& "Must use different memory");
1299 assert(n
>1 && "n must be > 1");
1301 // b denotes the base of the number system. In our case b is 2^32.
1302 const uint64_t b
= uint64_t(1) << 32;
1304 // The DEBUG macros here tend to be spam in the debug output if you're not
1305 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1307 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1309 #define DEBUG_KNUTH(X) do {} while(false)
1312 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m
<< " n=" << n
<< '\n');
1313 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1314 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1315 DEBUG_KNUTH(dbgs() << " by");
1316 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1317 DEBUG_KNUTH(dbgs() << '\n');
1318 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1319 // u and v by d. Note that we have taken Knuth's advice here to use a power
1320 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1321 // 2 allows us to shift instead of multiply and it is easy to determine the
1322 // shift amount from the leading zeros. We are basically normalizing the u
1323 // and v so that its high bits are shifted to the top of v's range without
1324 // overflow. Note that this can require an extra word in u so that u must
1325 // be of length m+n+1.
1326 unsigned shift
= countLeadingZeros(v
[n
-1]);
1327 uint32_t v_carry
= 0;
1328 uint32_t u_carry
= 0;
1330 for (unsigned i
= 0; i
< m
+n
; ++i
) {
1331 uint32_t u_tmp
= u
[i
] >> (32 - shift
);
1332 u
[i
] = (u
[i
] << shift
) | u_carry
;
1335 for (unsigned i
= 0; i
< n
; ++i
) {
1336 uint32_t v_tmp
= v
[i
] >> (32 - shift
);
1337 v
[i
] = (v
[i
] << shift
) | v_carry
;
1343 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1344 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1345 DEBUG_KNUTH(dbgs() << " by");
1346 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1347 DEBUG_KNUTH(dbgs() << '\n');
1349 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1352 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j
<< '\n');
1353 // D3. [Calculate q'.].
1354 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1355 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1356 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1357 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1358 // on v[n-2] determines at high speed most of the cases in which the trial
1359 // value qp is one too large, and it eliminates all cases where qp is two
1361 uint64_t dividend
= Make_64(u
[j
+n
], u
[j
+n
-1]);
1362 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend
<< '\n');
1363 uint64_t qp
= dividend
/ v
[n
-1];
1364 uint64_t rp
= dividend
% v
[n
-1];
1365 if (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]) {
1368 if (rp
< b
&& (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]))
1371 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp
<< ", rp == " << rp
<< '\n');
1373 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1374 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1375 // consists of a simple multiplication by a one-place number, combined with
1377 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1378 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1379 // true value plus b**(n+1), namely as the b's complement of
1380 // the true value, and a "borrow" to the left should be remembered.
1382 for (unsigned i
= 0; i
< n
; ++i
) {
1383 uint64_t p
= uint64_t(qp
) * uint64_t(v
[i
]);
1384 int64_t subres
= int64_t(u
[j
+i
]) - borrow
- Lo_32(p
);
1385 u
[j
+i
] = Lo_32(subres
);
1386 borrow
= Hi_32(p
) - Hi_32(subres
);
1387 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u
[j
+ i
]
1388 << ", borrow = " << borrow
<< '\n');
1390 bool isNeg
= u
[j
+n
] < borrow
;
1391 u
[j
+n
] -= Lo_32(borrow
);
1393 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1394 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1395 DEBUG_KNUTH(dbgs() << '\n');
1397 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1398 // negative, go to step D6; otherwise go on to step D7.
1401 // D6. [Add back]. The probability that this step is necessary is very
1402 // small, on the order of only 2/b. Make sure that test data accounts for
1403 // this possibility. Decrease q[j] by 1
1405 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1406 // A carry will occur to the left of u[j+n], and it should be ignored
1407 // since it cancels with the borrow that occurred in D4.
1409 for (unsigned i
= 0; i
< n
; i
++) {
1410 uint32_t limit
= std::min(u
[j
+i
],v
[i
]);
1411 u
[j
+i
] += v
[i
] + carry
;
1412 carry
= u
[j
+i
] < limit
|| (carry
&& u
[j
+i
] == limit
);
1416 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1417 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1418 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q
[j
] << '\n');
1420 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1423 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1424 DEBUG_KNUTH(for (int i
= m
; i
>= 0; i
--) dbgs() << " " << q
[i
]);
1425 DEBUG_KNUTH(dbgs() << '\n');
1427 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1428 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1429 // compute the remainder (urem uses this).
1431 // The value d is expressed by the "shift" value above since we avoided
1432 // multiplication by d by using a shift left. So, all we have to do is
1433 // shift right here.
1436 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1437 for (int i
= n
-1; i
>= 0; i
--) {
1438 r
[i
] = (u
[i
] >> shift
) | carry
;
1439 carry
= u
[i
] << (32 - shift
);
1440 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1443 for (int i
= n
-1; i
>= 0; i
--) {
1445 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1448 DEBUG_KNUTH(dbgs() << '\n');
1450 DEBUG_KNUTH(dbgs() << '\n');
1453 void APInt::divide(const WordType
*LHS
, unsigned lhsWords
, const WordType
*RHS
,
1454 unsigned rhsWords
, WordType
*Quotient
, WordType
*Remainder
) {
1455 assert(lhsWords
>= rhsWords
&& "Fractional result");
1457 // First, compose the values into an array of 32-bit words instead of
1458 // 64-bit words. This is a necessity of both the "short division" algorithm
1459 // and the Knuth "classical algorithm" which requires there to be native
1460 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1461 // can't use 64-bit operands here because we don't have native results of
1462 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1463 // work on large-endian machines.
1464 unsigned n
= rhsWords
* 2;
1465 unsigned m
= (lhsWords
* 2) - n
;
1467 // Allocate space for the temporary values we need either on the stack, if
1468 // it will fit, or on the heap if it won't.
1469 uint32_t SPACE
[128];
1470 uint32_t *U
= nullptr;
1471 uint32_t *V
= nullptr;
1472 uint32_t *Q
= nullptr;
1473 uint32_t *R
= nullptr;
1474 if ((Remainder
?4:3)*n
+2*m
+1 <= 128) {
1477 Q
= &SPACE
[(m
+n
+1) + n
];
1479 R
= &SPACE
[(m
+n
+1) + n
+ (m
+n
)];
1481 U
= new uint32_t[m
+ n
+ 1];
1482 V
= new uint32_t[n
];
1483 Q
= new uint32_t[m
+n
];
1485 R
= new uint32_t[n
];
1488 // Initialize the dividend
1489 memset(U
, 0, (m
+n
+1)*sizeof(uint32_t));
1490 for (unsigned i
= 0; i
< lhsWords
; ++i
) {
1491 uint64_t tmp
= LHS
[i
];
1492 U
[i
* 2] = Lo_32(tmp
);
1493 U
[i
* 2 + 1] = Hi_32(tmp
);
1495 U
[m
+n
] = 0; // this extra word is for "spill" in the Knuth algorithm.
1497 // Initialize the divisor
1498 memset(V
, 0, (n
)*sizeof(uint32_t));
1499 for (unsigned i
= 0; i
< rhsWords
; ++i
) {
1500 uint64_t tmp
= RHS
[i
];
1501 V
[i
* 2] = Lo_32(tmp
);
1502 V
[i
* 2 + 1] = Hi_32(tmp
);
1505 // initialize the quotient and remainder
1506 memset(Q
, 0, (m
+n
) * sizeof(uint32_t));
1508 memset(R
, 0, n
* sizeof(uint32_t));
1510 // Now, adjust m and n for the Knuth division. n is the number of words in
1511 // the divisor. m is the number of words by which the dividend exceeds the
1512 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1513 // contain any zero words or the Knuth algorithm fails.
1514 for (unsigned i
= n
; i
> 0 && V
[i
-1] == 0; i
--) {
1518 for (unsigned i
= m
+n
; i
> 0 && U
[i
-1] == 0; i
--)
1521 // If we're left with only a single word for the divisor, Knuth doesn't work
1522 // so we implement the short division algorithm here. This is much simpler
1523 // and faster because we are certain that we can divide a 64-bit quantity
1524 // by a 32-bit quantity at hardware speed and short division is simply a
1525 // series of such operations. This is just like doing short division but we
1526 // are using base 2^32 instead of base 10.
1527 assert(n
!= 0 && "Divide by zero?");
1529 uint32_t divisor
= V
[0];
1530 uint32_t remainder
= 0;
1531 for (int i
= m
; i
>= 0; i
--) {
1532 uint64_t partial_dividend
= Make_64(remainder
, U
[i
]);
1533 if (partial_dividend
== 0) {
1536 } else if (partial_dividend
< divisor
) {
1538 remainder
= Lo_32(partial_dividend
);
1539 } else if (partial_dividend
== divisor
) {
1543 Q
[i
] = Lo_32(partial_dividend
/ divisor
);
1544 remainder
= Lo_32(partial_dividend
- (Q
[i
] * divisor
));
1550 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1552 KnuthDiv(U
, V
, Q
, R
, m
, n
);
1555 // If the caller wants the quotient
1557 for (unsigned i
= 0; i
< lhsWords
; ++i
)
1558 Quotient
[i
] = Make_64(Q
[i
*2+1], Q
[i
*2]);
1561 // If the caller wants the remainder
1563 for (unsigned i
= 0; i
< rhsWords
; ++i
)
1564 Remainder
[i
] = Make_64(R
[i
*2+1], R
[i
*2]);
1567 // Clean up the memory we allocated.
1568 if (U
!= &SPACE
[0]) {
1576 APInt
APInt::udiv(const APInt
&RHS
) const {
1577 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1579 // First, deal with the easy case
1580 if (isSingleWord()) {
1581 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1582 return APInt(BitWidth
, U
.VAL
/ RHS
.U
.VAL
);
1585 // Get some facts about the LHS and RHS number of bits and words
1586 unsigned lhsWords
= getNumWords(getActiveBits());
1587 unsigned rhsBits
= RHS
.getActiveBits();
1588 unsigned rhsWords
= getNumWords(rhsBits
);
1589 assert(rhsWords
&& "Divided by zero???");
1591 // Deal with some degenerate cases
1594 return APInt(BitWidth
, 0);
1598 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1599 // X / Y ===> 0, iff X < Y
1600 return APInt(BitWidth
, 0);
1603 return APInt(BitWidth
, 1);
1604 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1605 // All high words are zero, just use native divide
1606 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
.U
.pVal
[0]);
1608 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609 APInt
Quotient(BitWidth
, 0); // to hold result.
1610 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
, nullptr);
1614 APInt
APInt::udiv(uint64_t RHS
) const {
1615 assert(RHS
!= 0 && "Divide by zero?");
1617 // First, deal with the easy case
1619 return APInt(BitWidth
, U
.VAL
/ RHS
);
1621 // Get some facts about the LHS words.
1622 unsigned lhsWords
= getNumWords(getActiveBits());
1624 // Deal with some degenerate cases
1627 return APInt(BitWidth
, 0);
1632 // X / Y ===> 0, iff X < Y
1633 return APInt(BitWidth
, 0);
1636 return APInt(BitWidth
, 1);
1637 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1638 // All high words are zero, just use native divide
1639 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
);
1641 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1642 APInt
Quotient(BitWidth
, 0); // to hold result.
1643 divide(U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, nullptr);
1647 APInt
APInt::sdiv(const APInt
&RHS
) const {
1649 if (RHS
.isNegative())
1650 return (-(*this)).udiv(-RHS
);
1651 return -((-(*this)).udiv(RHS
));
1653 if (RHS
.isNegative())
1654 return -(this->udiv(-RHS
));
1655 return this->udiv(RHS
);
1658 APInt
APInt::sdiv(int64_t RHS
) const {
1661 return (-(*this)).udiv(-RHS
);
1662 return -((-(*this)).udiv(RHS
));
1665 return -(this->udiv(-RHS
));
1666 return this->udiv(RHS
);
1669 APInt
APInt::urem(const APInt
&RHS
) const {
1670 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1671 if (isSingleWord()) {
1672 assert(RHS
.U
.VAL
!= 0 && "Remainder by zero?");
1673 return APInt(BitWidth
, U
.VAL
% RHS
.U
.VAL
);
1676 // Get some facts about the LHS
1677 unsigned lhsWords
= getNumWords(getActiveBits());
1679 // Get some facts about the RHS
1680 unsigned rhsBits
= RHS
.getActiveBits();
1681 unsigned rhsWords
= getNumWords(rhsBits
);
1682 assert(rhsWords
&& "Performing remainder operation by zero ???");
1684 // Check the degenerate cases
1687 return APInt(BitWidth
, 0);
1690 return APInt(BitWidth
, 0);
1691 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1692 // X % Y ===> X, iff X < Y
1696 return APInt(BitWidth
, 0);
1698 // All high words are zero, just use native remainder
1699 return APInt(BitWidth
, U
.pVal
[0] % RHS
.U
.pVal
[0]);
1701 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1702 APInt
Remainder(BitWidth
, 0);
1703 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, nullptr, Remainder
.U
.pVal
);
1707 uint64_t APInt::urem(uint64_t RHS
) const {
1708 assert(RHS
!= 0 && "Remainder by zero?");
1713 // Get some facts about the LHS
1714 unsigned lhsWords
= getNumWords(getActiveBits());
1716 // Check the degenerate cases
1724 // X % Y ===> X, iff X < Y
1725 return getZExtValue();
1730 // All high words are zero, just use native remainder
1731 return U
.pVal
[0] % RHS
;
1733 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1735 divide(U
.pVal
, lhsWords
, &RHS
, 1, nullptr, &Remainder
);
1739 APInt
APInt::srem(const APInt
&RHS
) const {
1741 if (RHS
.isNegative())
1742 return -((-(*this)).urem(-RHS
));
1743 return -((-(*this)).urem(RHS
));
1745 if (RHS
.isNegative())
1746 return this->urem(-RHS
);
1747 return this->urem(RHS
);
1750 int64_t APInt::srem(int64_t RHS
) const {
1753 return -((-(*this)).urem(-RHS
));
1754 return -((-(*this)).urem(RHS
));
1757 return this->urem(-RHS
);
1758 return this->urem(RHS
);
1761 void APInt::udivrem(const APInt
&LHS
, const APInt
&RHS
,
1762 APInt
&Quotient
, APInt
&Remainder
) {
1763 assert(LHS
.BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1764 unsigned BitWidth
= LHS
.BitWidth
;
1766 // First, deal with the easy case
1767 if (LHS
.isSingleWord()) {
1768 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1769 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
.U
.VAL
;
1770 uint64_t RemVal
= LHS
.U
.VAL
% RHS
.U
.VAL
;
1771 Quotient
= APInt(BitWidth
, QuotVal
);
1772 Remainder
= APInt(BitWidth
, RemVal
);
1776 // Get some size facts about the dividend and divisor
1777 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1778 unsigned rhsBits
= RHS
.getActiveBits();
1779 unsigned rhsWords
= getNumWords(rhsBits
);
1780 assert(rhsWords
&& "Performing divrem operation by zero ???");
1782 // Check the degenerate cases
1783 if (lhsWords
== 0) {
1784 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1785 Remainder
= APInt(BitWidth
, 0); // 0 % Y ===> 0
1790 Quotient
= LHS
; // X / 1 ===> X
1791 Remainder
= APInt(BitWidth
, 0); // X % 1 ===> 0
1794 if (lhsWords
< rhsWords
|| LHS
.ult(RHS
)) {
1795 Remainder
= LHS
; // X % Y ===> X, iff X < Y
1796 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1801 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1802 Remainder
= APInt(BitWidth
, 0); // X % X ===> 0;
1806 // Make sure there is enough space to hold the results.
1807 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1808 // change the size. This is necessary if Quotient or Remainder is aliased
1810 Quotient
.reallocate(BitWidth
);
1811 Remainder
.reallocate(BitWidth
);
1813 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1814 // There is only one word to consider so use the native versions.
1815 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1816 uint64_t rhsValue
= RHS
.U
.pVal
[0];
1817 Quotient
= lhsValue
/ rhsValue
;
1818 Remainder
= lhsValue
% rhsValue
;
1822 // Okay, lets do it the long way
1823 divide(LHS
.U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
,
1825 // Clear the rest of the Quotient and Remainder.
1826 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1827 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1828 std::memset(Remainder
.U
.pVal
+ rhsWords
, 0,
1829 (getNumWords(BitWidth
) - rhsWords
) * APINT_WORD_SIZE
);
1832 void APInt::udivrem(const APInt
&LHS
, uint64_t RHS
, APInt
&Quotient
,
1833 uint64_t &Remainder
) {
1834 assert(RHS
!= 0 && "Divide by zero?");
1835 unsigned BitWidth
= LHS
.BitWidth
;
1837 // First, deal with the easy case
1838 if (LHS
.isSingleWord()) {
1839 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
;
1840 Remainder
= LHS
.U
.VAL
% RHS
;
1841 Quotient
= APInt(BitWidth
, QuotVal
);
1845 // Get some size facts about the dividend and divisor
1846 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1848 // Check the degenerate cases
1849 if (lhsWords
== 0) {
1850 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1851 Remainder
= 0; // 0 % Y ===> 0
1856 Quotient
= LHS
; // X / 1 ===> X
1857 Remainder
= 0; // X % 1 ===> 0
1862 Remainder
= LHS
.getZExtValue(); // X % Y ===> X, iff X < Y
1863 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1868 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1869 Remainder
= 0; // X % X ===> 0;
1873 // Make sure there is enough space to hold the results.
1874 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1875 // change the size. This is necessary if Quotient is aliased with LHS.
1876 Quotient
.reallocate(BitWidth
);
1878 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1879 // There is only one word to consider so use the native versions.
1880 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1881 Quotient
= lhsValue
/ RHS
;
1882 Remainder
= lhsValue
% RHS
;
1886 // Okay, lets do it the long way
1887 divide(LHS
.U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, &Remainder
);
1888 // Clear the rest of the Quotient.
1889 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1890 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1893 void APInt::sdivrem(const APInt
&LHS
, const APInt
&RHS
,
1894 APInt
&Quotient
, APInt
&Remainder
) {
1895 if (LHS
.isNegative()) {
1896 if (RHS
.isNegative())
1897 APInt::udivrem(-LHS
, -RHS
, Quotient
, Remainder
);
1899 APInt::udivrem(-LHS
, RHS
, Quotient
, Remainder
);
1903 } else if (RHS
.isNegative()) {
1904 APInt::udivrem(LHS
, -RHS
, Quotient
, Remainder
);
1907 APInt::udivrem(LHS
, RHS
, Quotient
, Remainder
);
1911 void APInt::sdivrem(const APInt
&LHS
, int64_t RHS
,
1912 APInt
&Quotient
, int64_t &Remainder
) {
1913 uint64_t R
= Remainder
;
1914 if (LHS
.isNegative()) {
1916 APInt::udivrem(-LHS
, -RHS
, Quotient
, R
);
1918 APInt::udivrem(-LHS
, RHS
, Quotient
, R
);
1922 } else if (RHS
< 0) {
1923 APInt::udivrem(LHS
, -RHS
, Quotient
, R
);
1926 APInt::udivrem(LHS
, RHS
, Quotient
, R
);
1931 APInt
APInt::sadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1932 APInt Res
= *this+RHS
;
1933 Overflow
= isNonNegative() == RHS
.isNonNegative() &&
1934 Res
.isNonNegative() != isNonNegative();
1938 APInt
APInt::uadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1939 APInt Res
= *this+RHS
;
1940 Overflow
= Res
.ult(RHS
);
1944 APInt
APInt::ssub_ov(const APInt
&RHS
, bool &Overflow
) const {
1945 APInt Res
= *this - RHS
;
1946 Overflow
= isNonNegative() != RHS
.isNonNegative() &&
1947 Res
.isNonNegative() != isNonNegative();
1951 APInt
APInt::usub_ov(const APInt
&RHS
, bool &Overflow
) const {
1952 APInt Res
= *this-RHS
;
1953 Overflow
= Res
.ugt(*this);
1957 APInt
APInt::sdiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1958 // MININT/-1 --> overflow.
1959 Overflow
= isMinSignedValue() && RHS
.isAllOnesValue();
1963 APInt
APInt::smul_ov(const APInt
&RHS
, bool &Overflow
) const {
1964 APInt Res
= *this * RHS
;
1966 if (*this != 0 && RHS
!= 0)
1967 Overflow
= Res
.sdiv(RHS
) != *this || Res
.sdiv(*this) != RHS
;
1973 APInt
APInt::umul_ov(const APInt
&RHS
, bool &Overflow
) const {
1974 if (countLeadingZeros() + RHS
.countLeadingZeros() + 2 <= BitWidth
) {
1979 APInt Res
= lshr(1) * RHS
;
1980 Overflow
= Res
.isNegative();
1990 APInt
APInt::sshl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1991 Overflow
= ShAmt
.uge(getBitWidth());
1993 return APInt(BitWidth
, 0);
1995 if (isNonNegative()) // Don't allow sign change.
1996 Overflow
= ShAmt
.uge(countLeadingZeros());
1998 Overflow
= ShAmt
.uge(countLeadingOnes());
2000 return *this << ShAmt
;
2003 APInt
APInt::ushl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
2004 Overflow
= ShAmt
.uge(getBitWidth());
2006 return APInt(BitWidth
, 0);
2008 Overflow
= ShAmt
.ugt(countLeadingZeros());
2010 return *this << ShAmt
;
2013 APInt
APInt::sadd_sat(const APInt
&RHS
) const {
2015 APInt Res
= sadd_ov(RHS
, Overflow
);
2019 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2020 : APInt::getSignedMaxValue(BitWidth
);
2023 APInt
APInt::uadd_sat(const APInt
&RHS
) const {
2025 APInt Res
= uadd_ov(RHS
, Overflow
);
2029 return APInt::getMaxValue(BitWidth
);
2032 APInt
APInt::ssub_sat(const APInt
&RHS
) const {
2034 APInt Res
= ssub_ov(RHS
, Overflow
);
2038 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2039 : APInt::getSignedMaxValue(BitWidth
);
2042 APInt
APInt::usub_sat(const APInt
&RHS
) const {
2044 APInt Res
= usub_ov(RHS
, Overflow
);
2048 return APInt(BitWidth
, 0);
2052 void APInt::fromString(unsigned numbits
, StringRef str
, uint8_t radix
) {
2053 // Check our assumptions here
2054 assert(!str
.empty() && "Invalid string length");
2055 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
2057 "Radix should be 2, 8, 10, 16, or 36!");
2059 StringRef::iterator p
= str
.begin();
2060 size_t slen
= str
.size();
2061 bool isNeg
= *p
== '-';
2062 if (*p
== '-' || *p
== '+') {
2065 assert(slen
&& "String is only a sign, needs a value.");
2067 assert((slen
<= numbits
|| radix
!= 2) && "Insufficient bit width");
2068 assert(((slen
-1)*3 <= numbits
|| radix
!= 8) && "Insufficient bit width");
2069 assert(((slen
-1)*4 <= numbits
|| radix
!= 16) && "Insufficient bit width");
2070 assert((((slen
-1)*64)/22 <= numbits
|| radix
!= 10) &&
2071 "Insufficient bit width");
2073 // Allocate memory if needed
2077 U
.pVal
= getClearedMemory(getNumWords());
2079 // Figure out if we can shift instead of multiply
2080 unsigned shift
= (radix
== 16 ? 4 : radix
== 8 ? 3 : radix
== 2 ? 1 : 0);
2082 // Enter digit traversal loop
2083 for (StringRef::iterator e
= str
.end(); p
!= e
; ++p
) {
2084 unsigned digit
= getDigit(*p
, radix
);
2085 assert(digit
< radix
&& "Invalid character in digit string");
2087 // Shift or multiply the value by the radix
2095 // Add in the digit we just interpreted
2098 // If its negative, put it in two's complement form
2103 void APInt::toString(SmallVectorImpl
<char> &Str
, unsigned Radix
,
2104 bool Signed
, bool formatAsCLiteral
) const {
2105 assert((Radix
== 10 || Radix
== 8 || Radix
== 16 || Radix
== 2 ||
2107 "Radix should be 2, 8, 10, 16, or 36!");
2109 const char *Prefix
= "";
2110 if (formatAsCLiteral
) {
2113 // Binary literals are a non-standard extension added in gcc 4.3:
2114 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2126 llvm_unreachable("Invalid radix!");
2130 // First, check for a zero value and just short circuit the logic below.
2133 Str
.push_back(*Prefix
);
2140 static const char Digits
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2142 if (isSingleWord()) {
2144 char *BufPtr
= std::end(Buffer
);
2150 int64_t I
= getSExtValue();
2160 Str
.push_back(*Prefix
);
2165 *--BufPtr
= Digits
[N
% Radix
];
2168 Str
.append(BufPtr
, std::end(Buffer
));
2174 if (Signed
&& isNegative()) {
2175 // They want to print the signed version and it is a negative value
2176 // Flip the bits and add one to turn it into the equivalent positive
2177 // value and put a '-' in the result.
2183 Str
.push_back(*Prefix
);
2187 // We insert the digits backward, then reverse them to get the right order.
2188 unsigned StartDig
= Str
.size();
2190 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2191 // because the number of bits per digit (1, 3 and 4 respectively) divides
2192 // equally. We just shift until the value is zero.
2193 if (Radix
== 2 || Radix
== 8 || Radix
== 16) {
2194 // Just shift tmp right for each digit width until it becomes zero
2195 unsigned ShiftAmt
= (Radix
== 16 ? 4 : (Radix
== 8 ? 3 : 1));
2196 unsigned MaskAmt
= Radix
- 1;
2198 while (Tmp
.getBoolValue()) {
2199 unsigned Digit
= unsigned(Tmp
.getRawData()[0]) & MaskAmt
;
2200 Str
.push_back(Digits
[Digit
]);
2201 Tmp
.lshrInPlace(ShiftAmt
);
2204 while (Tmp
.getBoolValue()) {
2206 udivrem(Tmp
, Radix
, Tmp
, Digit
);
2207 assert(Digit
< Radix
&& "divide failed");
2208 Str
.push_back(Digits
[Digit
]);
2212 // Reverse the digits before returning.
2213 std::reverse(Str
.begin()+StartDig
, Str
.end());
2216 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2217 /// It is better to pass in a SmallVector/SmallString to the methods above.
2218 std::string
APInt::toString(unsigned Radix
= 10, bool Signed
= true) const {
2220 toString(S
, Radix
, Signed
, /* formatAsCLiteral = */false);
2224 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2225 LLVM_DUMP_METHOD
void APInt::dump() const {
2226 SmallString
<40> S
, U
;
2227 this->toStringUnsigned(U
);
2228 this->toStringSigned(S
);
2229 dbgs() << "APInt(" << BitWidth
<< "b, "
2230 << U
<< "u " << S
<< "s)\n";
2234 void APInt::print(raw_ostream
&OS
, bool isSigned
) const {
2236 this->toString(S
, 10, isSigned
, /* formatAsCLiteral = */false);
2240 // This implements a variety of operations on a representation of
2241 // arbitrary precision, two's-complement, bignum integer values.
2243 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2244 // and unrestricting assumption.
2245 static_assert(APInt::APINT_BITS_PER_WORD
% 2 == 0,
2246 "Part width must be divisible by 2!");
2248 /* Some handy functions local to this file. */
2250 /* Returns the integer part with the least significant BITS set.
2251 BITS cannot be zero. */
2252 static inline APInt::WordType
lowBitMask(unsigned bits
) {
2253 assert(bits
!= 0 && bits
<= APInt::APINT_BITS_PER_WORD
);
2255 return ~(APInt::WordType
) 0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
2258 /* Returns the value of the lower half of PART. */
2259 static inline APInt::WordType
lowHalf(APInt::WordType part
) {
2260 return part
& lowBitMask(APInt::APINT_BITS_PER_WORD
/ 2);
2263 /* Returns the value of the upper half of PART. */
2264 static inline APInt::WordType
highHalf(APInt::WordType part
) {
2265 return part
>> (APInt::APINT_BITS_PER_WORD
/ 2);
2268 /* Returns the bit number of the most significant set bit of a part.
2269 If the input number has no bits set -1U is returned. */
2270 static unsigned partMSB(APInt::WordType value
) {
2271 return findLastSet(value
, ZB_Max
);
2274 /* Returns the bit number of the least significant set bit of a
2275 part. If the input number has no bits set -1U is returned. */
2276 static unsigned partLSB(APInt::WordType value
) {
2277 return findFirstSet(value
, ZB_Max
);
2280 /* Sets the least significant part of a bignum to the input value, and
2281 zeroes out higher parts. */
2282 void APInt::tcSet(WordType
*dst
, WordType part
, unsigned parts
) {
2286 for (unsigned i
= 1; i
< parts
; i
++)
2290 /* Assign one bignum to another. */
2291 void APInt::tcAssign(WordType
*dst
, const WordType
*src
, unsigned parts
) {
2292 for (unsigned i
= 0; i
< parts
; i
++)
2296 /* Returns true if a bignum is zero, false otherwise. */
2297 bool APInt::tcIsZero(const WordType
*src
, unsigned parts
) {
2298 for (unsigned i
= 0; i
< parts
; i
++)
2305 /* Extract the given bit of a bignum; returns 0 or 1. */
2306 int APInt::tcExtractBit(const WordType
*parts
, unsigned bit
) {
2307 return (parts
[whichWord(bit
)] & maskBit(bit
)) != 0;
2310 /* Set the given bit of a bignum. */
2311 void APInt::tcSetBit(WordType
*parts
, unsigned bit
) {
2312 parts
[whichWord(bit
)] |= maskBit(bit
);
2315 /* Clears the given bit of a bignum. */
2316 void APInt::tcClearBit(WordType
*parts
, unsigned bit
) {
2317 parts
[whichWord(bit
)] &= ~maskBit(bit
);
2320 /* Returns the bit number of the least significant set bit of a
2321 number. If the input number has no bits set -1U is returned. */
2322 unsigned APInt::tcLSB(const WordType
*parts
, unsigned n
) {
2323 for (unsigned i
= 0; i
< n
; i
++) {
2324 if (parts
[i
] != 0) {
2325 unsigned lsb
= partLSB(parts
[i
]);
2327 return lsb
+ i
* APINT_BITS_PER_WORD
;
2334 /* Returns the bit number of the most significant set bit of a number.
2335 If the input number has no bits set -1U is returned. */
2336 unsigned APInt::tcMSB(const WordType
*parts
, unsigned n
) {
2340 if (parts
[n
] != 0) {
2341 unsigned msb
= partMSB(parts
[n
]);
2343 return msb
+ n
* APINT_BITS_PER_WORD
;
2350 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2351 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2352 the least significant bit of DST. All high bits above srcBITS in
2353 DST are zero-filled. */
2355 APInt::tcExtract(WordType
*dst
, unsigned dstCount
, const WordType
*src
,
2356 unsigned srcBits
, unsigned srcLSB
) {
2357 unsigned dstParts
= (srcBits
+ APINT_BITS_PER_WORD
- 1) / APINT_BITS_PER_WORD
;
2358 assert(dstParts
<= dstCount
);
2360 unsigned firstSrcPart
= srcLSB
/ APINT_BITS_PER_WORD
;
2361 tcAssign (dst
, src
+ firstSrcPart
, dstParts
);
2363 unsigned shift
= srcLSB
% APINT_BITS_PER_WORD
;
2364 tcShiftRight (dst
, dstParts
, shift
);
2366 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2367 in DST. If this is less that srcBits, append the rest, else
2368 clear the high bits. */
2369 unsigned n
= dstParts
* APINT_BITS_PER_WORD
- shift
;
2371 WordType mask
= lowBitMask (srcBits
- n
);
2372 dst
[dstParts
- 1] |= ((src
[firstSrcPart
+ dstParts
] & mask
)
2373 << n
% APINT_BITS_PER_WORD
);
2374 } else if (n
> srcBits
) {
2375 if (srcBits
% APINT_BITS_PER_WORD
)
2376 dst
[dstParts
- 1] &= lowBitMask (srcBits
% APINT_BITS_PER_WORD
);
2379 /* Clear high parts. */
2380 while (dstParts
< dstCount
)
2381 dst
[dstParts
++] = 0;
2384 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2385 APInt::WordType
APInt::tcAdd(WordType
*dst
, const WordType
*rhs
,
2386 WordType c
, unsigned parts
) {
2389 for (unsigned i
= 0; i
< parts
; i
++) {
2390 WordType l
= dst
[i
];
2392 dst
[i
] += rhs
[i
] + 1;
2403 /// This function adds a single "word" integer, src, to the multiple
2404 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2405 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2406 /// @returns the carry of the addition.
2407 APInt::WordType
APInt::tcAddPart(WordType
*dst
, WordType src
,
2409 for (unsigned i
= 0; i
< parts
; ++i
) {
2412 return 0; // No need to carry so exit early.
2413 src
= 1; // Carry one to next digit.
2419 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2420 APInt::WordType
APInt::tcSubtract(WordType
*dst
, const WordType
*rhs
,
2421 WordType c
, unsigned parts
) {
2424 for (unsigned i
= 0; i
< parts
; i
++) {
2425 WordType l
= dst
[i
];
2427 dst
[i
] -= rhs
[i
] + 1;
2438 /// This function subtracts a single "word" (64-bit word), src, from
2439 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2440 /// no further borrowing is needed or it runs out of "words" in dst. The result
2441 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2442 /// exhausted. In other words, if src > dst then this function returns 1,
2444 /// @returns the borrow out of the subtraction
2445 APInt::WordType
APInt::tcSubtractPart(WordType
*dst
, WordType src
,
2447 for (unsigned i
= 0; i
< parts
; ++i
) {
2448 WordType Dst
= dst
[i
];
2451 return 0; // No need to borrow so exit early.
2452 src
= 1; // We have to "borrow 1" from next "word"
2458 /* Negate a bignum in-place. */
2459 void APInt::tcNegate(WordType
*dst
, unsigned parts
) {
2460 tcComplement(dst
, parts
);
2461 tcIncrement(dst
, parts
);
2464 /* DST += SRC * MULTIPLIER + CARRY if add is true
2465 DST = SRC * MULTIPLIER + CARRY if add is false
2467 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2468 they must start at the same point, i.e. DST == SRC.
2470 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2471 returned. Otherwise DST is filled with the least significant
2472 DSTPARTS parts of the result, and if all of the omitted higher
2473 parts were zero return zero, otherwise overflow occurred and
2475 int APInt::tcMultiplyPart(WordType
*dst
, const WordType
*src
,
2476 WordType multiplier
, WordType carry
,
2477 unsigned srcParts
, unsigned dstParts
,
2479 /* Otherwise our writes of DST kill our later reads of SRC. */
2480 assert(dst
<= src
|| dst
>= src
+ srcParts
);
2481 assert(dstParts
<= srcParts
+ 1);
2483 /* N loops; minimum of dstParts and srcParts. */
2484 unsigned n
= std::min(dstParts
, srcParts
);
2486 for (unsigned i
= 0; i
< n
; i
++) {
2487 WordType low
, mid
, high
, srcPart
;
2489 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2491 This cannot overflow, because
2493 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2495 which is less than n^2. */
2499 if (multiplier
== 0 || srcPart
== 0) {
2503 low
= lowHalf(srcPart
) * lowHalf(multiplier
);
2504 high
= highHalf(srcPart
) * highHalf(multiplier
);
2506 mid
= lowHalf(srcPart
) * highHalf(multiplier
);
2507 high
+= highHalf(mid
);
2508 mid
<<= APINT_BITS_PER_WORD
/ 2;
2509 if (low
+ mid
< low
)
2513 mid
= highHalf(srcPart
) * lowHalf(multiplier
);
2514 high
+= highHalf(mid
);
2515 mid
<<= APINT_BITS_PER_WORD
/ 2;
2516 if (low
+ mid
< low
)
2520 /* Now add carry. */
2521 if (low
+ carry
< low
)
2527 /* And now DST[i], and store the new low part there. */
2528 if (low
+ dst
[i
] < low
)
2537 if (srcParts
< dstParts
) {
2538 /* Full multiplication, there is no overflow. */
2539 assert(srcParts
+ 1 == dstParts
);
2540 dst
[srcParts
] = carry
;
2544 /* We overflowed if there is carry. */
2548 /* We would overflow if any significant unwritten parts would be
2549 non-zero. This is true if any remaining src parts are non-zero
2550 and the multiplier is non-zero. */
2552 for (unsigned i
= dstParts
; i
< srcParts
; i
++)
2556 /* We fitted in the narrow destination. */
2560 /* DST = LHS * RHS, where DST has the same width as the operands and
2561 is filled with the least significant parts of the result. Returns
2562 one if overflow occurred, otherwise zero. DST must be disjoint
2563 from both operands. */
2564 int APInt::tcMultiply(WordType
*dst
, const WordType
*lhs
,
2565 const WordType
*rhs
, unsigned parts
) {
2566 assert(dst
!= lhs
&& dst
!= rhs
);
2569 tcSet(dst
, 0, parts
);
2571 for (unsigned i
= 0; i
< parts
; i
++)
2572 overflow
|= tcMultiplyPart(&dst
[i
], lhs
, rhs
[i
], 0, parts
,
2578 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2579 /// operands. No overflow occurs. DST must be disjoint from both operands.
2580 void APInt::tcFullMultiply(WordType
*dst
, const WordType
*lhs
,
2581 const WordType
*rhs
, unsigned lhsParts
,
2582 unsigned rhsParts
) {
2583 /* Put the narrower number on the LHS for less loops below. */
2584 if (lhsParts
> rhsParts
)
2585 return tcFullMultiply (dst
, rhs
, lhs
, rhsParts
, lhsParts
);
2587 assert(dst
!= lhs
&& dst
!= rhs
);
2589 tcSet(dst
, 0, rhsParts
);
2591 for (unsigned i
= 0; i
< lhsParts
; i
++)
2592 tcMultiplyPart(&dst
[i
], rhs
, lhs
[i
], 0, rhsParts
, rhsParts
+ 1, true);
2595 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2596 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2597 set REMAINDER to the remainder, return zero. i.e.
2599 OLD_LHS = RHS * LHS + REMAINDER
2601 SCRATCH is a bignum of the same size as the operands and result for
2602 use by the routine; its contents need not be initialized and are
2603 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2605 int APInt::tcDivide(WordType
*lhs
, const WordType
*rhs
,
2606 WordType
*remainder
, WordType
*srhs
,
2608 assert(lhs
!= remainder
&& lhs
!= srhs
&& remainder
!= srhs
);
2610 unsigned shiftCount
= tcMSB(rhs
, parts
) + 1;
2611 if (shiftCount
== 0)
2614 shiftCount
= parts
* APINT_BITS_PER_WORD
- shiftCount
;
2615 unsigned n
= shiftCount
/ APINT_BITS_PER_WORD
;
2616 WordType mask
= (WordType
) 1 << (shiftCount
% APINT_BITS_PER_WORD
);
2618 tcAssign(srhs
, rhs
, parts
);
2619 tcShiftLeft(srhs
, parts
, shiftCount
);
2620 tcAssign(remainder
, lhs
, parts
);
2621 tcSet(lhs
, 0, parts
);
2623 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2626 int compare
= tcCompare(remainder
, srhs
, parts
);
2628 tcSubtract(remainder
, srhs
, 0, parts
);
2632 if (shiftCount
== 0)
2635 tcShiftRight(srhs
, parts
, 1);
2636 if ((mask
>>= 1) == 0) {
2637 mask
= (WordType
) 1 << (APINT_BITS_PER_WORD
- 1);
2645 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2646 /// no restrictions on Count.
2647 void APInt::tcShiftLeft(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2648 // Don't bother performing a no-op shift.
2652 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2653 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2654 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2656 // Fastpath for moving by whole words.
2657 if (BitShift
== 0) {
2658 std::memmove(Dst
+ WordShift
, Dst
, (Words
- WordShift
) * APINT_WORD_SIZE
);
2660 while (Words
-- > WordShift
) {
2661 Dst
[Words
] = Dst
[Words
- WordShift
] << BitShift
;
2662 if (Words
> WordShift
)
2664 Dst
[Words
- WordShift
- 1] >> (APINT_BITS_PER_WORD
- BitShift
);
2668 // Fill in the remainder with 0s.
2669 std::memset(Dst
, 0, WordShift
* APINT_WORD_SIZE
);
2672 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2673 /// are no restrictions on Count.
2674 void APInt::tcShiftRight(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2675 // Don't bother performing a no-op shift.
2679 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2680 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2681 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2683 unsigned WordsToMove
= Words
- WordShift
;
2684 // Fastpath for moving by whole words.
2685 if (BitShift
== 0) {
2686 std::memmove(Dst
, Dst
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
2688 for (unsigned i
= 0; i
!= WordsToMove
; ++i
) {
2689 Dst
[i
] = Dst
[i
+ WordShift
] >> BitShift
;
2690 if (i
+ 1 != WordsToMove
)
2691 Dst
[i
] |= Dst
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
);
2695 // Fill in the remainder with 0s.
2696 std::memset(Dst
+ WordsToMove
, 0, WordShift
* APINT_WORD_SIZE
);
2699 /* Bitwise and of two bignums. */
2700 void APInt::tcAnd(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2701 for (unsigned i
= 0; i
< parts
; i
++)
2705 /* Bitwise inclusive or of two bignums. */
2706 void APInt::tcOr(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2707 for (unsigned i
= 0; i
< parts
; i
++)
2711 /* Bitwise exclusive or of two bignums. */
2712 void APInt::tcXor(WordType
*dst
, const WordType
*rhs
, unsigned parts
) {
2713 for (unsigned i
= 0; i
< parts
; i
++)
2717 /* Complement a bignum in-place. */
2718 void APInt::tcComplement(WordType
*dst
, unsigned parts
) {
2719 for (unsigned i
= 0; i
< parts
; i
++)
2723 /* Comparison (unsigned) of two bignums. */
2724 int APInt::tcCompare(const WordType
*lhs
, const WordType
*rhs
,
2728 if (lhs
[parts
] != rhs
[parts
])
2729 return (lhs
[parts
] > rhs
[parts
]) ? 1 : -1;
2735 /* Set the least significant BITS bits of a bignum, clear the
2737 void APInt::tcSetLeastSignificantBits(WordType
*dst
, unsigned parts
,
2740 while (bits
> APINT_BITS_PER_WORD
) {
2741 dst
[i
++] = ~(WordType
) 0;
2742 bits
-= APINT_BITS_PER_WORD
;
2746 dst
[i
++] = ~(WordType
) 0 >> (APINT_BITS_PER_WORD
- bits
);
2752 APInt
llvm::APIntOps::RoundingUDiv(const APInt
&A
, const APInt
&B
,
2753 APInt::Rounding RM
) {
2754 // Currently udivrem always rounds down.
2756 case APInt::Rounding::DOWN
:
2757 case APInt::Rounding::TOWARD_ZERO
:
2759 case APInt::Rounding::UP
: {
2761 APInt::udivrem(A
, B
, Quo
, Rem
);
2767 llvm_unreachable("Unknown APInt::Rounding enum");
2770 APInt
llvm::APIntOps::RoundingSDiv(const APInt
&A
, const APInt
&B
,
2771 APInt::Rounding RM
) {
2773 case APInt::Rounding::DOWN
:
2774 case APInt::Rounding::UP
: {
2776 APInt::sdivrem(A
, B
, Quo
, Rem
);
2779 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2780 // We want to check whether the non-integer part of the mathematical value
2781 // is negative or not. If the non-integer part is negative, we need to round
2782 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2783 // already rounded down.
2784 if (RM
== APInt::Rounding::DOWN
) {
2785 if (Rem
.isNegative() != B
.isNegative())
2789 if (Rem
.isNegative() != B
.isNegative())
2793 // Currently sdiv rounds twards zero.
2794 case APInt::Rounding::TOWARD_ZERO
:
2797 llvm_unreachable("Unknown APInt::Rounding enum");
2801 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A
, APInt B
, APInt C
,
2802 unsigned RangeWidth
) {
2803 unsigned CoeffWidth
= A
.getBitWidth();
2804 assert(CoeffWidth
== B
.getBitWidth() && CoeffWidth
== C
.getBitWidth());
2805 assert(RangeWidth
<= CoeffWidth
&&
2806 "Value range width should be less than coefficient width");
2807 assert(RangeWidth
> 1 && "Value range bit width should be > 1");
2809 LLVM_DEBUG(dbgs() << __func__
<< ": solving " << A
<< "x^2 + " << B
2810 << "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2812 // Identify 0 as a (non)solution immediately.
2813 if (C
.sextOrTrunc(RangeWidth
).isNullValue() ) {
2814 LLVM_DEBUG(dbgs() << __func__
<< ": zero solution\n");
2815 return APInt(CoeffWidth
, 0);
2818 // The result of APInt arithmetic has the same bit width as the operands,
2819 // so it can actually lose high bits. A product of two n-bit integers needs
2820 // 2n-1 bits to represent the full value.
2821 // The operation done below (on quadratic coefficients) that can produce
2822 // the largest value is the evaluation of the equation during bisection,
2823 // which needs 3 times the bitwidth of the coefficient, so the total number
2824 // of required bits is 3n.
2826 // The purpose of this extension is to simulate the set Z of all integers,
2827 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2828 // and negative numbers (not so much in a modulo arithmetic). The method
2829 // used to solve the equation is based on the standard formula for real
2830 // numbers, and uses the concepts of "positive" and "negative" with their
2833 A
= A
.sext(CoeffWidth
);
2834 B
= B
.sext(CoeffWidth
);
2835 C
= C
.sext(CoeffWidth
);
2837 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2838 // the bit width has increased.
2839 if (A
.isNegative()) {
2845 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2846 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2847 // and R = 2^BitWidth.
2848 // Since we're trying not only to find exact solutions, but also values
2849 // that "wrap around", such a set will always have a solution, i.e. an x
2850 // that satisfies at least one of the equations, or such that |q(x)|
2851 // exceeds kR, while |q(x-1)| for the same k does not.
2853 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2854 // positive solution n (in the above sense), and also such that the n
2855 // will be the least among all solutions corresponding to k = 0, 1, ...
2856 // (more precisely, the least element in the set
2857 // { n(k) | k is such that a solution n(k) exists }).
2859 // Consider the parabola (over real numbers) that corresponds to the
2860 // quadratic equation. Since A > 0, the arms of the parabola will point
2861 // up. Picking different values of k will shift it up and down by R.
2863 // We want to shift the parabola in such a way as to reduce the problem
2864 // of solving q(x) = kR to solving shifted_q(x) = 0.
2865 // (The interesting solutions are the ceilings of the real number
2867 APInt R
= APInt::getOneBitSet(CoeffWidth
, RangeWidth
);
2872 auto RoundUp
= [] (const APInt
&V
, const APInt
&A
) -> APInt
{
2873 assert(A
.isStrictlyPositive());
2874 APInt T
= V
.abs().urem(A
);
2875 if (T
.isNullValue())
2877 return V
.isNegative() ? V
+T
: V
+(A
-T
);
2880 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2881 // iff B is positive.
2882 if (B
.isNonNegative()) {
2883 // If B >= 0, the vertex it at a negative location (or at 0), so in
2884 // order to have a non-negative solution we need to pick k that makes
2885 // C-kR negative. To satisfy all the requirements for the solution
2886 // that we are looking for, it needs to be closest to 0 of all k.
2888 if (C
.isStrictlyPositive())
2890 // Pick the greater solution.
2893 // If B < 0, the vertex is at a positive location. For any solution
2894 // to exist, the discriminant must be non-negative. This means that
2895 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2896 // lower bound on values of k: kR >= C - B^2/4A.
2897 APInt LowkR
= C
- SqrB
.udiv(2*TwoA
); // udiv because all values > 0.
2898 // Round LowkR up (towards +inf) to the nearest kR.
2899 LowkR
= RoundUp(LowkR
, R
);
2901 // If there exists k meeting the condition above, and such that
2902 // C-kR > 0, there will be two positive real number solutions of
2903 // q(x) = kR. Out of all such values of k, pick the one that makes
2904 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2905 // In other words, find maximum k such that LowkR <= kR < C.
2907 // If LowkR < C, then such a k is guaranteed to exist because
2908 // LowkR itself is a multiple of R.
2909 C
-= -RoundUp(-C
, R
); // C = C - RoundDown(C, R)
2910 // Pick the smaller solution.
2913 // If C-kR < 0 for all potential k's, it means that one solution
2914 // will be negative, while the other will be positive. The positive
2915 // solution will shift towards 0 if the parabola is moved up.
2916 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2917 // to 0, or in other words, out of all parabolas that have solutions,
2918 // pick the one that is the farthest "up").
2919 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2921 // Pick the greater solution.
2926 LLVM_DEBUG(dbgs() << __func__
<< ": updated coefficients " << A
<< "x^2 + "
2927 << B
<< "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2929 APInt D
= SqrB
- 4*A
*C
;
2930 assert(D
.isNonNegative() && "Negative discriminant");
2931 APInt SQ
= D
.sqrt();
2934 bool InexactSQ
= Q
!= D
;
2935 // The calculated SQ may actually be greater than the exact (non-integer)
2936 // value. If that's the case, decremement SQ to get a value that is lower.
2943 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2944 // When using the quadratic formula directly, the calculated low root
2945 // may be greater than the exact one, since we would be subtracting SQ.
2946 // To make sure that the calculated root is not greater than the exact
2947 // one, subtract SQ+1 when calculating the low root (for inexact value
2950 APInt::sdivrem(-B
- (SQ
+InexactSQ
), TwoA
, X
, Rem
);
2952 APInt::sdivrem(-B
+ SQ
, TwoA
, X
, Rem
);
2954 // The updated coefficients should be such that the (exact) solution is
2955 // positive. Since APInt division rounds towards 0, the calculated one
2956 // can be 0, but cannot be negative.
2957 assert(X
.isNonNegative() && "Solution should be non-negative");
2959 if (!InexactSQ
&& Rem
.isNullValue()) {
2960 LLVM_DEBUG(dbgs() << __func__
<< ": solution (root): " << X
<< '\n');
2964 assert((SQ
*SQ
).sle(D
) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2965 // The exact value of the square root of D should be between SQ and SQ+1.
2966 // This implies that the solution should be between that corresponding to
2967 // SQ (i.e. X) and that corresponding to SQ+1.
2969 // The calculated X cannot be greater than the exact (real) solution.
2970 // Actually it must be strictly less than the exact solution, while
2971 // X+1 will be greater than or equal to it.
2973 APInt VX
= (A
*X
+ B
)*X
+ C
;
2974 APInt VY
= VX
+ TwoA
*X
+ A
+ B
;
2975 bool SignChange
= VX
.isNegative() != VY
.isNegative() ||
2976 VX
.isNullValue() != VY
.isNullValue();
2977 // If the sign did not change between X and X+1, X is not a valid solution.
2978 // This could happen when the actual (exact) roots don't have an integer
2979 // between them, so they would both be contained between X and X+1.
2981 LLVM_DEBUG(dbgs() << __func__
<< ": no valid solution\n");
2986 LLVM_DEBUG(dbgs() << __func__
<< ": solution (wrap): " << X
<< '\n');
2990 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2991 /// with the integer held in IntVal.
2992 void llvm::StoreIntToMemory(const APInt
&IntVal
, uint8_t *Dst
,
2993 unsigned StoreBytes
) {
2994 assert((IntVal
.getBitWidth()+7)/8 >= StoreBytes
&& "Integer too small!");
2995 const uint8_t *Src
= (const uint8_t *)IntVal
.getRawData();
2997 if (sys::IsLittleEndianHost
) {
2998 // Little-endian host - the source is ordered from LSB to MSB. Order the
2999 // destination from LSB to MSB: Do a straight copy.
3000 memcpy(Dst
, Src
, StoreBytes
);
3002 // Big-endian host - the source is an array of 64 bit words ordered from
3003 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3004 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3005 while (StoreBytes
> sizeof(uint64_t)) {
3006 StoreBytes
-= sizeof(uint64_t);
3007 // May not be aligned so use memcpy.
3008 memcpy(Dst
+ StoreBytes
, Src
, sizeof(uint64_t));
3009 Src
+= sizeof(uint64_t);
3012 memcpy(Dst
, Src
+ sizeof(uint64_t) - StoreBytes
, StoreBytes
);
3016 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3017 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3018 void llvm::LoadIntFromMemory(APInt
&IntVal
, uint8_t *Src
, unsigned LoadBytes
) {
3019 assert((IntVal
.getBitWidth()+7)/8 >= LoadBytes
&& "Integer too small!");
3020 uint8_t *Dst
= reinterpret_cast<uint8_t *>(
3021 const_cast<uint64_t *>(IntVal
.getRawData()));
3023 if (sys::IsLittleEndianHost
)
3024 // Little-endian host - the destination must be ordered from LSB to MSB.
3025 // The source is ordered from LSB to MSB: Do a straight copy.
3026 memcpy(Dst
, Src
, LoadBytes
);
3028 // Big-endian - the destination is an array of 64 bit words ordered from
3029 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3030 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3032 while (LoadBytes
> sizeof(uint64_t)) {
3033 LoadBytes
-= sizeof(uint64_t);
3034 // May not be aligned so use memcpy.
3035 memcpy(Dst
, Src
+ LoadBytes
, sizeof(uint64_t));
3036 Dst
+= sizeof(uint64_t);
3039 memcpy(Dst
+ sizeof(uint64_t) - LoadBytes
, Src
, LoadBytes
);