1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/bit.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Alignment.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
32 #define DEBUG_TYPE "apint"
34 /// A utility function for allocating memory, checking for allocation failures,
35 /// and ensuring the contents are zeroed.
36 inline static uint64_t* getClearedMemory(unsigned numWords
) {
37 return new uint64_t[numWords
]();
40 /// A utility function for allocating memory and checking for allocation
41 /// failure. The content is not zeroed.
42 inline static uint64_t* getMemory(unsigned numWords
) {
43 return new uint64_t[numWords
];
46 /// A utility function that converts a character to a digit.
47 inline static unsigned getDigit(char cdigit
, uint8_t radix
) {
50 if (radix
== 16 || radix
== 36) {
74 void APInt::initSlowCase(uint64_t val
, bool isSigned
) {
75 if (isSigned
&& int64_t(val
) < 0) {
76 U
.pVal
= getMemory(getNumWords());
78 memset(&U
.pVal
[1], 0xFF, APINT_WORD_SIZE
* (getNumWords() - 1));
81 U
.pVal
= getClearedMemory(getNumWords());
86 void APInt::initSlowCase(const APInt
& that
) {
87 U
.pVal
= getMemory(getNumWords());
88 memcpy(U
.pVal
, that
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
91 void APInt::initFromArray(ArrayRef
<uint64_t> bigVal
) {
92 assert(bigVal
.data() && "Null pointer detected!");
96 // Get memory, cleared to 0
97 U
.pVal
= getClearedMemory(getNumWords());
98 // Calculate the number of words to copy
99 unsigned words
= std::min
<unsigned>(bigVal
.size(), getNumWords());
100 // Copy the words from bigVal to pVal
101 memcpy(U
.pVal
, bigVal
.data(), words
* APINT_WORD_SIZE
);
103 // Make sure unused high bits are cleared
107 APInt::APInt(unsigned numBits
, ArrayRef
<uint64_t> bigVal
) : BitWidth(numBits
) {
108 initFromArray(bigVal
);
111 APInt::APInt(unsigned numBits
, unsigned numWords
, const uint64_t bigVal
[])
112 : BitWidth(numBits
) {
113 initFromArray(ArrayRef(bigVal
, numWords
));
116 APInt::APInt(unsigned numbits
, StringRef Str
, uint8_t radix
)
117 : BitWidth(numbits
) {
118 fromString(numbits
, Str
, radix
);
121 void APInt::reallocate(unsigned NewBitWidth
) {
122 // If the number of words is the same we can just change the width and stop.
123 if (getNumWords() == getNumWords(NewBitWidth
)) {
124 BitWidth
= NewBitWidth
;
128 // If we have an allocation, delete it.
133 BitWidth
= NewBitWidth
;
135 // If we are supposed to have an allocation, create it.
137 U
.pVal
= getMemory(getNumWords());
140 void APInt::assignSlowCase(const APInt
&RHS
) {
141 // Don't do anything for X = X
145 // Adjust the bit width and handle allocations as necessary.
146 reallocate(RHS
.getBitWidth());
152 memcpy(U
.pVal
, RHS
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
155 /// This method 'profiles' an APInt for use with FoldingSet.
156 void APInt::Profile(FoldingSetNodeID
& ID
) const {
157 ID
.AddInteger(BitWidth
);
159 if (isSingleWord()) {
160 ID
.AddInteger(U
.VAL
);
164 unsigned NumWords
= getNumWords();
165 for (unsigned i
= 0; i
< NumWords
; ++i
)
166 ID
.AddInteger(U
.pVal
[i
]);
169 bool APInt::isAligned(Align A
) const {
172 const unsigned TrailingZeroes
= countr_zero();
173 const unsigned MinimumTrailingZeroes
= Log2(A
);
174 return TrailingZeroes
>= MinimumTrailingZeroes
;
177 /// Prefix increment operator. Increments the APInt by one.
178 APInt
& APInt::operator++() {
182 tcIncrement(U
.pVal
, getNumWords());
183 return clearUnusedBits();
186 /// Prefix decrement operator. Decrements the APInt by one.
187 APInt
& APInt::operator--() {
191 tcDecrement(U
.pVal
, getNumWords());
192 return clearUnusedBits();
195 /// Adds the RHS APInt to this APInt.
196 /// @returns this, after addition of RHS.
197 /// Addition assignment operator.
198 APInt
& APInt::operator+=(const APInt
& RHS
) {
199 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
203 tcAdd(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
204 return clearUnusedBits();
207 APInt
& APInt::operator+=(uint64_t RHS
) {
211 tcAddPart(U
.pVal
, RHS
, getNumWords());
212 return clearUnusedBits();
215 /// Subtracts the RHS APInt from this APInt
216 /// @returns this, after subtraction
217 /// Subtraction assignment operator.
218 APInt
& APInt::operator-=(const APInt
& RHS
) {
219 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
223 tcSubtract(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
224 return clearUnusedBits();
227 APInt
& APInt::operator-=(uint64_t RHS
) {
231 tcSubtractPart(U
.pVal
, RHS
, getNumWords());
232 return clearUnusedBits();
235 APInt
APInt::operator*(const APInt
& RHS
) const {
236 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
238 return APInt(BitWidth
, U
.VAL
* RHS
.U
.VAL
, /*isSigned=*/false,
239 /*implicitTrunc=*/true);
241 APInt
Result(getMemory(getNumWords()), getBitWidth());
242 tcMultiply(Result
.U
.pVal
, U
.pVal
, RHS
.U
.pVal
, getNumWords());
243 Result
.clearUnusedBits();
247 void APInt::andAssignSlowCase(const APInt
&RHS
) {
248 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
249 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
253 void APInt::orAssignSlowCase(const APInt
&RHS
) {
254 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
255 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
259 void APInt::xorAssignSlowCase(const APInt
&RHS
) {
260 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
261 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
265 APInt
&APInt::operator*=(const APInt
&RHS
) {
270 APInt
& APInt::operator*=(uint64_t RHS
) {
271 if (isSingleWord()) {
274 unsigned NumWords
= getNumWords();
275 tcMultiplyPart(U
.pVal
, U
.pVal
, RHS
, 0, NumWords
, NumWords
, false);
277 return clearUnusedBits();
280 bool APInt::equalSlowCase(const APInt
&RHS
) const {
281 return std::equal(U
.pVal
, U
.pVal
+ getNumWords(), RHS
.U
.pVal
);
284 int APInt::compare(const APInt
& RHS
) const {
285 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
287 return U
.VAL
< RHS
.U
.VAL
? -1 : U
.VAL
> RHS
.U
.VAL
;
289 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
292 int APInt::compareSigned(const APInt
& RHS
) const {
293 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
294 if (isSingleWord()) {
295 int64_t lhsSext
= SignExtend64(U
.VAL
, BitWidth
);
296 int64_t rhsSext
= SignExtend64(RHS
.U
.VAL
, BitWidth
);
297 return lhsSext
< rhsSext
? -1 : lhsSext
> rhsSext
;
300 bool lhsNeg
= isNegative();
301 bool rhsNeg
= RHS
.isNegative();
303 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
304 if (lhsNeg
!= rhsNeg
)
305 return lhsNeg
? -1 : 1;
307 // Otherwise we can just use an unsigned comparison, because even negative
308 // numbers compare correctly this way if both have the same signed-ness.
309 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
312 void APInt::setBitsSlowCase(unsigned loBit
, unsigned hiBit
) {
313 unsigned loWord
= whichWord(loBit
);
314 unsigned hiWord
= whichWord(hiBit
);
316 // Create an initial mask for the low word with zeros below loBit.
317 uint64_t loMask
= WORDTYPE_MAX
<< whichBit(loBit
);
319 // If hiBit is not aligned, we need a high mask.
320 unsigned hiShiftAmt
= whichBit(hiBit
);
321 if (hiShiftAmt
!= 0) {
322 // Create a high mask with zeros above hiBit.
323 uint64_t hiMask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- hiShiftAmt
);
324 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
325 // set the bits in hiWord.
326 if (hiWord
== loWord
)
329 U
.pVal
[hiWord
] |= hiMask
;
331 // Apply the mask to the low word.
332 U
.pVal
[loWord
] |= loMask
;
334 // Fill any words between loWord and hiWord with all ones.
335 for (unsigned word
= loWord
+ 1; word
< hiWord
; ++word
)
336 U
.pVal
[word
] = WORDTYPE_MAX
;
339 // Complement a bignum in-place.
340 static void tcComplement(APInt::WordType
*dst
, unsigned parts
) {
341 for (unsigned i
= 0; i
< parts
; i
++)
345 /// Toggle every bit to its opposite value.
346 void APInt::flipAllBitsSlowCase() {
347 tcComplement(U
.pVal
, getNumWords());
351 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
353 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
354 /// In the slow case, we know the result is large.
355 APInt
APInt::concatSlowCase(const APInt
&NewLSB
) const {
356 unsigned NewWidth
= getBitWidth() + NewLSB
.getBitWidth();
357 APInt Result
= NewLSB
.zext(NewWidth
);
358 Result
.insertBits(*this, NewLSB
.getBitWidth());
362 /// Toggle a given bit to its opposite value whose position is given
363 /// as "bitPosition".
364 /// Toggles a given bit to its opposite value.
365 void APInt::flipBit(unsigned bitPosition
) {
366 assert(bitPosition
< BitWidth
&& "Out of the bit-width range!");
367 setBitVal(bitPosition
, !(*this)[bitPosition
]);
370 void APInt::insertBits(const APInt
&subBits
, unsigned bitPosition
) {
371 unsigned subBitWidth
= subBits
.getBitWidth();
372 assert((subBitWidth
+ bitPosition
) <= BitWidth
&& "Illegal bit insertion");
374 // inserting no bits is a noop.
375 if (subBitWidth
== 0)
378 // Insertion is a direct copy.
379 if (subBitWidth
== BitWidth
) {
384 // Single word result can be done as a direct bitmask.
385 if (isSingleWord()) {
386 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
387 U
.VAL
&= ~(mask
<< bitPosition
);
388 U
.VAL
|= (subBits
.U
.VAL
<< bitPosition
);
392 unsigned loBit
= whichBit(bitPosition
);
393 unsigned loWord
= whichWord(bitPosition
);
394 unsigned hi1Word
= whichWord(bitPosition
+ subBitWidth
- 1);
396 // Insertion within a single word can be done as a direct bitmask.
397 if (loWord
== hi1Word
) {
398 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
399 U
.pVal
[loWord
] &= ~(mask
<< loBit
);
400 U
.pVal
[loWord
] |= (subBits
.U
.VAL
<< loBit
);
404 // Insert on word boundaries.
406 // Direct copy whole words.
407 unsigned numWholeSubWords
= subBitWidth
/ APINT_BITS_PER_WORD
;
408 memcpy(U
.pVal
+ loWord
, subBits
.getRawData(),
409 numWholeSubWords
* APINT_WORD_SIZE
);
411 // Mask+insert remaining bits.
412 unsigned remainingBits
= subBitWidth
% APINT_BITS_PER_WORD
;
413 if (remainingBits
!= 0) {
414 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- remainingBits
);
415 U
.pVal
[hi1Word
] &= ~mask
;
416 U
.pVal
[hi1Word
] |= subBits
.getWord(subBitWidth
- 1);
421 // General case - set/clear individual bits in dst based on src.
422 // TODO - there is scope for optimization here, but at the moment this code
423 // path is barely used so prefer readability over performance.
424 for (unsigned i
= 0; i
!= subBitWidth
; ++i
)
425 setBitVal(bitPosition
+ i
, subBits
[i
]);
428 void APInt::insertBits(uint64_t subBits
, unsigned bitPosition
, unsigned numBits
) {
429 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
431 if (isSingleWord()) {
432 U
.VAL
&= ~(maskBits
<< bitPosition
);
433 U
.VAL
|= subBits
<< bitPosition
;
437 unsigned loBit
= whichBit(bitPosition
);
438 unsigned loWord
= whichWord(bitPosition
);
439 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
440 if (loWord
== hiWord
) {
441 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
442 U
.pVal
[loWord
] |= subBits
<< loBit
;
446 static_assert(8 * sizeof(WordType
) <= 64, "This code assumes only two words affected");
447 unsigned wordBits
= 8 * sizeof(WordType
);
448 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
449 U
.pVal
[loWord
] |= subBits
<< loBit
;
451 U
.pVal
[hiWord
] &= ~(maskBits
>> (wordBits
- loBit
));
452 U
.pVal
[hiWord
] |= subBits
>> (wordBits
- loBit
);
455 APInt
APInt::extractBits(unsigned numBits
, unsigned bitPosition
) const {
456 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
457 "Illegal bit extraction");
460 return APInt(numBits
, U
.VAL
>> bitPosition
, /*isSigned=*/false,
461 /*implicitTrunc=*/true);
463 unsigned loBit
= whichBit(bitPosition
);
464 unsigned loWord
= whichWord(bitPosition
);
465 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
467 // Single word result extracting bits from a single word source.
468 if (loWord
== hiWord
)
469 return APInt(numBits
, U
.pVal
[loWord
] >> loBit
, /*isSigned=*/false,
470 /*implicitTrunc=*/true);
472 // Extracting bits that start on a source word boundary can be done
473 // as a fast memory copy.
475 return APInt(numBits
, ArrayRef(U
.pVal
+ loWord
, 1 + hiWord
- loWord
));
477 // General case - shift + copy source words directly into place.
478 APInt
Result(numBits
, 0);
479 unsigned NumSrcWords
= getNumWords();
480 unsigned NumDstWords
= Result
.getNumWords();
482 uint64_t *DestPtr
= Result
.isSingleWord() ? &Result
.U
.VAL
: Result
.U
.pVal
;
483 for (unsigned word
= 0; word
< NumDstWords
; ++word
) {
484 uint64_t w0
= U
.pVal
[loWord
+ word
];
486 (loWord
+ word
+ 1) < NumSrcWords
? U
.pVal
[loWord
+ word
+ 1] : 0;
487 DestPtr
[word
] = (w0
>> loBit
) | (w1
<< (APINT_BITS_PER_WORD
- loBit
));
490 return Result
.clearUnusedBits();
493 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits
,
494 unsigned bitPosition
) const {
495 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
496 "Illegal bit extraction");
497 assert(numBits
<= 64 && "Illegal bit extraction");
499 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
501 return (U
.VAL
>> bitPosition
) & maskBits
;
503 static_assert(APINT_BITS_PER_WORD
>= 64,
504 "This code assumes only two words affected");
505 unsigned loBit
= whichBit(bitPosition
);
506 unsigned loWord
= whichWord(bitPosition
);
507 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
508 if (loWord
== hiWord
)
509 return (U
.pVal
[loWord
] >> loBit
) & maskBits
;
511 uint64_t retBits
= U
.pVal
[loWord
] >> loBit
;
512 retBits
|= U
.pVal
[hiWord
] << (APINT_BITS_PER_WORD
- loBit
);
517 unsigned APInt::getSufficientBitsNeeded(StringRef Str
, uint8_t Radix
) {
518 assert(!Str
.empty() && "Invalid string length");
519 size_t StrLen
= Str
.size();
521 // Each computation below needs to know if it's negative.
522 unsigned IsNegative
= false;
523 if (Str
[0] == '-' || Str
[0] == '+') {
524 IsNegative
= Str
[0] == '-';
526 assert(StrLen
&& "String is only a sign, needs a value.");
529 // For radixes of power-of-two values, the bits required is accurately and
532 return StrLen
+ IsNegative
;
534 return StrLen
* 3 + IsNegative
;
536 return StrLen
* 4 + IsNegative
;
538 // Compute a sufficient number of bits that is always large enough but might
539 // be too large. This avoids the assertion in the constructor. This
540 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
541 // bits in that case.
543 return (StrLen
== 1 ? 4 : StrLen
* 64 / 18) + IsNegative
;
546 return (StrLen
== 1 ? 7 : StrLen
* 16 / 3) + IsNegative
;
549 unsigned APInt::getBitsNeeded(StringRef str
, uint8_t radix
) {
550 // Compute a sufficient number of bits that is always large enough but might
552 unsigned sufficient
= getSufficientBitsNeeded(str
, radix
);
554 // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
555 // return the value directly. For bases 10 and 36, we need to do extra work.
556 if (radix
== 2 || radix
== 8 || radix
== 16)
559 // This is grossly inefficient but accurate. We could probably do something
560 // with a computation of roughly slen*64/20 and then adjust by the value of
561 // the first few digits. But, I'm not sure how accurate that could be.
562 size_t slen
= str
.size();
564 // Each computation below needs to know if it's negative.
565 StringRef::iterator p
= str
.begin();
566 unsigned isNegative
= *p
== '-';
567 if (*p
== '-' || *p
== '+') {
570 assert(slen
&& "String is only a sign, needs a value.");
574 // Convert to the actual binary value.
575 APInt
tmp(sufficient
, StringRef(p
, slen
), radix
);
577 // Compute how many bits are required. If the log is infinite, assume we need
578 // just bit. If the log is exact and value is negative, then the value is
579 // MinSignedValue with (log + 1) bits.
580 unsigned log
= tmp
.logBase2();
581 if (log
== (unsigned)-1) {
582 return isNegative
+ 1;
583 } else if (isNegative
&& tmp
.isPowerOf2()) {
584 return isNegative
+ log
;
586 return isNegative
+ log
+ 1;
590 hash_code
llvm::hash_value(const APInt
&Arg
) {
591 if (Arg
.isSingleWord())
592 return hash_combine(Arg
.BitWidth
, Arg
.U
.VAL
);
596 hash_combine_range(Arg
.U
.pVal
, Arg
.U
.pVal
+ Arg
.getNumWords()));
599 unsigned DenseMapInfo
<APInt
, void>::getHashValue(const APInt
&Key
) {
600 return static_cast<unsigned>(hash_value(Key
));
603 bool APInt::isSplat(unsigned SplatSizeInBits
) const {
604 assert(getBitWidth() % SplatSizeInBits
== 0 &&
605 "SplatSizeInBits must divide width!");
606 // We can check that all parts of an integer are equal by making use of a
607 // little trick: rotate and check if it's still the same value.
608 return *this == rotl(SplatSizeInBits
);
611 /// This function returns the high "numBits" bits of this APInt.
612 APInt
APInt::getHiBits(unsigned numBits
) const {
613 return this->lshr(BitWidth
- numBits
);
616 /// This function returns the low "numBits" bits of this APInt.
617 APInt
APInt::getLoBits(unsigned numBits
) const {
618 APInt
Result(getLowBitsSet(BitWidth
, numBits
));
623 /// Return a value containing V broadcasted over NewLen bits.
624 APInt
APInt::getSplat(unsigned NewLen
, const APInt
&V
) {
625 assert(NewLen
>= V
.getBitWidth() && "Can't splat to smaller bit width!");
627 APInt Val
= V
.zext(NewLen
);
628 for (unsigned I
= V
.getBitWidth(); I
< NewLen
; I
<<= 1)
634 unsigned APInt::countLeadingZerosSlowCase() const {
636 for (int i
= getNumWords()-1; i
>= 0; --i
) {
637 uint64_t V
= U
.pVal
[i
];
639 Count
+= APINT_BITS_PER_WORD
;
641 Count
+= llvm::countl_zero(V
);
645 // Adjust for unused bits in the most significant word (they are zero).
646 unsigned Mod
= BitWidth
% APINT_BITS_PER_WORD
;
647 Count
-= Mod
> 0 ? APINT_BITS_PER_WORD
- Mod
: 0;
651 unsigned APInt::countLeadingOnesSlowCase() const {
652 unsigned highWordBits
= BitWidth
% APINT_BITS_PER_WORD
;
655 highWordBits
= APINT_BITS_PER_WORD
;
658 shift
= APINT_BITS_PER_WORD
- highWordBits
;
660 int i
= getNumWords() - 1;
661 unsigned Count
= llvm::countl_one(U
.pVal
[i
] << shift
);
662 if (Count
== highWordBits
) {
663 for (i
--; i
>= 0; --i
) {
664 if (U
.pVal
[i
] == WORDTYPE_MAX
)
665 Count
+= APINT_BITS_PER_WORD
;
667 Count
+= llvm::countl_one(U
.pVal
[i
]);
675 unsigned APInt::countTrailingZerosSlowCase() const {
678 for (; i
< getNumWords() && U
.pVal
[i
] == 0; ++i
)
679 Count
+= APINT_BITS_PER_WORD
;
680 if (i
< getNumWords())
681 Count
+= llvm::countr_zero(U
.pVal
[i
]);
682 return std::min(Count
, BitWidth
);
685 unsigned APInt::countTrailingOnesSlowCase() const {
688 for (; i
< getNumWords() && U
.pVal
[i
] == WORDTYPE_MAX
; ++i
)
689 Count
+= APINT_BITS_PER_WORD
;
690 if (i
< getNumWords())
691 Count
+= llvm::countr_one(U
.pVal
[i
]);
692 assert(Count
<= BitWidth
);
696 unsigned APInt::countPopulationSlowCase() const {
698 for (unsigned i
= 0; i
< getNumWords(); ++i
)
699 Count
+= llvm::popcount(U
.pVal
[i
]);
703 bool APInt::intersectsSlowCase(const APInt
&RHS
) const {
704 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
705 if ((U
.pVal
[i
] & RHS
.U
.pVal
[i
]) != 0)
711 bool APInt::isSubsetOfSlowCase(const APInt
&RHS
) const {
712 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
713 if ((U
.pVal
[i
] & ~RHS
.U
.pVal
[i
]) != 0)
719 APInt
APInt::byteSwap() const {
720 assert(BitWidth
>= 16 && BitWidth
% 8 == 0 && "Cannot byteswap!");
722 return APInt(BitWidth
, llvm::byteswap
<uint16_t>(U
.VAL
));
724 return APInt(BitWidth
, llvm::byteswap
<uint32_t>(U
.VAL
));
725 if (BitWidth
<= 64) {
726 uint64_t Tmp1
= llvm::byteswap
<uint64_t>(U
.VAL
);
727 Tmp1
>>= (64 - BitWidth
);
728 return APInt(BitWidth
, Tmp1
);
731 APInt
Result(getNumWords() * APINT_BITS_PER_WORD
, 0);
732 for (unsigned I
= 0, N
= getNumWords(); I
!= N
; ++I
)
733 Result
.U
.pVal
[I
] = llvm::byteswap
<uint64_t>(U
.pVal
[N
- I
- 1]);
734 if (Result
.BitWidth
!= BitWidth
) {
735 Result
.lshrInPlace(Result
.BitWidth
- BitWidth
);
736 Result
.BitWidth
= BitWidth
;
741 APInt
APInt::reverseBits() const {
744 return APInt(BitWidth
, llvm::reverseBits
<uint64_t>(U
.VAL
));
746 return APInt(BitWidth
, llvm::reverseBits
<uint32_t>(U
.VAL
));
748 return APInt(BitWidth
, llvm::reverseBits
<uint16_t>(U
.VAL
));
750 return APInt(BitWidth
, llvm::reverseBits
<uint8_t>(U
.VAL
));
758 APInt
Reversed(BitWidth
, 0);
759 unsigned S
= BitWidth
;
761 for (; Val
!= 0; Val
.lshrInPlace(1)) {
771 APInt
llvm::APIntOps::GreatestCommonDivisor(APInt A
, APInt B
) {
772 // Fast-path a common case.
773 if (A
== B
) return A
;
775 // Corner cases: if either operand is zero, the other is the gcd.
779 // Count common powers of 2 and remove all other powers of 2.
782 unsigned Pow2_A
= A
.countr_zero();
783 unsigned Pow2_B
= B
.countr_zero();
784 if (Pow2_A
> Pow2_B
) {
785 A
.lshrInPlace(Pow2_A
- Pow2_B
);
787 } else if (Pow2_B
> Pow2_A
) {
788 B
.lshrInPlace(Pow2_B
- Pow2_A
);
795 // Both operands are odd multiples of 2^Pow_2:
797 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
799 // This is a modified version of Stein's algorithm, taking advantage of
800 // efficient countTrailingZeros().
804 A
.lshrInPlace(A
.countr_zero() - Pow2
);
807 B
.lshrInPlace(B
.countr_zero() - Pow2
);
814 APInt
llvm::APIntOps::RoundDoubleToAPInt(double Double
, unsigned width
) {
815 uint64_t I
= bit_cast
<uint64_t>(Double
);
817 // Get the sign bit from the highest order bit
818 bool isNeg
= I
>> 63;
820 // Get the 11-bit exponent and adjust for the 1023 bit bias
821 int64_t exp
= ((I
>> 52) & 0x7ff) - 1023;
823 // If the exponent is negative, the value is < 0 so just return 0.
825 return APInt(width
, 0u);
827 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
828 uint64_t mantissa
= (I
& (~0ULL >> 12)) | 1ULL << 52;
830 // If the exponent doesn't shift all bits out of the mantissa
832 return isNeg
? -APInt(width
, mantissa
>> (52 - exp
)) :
833 APInt(width
, mantissa
>> (52 - exp
));
835 // If the client didn't provide enough bits for us to shift the mantissa into
836 // then the result is undefined, just return 0
837 if (width
<= exp
- 52)
838 return APInt(width
, 0);
840 // Otherwise, we have to shift the mantissa bits up to the right location
841 APInt
Tmp(width
, mantissa
);
842 Tmp
<<= (unsigned)exp
- 52;
843 return isNeg
? -Tmp
: Tmp
;
846 /// This function converts this APInt to a double.
847 /// The layout for double is as following (IEEE Standard 754):
848 /// --------------------------------------
849 /// | Sign Exponent Fraction Bias |
850 /// |-------------------------------------- |
851 /// | 1[63] 11[62-52] 52[51-00] 1023 |
852 /// --------------------------------------
853 double APInt::roundToDouble(bool isSigned
) const {
855 // Handle the simple case where the value is contained in one uint64_t.
856 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
857 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD
) {
859 int64_t sext
= SignExtend64(getWord(0), BitWidth
);
862 return double(getWord(0));
865 // Determine if the value is negative.
866 bool isNeg
= isSigned
? (*this)[BitWidth
-1] : false;
868 // Construct the absolute value if we're negative.
869 APInt
Tmp(isNeg
? -(*this) : (*this));
871 // Figure out how many bits we're using.
872 unsigned n
= Tmp
.getActiveBits();
874 // The exponent (without bias normalization) is just the number of bits
875 // we are using. Note that the sign bit is gone since we constructed the
879 // Return infinity for exponent overflow
881 if (!isSigned
|| !isNeg
)
882 return std::numeric_limits
<double>::infinity();
884 return -std::numeric_limits
<double>::infinity();
886 exp
+= 1023; // Increment for 1023 bias
888 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
889 // extract the high 52 bits from the correct words in pVal.
891 unsigned hiWord
= whichWord(n
-1);
893 mantissa
= Tmp
.U
.pVal
[0];
895 mantissa
>>= n
- 52; // shift down, we want the top 52 bits.
897 assert(hiWord
> 0 && "huh?");
898 uint64_t hibits
= Tmp
.U
.pVal
[hiWord
] << (52 - n
% APINT_BITS_PER_WORD
);
899 uint64_t lobits
= Tmp
.U
.pVal
[hiWord
-1] >> (11 + n
% APINT_BITS_PER_WORD
);
900 mantissa
= hibits
| lobits
;
903 // The leading bit of mantissa is implicit, so get rid of it.
904 uint64_t sign
= isNeg
? (1ULL << (APINT_BITS_PER_WORD
- 1)) : 0;
905 uint64_t I
= sign
| (exp
<< 52) | mantissa
;
906 return bit_cast
<double>(I
);
909 // Truncate to new width.
910 APInt
APInt::trunc(unsigned width
) const {
911 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
913 if (width
<= APINT_BITS_PER_WORD
)
914 return APInt(width
, getRawData()[0], /*isSigned=*/false,
915 /*implicitTrunc=*/true);
917 if (width
== BitWidth
)
920 APInt
Result(getMemory(getNumWords(width
)), width
);
924 for (i
= 0; i
!= width
/ APINT_BITS_PER_WORD
; i
++)
925 Result
.U
.pVal
[i
] = U
.pVal
[i
];
927 // Truncate and copy any partial word.
928 unsigned bits
= (0 - width
) % APINT_BITS_PER_WORD
;
930 Result
.U
.pVal
[i
] = U
.pVal
[i
] << bits
>> bits
;
935 // Truncate to new width with unsigned saturation.
936 APInt
APInt::truncUSat(unsigned width
) const {
937 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
939 // Can we just losslessly truncate it?
942 // If not, then just return the new limit.
943 return APInt::getMaxValue(width
);
946 // Truncate to new width with signed saturation.
947 APInt
APInt::truncSSat(unsigned width
) const {
948 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
950 // Can we just losslessly truncate it?
951 if (isSignedIntN(width
))
953 // If not, then just return the new limits.
954 return isNegative() ? APInt::getSignedMinValue(width
)
955 : APInt::getSignedMaxValue(width
);
958 // Sign extend to a new width.
959 APInt
APInt::sext(unsigned Width
) const {
960 assert(Width
>= BitWidth
&& "Invalid APInt SignExtend request");
962 if (Width
<= APINT_BITS_PER_WORD
)
963 return APInt(Width
, SignExtend64(U
.VAL
, BitWidth
), /*isSigned=*/true);
965 if (Width
== BitWidth
)
968 APInt
Result(getMemory(getNumWords(Width
)), Width
);
971 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
973 // Sign extend the last word since there may be unused bits in the input.
974 Result
.U
.pVal
[getNumWords() - 1] =
975 SignExtend64(Result
.U
.pVal
[getNumWords() - 1],
976 ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
978 // Fill with sign bits.
979 std::memset(Result
.U
.pVal
+ getNumWords(), isNegative() ? -1 : 0,
980 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
981 Result
.clearUnusedBits();
985 // Zero extend to a new width.
986 APInt
APInt::zext(unsigned width
) const {
987 assert(width
>= BitWidth
&& "Invalid APInt ZeroExtend request");
989 if (width
<= APINT_BITS_PER_WORD
)
990 return APInt(width
, U
.VAL
);
992 if (width
== BitWidth
)
995 APInt
Result(getMemory(getNumWords(width
)), width
);
998 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
1000 // Zero remaining words.
1001 std::memset(Result
.U
.pVal
+ getNumWords(), 0,
1002 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
1007 APInt
APInt::zextOrTrunc(unsigned width
) const {
1008 if (BitWidth
< width
)
1010 if (BitWidth
> width
)
1011 return trunc(width
);
1015 APInt
APInt::sextOrTrunc(unsigned width
) const {
1016 if (BitWidth
< width
)
1018 if (BitWidth
> width
)
1019 return trunc(width
);
1023 /// Arithmetic right-shift this APInt by shiftAmt.
1024 /// Arithmetic right-shift function.
1025 void APInt::ashrInPlace(const APInt
&shiftAmt
) {
1026 ashrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
1029 /// Arithmetic right-shift this APInt by shiftAmt.
1030 /// Arithmetic right-shift function.
1031 void APInt::ashrSlowCase(unsigned ShiftAmt
) {
1032 // Don't bother performing a no-op shift.
1036 // Save the original sign bit for later.
1037 bool Negative
= isNegative();
1039 // WordShift is the inter-part shift; BitShift is intra-part shift.
1040 unsigned WordShift
= ShiftAmt
/ APINT_BITS_PER_WORD
;
1041 unsigned BitShift
= ShiftAmt
% APINT_BITS_PER_WORD
;
1043 unsigned WordsToMove
= getNumWords() - WordShift
;
1044 if (WordsToMove
!= 0) {
1045 // Sign extend the last word to fill in the unused bits.
1046 U
.pVal
[getNumWords() - 1] = SignExtend64(
1047 U
.pVal
[getNumWords() - 1], ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
1049 // Fastpath for moving by whole words.
1050 if (BitShift
== 0) {
1051 std::memmove(U
.pVal
, U
.pVal
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
1053 // Move the words containing significant bits.
1054 for (unsigned i
= 0; i
!= WordsToMove
- 1; ++i
)
1055 U
.pVal
[i
] = (U
.pVal
[i
+ WordShift
] >> BitShift
) |
1056 (U
.pVal
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
));
1058 // Handle the last word which has no high bits to copy. Use an arithmetic
1059 // shift to preserve the sign bit.
1060 U
.pVal
[WordsToMove
- 1] =
1061 (int64_t)U
.pVal
[WordShift
+ WordsToMove
- 1] >> BitShift
;
1065 // Fill in the remainder based on the original sign.
1066 std::memset(U
.pVal
+ WordsToMove
, Negative
? -1 : 0,
1067 WordShift
* APINT_WORD_SIZE
);
1071 /// Logical right-shift this APInt by shiftAmt.
1072 /// Logical right-shift function.
1073 void APInt::lshrInPlace(const APInt
&shiftAmt
) {
1074 lshrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
1077 /// Logical right-shift this APInt by shiftAmt.
1078 /// Logical right-shift function.
1079 void APInt::lshrSlowCase(unsigned ShiftAmt
) {
1080 tcShiftRight(U
.pVal
, getNumWords(), ShiftAmt
);
1083 /// Left-shift this APInt by shiftAmt.
1084 /// Left-shift function.
1085 APInt
&APInt::operator<<=(const APInt
&shiftAmt
) {
1086 // It's undefined behavior in C to shift by BitWidth or greater.
1087 *this <<= (unsigned)shiftAmt
.getLimitedValue(BitWidth
);
1091 void APInt::shlSlowCase(unsigned ShiftAmt
) {
1092 tcShiftLeft(U
.pVal
, getNumWords(), ShiftAmt
);
1096 // Calculate the rotate amount modulo the bit width.
1097 static unsigned rotateModulo(unsigned BitWidth
, const APInt
&rotateAmt
) {
1098 if (LLVM_UNLIKELY(BitWidth
== 0))
1100 unsigned rotBitWidth
= rotateAmt
.getBitWidth();
1101 APInt rot
= rotateAmt
;
1102 if (rotBitWidth
< BitWidth
) {
1103 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1104 // e.g. APInt(1, 32) would give APInt(1, 0).
1105 rot
= rotateAmt
.zext(BitWidth
);
1107 rot
= rot
.urem(APInt(rot
.getBitWidth(), BitWidth
));
1108 return rot
.getLimitedValue(BitWidth
);
1111 APInt
APInt::rotl(const APInt
&rotateAmt
) const {
1112 return rotl(rotateModulo(BitWidth
, rotateAmt
));
1115 APInt
APInt::rotl(unsigned rotateAmt
) const {
1116 if (LLVM_UNLIKELY(BitWidth
== 0))
1118 rotateAmt
%= BitWidth
;
1121 return shl(rotateAmt
) | lshr(BitWidth
- rotateAmt
);
1124 APInt
APInt::rotr(const APInt
&rotateAmt
) const {
1125 return rotr(rotateModulo(BitWidth
, rotateAmt
));
1128 APInt
APInt::rotr(unsigned rotateAmt
) const {
1131 rotateAmt
%= BitWidth
;
1134 return lshr(rotateAmt
) | shl(BitWidth
- rotateAmt
);
1137 /// \returns the nearest log base 2 of this APInt. Ties round up.
1139 /// NOTE: When we have a BitWidth of 1, we define:
1141 /// log2(0) = UINT32_MAX
1144 /// to get around any mathematical concerns resulting from
1145 /// referencing 2 in a space where 2 does no exist.
1146 unsigned APInt::nearestLogBase2() const {
1147 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1148 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1153 // Handle the zero case.
1157 // The non-zero case is handled by computing:
1159 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1161 // where x[i] is referring to the value of the ith bit of x.
1162 unsigned lg
= logBase2();
1163 return lg
+ unsigned((*this)[lg
- 1]);
1166 // Square Root - this method computes and returns the square root of "this".
1167 // Three mechanisms are used for computation. For small values (<= 5 bits),
1168 // a table lookup is done. This gets some performance for common cases. For
1169 // values using less than 52 bits, the value is converted to double and then
1170 // the libc sqrt function is called. The result is rounded and then converted
1171 // back to a uint64_t which is then used to construct the result. Finally,
1172 // the Babylonian method for computing square roots is used.
1173 APInt
APInt::sqrt() const {
1175 // Determine the magnitude of the value.
1176 unsigned magnitude
= getActiveBits();
1178 // Use a fast table for some small values. This also gets rid of some
1179 // rounding errors in libc sqrt for small values.
1180 if (magnitude
<= 5) {
1181 static const uint8_t results
[32] = {
1184 /* 3- 6 */ 2, 2, 2, 2,
1185 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1186 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1187 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1190 return APInt(BitWidth
, results
[ (isSingleWord() ? U
.VAL
: U
.pVal
[0]) ]);
1193 // If the magnitude of the value fits in less than 52 bits (the precision of
1194 // an IEEE double precision floating point value), then we can use the
1195 // libc sqrt function which will probably use a hardware sqrt computation.
1196 // This should be faster than the algorithm below.
1197 if (magnitude
< 52) {
1198 return APInt(BitWidth
,
1199 uint64_t(::round(::sqrt(double(isSingleWord() ? U
.VAL
1203 // Okay, all the short cuts are exhausted. We must compute it. The following
1204 // is a classical Babylonian method for computing the square root. This code
1205 // was adapted to APInt from a wikipedia article on such computations.
1206 // See http://www.wikipedia.org/ and go to the page named
1207 // Calculate_an_integer_square_root.
1208 unsigned nbits
= BitWidth
, i
= 4;
1209 APInt
testy(BitWidth
, 16);
1210 APInt
x_old(BitWidth
, 1);
1211 APInt
x_new(BitWidth
, 0);
1212 APInt
two(BitWidth
, 2);
1214 // Select a good starting value using binary logarithms.
1215 for (;; i
+= 2, testy
= testy
.shl(2))
1216 if (i
>= nbits
|| this->ule(testy
)) {
1217 x_old
= x_old
.shl(i
/ 2);
1221 // Use the Babylonian method to arrive at the integer square root:
1223 x_new
= (this->udiv(x_old
) + x_old
).udiv(two
);
1224 if (x_old
.ule(x_new
))
1229 // Make sure we return the closest approximation
1230 // NOTE: The rounding calculation below is correct. It will produce an
1231 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1232 // determined to be a rounding issue with pari/gp as it begins to use a
1233 // floating point representation after 192 bits. There are no discrepancies
1234 // between this algorithm and pari/gp for bit widths < 192 bits.
1235 APInt
square(x_old
* x_old
);
1236 APInt
nextSquare((x_old
+ 1) * (x_old
+1));
1237 if (this->ult(square
))
1239 assert(this->ule(nextSquare
) && "Error in APInt::sqrt computation");
1240 APInt
midpoint((nextSquare
- square
).udiv(two
));
1241 APInt
offset(*this - square
);
1242 if (offset
.ult(midpoint
))
1247 /// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth.
1248 APInt
APInt::multiplicativeInverse() const {
1249 assert((*this)[0] &&
1250 "multiplicative inverse is only defined for odd numbers!");
1252 // Use Newton's method.
1253 APInt Factor
= *this;
1255 while (!(T
= *this * Factor
).isOne())
1256 Factor
*= 2 - std::move(T
);
1260 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1261 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1262 /// variables here have the same names as in the algorithm. Comments explain
1263 /// the algorithm and any deviation from it.
1264 static void KnuthDiv(uint32_t *u
, uint32_t *v
, uint32_t *q
, uint32_t* r
,
1265 unsigned m
, unsigned n
) {
1266 assert(u
&& "Must provide dividend");
1267 assert(v
&& "Must provide divisor");
1268 assert(q
&& "Must provide quotient");
1269 assert(u
!= v
&& u
!= q
&& v
!= q
&& "Must use different memory");
1270 assert(n
>1 && "n must be > 1");
1272 // b denotes the base of the number system. In our case b is 2^32.
1273 const uint64_t b
= uint64_t(1) << 32;
1275 // The DEBUG macros here tend to be spam in the debug output if you're not
1276 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1278 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1280 #define DEBUG_KNUTH(X) do {} while(false)
1283 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m
<< " n=" << n
<< '\n');
1284 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1285 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1286 DEBUG_KNUTH(dbgs() << " by");
1287 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1288 DEBUG_KNUTH(dbgs() << '\n');
1289 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1290 // u and v by d. Note that we have taken Knuth's advice here to use a power
1291 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1292 // 2 allows us to shift instead of multiply and it is easy to determine the
1293 // shift amount from the leading zeros. We are basically normalizing the u
1294 // and v so that its high bits are shifted to the top of v's range without
1295 // overflow. Note that this can require an extra word in u so that u must
1296 // be of length m+n+1.
1297 unsigned shift
= llvm::countl_zero(v
[n
- 1]);
1298 uint32_t v_carry
= 0;
1299 uint32_t u_carry
= 0;
1301 for (unsigned i
= 0; i
< m
+n
; ++i
) {
1302 uint32_t u_tmp
= u
[i
] >> (32 - shift
);
1303 u
[i
] = (u
[i
] << shift
) | u_carry
;
1306 for (unsigned i
= 0; i
< n
; ++i
) {
1307 uint32_t v_tmp
= v
[i
] >> (32 - shift
);
1308 v
[i
] = (v
[i
] << shift
) | v_carry
;
1314 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1315 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1316 DEBUG_KNUTH(dbgs() << " by");
1317 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1318 DEBUG_KNUTH(dbgs() << '\n');
1320 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1323 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j
<< '\n');
1324 // D3. [Calculate q'.].
1325 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1326 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1327 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1328 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1329 // on v[n-2] determines at high speed most of the cases in which the trial
1330 // value qp is one too large, and it eliminates all cases where qp is two
1332 uint64_t dividend
= Make_64(u
[j
+n
], u
[j
+n
-1]);
1333 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend
<< '\n');
1334 uint64_t qp
= dividend
/ v
[n
-1];
1335 uint64_t rp
= dividend
% v
[n
-1];
1336 if (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]) {
1339 if (rp
< b
&& (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]))
1342 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp
<< ", rp == " << rp
<< '\n');
1344 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1345 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1346 // consists of a simple multiplication by a one-place number, combined with
1348 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1349 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1350 // true value plus b**(n+1), namely as the b's complement of
1351 // the true value, and a "borrow" to the left should be remembered.
1353 for (unsigned i
= 0; i
< n
; ++i
) {
1354 uint64_t p
= uint64_t(qp
) * uint64_t(v
[i
]);
1355 int64_t subres
= int64_t(u
[j
+i
]) - borrow
- Lo_32(p
);
1356 u
[j
+i
] = Lo_32(subres
);
1357 borrow
= Hi_32(p
) - Hi_32(subres
);
1358 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u
[j
+ i
]
1359 << ", borrow = " << borrow
<< '\n');
1361 bool isNeg
= u
[j
+n
] < borrow
;
1362 u
[j
+n
] -= Lo_32(borrow
);
1364 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1365 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1366 DEBUG_KNUTH(dbgs() << '\n');
1368 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1369 // negative, go to step D6; otherwise go on to step D7.
1372 // D6. [Add back]. The probability that this step is necessary is very
1373 // small, on the order of only 2/b. Make sure that test data accounts for
1374 // this possibility. Decrease q[j] by 1
1376 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1377 // A carry will occur to the left of u[j+n], and it should be ignored
1378 // since it cancels with the borrow that occurred in D4.
1380 for (unsigned i
= 0; i
< n
; i
++) {
1381 uint32_t limit
= std::min(u
[j
+i
],v
[i
]);
1382 u
[j
+i
] += v
[i
] + carry
;
1383 carry
= u
[j
+i
] < limit
|| (carry
&& u
[j
+i
] == limit
);
1387 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1388 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1389 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q
[j
] << '\n');
1391 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1394 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1395 DEBUG_KNUTH(for (int i
= m
; i
>= 0; i
--) dbgs() << " " << q
[i
]);
1396 DEBUG_KNUTH(dbgs() << '\n');
1398 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1399 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1400 // compute the remainder (urem uses this).
1402 // The value d is expressed by the "shift" value above since we avoided
1403 // multiplication by d by using a shift left. So, all we have to do is
1404 // shift right here.
1407 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1408 for (int i
= n
-1; i
>= 0; i
--) {
1409 r
[i
] = (u
[i
] >> shift
) | carry
;
1410 carry
= u
[i
] << (32 - shift
);
1411 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1414 for (int i
= n
-1; i
>= 0; i
--) {
1416 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1419 DEBUG_KNUTH(dbgs() << '\n');
1421 DEBUG_KNUTH(dbgs() << '\n');
1424 void APInt::divide(const WordType
*LHS
, unsigned lhsWords
, const WordType
*RHS
,
1425 unsigned rhsWords
, WordType
*Quotient
, WordType
*Remainder
) {
1426 assert(lhsWords
>= rhsWords
&& "Fractional result");
1428 // First, compose the values into an array of 32-bit words instead of
1429 // 64-bit words. This is a necessity of both the "short division" algorithm
1430 // and the Knuth "classical algorithm" which requires there to be native
1431 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1432 // can't use 64-bit operands here because we don't have native results of
1433 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1434 // work on large-endian machines.
1435 unsigned n
= rhsWords
* 2;
1436 unsigned m
= (lhsWords
* 2) - n
;
1438 // Allocate space for the temporary values we need either on the stack, if
1439 // it will fit, or on the heap if it won't.
1440 uint32_t SPACE
[128];
1441 uint32_t *U
= nullptr;
1442 uint32_t *V
= nullptr;
1443 uint32_t *Q
= nullptr;
1444 uint32_t *R
= nullptr;
1445 if ((Remainder
?4:3)*n
+2*m
+1 <= 128) {
1448 Q
= &SPACE
[(m
+n
+1) + n
];
1450 R
= &SPACE
[(m
+n
+1) + n
+ (m
+n
)];
1452 U
= new uint32_t[m
+ n
+ 1];
1453 V
= new uint32_t[n
];
1454 Q
= new uint32_t[m
+n
];
1456 R
= new uint32_t[n
];
1459 // Initialize the dividend
1460 memset(U
, 0, (m
+n
+1)*sizeof(uint32_t));
1461 for (unsigned i
= 0; i
< lhsWords
; ++i
) {
1462 uint64_t tmp
= LHS
[i
];
1463 U
[i
* 2] = Lo_32(tmp
);
1464 U
[i
* 2 + 1] = Hi_32(tmp
);
1466 U
[m
+n
] = 0; // this extra word is for "spill" in the Knuth algorithm.
1468 // Initialize the divisor
1469 memset(V
, 0, (n
)*sizeof(uint32_t));
1470 for (unsigned i
= 0; i
< rhsWords
; ++i
) {
1471 uint64_t tmp
= RHS
[i
];
1472 V
[i
* 2] = Lo_32(tmp
);
1473 V
[i
* 2 + 1] = Hi_32(tmp
);
1476 // initialize the quotient and remainder
1477 memset(Q
, 0, (m
+n
) * sizeof(uint32_t));
1479 memset(R
, 0, n
* sizeof(uint32_t));
1481 // Now, adjust m and n for the Knuth division. n is the number of words in
1482 // the divisor. m is the number of words by which the dividend exceeds the
1483 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1484 // contain any zero words or the Knuth algorithm fails.
1485 for (unsigned i
= n
; i
> 0 && V
[i
-1] == 0; i
--) {
1489 for (unsigned i
= m
+n
; i
> 0 && U
[i
-1] == 0; i
--)
1492 // If we're left with only a single word for the divisor, Knuth doesn't work
1493 // so we implement the short division algorithm here. This is much simpler
1494 // and faster because we are certain that we can divide a 64-bit quantity
1495 // by a 32-bit quantity at hardware speed and short division is simply a
1496 // series of such operations. This is just like doing short division but we
1497 // are using base 2^32 instead of base 10.
1498 assert(n
!= 0 && "Divide by zero?");
1500 uint32_t divisor
= V
[0];
1501 uint32_t remainder
= 0;
1502 for (int i
= m
; i
>= 0; i
--) {
1503 uint64_t partial_dividend
= Make_64(remainder
, U
[i
]);
1504 if (partial_dividend
== 0) {
1507 } else if (partial_dividend
< divisor
) {
1509 remainder
= Lo_32(partial_dividend
);
1510 } else if (partial_dividend
== divisor
) {
1514 Q
[i
] = Lo_32(partial_dividend
/ divisor
);
1515 remainder
= Lo_32(partial_dividend
- (Q
[i
] * divisor
));
1521 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1523 KnuthDiv(U
, V
, Q
, R
, m
, n
);
1526 // If the caller wants the quotient
1528 for (unsigned i
= 0; i
< lhsWords
; ++i
)
1529 Quotient
[i
] = Make_64(Q
[i
*2+1], Q
[i
*2]);
1532 // If the caller wants the remainder
1534 for (unsigned i
= 0; i
< rhsWords
; ++i
)
1535 Remainder
[i
] = Make_64(R
[i
*2+1], R
[i
*2]);
1538 // Clean up the memory we allocated.
1539 if (U
!= &SPACE
[0]) {
1547 APInt
APInt::udiv(const APInt
&RHS
) const {
1548 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1550 // First, deal with the easy case
1551 if (isSingleWord()) {
1552 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1553 return APInt(BitWidth
, U
.VAL
/ RHS
.U
.VAL
);
1556 // Get some facts about the LHS and RHS number of bits and words
1557 unsigned lhsWords
= getNumWords(getActiveBits());
1558 unsigned rhsBits
= RHS
.getActiveBits();
1559 unsigned rhsWords
= getNumWords(rhsBits
);
1560 assert(rhsWords
&& "Divided by zero???");
1562 // Deal with some degenerate cases
1565 return APInt(BitWidth
, 0);
1569 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1570 // X / Y ===> 0, iff X < Y
1571 return APInt(BitWidth
, 0);
1574 return APInt(BitWidth
, 1);
1575 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1576 // All high words are zero, just use native divide
1577 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
.U
.pVal
[0]);
1579 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1580 APInt
Quotient(BitWidth
, 0); // to hold result.
1581 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
, nullptr);
1585 APInt
APInt::udiv(uint64_t RHS
) const {
1586 assert(RHS
!= 0 && "Divide by zero?");
1588 // First, deal with the easy case
1590 return APInt(BitWidth
, U
.VAL
/ RHS
);
1592 // Get some facts about the LHS words.
1593 unsigned lhsWords
= getNumWords(getActiveBits());
1595 // Deal with some degenerate cases
1598 return APInt(BitWidth
, 0);
1603 // X / Y ===> 0, iff X < Y
1604 return APInt(BitWidth
, 0);
1607 return APInt(BitWidth
, 1);
1608 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1609 // All high words are zero, just use native divide
1610 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
);
1612 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1613 APInt
Quotient(BitWidth
, 0); // to hold result.
1614 divide(U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, nullptr);
1618 APInt
APInt::sdiv(const APInt
&RHS
) const {
1620 if (RHS
.isNegative())
1621 return (-(*this)).udiv(-RHS
);
1622 return -((-(*this)).udiv(RHS
));
1624 if (RHS
.isNegative())
1625 return -(this->udiv(-RHS
));
1626 return this->udiv(RHS
);
1629 APInt
APInt::sdiv(int64_t RHS
) const {
1632 return (-(*this)).udiv(-RHS
);
1633 return -((-(*this)).udiv(RHS
));
1636 return -(this->udiv(-RHS
));
1637 return this->udiv(RHS
);
1640 APInt
APInt::urem(const APInt
&RHS
) const {
1641 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1642 if (isSingleWord()) {
1643 assert(RHS
.U
.VAL
!= 0 && "Remainder by zero?");
1644 return APInt(BitWidth
, U
.VAL
% RHS
.U
.VAL
);
1647 // Get some facts about the LHS
1648 unsigned lhsWords
= getNumWords(getActiveBits());
1650 // Get some facts about the RHS
1651 unsigned rhsBits
= RHS
.getActiveBits();
1652 unsigned rhsWords
= getNumWords(rhsBits
);
1653 assert(rhsWords
&& "Performing remainder operation by zero ???");
1655 // Check the degenerate cases
1658 return APInt(BitWidth
, 0);
1661 return APInt(BitWidth
, 0);
1662 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1663 // X % Y ===> X, iff X < Y
1667 return APInt(BitWidth
, 0);
1669 // All high words are zero, just use native remainder
1670 return APInt(BitWidth
, U
.pVal
[0] % RHS
.U
.pVal
[0]);
1672 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1673 APInt
Remainder(BitWidth
, 0);
1674 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, nullptr, Remainder
.U
.pVal
);
1678 uint64_t APInt::urem(uint64_t RHS
) const {
1679 assert(RHS
!= 0 && "Remainder by zero?");
1684 // Get some facts about the LHS
1685 unsigned lhsWords
= getNumWords(getActiveBits());
1687 // Check the degenerate cases
1695 // X % Y ===> X, iff X < Y
1696 return getZExtValue();
1701 // All high words are zero, just use native remainder
1702 return U
.pVal
[0] % RHS
;
1704 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1706 divide(U
.pVal
, lhsWords
, &RHS
, 1, nullptr, &Remainder
);
1710 APInt
APInt::srem(const APInt
&RHS
) const {
1712 if (RHS
.isNegative())
1713 return -((-(*this)).urem(-RHS
));
1714 return -((-(*this)).urem(RHS
));
1716 if (RHS
.isNegative())
1717 return this->urem(-RHS
);
1718 return this->urem(RHS
);
1721 int64_t APInt::srem(int64_t RHS
) const {
1724 return -((-(*this)).urem(-RHS
));
1725 return -((-(*this)).urem(RHS
));
1728 return this->urem(-RHS
);
1729 return this->urem(RHS
);
1732 void APInt::udivrem(const APInt
&LHS
, const APInt
&RHS
,
1733 APInt
&Quotient
, APInt
&Remainder
) {
1734 assert(LHS
.BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1735 unsigned BitWidth
= LHS
.BitWidth
;
1737 // First, deal with the easy case
1738 if (LHS
.isSingleWord()) {
1739 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1740 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
.U
.VAL
;
1741 uint64_t RemVal
= LHS
.U
.VAL
% RHS
.U
.VAL
;
1742 Quotient
= APInt(BitWidth
, QuotVal
);
1743 Remainder
= APInt(BitWidth
, RemVal
);
1747 // Get some size facts about the dividend and divisor
1748 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1749 unsigned rhsBits
= RHS
.getActiveBits();
1750 unsigned rhsWords
= getNumWords(rhsBits
);
1751 assert(rhsWords
&& "Performing divrem operation by zero ???");
1753 // Check the degenerate cases
1754 if (lhsWords
== 0) {
1755 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1756 Remainder
= APInt(BitWidth
, 0); // 0 % Y ===> 0
1761 Quotient
= LHS
; // X / 1 ===> X
1762 Remainder
= APInt(BitWidth
, 0); // X % 1 ===> 0
1765 if (lhsWords
< rhsWords
|| LHS
.ult(RHS
)) {
1766 Remainder
= LHS
; // X % Y ===> X, iff X < Y
1767 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1772 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1773 Remainder
= APInt(BitWidth
, 0); // X % X ===> 0;
1777 // Make sure there is enough space to hold the results.
1778 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1779 // change the size. This is necessary if Quotient or Remainder is aliased
1781 Quotient
.reallocate(BitWidth
);
1782 Remainder
.reallocate(BitWidth
);
1784 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1785 // There is only one word to consider so use the native versions.
1786 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1787 uint64_t rhsValue
= RHS
.U
.pVal
[0];
1788 Quotient
= lhsValue
/ rhsValue
;
1789 Remainder
= lhsValue
% rhsValue
;
1793 // Okay, lets do it the long way
1794 divide(LHS
.U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
,
1796 // Clear the rest of the Quotient and Remainder.
1797 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1798 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1799 std::memset(Remainder
.U
.pVal
+ rhsWords
, 0,
1800 (getNumWords(BitWidth
) - rhsWords
) * APINT_WORD_SIZE
);
1803 void APInt::udivrem(const APInt
&LHS
, uint64_t RHS
, APInt
&Quotient
,
1804 uint64_t &Remainder
) {
1805 assert(RHS
!= 0 && "Divide by zero?");
1806 unsigned BitWidth
= LHS
.BitWidth
;
1808 // First, deal with the easy case
1809 if (LHS
.isSingleWord()) {
1810 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
;
1811 Remainder
= LHS
.U
.VAL
% RHS
;
1812 Quotient
= APInt(BitWidth
, QuotVal
);
1816 // Get some size facts about the dividend and divisor
1817 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1819 // Check the degenerate cases
1820 if (lhsWords
== 0) {
1821 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1822 Remainder
= 0; // 0 % Y ===> 0
1827 Quotient
= LHS
; // X / 1 ===> X
1828 Remainder
= 0; // X % 1 ===> 0
1833 Remainder
= LHS
.getZExtValue(); // X % Y ===> X, iff X < Y
1834 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1839 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1840 Remainder
= 0; // X % X ===> 0;
1844 // Make sure there is enough space to hold the results.
1845 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1846 // change the size. This is necessary if Quotient is aliased with LHS.
1847 Quotient
.reallocate(BitWidth
);
1849 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1850 // There is only one word to consider so use the native versions.
1851 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1852 Quotient
= lhsValue
/ RHS
;
1853 Remainder
= lhsValue
% RHS
;
1857 // Okay, lets do it the long way
1858 divide(LHS
.U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, &Remainder
);
1859 // Clear the rest of the Quotient.
1860 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1861 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1864 void APInt::sdivrem(const APInt
&LHS
, const APInt
&RHS
,
1865 APInt
&Quotient
, APInt
&Remainder
) {
1866 if (LHS
.isNegative()) {
1867 if (RHS
.isNegative())
1868 APInt::udivrem(-LHS
, -RHS
, Quotient
, Remainder
);
1870 APInt::udivrem(-LHS
, RHS
, Quotient
, Remainder
);
1874 } else if (RHS
.isNegative()) {
1875 APInt::udivrem(LHS
, -RHS
, Quotient
, Remainder
);
1878 APInt::udivrem(LHS
, RHS
, Quotient
, Remainder
);
1882 void APInt::sdivrem(const APInt
&LHS
, int64_t RHS
,
1883 APInt
&Quotient
, int64_t &Remainder
) {
1884 uint64_t R
= Remainder
;
1885 if (LHS
.isNegative()) {
1887 APInt::udivrem(-LHS
, -RHS
, Quotient
, R
);
1889 APInt::udivrem(-LHS
, RHS
, Quotient
, R
);
1893 } else if (RHS
< 0) {
1894 APInt::udivrem(LHS
, -RHS
, Quotient
, R
);
1897 APInt::udivrem(LHS
, RHS
, Quotient
, R
);
1902 APInt
APInt::sadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1903 APInt Res
= *this+RHS
;
1904 Overflow
= isNonNegative() == RHS
.isNonNegative() &&
1905 Res
.isNonNegative() != isNonNegative();
1909 APInt
APInt::uadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1910 APInt Res
= *this+RHS
;
1911 Overflow
= Res
.ult(RHS
);
1915 APInt
APInt::ssub_ov(const APInt
&RHS
, bool &Overflow
) const {
1916 APInt Res
= *this - RHS
;
1917 Overflow
= isNonNegative() != RHS
.isNonNegative() &&
1918 Res
.isNonNegative() != isNonNegative();
1922 APInt
APInt::usub_ov(const APInt
&RHS
, bool &Overflow
) const {
1923 APInt Res
= *this-RHS
;
1924 Overflow
= Res
.ugt(*this);
1928 APInt
APInt::sdiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1929 // MININT/-1 --> overflow.
1930 Overflow
= isMinSignedValue() && RHS
.isAllOnes();
1934 APInt
APInt::smul_ov(const APInt
&RHS
, bool &Overflow
) const {
1935 APInt Res
= *this * RHS
;
1938 Overflow
= Res
.sdiv(RHS
) != *this ||
1939 (isMinSignedValue() && RHS
.isAllOnes());
1945 APInt
APInt::umul_ov(const APInt
&RHS
, bool &Overflow
) const {
1946 if (countl_zero() + RHS
.countl_zero() + 2 <= BitWidth
) {
1951 APInt Res
= lshr(1) * RHS
;
1952 Overflow
= Res
.isNegative();
1962 APInt
APInt::sshl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1963 return sshl_ov(ShAmt
.getLimitedValue(getBitWidth()), Overflow
);
1966 APInt
APInt::sshl_ov(unsigned ShAmt
, bool &Overflow
) const {
1967 Overflow
= ShAmt
>= getBitWidth();
1969 return APInt(BitWidth
, 0);
1971 if (isNonNegative()) // Don't allow sign change.
1972 Overflow
= ShAmt
>= countl_zero();
1974 Overflow
= ShAmt
>= countl_one();
1976 return *this << ShAmt
;
1979 APInt
APInt::ushl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1980 return ushl_ov(ShAmt
.getLimitedValue(getBitWidth()), Overflow
);
1983 APInt
APInt::ushl_ov(unsigned ShAmt
, bool &Overflow
) const {
1984 Overflow
= ShAmt
>= getBitWidth();
1986 return APInt(BitWidth
, 0);
1988 Overflow
= ShAmt
> countl_zero();
1990 return *this << ShAmt
;
1993 APInt
APInt::sfloordiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1994 APInt quotient
= sdiv_ov(RHS
, Overflow
);
1995 if ((quotient
* RHS
!= *this) && (isNegative() != RHS
.isNegative()))
1996 return quotient
- 1;
2000 APInt
APInt::sadd_sat(const APInt
&RHS
) const {
2002 APInt Res
= sadd_ov(RHS
, Overflow
);
2006 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2007 : APInt::getSignedMaxValue(BitWidth
);
2010 APInt
APInt::uadd_sat(const APInt
&RHS
) const {
2012 APInt Res
= uadd_ov(RHS
, Overflow
);
2016 return APInt::getMaxValue(BitWidth
);
2019 APInt
APInt::ssub_sat(const APInt
&RHS
) const {
2021 APInt Res
= ssub_ov(RHS
, Overflow
);
2025 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2026 : APInt::getSignedMaxValue(BitWidth
);
2029 APInt
APInt::usub_sat(const APInt
&RHS
) const {
2031 APInt Res
= usub_ov(RHS
, Overflow
);
2035 return APInt(BitWidth
, 0);
2038 APInt
APInt::smul_sat(const APInt
&RHS
) const {
2040 APInt Res
= smul_ov(RHS
, Overflow
);
2044 // The result is negative if one and only one of inputs is negative.
2045 bool ResIsNegative
= isNegative() ^ RHS
.isNegative();
2047 return ResIsNegative
? APInt::getSignedMinValue(BitWidth
)
2048 : APInt::getSignedMaxValue(BitWidth
);
2051 APInt
APInt::umul_sat(const APInt
&RHS
) const {
2053 APInt Res
= umul_ov(RHS
, Overflow
);
2057 return APInt::getMaxValue(BitWidth
);
2060 APInt
APInt::sshl_sat(const APInt
&RHS
) const {
2061 return sshl_sat(RHS
.getLimitedValue(getBitWidth()));
2064 APInt
APInt::sshl_sat(unsigned RHS
) const {
2066 APInt Res
= sshl_ov(RHS
, Overflow
);
2070 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2071 : APInt::getSignedMaxValue(BitWidth
);
2074 APInt
APInt::ushl_sat(const APInt
&RHS
) const {
2075 return ushl_sat(RHS
.getLimitedValue(getBitWidth()));
2078 APInt
APInt::ushl_sat(unsigned RHS
) const {
2080 APInt Res
= ushl_ov(RHS
, Overflow
);
2084 return APInt::getMaxValue(BitWidth
);
2087 void APInt::fromString(unsigned numbits
, StringRef str
, uint8_t radix
) {
2088 // Check our assumptions here
2089 assert(!str
.empty() && "Invalid string length");
2090 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
2092 "Radix should be 2, 8, 10, 16, or 36!");
2094 StringRef::iterator p
= str
.begin();
2095 size_t slen
= str
.size();
2096 bool isNeg
= *p
== '-';
2097 if (*p
== '-' || *p
== '+') {
2100 assert(slen
&& "String is only a sign, needs a value.");
2102 assert((slen
<= numbits
|| radix
!= 2) && "Insufficient bit width");
2103 assert(((slen
-1)*3 <= numbits
|| radix
!= 8) && "Insufficient bit width");
2104 assert(((slen
-1)*4 <= numbits
|| radix
!= 16) && "Insufficient bit width");
2105 assert((((slen
-1)*64)/22 <= numbits
|| radix
!= 10) &&
2106 "Insufficient bit width");
2108 // Allocate memory if needed
2112 U
.pVal
= getClearedMemory(getNumWords());
2114 // Figure out if we can shift instead of multiply
2115 unsigned shift
= (radix
== 16 ? 4 : radix
== 8 ? 3 : radix
== 2 ? 1 : 0);
2117 // Enter digit traversal loop
2118 for (StringRef::iterator e
= str
.end(); p
!= e
; ++p
) {
2119 unsigned digit
= getDigit(*p
, radix
);
2120 assert(digit
< radix
&& "Invalid character in digit string");
2122 // Shift or multiply the value by the radix
2130 // Add in the digit we just interpreted
2133 // If its negative, put it in two's complement form
2138 void APInt::toString(SmallVectorImpl
<char> &Str
, unsigned Radix
, bool Signed
,
2139 bool formatAsCLiteral
, bool UpperCase
,
2140 bool InsertSeparators
) const {
2141 assert((Radix
== 10 || Radix
== 8 || Radix
== 16 || Radix
== 2 ||
2143 "Radix should be 2, 8, 10, 16, or 36!");
2145 const char *Prefix
= "";
2146 if (formatAsCLiteral
) {
2149 // Binary literals are a non-standard extension added in gcc 4.3:
2150 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2162 llvm_unreachable("Invalid radix!");
2166 // Number of digits in a group between separators.
2167 unsigned Grouping
= (Radix
== 8 || Radix
== 10) ? 3 : 4;
2169 // First, check for a zero value and just short circuit the logic below.
2172 Str
.push_back(*Prefix
);
2179 static const char BothDigits
[] = "0123456789abcdefghijklmnopqrstuvwxyz"
2180 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2181 const char *Digits
= BothDigits
+ (UpperCase
? 36 : 0);
2183 if (isSingleWord()) {
2185 char *BufPtr
= std::end(Buffer
);
2191 int64_t I
= getSExtValue();
2201 Str
.push_back(*Prefix
);
2207 if (InsertSeparators
&& Pos
% Grouping
== 0 && Pos
> 0)
2209 *--BufPtr
= Digits
[N
% Radix
];
2213 Str
.append(BufPtr
, std::end(Buffer
));
2219 if (Signed
&& isNegative()) {
2220 // They want to print the signed version and it is a negative value
2221 // Flip the bits and add one to turn it into the equivalent positive
2222 // value and put a '-' in the result.
2228 Str
.push_back(*Prefix
);
2232 // We insert the digits backward, then reverse them to get the right order.
2233 unsigned StartDig
= Str
.size();
2235 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2236 // because the number of bits per digit (1, 3 and 4 respectively) divides
2237 // equally. We just shift until the value is zero.
2238 if (Radix
== 2 || Radix
== 8 || Radix
== 16) {
2239 // Just shift tmp right for each digit width until it becomes zero
2240 unsigned ShiftAmt
= (Radix
== 16 ? 4 : (Radix
== 8 ? 3 : 1));
2241 unsigned MaskAmt
= Radix
- 1;
2244 while (Tmp
.getBoolValue()) {
2245 unsigned Digit
= unsigned(Tmp
.getRawData()[0]) & MaskAmt
;
2246 if (InsertSeparators
&& Pos
% Grouping
== 0 && Pos
> 0)
2247 Str
.push_back('\'');
2249 Str
.push_back(Digits
[Digit
]);
2250 Tmp
.lshrInPlace(ShiftAmt
);
2255 while (Tmp
.getBoolValue()) {
2257 udivrem(Tmp
, Radix
, Tmp
, Digit
);
2258 assert(Digit
< Radix
&& "divide failed");
2259 if (InsertSeparators
&& Pos
% Grouping
== 0 && Pos
> 0)
2260 Str
.push_back('\'');
2262 Str
.push_back(Digits
[Digit
]);
2267 // Reverse the digits before returning.
2268 std::reverse(Str
.begin()+StartDig
, Str
.end());
2271 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2272 LLVM_DUMP_METHOD
void APInt::dump() const {
2273 SmallString
<40> S
, U
;
2274 this->toStringUnsigned(U
);
2275 this->toStringSigned(S
);
2276 dbgs() << "APInt(" << BitWidth
<< "b, "
2277 << U
<< "u " << S
<< "s)\n";
2281 void APInt::print(raw_ostream
&OS
, bool isSigned
) const {
2283 this->toString(S
, 10, isSigned
, /* formatAsCLiteral = */false);
2287 // This implements a variety of operations on a representation of
2288 // arbitrary precision, two's-complement, bignum integer values.
2290 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2291 // and unrestricting assumption.
2292 static_assert(APInt::APINT_BITS_PER_WORD
% 2 == 0,
2293 "Part width must be divisible by 2!");
2295 // Returns the integer part with the least significant BITS set.
2296 // BITS cannot be zero.
2297 static inline APInt::WordType
lowBitMask(unsigned bits
) {
2298 assert(bits
!= 0 && bits
<= APInt::APINT_BITS_PER_WORD
);
2299 return ~(APInt::WordType
) 0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
2302 /// Returns the value of the lower half of PART.
2303 static inline APInt::WordType
lowHalf(APInt::WordType part
) {
2304 return part
& lowBitMask(APInt::APINT_BITS_PER_WORD
/ 2);
2307 /// Returns the value of the upper half of PART.
2308 static inline APInt::WordType
highHalf(APInt::WordType part
) {
2309 return part
>> (APInt::APINT_BITS_PER_WORD
/ 2);
2312 /// Sets the least significant part of a bignum to the input value, and zeroes
2313 /// out higher parts.
2314 void APInt::tcSet(WordType
*dst
, WordType part
, unsigned parts
) {
2317 for (unsigned i
= 1; i
< parts
; i
++)
2321 /// Assign one bignum to another.
2322 void APInt::tcAssign(WordType
*dst
, const WordType
*src
, unsigned parts
) {
2323 for (unsigned i
= 0; i
< parts
; i
++)
2327 /// Returns true if a bignum is zero, false otherwise.
2328 bool APInt::tcIsZero(const WordType
*src
, unsigned parts
) {
2329 for (unsigned i
= 0; i
< parts
; i
++)
2336 /// Extract the given bit of a bignum; returns 0 or 1.
2337 int APInt::tcExtractBit(const WordType
*parts
, unsigned bit
) {
2338 return (parts
[whichWord(bit
)] & maskBit(bit
)) != 0;
2341 /// Set the given bit of a bignum.
2342 void APInt::tcSetBit(WordType
*parts
, unsigned bit
) {
2343 parts
[whichWord(bit
)] |= maskBit(bit
);
2346 /// Clears the given bit of a bignum.
2347 void APInt::tcClearBit(WordType
*parts
, unsigned bit
) {
2348 parts
[whichWord(bit
)] &= ~maskBit(bit
);
2351 /// Returns the bit number of the least significant set bit of a number. If the
2352 /// input number has no bits set UINT_MAX is returned.
2353 unsigned APInt::tcLSB(const WordType
*parts
, unsigned n
) {
2354 for (unsigned i
= 0; i
< n
; i
++) {
2355 if (parts
[i
] != 0) {
2356 unsigned lsb
= llvm::countr_zero(parts
[i
]);
2357 return lsb
+ i
* APINT_BITS_PER_WORD
;
2364 /// Returns the bit number of the most significant set bit of a number.
2365 /// If the input number has no bits set UINT_MAX is returned.
2366 unsigned APInt::tcMSB(const WordType
*parts
, unsigned n
) {
2370 if (parts
[n
] != 0) {
2371 static_assert(sizeof(parts
[n
]) <= sizeof(uint64_t));
2372 unsigned msb
= llvm::Log2_64(parts
[n
]);
2374 return msb
+ n
* APINT_BITS_PER_WORD
;
2381 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2382 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2383 /// significant bit of DST. All high bits above srcBITS in DST are zero-filled.
2386 APInt::tcExtract(WordType
*dst
, unsigned dstCount
, const WordType
*src
,
2387 unsigned srcBits
, unsigned srcLSB
) {
2388 unsigned dstParts
= (srcBits
+ APINT_BITS_PER_WORD
- 1) / APINT_BITS_PER_WORD
;
2389 assert(dstParts
<= dstCount
);
2391 unsigned firstSrcPart
= srcLSB
/ APINT_BITS_PER_WORD
;
2392 tcAssign(dst
, src
+ firstSrcPart
, dstParts
);
2394 unsigned shift
= srcLSB
% APINT_BITS_PER_WORD
;
2395 tcShiftRight(dst
, dstParts
, shift
);
2397 // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2398 // in DST. If this is less that srcBits, append the rest, else
2399 // clear the high bits.
2400 unsigned n
= dstParts
* APINT_BITS_PER_WORD
- shift
;
2402 WordType mask
= lowBitMask (srcBits
- n
);
2403 dst
[dstParts
- 1] |= ((src
[firstSrcPart
+ dstParts
] & mask
)
2404 << n
% APINT_BITS_PER_WORD
);
2405 } else if (n
> srcBits
) {
2406 if (srcBits
% APINT_BITS_PER_WORD
)
2407 dst
[dstParts
- 1] &= lowBitMask (srcBits
% APINT_BITS_PER_WORD
);
2410 // Clear high parts.
2411 while (dstParts
< dstCount
)
2412 dst
[dstParts
++] = 0;
2415 //// DST += RHS + C where C is zero or one. Returns the carry flag.
2416 APInt::WordType
APInt::tcAdd(WordType
*dst
, const WordType
*rhs
,
2417 WordType c
, unsigned parts
) {
2420 for (unsigned i
= 0; i
< parts
; i
++) {
2421 WordType l
= dst
[i
];
2423 dst
[i
] += rhs
[i
] + 1;
2434 /// This function adds a single "word" integer, src, to the multiple
2435 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2436 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2437 /// @returns the carry of the addition.
2438 APInt::WordType
APInt::tcAddPart(WordType
*dst
, WordType src
,
2440 for (unsigned i
= 0; i
< parts
; ++i
) {
2443 return 0; // No need to carry so exit early.
2444 src
= 1; // Carry one to next digit.
2450 /// DST -= RHS + C where C is zero or one. Returns the carry flag.
2451 APInt::WordType
APInt::tcSubtract(WordType
*dst
, const WordType
*rhs
,
2452 WordType c
, unsigned parts
) {
2455 for (unsigned i
= 0; i
< parts
; i
++) {
2456 WordType l
= dst
[i
];
2458 dst
[i
] -= rhs
[i
] + 1;
2469 /// This function subtracts a single "word" (64-bit word), src, from
2470 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2471 /// no further borrowing is needed or it runs out of "words" in dst. The result
2472 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2473 /// exhausted. In other words, if src > dst then this function returns 1,
2475 /// @returns the borrow out of the subtraction
2476 APInt::WordType
APInt::tcSubtractPart(WordType
*dst
, WordType src
,
2478 for (unsigned i
= 0; i
< parts
; ++i
) {
2479 WordType Dst
= dst
[i
];
2482 return 0; // No need to borrow so exit early.
2483 src
= 1; // We have to "borrow 1" from next "word"
2489 /// Negate a bignum in-place.
2490 void APInt::tcNegate(WordType
*dst
, unsigned parts
) {
2491 tcComplement(dst
, parts
);
2492 tcIncrement(dst
, parts
);
2495 /// DST += SRC * MULTIPLIER + CARRY if add is true
2496 /// DST = SRC * MULTIPLIER + CARRY if add is false
2497 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2498 /// they must start at the same point, i.e. DST == SRC.
2499 /// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2500 /// returned. Otherwise DST is filled with the least significant
2501 /// DSTPARTS parts of the result, and if all of the omitted higher
2502 /// parts were zero return zero, otherwise overflow occurred and
2504 int APInt::tcMultiplyPart(WordType
*dst
, const WordType
*src
,
2505 WordType multiplier
, WordType carry
,
2506 unsigned srcParts
, unsigned dstParts
,
2508 // Otherwise our writes of DST kill our later reads of SRC.
2509 assert(dst
<= src
|| dst
>= src
+ srcParts
);
2510 assert(dstParts
<= srcParts
+ 1);
2512 // N loops; minimum of dstParts and srcParts.
2513 unsigned n
= std::min(dstParts
, srcParts
);
2515 for (unsigned i
= 0; i
< n
; i
++) {
2516 // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2517 // This cannot overflow, because:
2518 // (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2519 // which is less than n^2.
2520 WordType srcPart
= src
[i
];
2521 WordType low
, mid
, high
;
2522 if (multiplier
== 0 || srcPart
== 0) {
2526 low
= lowHalf(srcPart
) * lowHalf(multiplier
);
2527 high
= highHalf(srcPart
) * highHalf(multiplier
);
2529 mid
= lowHalf(srcPart
) * highHalf(multiplier
);
2530 high
+= highHalf(mid
);
2531 mid
<<= APINT_BITS_PER_WORD
/ 2;
2532 if (low
+ mid
< low
)
2536 mid
= highHalf(srcPart
) * lowHalf(multiplier
);
2537 high
+= highHalf(mid
);
2538 mid
<<= APINT_BITS_PER_WORD
/ 2;
2539 if (low
+ mid
< low
)
2544 if (low
+ carry
< low
)
2550 // And now DST[i], and store the new low part there.
2551 if (low
+ dst
[i
] < low
)
2560 if (srcParts
< dstParts
) {
2561 // Full multiplication, there is no overflow.
2562 assert(srcParts
+ 1 == dstParts
);
2563 dst
[srcParts
] = carry
;
2567 // We overflowed if there is carry.
2571 // We would overflow if any significant unwritten parts would be
2572 // non-zero. This is true if any remaining src parts are non-zero
2573 // and the multiplier is non-zero.
2575 for (unsigned i
= dstParts
; i
< srcParts
; i
++)
2579 // We fitted in the narrow destination.
2583 /// DST = LHS * RHS, where DST has the same width as the operands and
2584 /// is filled with the least significant parts of the result. Returns
2585 /// one if overflow occurred, otherwise zero. DST must be disjoint
2586 /// from both operands.
2587 int APInt::tcMultiply(WordType
*dst
, const WordType
*lhs
,
2588 const WordType
*rhs
, unsigned parts
) {
2589 assert(dst
!= lhs
&& dst
!= rhs
);
2593 for (unsigned i
= 0; i
< parts
; i
++) {
2594 // Don't accumulate on the first iteration so we don't need to initalize
2597 tcMultiplyPart(&dst
[i
], lhs
, rhs
[i
], 0, parts
, parts
- i
, i
!= 0);
2603 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2604 /// operands. No overflow occurs. DST must be disjoint from both operands.
2605 void APInt::tcFullMultiply(WordType
*dst
, const WordType
*lhs
,
2606 const WordType
*rhs
, unsigned lhsParts
,
2607 unsigned rhsParts
) {
2608 // Put the narrower number on the LHS for less loops below.
2609 if (lhsParts
> rhsParts
)
2610 return tcFullMultiply (dst
, rhs
, lhs
, rhsParts
, lhsParts
);
2612 assert(dst
!= lhs
&& dst
!= rhs
);
2614 for (unsigned i
= 0; i
< lhsParts
; i
++) {
2615 // Don't accumulate on the first iteration so we don't need to initalize
2617 tcMultiplyPart(&dst
[i
], rhs
, lhs
[i
], 0, rhsParts
, rhsParts
+ 1, i
!= 0);
2621 // If RHS is zero LHS and REMAINDER are left unchanged, return one.
2622 // Otherwise set LHS to LHS / RHS with the fractional part discarded,
2623 // set REMAINDER to the remainder, return zero. i.e.
2625 // OLD_LHS = RHS * LHS + REMAINDER
2627 // SCRATCH is a bignum of the same size as the operands and result for
2628 // use by the routine; its contents need not be initialized and are
2629 // destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2630 int APInt::tcDivide(WordType
*lhs
, const WordType
*rhs
,
2631 WordType
*remainder
, WordType
*srhs
,
2633 assert(lhs
!= remainder
&& lhs
!= srhs
&& remainder
!= srhs
);
2635 unsigned shiftCount
= tcMSB(rhs
, parts
) + 1;
2636 if (shiftCount
== 0)
2639 shiftCount
= parts
* APINT_BITS_PER_WORD
- shiftCount
;
2640 unsigned n
= shiftCount
/ APINT_BITS_PER_WORD
;
2641 WordType mask
= (WordType
) 1 << (shiftCount
% APINT_BITS_PER_WORD
);
2643 tcAssign(srhs
, rhs
, parts
);
2644 tcShiftLeft(srhs
, parts
, shiftCount
);
2645 tcAssign(remainder
, lhs
, parts
);
2646 tcSet(lhs
, 0, parts
);
2648 // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2651 int compare
= tcCompare(remainder
, srhs
, parts
);
2653 tcSubtract(remainder
, srhs
, 0, parts
);
2657 if (shiftCount
== 0)
2660 tcShiftRight(srhs
, parts
, 1);
2661 if ((mask
>>= 1) == 0) {
2662 mask
= (WordType
) 1 << (APINT_BITS_PER_WORD
- 1);
2670 /// Shift a bignum left Count bits in-place. Shifted in bits are zero. There are
2671 /// no restrictions on Count.
2672 void APInt::tcShiftLeft(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2673 // Don't bother performing a no-op shift.
2677 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2678 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2679 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2681 // Fastpath for moving by whole words.
2682 if (BitShift
== 0) {
2683 std::memmove(Dst
+ WordShift
, Dst
, (Words
- WordShift
) * APINT_WORD_SIZE
);
2685 while (Words
-- > WordShift
) {
2686 Dst
[Words
] = Dst
[Words
- WordShift
] << BitShift
;
2687 if (Words
> WordShift
)
2689 Dst
[Words
- WordShift
- 1] >> (APINT_BITS_PER_WORD
- BitShift
);
2693 // Fill in the remainder with 0s.
2694 std::memset(Dst
, 0, WordShift
* APINT_WORD_SIZE
);
2697 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2698 /// are no restrictions on Count.
2699 void APInt::tcShiftRight(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2700 // Don't bother performing a no-op shift.
2704 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2705 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2706 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2708 unsigned WordsToMove
= Words
- WordShift
;
2709 // Fastpath for moving by whole words.
2710 if (BitShift
== 0) {
2711 std::memmove(Dst
, Dst
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
2713 for (unsigned i
= 0; i
!= WordsToMove
; ++i
) {
2714 Dst
[i
] = Dst
[i
+ WordShift
] >> BitShift
;
2715 if (i
+ 1 != WordsToMove
)
2716 Dst
[i
] |= Dst
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
);
2720 // Fill in the remainder with 0s.
2721 std::memset(Dst
+ WordsToMove
, 0, WordShift
* APINT_WORD_SIZE
);
2724 // Comparison (unsigned) of two bignums.
2725 int APInt::tcCompare(const WordType
*lhs
, const WordType
*rhs
,
2729 if (lhs
[parts
] != rhs
[parts
])
2730 return (lhs
[parts
] > rhs
[parts
]) ? 1 : -1;
2736 APInt
llvm::APIntOps::RoundingUDiv(const APInt
&A
, const APInt
&B
,
2737 APInt::Rounding RM
) {
2738 // Currently udivrem always rounds down.
2740 case APInt::Rounding::DOWN
:
2741 case APInt::Rounding::TOWARD_ZERO
:
2743 case APInt::Rounding::UP
: {
2745 APInt::udivrem(A
, B
, Quo
, Rem
);
2751 llvm_unreachable("Unknown APInt::Rounding enum");
2754 APInt
llvm::APIntOps::RoundingSDiv(const APInt
&A
, const APInt
&B
,
2755 APInt::Rounding RM
) {
2757 case APInt::Rounding::DOWN
:
2758 case APInt::Rounding::UP
: {
2760 APInt::sdivrem(A
, B
, Quo
, Rem
);
2763 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2764 // We want to check whether the non-integer part of the mathematical value
2765 // is negative or not. If the non-integer part is negative, we need to round
2766 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2767 // already rounded down.
2768 if (RM
== APInt::Rounding::DOWN
) {
2769 if (Rem
.isNegative() != B
.isNegative())
2773 if (Rem
.isNegative() != B
.isNegative())
2777 // Currently sdiv rounds towards zero.
2778 case APInt::Rounding::TOWARD_ZERO
:
2781 llvm_unreachable("Unknown APInt::Rounding enum");
2784 std::optional
<APInt
>
2785 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A
, APInt B
, APInt C
,
2786 unsigned RangeWidth
) {
2787 unsigned CoeffWidth
= A
.getBitWidth();
2788 assert(CoeffWidth
== B
.getBitWidth() && CoeffWidth
== C
.getBitWidth());
2789 assert(RangeWidth
<= CoeffWidth
&&
2790 "Value range width should be less than coefficient width");
2791 assert(RangeWidth
> 1 && "Value range bit width should be > 1");
2793 LLVM_DEBUG(dbgs() << __func__
<< ": solving " << A
<< "x^2 + " << B
2794 << "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2796 // Identify 0 as a (non)solution immediately.
2797 if (C
.sextOrTrunc(RangeWidth
).isZero()) {
2798 LLVM_DEBUG(dbgs() << __func__
<< ": zero solution\n");
2799 return APInt(CoeffWidth
, 0);
2802 // The result of APInt arithmetic has the same bit width as the operands,
2803 // so it can actually lose high bits. A product of two n-bit integers needs
2804 // 2n-1 bits to represent the full value.
2805 // The operation done below (on quadratic coefficients) that can produce
2806 // the largest value is the evaluation of the equation during bisection,
2807 // which needs 3 times the bitwidth of the coefficient, so the total number
2808 // of required bits is 3n.
2810 // The purpose of this extension is to simulate the set Z of all integers,
2811 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2812 // and negative numbers (not so much in a modulo arithmetic). The method
2813 // used to solve the equation is based on the standard formula for real
2814 // numbers, and uses the concepts of "positive" and "negative" with their
2817 A
= A
.sext(CoeffWidth
);
2818 B
= B
.sext(CoeffWidth
);
2819 C
= C
.sext(CoeffWidth
);
2821 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2822 // the bit width has increased.
2823 if (A
.isNegative()) {
2829 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2830 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2831 // and R = 2^BitWidth.
2832 // Since we're trying not only to find exact solutions, but also values
2833 // that "wrap around", such a set will always have a solution, i.e. an x
2834 // that satisfies at least one of the equations, or such that |q(x)|
2835 // exceeds kR, while |q(x-1)| for the same k does not.
2837 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2838 // positive solution n (in the above sense), and also such that the n
2839 // will be the least among all solutions corresponding to k = 0, 1, ...
2840 // (more precisely, the least element in the set
2841 // { n(k) | k is such that a solution n(k) exists }).
2843 // Consider the parabola (over real numbers) that corresponds to the
2844 // quadratic equation. Since A > 0, the arms of the parabola will point
2845 // up. Picking different values of k will shift it up and down by R.
2847 // We want to shift the parabola in such a way as to reduce the problem
2848 // of solving q(x) = kR to solving shifted_q(x) = 0.
2849 // (The interesting solutions are the ceilings of the real number
2851 APInt R
= APInt::getOneBitSet(CoeffWidth
, RangeWidth
);
2856 auto RoundUp
= [] (const APInt
&V
, const APInt
&A
) -> APInt
{
2857 assert(A
.isStrictlyPositive());
2858 APInt T
= V
.abs().urem(A
);
2861 return V
.isNegative() ? V
+T
: V
+(A
-T
);
2864 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2865 // iff B is positive.
2866 if (B
.isNonNegative()) {
2867 // If B >= 0, the vertex it at a negative location (or at 0), so in
2868 // order to have a non-negative solution we need to pick k that makes
2869 // C-kR negative. To satisfy all the requirements for the solution
2870 // that we are looking for, it needs to be closest to 0 of all k.
2872 if (C
.isStrictlyPositive())
2874 // Pick the greater solution.
2877 // If B < 0, the vertex is at a positive location. For any solution
2878 // to exist, the discriminant must be non-negative. This means that
2879 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2880 // lower bound on values of k: kR >= C - B^2/4A.
2881 APInt LowkR
= C
- SqrB
.udiv(2*TwoA
); // udiv because all values > 0.
2882 // Round LowkR up (towards +inf) to the nearest kR.
2883 LowkR
= RoundUp(LowkR
, R
);
2885 // If there exists k meeting the condition above, and such that
2886 // C-kR > 0, there will be two positive real number solutions of
2887 // q(x) = kR. Out of all such values of k, pick the one that makes
2888 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2889 // In other words, find maximum k such that LowkR <= kR < C.
2891 // If LowkR < C, then such a k is guaranteed to exist because
2892 // LowkR itself is a multiple of R.
2893 C
-= -RoundUp(-C
, R
); // C = C - RoundDown(C, R)
2894 // Pick the smaller solution.
2897 // If C-kR < 0 for all potential k's, it means that one solution
2898 // will be negative, while the other will be positive. The positive
2899 // solution will shift towards 0 if the parabola is moved up.
2900 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2901 // to 0, or in other words, out of all parabolas that have solutions,
2902 // pick the one that is the farthest "up").
2903 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2905 // Pick the greater solution.
2910 LLVM_DEBUG(dbgs() << __func__
<< ": updated coefficients " << A
<< "x^2 + "
2911 << B
<< "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2913 APInt D
= SqrB
- 4*A
*C
;
2914 assert(D
.isNonNegative() && "Negative discriminant");
2915 APInt SQ
= D
.sqrt();
2918 bool InexactSQ
= Q
!= D
;
2919 // The calculated SQ may actually be greater than the exact (non-integer)
2920 // value. If that's the case, decrement SQ to get a value that is lower.
2927 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2928 // When using the quadratic formula directly, the calculated low root
2929 // may be greater than the exact one, since we would be subtracting SQ.
2930 // To make sure that the calculated root is not greater than the exact
2931 // one, subtract SQ+1 when calculating the low root (for inexact value
2934 APInt::sdivrem(-B
- (SQ
+InexactSQ
), TwoA
, X
, Rem
);
2936 APInt::sdivrem(-B
+ SQ
, TwoA
, X
, Rem
);
2938 // The updated coefficients should be such that the (exact) solution is
2939 // positive. Since APInt division rounds towards 0, the calculated one
2940 // can be 0, but cannot be negative.
2941 assert(X
.isNonNegative() && "Solution should be non-negative");
2943 if (!InexactSQ
&& Rem
.isZero()) {
2944 LLVM_DEBUG(dbgs() << __func__
<< ": solution (root): " << X
<< '\n');
2948 assert((SQ
*SQ
).sle(D
) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2949 // The exact value of the square root of D should be between SQ and SQ+1.
2950 // This implies that the solution should be between that corresponding to
2951 // SQ (i.e. X) and that corresponding to SQ+1.
2953 // The calculated X cannot be greater than the exact (real) solution.
2954 // Actually it must be strictly less than the exact solution, while
2955 // X+1 will be greater than or equal to it.
2957 APInt VX
= (A
*X
+ B
)*X
+ C
;
2958 APInt VY
= VX
+ TwoA
*X
+ A
+ B
;
2960 VX
.isNegative() != VY
.isNegative() || VX
.isZero() != VY
.isZero();
2961 // If the sign did not change between X and X+1, X is not a valid solution.
2962 // This could happen when the actual (exact) roots don't have an integer
2963 // between them, so they would both be contained between X and X+1.
2965 LLVM_DEBUG(dbgs() << __func__
<< ": no valid solution\n");
2966 return std::nullopt
;
2970 LLVM_DEBUG(dbgs() << __func__
<< ": solution (wrap): " << X
<< '\n');
2974 std::optional
<unsigned>
2975 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt
&A
, const APInt
&B
) {
2976 assert(A
.getBitWidth() == B
.getBitWidth() && "Must have the same bitwidth");
2978 return std::nullopt
;
2979 return A
.getBitWidth() - ((A
^ B
).countl_zero() + 1);
2982 APInt
llvm::APIntOps::ScaleBitMask(const APInt
&A
, unsigned NewBitWidth
,
2983 bool MatchAllBits
) {
2984 unsigned OldBitWidth
= A
.getBitWidth();
2985 assert((((OldBitWidth
% NewBitWidth
) == 0) ||
2986 ((NewBitWidth
% OldBitWidth
) == 0)) &&
2987 "One size should be a multiple of the other one. "
2988 "Can't do fractional scaling.");
2990 // Check for matching bitwidths.
2991 if (OldBitWidth
== NewBitWidth
)
2994 APInt NewA
= APInt::getZero(NewBitWidth
);
2996 // Check for null input.
3000 if (NewBitWidth
> OldBitWidth
) {
3002 unsigned Scale
= NewBitWidth
/ OldBitWidth
;
3003 for (unsigned i
= 0; i
!= OldBitWidth
; ++i
)
3005 NewA
.setBits(i
* Scale
, (i
+ 1) * Scale
);
3007 unsigned Scale
= OldBitWidth
/ NewBitWidth
;
3008 for (unsigned i
= 0; i
!= NewBitWidth
; ++i
) {
3010 if (A
.extractBits(Scale
, i
* Scale
).isAllOnes())
3013 if (!A
.extractBits(Scale
, i
* Scale
).isZero())
3022 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3023 /// with the integer held in IntVal.
3024 void llvm::StoreIntToMemory(const APInt
&IntVal
, uint8_t *Dst
,
3025 unsigned StoreBytes
) {
3026 assert((IntVal
.getBitWidth()+7)/8 >= StoreBytes
&& "Integer too small!");
3027 const uint8_t *Src
= (const uint8_t *)IntVal
.getRawData();
3029 if (sys::IsLittleEndianHost
) {
3030 // Little-endian host - the source is ordered from LSB to MSB. Order the
3031 // destination from LSB to MSB: Do a straight copy.
3032 memcpy(Dst
, Src
, StoreBytes
);
3034 // Big-endian host - the source is an array of 64 bit words ordered from
3035 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3036 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3037 while (StoreBytes
> sizeof(uint64_t)) {
3038 StoreBytes
-= sizeof(uint64_t);
3039 // May not be aligned so use memcpy.
3040 memcpy(Dst
+ StoreBytes
, Src
, sizeof(uint64_t));
3041 Src
+= sizeof(uint64_t);
3044 memcpy(Dst
, Src
+ sizeof(uint64_t) - StoreBytes
, StoreBytes
);
3048 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3049 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3050 void llvm::LoadIntFromMemory(APInt
&IntVal
, const uint8_t *Src
,
3051 unsigned LoadBytes
) {
3052 assert((IntVal
.getBitWidth()+7)/8 >= LoadBytes
&& "Integer too small!");
3053 uint8_t *Dst
= reinterpret_cast<uint8_t *>(
3054 const_cast<uint64_t *>(IntVal
.getRawData()));
3056 if (sys::IsLittleEndianHost
)
3057 // Little-endian host - the destination must be ordered from LSB to MSB.
3058 // The source is ordered from LSB to MSB: Do a straight copy.
3059 memcpy(Dst
, Src
, LoadBytes
);
3061 // Big-endian - the destination is an array of 64 bit words ordered from
3062 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3063 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3065 while (LoadBytes
> sizeof(uint64_t)) {
3066 LoadBytes
-= sizeof(uint64_t);
3067 // May not be aligned so use memcpy.
3068 memcpy(Dst
, Src
+ LoadBytes
, sizeof(uint64_t));
3069 Dst
+= sizeof(uint64_t);
3072 memcpy(Dst
+ sizeof(uint64_t) - LoadBytes
, Src
, LoadBytes
);
3076 APInt
APIntOps::avgFloorS(const APInt
&C1
, const APInt
&C2
) {
3077 // Return floor((C1 + C2) / 2)
3078 return (C1
& C2
) + (C1
^ C2
).ashr(1);
3081 APInt
APIntOps::avgFloorU(const APInt
&C1
, const APInt
&C2
) {
3082 // Return floor((C1 + C2) / 2)
3083 return (C1
& C2
) + (C1
^ C2
).lshr(1);
3086 APInt
APIntOps::avgCeilS(const APInt
&C1
, const APInt
&C2
) {
3087 // Return ceil((C1 + C2) / 2)
3088 return (C1
| C2
) - (C1
^ C2
).ashr(1);
3091 APInt
APIntOps::avgCeilU(const APInt
&C1
, const APInt
&C2
) {
3092 // Return ceil((C1 + C2) / 2)
3093 return (C1
| C2
) - (C1
^ C2
).lshr(1);
3096 APInt
APIntOps::mulhs(const APInt
&C1
, const APInt
&C2
) {
3097 assert(C1
.getBitWidth() == C2
.getBitWidth() && "Unequal bitwidths");
3098 unsigned FullWidth
= C1
.getBitWidth() * 2;
3099 APInt C1Ext
= C1
.sext(FullWidth
);
3100 APInt C2Ext
= C2
.sext(FullWidth
);
3101 return (C1Ext
* C2Ext
).extractBits(C1
.getBitWidth(), C1
.getBitWidth());
3104 APInt
APIntOps::mulhu(const APInt
&C1
, const APInt
&C2
) {
3105 assert(C1
.getBitWidth() == C2
.getBitWidth() && "Unequal bitwidths");
3106 unsigned FullWidth
= C1
.getBitWidth() * 2;
3107 APInt C1Ext
= C1
.zext(FullWidth
);
3108 APInt C2Ext
= C2
.zext(FullWidth
);
3109 return (C1Ext
* C2Ext
).extractBits(C1
.getBitWidth(), C1
.getBitWidth());
3112 APInt
APIntOps::pow(const APInt
&X
, int64_t N
) {
3113 assert(N
>= 0 && "negative exponents not supported.");
3114 APInt Acc
= APInt(X
.getBitWidth(), 1);
3118 int64_t RemainingExponent
= N
;
3119 while (RemainingExponent
> 0) {
3120 while (RemainingExponent
% 2 == 0) {
3122 RemainingExponent
/= 2;
3124 --RemainingExponent
;