1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/bit.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Alignment.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
32 #define DEBUG_TYPE "apint"
34 /// A utility function for allocating memory, checking for allocation failures,
35 /// and ensuring the contents are zeroed.
36 inline static uint64_t* getClearedMemory(unsigned numWords
) {
37 uint64_t *result
= new uint64_t[numWords
];
38 memset(result
, 0, numWords
* sizeof(uint64_t));
42 /// A utility function for allocating memory and checking for allocation
43 /// failure. The content is not zeroed.
44 inline static uint64_t* getMemory(unsigned numWords
) {
45 return new uint64_t[numWords
];
48 /// A utility function that converts a character to a digit.
49 inline static unsigned getDigit(char cdigit
, uint8_t radix
) {
52 if (radix
== 16 || radix
== 36) {
76 void APInt::initSlowCase(uint64_t val
, bool isSigned
) {
77 U
.pVal
= getClearedMemory(getNumWords());
79 if (isSigned
&& int64_t(val
) < 0)
80 for (unsigned i
= 1; i
< getNumWords(); ++i
)
81 U
.pVal
[i
] = WORDTYPE_MAX
;
85 void APInt::initSlowCase(const APInt
& that
) {
86 U
.pVal
= getMemory(getNumWords());
87 memcpy(U
.pVal
, that
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
90 void APInt::initFromArray(ArrayRef
<uint64_t> bigVal
) {
91 assert(bigVal
.data() && "Null pointer detected!");
95 // Get memory, cleared to 0
96 U
.pVal
= getClearedMemory(getNumWords());
97 // Calculate the number of words to copy
98 unsigned words
= std::min
<unsigned>(bigVal
.size(), getNumWords());
99 // Copy the words from bigVal to pVal
100 memcpy(U
.pVal
, bigVal
.data(), words
* APINT_WORD_SIZE
);
102 // Make sure unused high bits are cleared
106 APInt::APInt(unsigned numBits
, ArrayRef
<uint64_t> bigVal
) : BitWidth(numBits
) {
107 initFromArray(bigVal
);
110 APInt::APInt(unsigned numBits
, unsigned numWords
, const uint64_t bigVal
[])
111 : BitWidth(numBits
) {
112 initFromArray(ArrayRef(bigVal
, numWords
));
115 APInt::APInt(unsigned numbits
, StringRef Str
, uint8_t radix
)
116 : BitWidth(numbits
) {
117 fromString(numbits
, Str
, radix
);
120 void APInt::reallocate(unsigned NewBitWidth
) {
121 // If the number of words is the same we can just change the width and stop.
122 if (getNumWords() == getNumWords(NewBitWidth
)) {
123 BitWidth
= NewBitWidth
;
127 // If we have an allocation, delete it.
132 BitWidth
= NewBitWidth
;
134 // If we are supposed to have an allocation, create it.
136 U
.pVal
= getMemory(getNumWords());
139 void APInt::assignSlowCase(const APInt
&RHS
) {
140 // Don't do anything for X = X
144 // Adjust the bit width and handle allocations as necessary.
145 reallocate(RHS
.getBitWidth());
151 memcpy(U
.pVal
, RHS
.U
.pVal
, getNumWords() * APINT_WORD_SIZE
);
154 /// This method 'profiles' an APInt for use with FoldingSet.
155 void APInt::Profile(FoldingSetNodeID
& ID
) const {
156 ID
.AddInteger(BitWidth
);
158 if (isSingleWord()) {
159 ID
.AddInteger(U
.VAL
);
163 unsigned NumWords
= getNumWords();
164 for (unsigned i
= 0; i
< NumWords
; ++i
)
165 ID
.AddInteger(U
.pVal
[i
]);
168 bool APInt::isAligned(Align A
) const {
171 const unsigned TrailingZeroes
= countr_zero();
172 const unsigned MinimumTrailingZeroes
= Log2(A
);
173 return TrailingZeroes
>= MinimumTrailingZeroes
;
176 /// Prefix increment operator. Increments the APInt by one.
177 APInt
& APInt::operator++() {
181 tcIncrement(U
.pVal
, getNumWords());
182 return clearUnusedBits();
185 /// Prefix decrement operator. Decrements the APInt by one.
186 APInt
& APInt::operator--() {
190 tcDecrement(U
.pVal
, getNumWords());
191 return clearUnusedBits();
194 /// Adds the RHS APInt to this APInt.
195 /// @returns this, after addition of RHS.
196 /// Addition assignment operator.
197 APInt
& APInt::operator+=(const APInt
& RHS
) {
198 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
202 tcAdd(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
203 return clearUnusedBits();
206 APInt
& APInt::operator+=(uint64_t RHS
) {
210 tcAddPart(U
.pVal
, RHS
, getNumWords());
211 return clearUnusedBits();
214 /// Subtracts the RHS APInt from this APInt
215 /// @returns this, after subtraction
216 /// Subtraction assignment operator.
217 APInt
& APInt::operator-=(const APInt
& RHS
) {
218 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
222 tcSubtract(U
.pVal
, RHS
.U
.pVal
, 0, getNumWords());
223 return clearUnusedBits();
226 APInt
& APInt::operator-=(uint64_t RHS
) {
230 tcSubtractPart(U
.pVal
, RHS
, getNumWords());
231 return clearUnusedBits();
234 APInt
APInt::operator*(const APInt
& RHS
) const {
235 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
237 return APInt(BitWidth
, U
.VAL
* RHS
.U
.VAL
);
239 APInt
Result(getMemory(getNumWords()), getBitWidth());
240 tcMultiply(Result
.U
.pVal
, U
.pVal
, RHS
.U
.pVal
, getNumWords());
241 Result
.clearUnusedBits();
245 void APInt::andAssignSlowCase(const APInt
&RHS
) {
246 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
247 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
251 void APInt::orAssignSlowCase(const APInt
&RHS
) {
252 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
253 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
257 void APInt::xorAssignSlowCase(const APInt
&RHS
) {
258 WordType
*dst
= U
.pVal
, *rhs
= RHS
.U
.pVal
;
259 for (size_t i
= 0, e
= getNumWords(); i
!= e
; ++i
)
263 APInt
&APInt::operator*=(const APInt
&RHS
) {
268 APInt
& APInt::operator*=(uint64_t RHS
) {
269 if (isSingleWord()) {
272 unsigned NumWords
= getNumWords();
273 tcMultiplyPart(U
.pVal
, U
.pVal
, RHS
, 0, NumWords
, NumWords
, false);
275 return clearUnusedBits();
278 bool APInt::equalSlowCase(const APInt
&RHS
) const {
279 return std::equal(U
.pVal
, U
.pVal
+ getNumWords(), RHS
.U
.pVal
);
282 int APInt::compare(const APInt
& RHS
) const {
283 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
285 return U
.VAL
< RHS
.U
.VAL
? -1 : U
.VAL
> RHS
.U
.VAL
;
287 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
290 int APInt::compareSigned(const APInt
& RHS
) const {
291 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be same for comparison");
292 if (isSingleWord()) {
293 int64_t lhsSext
= SignExtend64(U
.VAL
, BitWidth
);
294 int64_t rhsSext
= SignExtend64(RHS
.U
.VAL
, BitWidth
);
295 return lhsSext
< rhsSext
? -1 : lhsSext
> rhsSext
;
298 bool lhsNeg
= isNegative();
299 bool rhsNeg
= RHS
.isNegative();
301 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
302 if (lhsNeg
!= rhsNeg
)
303 return lhsNeg
? -1 : 1;
305 // Otherwise we can just use an unsigned comparison, because even negative
306 // numbers compare correctly this way if both have the same signed-ness.
307 return tcCompare(U
.pVal
, RHS
.U
.pVal
, getNumWords());
310 void APInt::setBitsSlowCase(unsigned loBit
, unsigned hiBit
) {
311 unsigned loWord
= whichWord(loBit
);
312 unsigned hiWord
= whichWord(hiBit
);
314 // Create an initial mask for the low word with zeros below loBit.
315 uint64_t loMask
= WORDTYPE_MAX
<< whichBit(loBit
);
317 // If hiBit is not aligned, we need a high mask.
318 unsigned hiShiftAmt
= whichBit(hiBit
);
319 if (hiShiftAmt
!= 0) {
320 // Create a high mask with zeros above hiBit.
321 uint64_t hiMask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- hiShiftAmt
);
322 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
323 // set the bits in hiWord.
324 if (hiWord
== loWord
)
327 U
.pVal
[hiWord
] |= hiMask
;
329 // Apply the mask to the low word.
330 U
.pVal
[loWord
] |= loMask
;
332 // Fill any words between loWord and hiWord with all ones.
333 for (unsigned word
= loWord
+ 1; word
< hiWord
; ++word
)
334 U
.pVal
[word
] = WORDTYPE_MAX
;
337 // Complement a bignum in-place.
338 static void tcComplement(APInt::WordType
*dst
, unsigned parts
) {
339 for (unsigned i
= 0; i
< parts
; i
++)
343 /// Toggle every bit to its opposite value.
344 void APInt::flipAllBitsSlowCase() {
345 tcComplement(U
.pVal
, getNumWords());
349 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
351 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
352 /// In the slow case, we know the result is large.
353 APInt
APInt::concatSlowCase(const APInt
&NewLSB
) const {
354 unsigned NewWidth
= getBitWidth() + NewLSB
.getBitWidth();
355 APInt Result
= NewLSB
.zext(NewWidth
);
356 Result
.insertBits(*this, NewLSB
.getBitWidth());
360 /// Toggle a given bit to its opposite value whose position is given
361 /// as "bitPosition".
362 /// Toggles a given bit to its opposite value.
363 void APInt::flipBit(unsigned bitPosition
) {
364 assert(bitPosition
< BitWidth
&& "Out of the bit-width range!");
365 setBitVal(bitPosition
, !(*this)[bitPosition
]);
368 void APInt::insertBits(const APInt
&subBits
, unsigned bitPosition
) {
369 unsigned subBitWidth
= subBits
.getBitWidth();
370 assert((subBitWidth
+ bitPosition
) <= BitWidth
&& "Illegal bit insertion");
372 // inserting no bits is a noop.
373 if (subBitWidth
== 0)
376 // Insertion is a direct copy.
377 if (subBitWidth
== BitWidth
) {
382 // Single word result can be done as a direct bitmask.
383 if (isSingleWord()) {
384 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
385 U
.VAL
&= ~(mask
<< bitPosition
);
386 U
.VAL
|= (subBits
.U
.VAL
<< bitPosition
);
390 unsigned loBit
= whichBit(bitPosition
);
391 unsigned loWord
= whichWord(bitPosition
);
392 unsigned hi1Word
= whichWord(bitPosition
+ subBitWidth
- 1);
394 // Insertion within a single word can be done as a direct bitmask.
395 if (loWord
== hi1Word
) {
396 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- subBitWidth
);
397 U
.pVal
[loWord
] &= ~(mask
<< loBit
);
398 U
.pVal
[loWord
] |= (subBits
.U
.VAL
<< loBit
);
402 // Insert on word boundaries.
404 // Direct copy whole words.
405 unsigned numWholeSubWords
= subBitWidth
/ APINT_BITS_PER_WORD
;
406 memcpy(U
.pVal
+ loWord
, subBits
.getRawData(),
407 numWholeSubWords
* APINT_WORD_SIZE
);
409 // Mask+insert remaining bits.
410 unsigned remainingBits
= subBitWidth
% APINT_BITS_PER_WORD
;
411 if (remainingBits
!= 0) {
412 uint64_t mask
= WORDTYPE_MAX
>> (APINT_BITS_PER_WORD
- remainingBits
);
413 U
.pVal
[hi1Word
] &= ~mask
;
414 U
.pVal
[hi1Word
] |= subBits
.getWord(subBitWidth
- 1);
419 // General case - set/clear individual bits in dst based on src.
420 // TODO - there is scope for optimization here, but at the moment this code
421 // path is barely used so prefer readability over performance.
422 for (unsigned i
= 0; i
!= subBitWidth
; ++i
)
423 setBitVal(bitPosition
+ i
, subBits
[i
]);
426 void APInt::insertBits(uint64_t subBits
, unsigned bitPosition
, unsigned numBits
) {
427 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
429 if (isSingleWord()) {
430 U
.VAL
&= ~(maskBits
<< bitPosition
);
431 U
.VAL
|= subBits
<< bitPosition
;
435 unsigned loBit
= whichBit(bitPosition
);
436 unsigned loWord
= whichWord(bitPosition
);
437 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
438 if (loWord
== hiWord
) {
439 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
440 U
.pVal
[loWord
] |= subBits
<< loBit
;
444 static_assert(8 * sizeof(WordType
) <= 64, "This code assumes only two words affected");
445 unsigned wordBits
= 8 * sizeof(WordType
);
446 U
.pVal
[loWord
] &= ~(maskBits
<< loBit
);
447 U
.pVal
[loWord
] |= subBits
<< loBit
;
449 U
.pVal
[hiWord
] &= ~(maskBits
>> (wordBits
- loBit
));
450 U
.pVal
[hiWord
] |= subBits
>> (wordBits
- loBit
);
453 APInt
APInt::extractBits(unsigned numBits
, unsigned bitPosition
) const {
454 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
455 "Illegal bit extraction");
458 return APInt(numBits
, U
.VAL
>> bitPosition
);
460 unsigned loBit
= whichBit(bitPosition
);
461 unsigned loWord
= whichWord(bitPosition
);
462 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
464 // Single word result extracting bits from a single word source.
465 if (loWord
== hiWord
)
466 return APInt(numBits
, U
.pVal
[loWord
] >> loBit
);
468 // Extracting bits that start on a source word boundary can be done
469 // as a fast memory copy.
471 return APInt(numBits
, ArrayRef(U
.pVal
+ loWord
, 1 + hiWord
- loWord
));
473 // General case - shift + copy source words directly into place.
474 APInt
Result(numBits
, 0);
475 unsigned NumSrcWords
= getNumWords();
476 unsigned NumDstWords
= Result
.getNumWords();
478 uint64_t *DestPtr
= Result
.isSingleWord() ? &Result
.U
.VAL
: Result
.U
.pVal
;
479 for (unsigned word
= 0; word
< NumDstWords
; ++word
) {
480 uint64_t w0
= U
.pVal
[loWord
+ word
];
482 (loWord
+ word
+ 1) < NumSrcWords
? U
.pVal
[loWord
+ word
+ 1] : 0;
483 DestPtr
[word
] = (w0
>> loBit
) | (w1
<< (APINT_BITS_PER_WORD
- loBit
));
486 return Result
.clearUnusedBits();
489 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits
,
490 unsigned bitPosition
) const {
491 assert(bitPosition
< BitWidth
&& (numBits
+ bitPosition
) <= BitWidth
&&
492 "Illegal bit extraction");
493 assert(numBits
<= 64 && "Illegal bit extraction");
495 uint64_t maskBits
= maskTrailingOnes
<uint64_t>(numBits
);
497 return (U
.VAL
>> bitPosition
) & maskBits
;
499 unsigned loBit
= whichBit(bitPosition
);
500 unsigned loWord
= whichWord(bitPosition
);
501 unsigned hiWord
= whichWord(bitPosition
+ numBits
- 1);
502 if (loWord
== hiWord
)
503 return (U
.pVal
[loWord
] >> loBit
) & maskBits
;
505 static_assert(8 * sizeof(WordType
) <= 64, "This code assumes only two words affected");
506 unsigned wordBits
= 8 * sizeof(WordType
);
507 uint64_t retBits
= U
.pVal
[loWord
] >> loBit
;
508 retBits
|= U
.pVal
[hiWord
] << (wordBits
- loBit
);
513 unsigned APInt::getSufficientBitsNeeded(StringRef Str
, uint8_t Radix
) {
514 assert(!Str
.empty() && "Invalid string length");
515 size_t StrLen
= Str
.size();
517 // Each computation below needs to know if it's negative.
518 unsigned IsNegative
= false;
519 if (Str
[0] == '-' || Str
[0] == '+') {
520 IsNegative
= Str
[0] == '-';
522 assert(StrLen
&& "String is only a sign, needs a value.");
525 // For radixes of power-of-two values, the bits required is accurately and
528 return StrLen
+ IsNegative
;
530 return StrLen
* 3 + IsNegative
;
532 return StrLen
* 4 + IsNegative
;
534 // Compute a sufficient number of bits that is always large enough but might
535 // be too large. This avoids the assertion in the constructor. This
536 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
537 // bits in that case.
539 return (StrLen
== 1 ? 4 : StrLen
* 64 / 18) + IsNegative
;
542 return (StrLen
== 1 ? 7 : StrLen
* 16 / 3) + IsNegative
;
545 unsigned APInt::getBitsNeeded(StringRef str
, uint8_t radix
) {
546 // Compute a sufficient number of bits that is always large enough but might
548 unsigned sufficient
= getSufficientBitsNeeded(str
, radix
);
550 // For bases 2, 8, and 16, the sufficient number of bits is exact and we can
551 // return the value directly. For bases 10 and 36, we need to do extra work.
552 if (radix
== 2 || radix
== 8 || radix
== 16)
555 // This is grossly inefficient but accurate. We could probably do something
556 // with a computation of roughly slen*64/20 and then adjust by the value of
557 // the first few digits. But, I'm not sure how accurate that could be.
558 size_t slen
= str
.size();
560 // Each computation below needs to know if it's negative.
561 StringRef::iterator p
= str
.begin();
562 unsigned isNegative
= *p
== '-';
563 if (*p
== '-' || *p
== '+') {
566 assert(slen
&& "String is only a sign, needs a value.");
570 // Convert to the actual binary value.
571 APInt
tmp(sufficient
, StringRef(p
, slen
), radix
);
573 // Compute how many bits are required. If the log is infinite, assume we need
574 // just bit. If the log is exact and value is negative, then the value is
575 // MinSignedValue with (log + 1) bits.
576 unsigned log
= tmp
.logBase2();
577 if (log
== (unsigned)-1) {
578 return isNegative
+ 1;
579 } else if (isNegative
&& tmp
.isPowerOf2()) {
580 return isNegative
+ log
;
582 return isNegative
+ log
+ 1;
586 hash_code
llvm::hash_value(const APInt
&Arg
) {
587 if (Arg
.isSingleWord())
588 return hash_combine(Arg
.BitWidth
, Arg
.U
.VAL
);
592 hash_combine_range(Arg
.U
.pVal
, Arg
.U
.pVal
+ Arg
.getNumWords()));
595 unsigned DenseMapInfo
<APInt
, void>::getHashValue(const APInt
&Key
) {
596 return static_cast<unsigned>(hash_value(Key
));
599 bool APInt::isSplat(unsigned SplatSizeInBits
) const {
600 assert(getBitWidth() % SplatSizeInBits
== 0 &&
601 "SplatSizeInBits must divide width!");
602 // We can check that all parts of an integer are equal by making use of a
603 // little trick: rotate and check if it's still the same value.
604 return *this == rotl(SplatSizeInBits
);
607 /// This function returns the high "numBits" bits of this APInt.
608 APInt
APInt::getHiBits(unsigned numBits
) const {
609 return this->lshr(BitWidth
- numBits
);
612 /// This function returns the low "numBits" bits of this APInt.
613 APInt
APInt::getLoBits(unsigned numBits
) const {
614 APInt
Result(getLowBitsSet(BitWidth
, numBits
));
619 /// Return a value containing V broadcasted over NewLen bits.
620 APInt
APInt::getSplat(unsigned NewLen
, const APInt
&V
) {
621 assert(NewLen
>= V
.getBitWidth() && "Can't splat to smaller bit width!");
623 APInt Val
= V
.zext(NewLen
);
624 for (unsigned I
= V
.getBitWidth(); I
< NewLen
; I
<<= 1)
630 unsigned APInt::countLeadingZerosSlowCase() const {
632 for (int i
= getNumWords()-1; i
>= 0; --i
) {
633 uint64_t V
= U
.pVal
[i
];
635 Count
+= APINT_BITS_PER_WORD
;
637 Count
+= llvm::countl_zero(V
);
641 // Adjust for unused bits in the most significant word (they are zero).
642 unsigned Mod
= BitWidth
% APINT_BITS_PER_WORD
;
643 Count
-= Mod
> 0 ? APINT_BITS_PER_WORD
- Mod
: 0;
647 unsigned APInt::countLeadingOnesSlowCase() const {
648 unsigned highWordBits
= BitWidth
% APINT_BITS_PER_WORD
;
651 highWordBits
= APINT_BITS_PER_WORD
;
654 shift
= APINT_BITS_PER_WORD
- highWordBits
;
656 int i
= getNumWords() - 1;
657 unsigned Count
= llvm::countl_one(U
.pVal
[i
] << shift
);
658 if (Count
== highWordBits
) {
659 for (i
--; i
>= 0; --i
) {
660 if (U
.pVal
[i
] == WORDTYPE_MAX
)
661 Count
+= APINT_BITS_PER_WORD
;
663 Count
+= llvm::countl_one(U
.pVal
[i
]);
671 unsigned APInt::countTrailingZerosSlowCase() const {
674 for (; i
< getNumWords() && U
.pVal
[i
] == 0; ++i
)
675 Count
+= APINT_BITS_PER_WORD
;
676 if (i
< getNumWords())
677 Count
+= llvm::countr_zero(U
.pVal
[i
]);
678 return std::min(Count
, BitWidth
);
681 unsigned APInt::countTrailingOnesSlowCase() const {
684 for (; i
< getNumWords() && U
.pVal
[i
] == WORDTYPE_MAX
; ++i
)
685 Count
+= APINT_BITS_PER_WORD
;
686 if (i
< getNumWords())
687 Count
+= llvm::countr_one(U
.pVal
[i
]);
688 assert(Count
<= BitWidth
);
692 unsigned APInt::countPopulationSlowCase() const {
694 for (unsigned i
= 0; i
< getNumWords(); ++i
)
695 Count
+= llvm::popcount(U
.pVal
[i
]);
699 bool APInt::intersectsSlowCase(const APInt
&RHS
) const {
700 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
701 if ((U
.pVal
[i
] & RHS
.U
.pVal
[i
]) != 0)
707 bool APInt::isSubsetOfSlowCase(const APInt
&RHS
) const {
708 for (unsigned i
= 0, e
= getNumWords(); i
!= e
; ++i
)
709 if ((U
.pVal
[i
] & ~RHS
.U
.pVal
[i
]) != 0)
715 APInt
APInt::byteSwap() const {
716 assert(BitWidth
>= 16 && BitWidth
% 8 == 0 && "Cannot byteswap!");
718 return APInt(BitWidth
, llvm::byteswap
<uint16_t>(U
.VAL
));
720 return APInt(BitWidth
, llvm::byteswap
<uint32_t>(U
.VAL
));
721 if (BitWidth
<= 64) {
722 uint64_t Tmp1
= llvm::byteswap
<uint64_t>(U
.VAL
);
723 Tmp1
>>= (64 - BitWidth
);
724 return APInt(BitWidth
, Tmp1
);
727 APInt
Result(getNumWords() * APINT_BITS_PER_WORD
, 0);
728 for (unsigned I
= 0, N
= getNumWords(); I
!= N
; ++I
)
729 Result
.U
.pVal
[I
] = llvm::byteswap
<uint64_t>(U
.pVal
[N
- I
- 1]);
730 if (Result
.BitWidth
!= BitWidth
) {
731 Result
.lshrInPlace(Result
.BitWidth
- BitWidth
);
732 Result
.BitWidth
= BitWidth
;
737 APInt
APInt::reverseBits() const {
740 return APInt(BitWidth
, llvm::reverseBits
<uint64_t>(U
.VAL
));
742 return APInt(BitWidth
, llvm::reverseBits
<uint32_t>(U
.VAL
));
744 return APInt(BitWidth
, llvm::reverseBits
<uint16_t>(U
.VAL
));
746 return APInt(BitWidth
, llvm::reverseBits
<uint8_t>(U
.VAL
));
754 APInt
Reversed(BitWidth
, 0);
755 unsigned S
= BitWidth
;
757 for (; Val
!= 0; Val
.lshrInPlace(1)) {
767 APInt
llvm::APIntOps::GreatestCommonDivisor(APInt A
, APInt B
) {
768 // Fast-path a common case.
769 if (A
== B
) return A
;
771 // Corner cases: if either operand is zero, the other is the gcd.
775 // Count common powers of 2 and remove all other powers of 2.
778 unsigned Pow2_A
= A
.countr_zero();
779 unsigned Pow2_B
= B
.countr_zero();
780 if (Pow2_A
> Pow2_B
) {
781 A
.lshrInPlace(Pow2_A
- Pow2_B
);
783 } else if (Pow2_B
> Pow2_A
) {
784 B
.lshrInPlace(Pow2_B
- Pow2_A
);
791 // Both operands are odd multiples of 2^Pow_2:
793 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
795 // This is a modified version of Stein's algorithm, taking advantage of
796 // efficient countTrailingZeros().
800 A
.lshrInPlace(A
.countr_zero() - Pow2
);
803 B
.lshrInPlace(B
.countr_zero() - Pow2
);
810 APInt
llvm::APIntOps::RoundDoubleToAPInt(double Double
, unsigned width
) {
811 uint64_t I
= bit_cast
<uint64_t>(Double
);
813 // Get the sign bit from the highest order bit
814 bool isNeg
= I
>> 63;
816 // Get the 11-bit exponent and adjust for the 1023 bit bias
817 int64_t exp
= ((I
>> 52) & 0x7ff) - 1023;
819 // If the exponent is negative, the value is < 0 so just return 0.
821 return APInt(width
, 0u);
823 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
824 uint64_t mantissa
= (I
& (~0ULL >> 12)) | 1ULL << 52;
826 // If the exponent doesn't shift all bits out of the mantissa
828 return isNeg
? -APInt(width
, mantissa
>> (52 - exp
)) :
829 APInt(width
, mantissa
>> (52 - exp
));
831 // If the client didn't provide enough bits for us to shift the mantissa into
832 // then the result is undefined, just return 0
833 if (width
<= exp
- 52)
834 return APInt(width
, 0);
836 // Otherwise, we have to shift the mantissa bits up to the right location
837 APInt
Tmp(width
, mantissa
);
838 Tmp
<<= (unsigned)exp
- 52;
839 return isNeg
? -Tmp
: Tmp
;
842 /// This function converts this APInt to a double.
843 /// The layout for double is as following (IEEE Standard 754):
844 /// --------------------------------------
845 /// | Sign Exponent Fraction Bias |
846 /// |-------------------------------------- |
847 /// | 1[63] 11[62-52] 52[51-00] 1023 |
848 /// --------------------------------------
849 double APInt::roundToDouble(bool isSigned
) const {
851 // Handle the simple case where the value is contained in one uint64_t.
852 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
853 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD
) {
855 int64_t sext
= SignExtend64(getWord(0), BitWidth
);
858 return double(getWord(0));
861 // Determine if the value is negative.
862 bool isNeg
= isSigned
? (*this)[BitWidth
-1] : false;
864 // Construct the absolute value if we're negative.
865 APInt
Tmp(isNeg
? -(*this) : (*this));
867 // Figure out how many bits we're using.
868 unsigned n
= Tmp
.getActiveBits();
870 // The exponent (without bias normalization) is just the number of bits
871 // we are using. Note that the sign bit is gone since we constructed the
875 // Return infinity for exponent overflow
877 if (!isSigned
|| !isNeg
)
878 return std::numeric_limits
<double>::infinity();
880 return -std::numeric_limits
<double>::infinity();
882 exp
+= 1023; // Increment for 1023 bias
884 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
885 // extract the high 52 bits from the correct words in pVal.
887 unsigned hiWord
= whichWord(n
-1);
889 mantissa
= Tmp
.U
.pVal
[0];
891 mantissa
>>= n
- 52; // shift down, we want the top 52 bits.
893 assert(hiWord
> 0 && "huh?");
894 uint64_t hibits
= Tmp
.U
.pVal
[hiWord
] << (52 - n
% APINT_BITS_PER_WORD
);
895 uint64_t lobits
= Tmp
.U
.pVal
[hiWord
-1] >> (11 + n
% APINT_BITS_PER_WORD
);
896 mantissa
= hibits
| lobits
;
899 // The leading bit of mantissa is implicit, so get rid of it.
900 uint64_t sign
= isNeg
? (1ULL << (APINT_BITS_PER_WORD
- 1)) : 0;
901 uint64_t I
= sign
| (exp
<< 52) | mantissa
;
902 return bit_cast
<double>(I
);
905 // Truncate to new width.
906 APInt
APInt::trunc(unsigned width
) const {
907 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
909 if (width
<= APINT_BITS_PER_WORD
)
910 return APInt(width
, getRawData()[0]);
912 if (width
== BitWidth
)
915 APInt
Result(getMemory(getNumWords(width
)), width
);
919 for (i
= 0; i
!= width
/ APINT_BITS_PER_WORD
; i
++)
920 Result
.U
.pVal
[i
] = U
.pVal
[i
];
922 // Truncate and copy any partial word.
923 unsigned bits
= (0 - width
) % APINT_BITS_PER_WORD
;
925 Result
.U
.pVal
[i
] = U
.pVal
[i
] << bits
>> bits
;
930 // Truncate to new width with unsigned saturation.
931 APInt
APInt::truncUSat(unsigned width
) const {
932 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
934 // Can we just losslessly truncate it?
937 // If not, then just return the new limit.
938 return APInt::getMaxValue(width
);
941 // Truncate to new width with signed saturation.
942 APInt
APInt::truncSSat(unsigned width
) const {
943 assert(width
<= BitWidth
&& "Invalid APInt Truncate request");
945 // Can we just losslessly truncate it?
946 if (isSignedIntN(width
))
948 // If not, then just return the new limits.
949 return isNegative() ? APInt::getSignedMinValue(width
)
950 : APInt::getSignedMaxValue(width
);
953 // Sign extend to a new width.
954 APInt
APInt::sext(unsigned Width
) const {
955 assert(Width
>= BitWidth
&& "Invalid APInt SignExtend request");
957 if (Width
<= APINT_BITS_PER_WORD
)
958 return APInt(Width
, SignExtend64(U
.VAL
, BitWidth
));
960 if (Width
== BitWidth
)
963 APInt
Result(getMemory(getNumWords(Width
)), Width
);
966 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
968 // Sign extend the last word since there may be unused bits in the input.
969 Result
.U
.pVal
[getNumWords() - 1] =
970 SignExtend64(Result
.U
.pVal
[getNumWords() - 1],
971 ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
973 // Fill with sign bits.
974 std::memset(Result
.U
.pVal
+ getNumWords(), isNegative() ? -1 : 0,
975 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
976 Result
.clearUnusedBits();
980 // Zero extend to a new width.
981 APInt
APInt::zext(unsigned width
) const {
982 assert(width
>= BitWidth
&& "Invalid APInt ZeroExtend request");
984 if (width
<= APINT_BITS_PER_WORD
)
985 return APInt(width
, U
.VAL
);
987 if (width
== BitWidth
)
990 APInt
Result(getMemory(getNumWords(width
)), width
);
993 std::memcpy(Result
.U
.pVal
, getRawData(), getNumWords() * APINT_WORD_SIZE
);
995 // Zero remaining words.
996 std::memset(Result
.U
.pVal
+ getNumWords(), 0,
997 (Result
.getNumWords() - getNumWords()) * APINT_WORD_SIZE
);
1002 APInt
APInt::zextOrTrunc(unsigned width
) const {
1003 if (BitWidth
< width
)
1005 if (BitWidth
> width
)
1006 return trunc(width
);
1010 APInt
APInt::sextOrTrunc(unsigned width
) const {
1011 if (BitWidth
< width
)
1013 if (BitWidth
> width
)
1014 return trunc(width
);
1018 /// Arithmetic right-shift this APInt by shiftAmt.
1019 /// Arithmetic right-shift function.
1020 void APInt::ashrInPlace(const APInt
&shiftAmt
) {
1021 ashrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
1024 /// Arithmetic right-shift this APInt by shiftAmt.
1025 /// Arithmetic right-shift function.
1026 void APInt::ashrSlowCase(unsigned ShiftAmt
) {
1027 // Don't bother performing a no-op shift.
1031 // Save the original sign bit for later.
1032 bool Negative
= isNegative();
1034 // WordShift is the inter-part shift; BitShift is intra-part shift.
1035 unsigned WordShift
= ShiftAmt
/ APINT_BITS_PER_WORD
;
1036 unsigned BitShift
= ShiftAmt
% APINT_BITS_PER_WORD
;
1038 unsigned WordsToMove
= getNumWords() - WordShift
;
1039 if (WordsToMove
!= 0) {
1040 // Sign extend the last word to fill in the unused bits.
1041 U
.pVal
[getNumWords() - 1] = SignExtend64(
1042 U
.pVal
[getNumWords() - 1], ((BitWidth
- 1) % APINT_BITS_PER_WORD
) + 1);
1044 // Fastpath for moving by whole words.
1045 if (BitShift
== 0) {
1046 std::memmove(U
.pVal
, U
.pVal
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
1048 // Move the words containing significant bits.
1049 for (unsigned i
= 0; i
!= WordsToMove
- 1; ++i
)
1050 U
.pVal
[i
] = (U
.pVal
[i
+ WordShift
] >> BitShift
) |
1051 (U
.pVal
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
));
1053 // Handle the last word which has no high bits to copy.
1054 U
.pVal
[WordsToMove
- 1] = U
.pVal
[WordShift
+ WordsToMove
- 1] >> BitShift
;
1055 // Sign extend one more time.
1056 U
.pVal
[WordsToMove
- 1] =
1057 SignExtend64(U
.pVal
[WordsToMove
- 1], APINT_BITS_PER_WORD
- BitShift
);
1061 // Fill in the remainder based on the original sign.
1062 std::memset(U
.pVal
+ WordsToMove
, Negative
? -1 : 0,
1063 WordShift
* APINT_WORD_SIZE
);
1067 /// Logical right-shift this APInt by shiftAmt.
1068 /// Logical right-shift function.
1069 void APInt::lshrInPlace(const APInt
&shiftAmt
) {
1070 lshrInPlace((unsigned)shiftAmt
.getLimitedValue(BitWidth
));
1073 /// Logical right-shift this APInt by shiftAmt.
1074 /// Logical right-shift function.
1075 void APInt::lshrSlowCase(unsigned ShiftAmt
) {
1076 tcShiftRight(U
.pVal
, getNumWords(), ShiftAmt
);
1079 /// Left-shift this APInt by shiftAmt.
1080 /// Left-shift function.
1081 APInt
&APInt::operator<<=(const APInt
&shiftAmt
) {
1082 // It's undefined behavior in C to shift by BitWidth or greater.
1083 *this <<= (unsigned)shiftAmt
.getLimitedValue(BitWidth
);
1087 void APInt::shlSlowCase(unsigned ShiftAmt
) {
1088 tcShiftLeft(U
.pVal
, getNumWords(), ShiftAmt
);
1092 // Calculate the rotate amount modulo the bit width.
1093 static unsigned rotateModulo(unsigned BitWidth
, const APInt
&rotateAmt
) {
1094 if (LLVM_UNLIKELY(BitWidth
== 0))
1096 unsigned rotBitWidth
= rotateAmt
.getBitWidth();
1097 APInt rot
= rotateAmt
;
1098 if (rotBitWidth
< BitWidth
) {
1099 // Extend the rotate APInt, so that the urem doesn't divide by 0.
1100 // e.g. APInt(1, 32) would give APInt(1, 0).
1101 rot
= rotateAmt
.zext(BitWidth
);
1103 rot
= rot
.urem(APInt(rot
.getBitWidth(), BitWidth
));
1104 return rot
.getLimitedValue(BitWidth
);
1107 APInt
APInt::rotl(const APInt
&rotateAmt
) const {
1108 return rotl(rotateModulo(BitWidth
, rotateAmt
));
1111 APInt
APInt::rotl(unsigned rotateAmt
) const {
1112 if (LLVM_UNLIKELY(BitWidth
== 0))
1114 rotateAmt
%= BitWidth
;
1117 return shl(rotateAmt
) | lshr(BitWidth
- rotateAmt
);
1120 APInt
APInt::rotr(const APInt
&rotateAmt
) const {
1121 return rotr(rotateModulo(BitWidth
, rotateAmt
));
1124 APInt
APInt::rotr(unsigned rotateAmt
) const {
1127 rotateAmt
%= BitWidth
;
1130 return lshr(rotateAmt
) | shl(BitWidth
- rotateAmt
);
1133 /// \returns the nearest log base 2 of this APInt. Ties round up.
1135 /// NOTE: When we have a BitWidth of 1, we define:
1137 /// log2(0) = UINT32_MAX
1140 /// to get around any mathematical concerns resulting from
1141 /// referencing 2 in a space where 2 does no exist.
1142 unsigned APInt::nearestLogBase2() const {
1143 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1144 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1149 // Handle the zero case.
1153 // The non-zero case is handled by computing:
1155 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1157 // where x[i] is referring to the value of the ith bit of x.
1158 unsigned lg
= logBase2();
1159 return lg
+ unsigned((*this)[lg
- 1]);
1162 // Square Root - this method computes and returns the square root of "this".
1163 // Three mechanisms are used for computation. For small values (<= 5 bits),
1164 // a table lookup is done. This gets some performance for common cases. For
1165 // values using less than 52 bits, the value is converted to double and then
1166 // the libc sqrt function is called. The result is rounded and then converted
1167 // back to a uint64_t which is then used to construct the result. Finally,
1168 // the Babylonian method for computing square roots is used.
1169 APInt
APInt::sqrt() const {
1171 // Determine the magnitude of the value.
1172 unsigned magnitude
= getActiveBits();
1174 // Use a fast table for some small values. This also gets rid of some
1175 // rounding errors in libc sqrt for small values.
1176 if (magnitude
<= 5) {
1177 static const uint8_t results
[32] = {
1180 /* 3- 6 */ 2, 2, 2, 2,
1181 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1182 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1183 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1186 return APInt(BitWidth
, results
[ (isSingleWord() ? U
.VAL
: U
.pVal
[0]) ]);
1189 // If the magnitude of the value fits in less than 52 bits (the precision of
1190 // an IEEE double precision floating point value), then we can use the
1191 // libc sqrt function which will probably use a hardware sqrt computation.
1192 // This should be faster than the algorithm below.
1193 if (magnitude
< 52) {
1194 return APInt(BitWidth
,
1195 uint64_t(::round(::sqrt(double(isSingleWord() ? U
.VAL
1199 // Okay, all the short cuts are exhausted. We must compute it. The following
1200 // is a classical Babylonian method for computing the square root. This code
1201 // was adapted to APInt from a wikipedia article on such computations.
1202 // See http://www.wikipedia.org/ and go to the page named
1203 // Calculate_an_integer_square_root.
1204 unsigned nbits
= BitWidth
, i
= 4;
1205 APInt
testy(BitWidth
, 16);
1206 APInt
x_old(BitWidth
, 1);
1207 APInt
x_new(BitWidth
, 0);
1208 APInt
two(BitWidth
, 2);
1210 // Select a good starting value using binary logarithms.
1211 for (;; i
+= 2, testy
= testy
.shl(2))
1212 if (i
>= nbits
|| this->ule(testy
)) {
1213 x_old
= x_old
.shl(i
/ 2);
1217 // Use the Babylonian method to arrive at the integer square root:
1219 x_new
= (this->udiv(x_old
) + x_old
).udiv(two
);
1220 if (x_old
.ule(x_new
))
1225 // Make sure we return the closest approximation
1226 // NOTE: The rounding calculation below is correct. It will produce an
1227 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1228 // determined to be a rounding issue with pari/gp as it begins to use a
1229 // floating point representation after 192 bits. There are no discrepancies
1230 // between this algorithm and pari/gp for bit widths < 192 bits.
1231 APInt
square(x_old
* x_old
);
1232 APInt
nextSquare((x_old
+ 1) * (x_old
+1));
1233 if (this->ult(square
))
1235 assert(this->ule(nextSquare
) && "Error in APInt::sqrt computation");
1236 APInt
midpoint((nextSquare
- square
).udiv(two
));
1237 APInt
offset(*this - square
);
1238 if (offset
.ult(midpoint
))
1243 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1244 /// iterative extended Euclidean algorithm is used to solve for this value,
1245 /// however we simplify it to speed up calculating only the inverse, and take
1246 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1247 /// (potentially large) APInts around.
1248 /// WARNING: a value of '0' may be returned,
1249 /// signifying that no multiplicative inverse exists!
1250 APInt
APInt::multiplicativeInverse(const APInt
& modulo
) const {
1251 assert(ult(modulo
) && "This APInt must be smaller than the modulo");
1253 // Using the properties listed at the following web page (accessed 06/21/08):
1254 // http://www.numbertheory.org/php/euclid.html
1255 // (especially the properties numbered 3, 4 and 9) it can be proved that
1256 // BitWidth bits suffice for all the computations in the algorithm implemented
1257 // below. More precisely, this number of bits suffice if the multiplicative
1258 // inverse exists, but may not suffice for the general extended Euclidean
1261 APInt r
[2] = { modulo
, *this };
1262 APInt t
[2] = { APInt(BitWidth
, 0), APInt(BitWidth
, 1) };
1263 APInt
q(BitWidth
, 0);
1266 for (i
= 0; r
[i
^1] != 0; i
^= 1) {
1267 // An overview of the math without the confusing bit-flipping:
1268 // q = r[i-2] / r[i-1]
1269 // r[i] = r[i-2] % r[i-1]
1270 // t[i] = t[i-2] - t[i-1] * q
1271 udivrem(r
[i
], r
[i
^1], q
, r
[i
]);
1275 // If this APInt and the modulo are not coprime, there is no multiplicative
1276 // inverse, so return 0. We check this by looking at the next-to-last
1277 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1280 return APInt(BitWidth
, 0);
1282 // The next-to-last t is the multiplicative inverse. However, we are
1283 // interested in a positive inverse. Calculate a positive one from a negative
1284 // one if necessary. A simple addition of the modulo suffices because
1285 // abs(t[i]) is known to be less than *this/2 (see the link above).
1286 if (t
[i
].isNegative())
1289 return std::move(t
[i
]);
1292 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1293 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1294 /// variables here have the same names as in the algorithm. Comments explain
1295 /// the algorithm and any deviation from it.
1296 static void KnuthDiv(uint32_t *u
, uint32_t *v
, uint32_t *q
, uint32_t* r
,
1297 unsigned m
, unsigned n
) {
1298 assert(u
&& "Must provide dividend");
1299 assert(v
&& "Must provide divisor");
1300 assert(q
&& "Must provide quotient");
1301 assert(u
!= v
&& u
!= q
&& v
!= q
&& "Must use different memory");
1302 assert(n
>1 && "n must be > 1");
1304 // b denotes the base of the number system. In our case b is 2^32.
1305 const uint64_t b
= uint64_t(1) << 32;
1307 // The DEBUG macros here tend to be spam in the debug output if you're not
1308 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1310 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1312 #define DEBUG_KNUTH(X) do {} while(false)
1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m
<< " n=" << n
<< '\n');
1316 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1317 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1318 DEBUG_KNUTH(dbgs() << " by");
1319 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1320 DEBUG_KNUTH(dbgs() << '\n');
1321 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1322 // u and v by d. Note that we have taken Knuth's advice here to use a power
1323 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1324 // 2 allows us to shift instead of multiply and it is easy to determine the
1325 // shift amount from the leading zeros. We are basically normalizing the u
1326 // and v so that its high bits are shifted to the top of v's range without
1327 // overflow. Note that this can require an extra word in u so that u must
1328 // be of length m+n+1.
1329 unsigned shift
= llvm::countl_zero(v
[n
- 1]);
1330 uint32_t v_carry
= 0;
1331 uint32_t u_carry
= 0;
1333 for (unsigned i
= 0; i
< m
+n
; ++i
) {
1334 uint32_t u_tmp
= u
[i
] >> (32 - shift
);
1335 u
[i
] = (u
[i
] << shift
) | u_carry
;
1338 for (unsigned i
= 0; i
< n
; ++i
) {
1339 uint32_t v_tmp
= v
[i
] >> (32 - shift
);
1340 v
[i
] = (v
[i
] << shift
) | v_carry
;
1346 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1347 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1348 DEBUG_KNUTH(dbgs() << " by");
1349 DEBUG_KNUTH(for (int i
= n
; i
> 0; i
--) dbgs() << " " << v
[i
- 1]);
1350 DEBUG_KNUTH(dbgs() << '\n');
1352 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1355 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j
<< '\n');
1356 // D3. [Calculate q'.].
1357 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1358 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1359 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1360 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1361 // on v[n-2] determines at high speed most of the cases in which the trial
1362 // value qp is one too large, and it eliminates all cases where qp is two
1364 uint64_t dividend
= Make_64(u
[j
+n
], u
[j
+n
-1]);
1365 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend
<< '\n');
1366 uint64_t qp
= dividend
/ v
[n
-1];
1367 uint64_t rp
= dividend
% v
[n
-1];
1368 if (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]) {
1371 if (rp
< b
&& (qp
== b
|| qp
*v
[n
-2] > b
*rp
+ u
[j
+n
-2]))
1374 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp
<< ", rp == " << rp
<< '\n');
1376 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1377 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1378 // consists of a simple multiplication by a one-place number, combined with
1380 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1381 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1382 // true value plus b**(n+1), namely as the b's complement of
1383 // the true value, and a "borrow" to the left should be remembered.
1385 for (unsigned i
= 0; i
< n
; ++i
) {
1386 uint64_t p
= uint64_t(qp
) * uint64_t(v
[i
]);
1387 int64_t subres
= int64_t(u
[j
+i
]) - borrow
- Lo_32(p
);
1388 u
[j
+i
] = Lo_32(subres
);
1389 borrow
= Hi_32(p
) - Hi_32(subres
);
1390 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u
[j
+ i
]
1391 << ", borrow = " << borrow
<< '\n');
1393 bool isNeg
= u
[j
+n
] < borrow
;
1394 u
[j
+n
] -= Lo_32(borrow
);
1396 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1397 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1398 DEBUG_KNUTH(dbgs() << '\n');
1400 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1401 // negative, go to step D6; otherwise go on to step D7.
1404 // D6. [Add back]. The probability that this step is necessary is very
1405 // small, on the order of only 2/b. Make sure that test data accounts for
1406 // this possibility. Decrease q[j] by 1
1408 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1409 // A carry will occur to the left of u[j+n], and it should be ignored
1410 // since it cancels with the borrow that occurred in D4.
1412 for (unsigned i
= 0; i
< n
; i
++) {
1413 uint32_t limit
= std::min(u
[j
+i
],v
[i
]);
1414 u
[j
+i
] += v
[i
] + carry
;
1415 carry
= u
[j
+i
] < limit
|| (carry
&& u
[j
+i
] == limit
);
1419 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1420 DEBUG_KNUTH(for (int i
= m
+ n
; i
>= 0; i
--) dbgs() << " " << u
[i
]);
1421 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q
[j
] << '\n');
1423 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1426 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1427 DEBUG_KNUTH(for (int i
= m
; i
>= 0; i
--) dbgs() << " " << q
[i
]);
1428 DEBUG_KNUTH(dbgs() << '\n');
1430 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1431 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1432 // compute the remainder (urem uses this).
1434 // The value d is expressed by the "shift" value above since we avoided
1435 // multiplication by d by using a shift left. So, all we have to do is
1436 // shift right here.
1439 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1440 for (int i
= n
-1; i
>= 0; i
--) {
1441 r
[i
] = (u
[i
] >> shift
) | carry
;
1442 carry
= u
[i
] << (32 - shift
);
1443 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1446 for (int i
= n
-1; i
>= 0; i
--) {
1448 DEBUG_KNUTH(dbgs() << " " << r
[i
]);
1451 DEBUG_KNUTH(dbgs() << '\n');
1453 DEBUG_KNUTH(dbgs() << '\n');
1456 void APInt::divide(const WordType
*LHS
, unsigned lhsWords
, const WordType
*RHS
,
1457 unsigned rhsWords
, WordType
*Quotient
, WordType
*Remainder
) {
1458 assert(lhsWords
>= rhsWords
&& "Fractional result");
1460 // First, compose the values into an array of 32-bit words instead of
1461 // 64-bit words. This is a necessity of both the "short division" algorithm
1462 // and the Knuth "classical algorithm" which requires there to be native
1463 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1464 // can't use 64-bit operands here because we don't have native results of
1465 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1466 // work on large-endian machines.
1467 unsigned n
= rhsWords
* 2;
1468 unsigned m
= (lhsWords
* 2) - n
;
1470 // Allocate space for the temporary values we need either on the stack, if
1471 // it will fit, or on the heap if it won't.
1472 uint32_t SPACE
[128];
1473 uint32_t *U
= nullptr;
1474 uint32_t *V
= nullptr;
1475 uint32_t *Q
= nullptr;
1476 uint32_t *R
= nullptr;
1477 if ((Remainder
?4:3)*n
+2*m
+1 <= 128) {
1480 Q
= &SPACE
[(m
+n
+1) + n
];
1482 R
= &SPACE
[(m
+n
+1) + n
+ (m
+n
)];
1484 U
= new uint32_t[m
+ n
+ 1];
1485 V
= new uint32_t[n
];
1486 Q
= new uint32_t[m
+n
];
1488 R
= new uint32_t[n
];
1491 // Initialize the dividend
1492 memset(U
, 0, (m
+n
+1)*sizeof(uint32_t));
1493 for (unsigned i
= 0; i
< lhsWords
; ++i
) {
1494 uint64_t tmp
= LHS
[i
];
1495 U
[i
* 2] = Lo_32(tmp
);
1496 U
[i
* 2 + 1] = Hi_32(tmp
);
1498 U
[m
+n
] = 0; // this extra word is for "spill" in the Knuth algorithm.
1500 // Initialize the divisor
1501 memset(V
, 0, (n
)*sizeof(uint32_t));
1502 for (unsigned i
= 0; i
< rhsWords
; ++i
) {
1503 uint64_t tmp
= RHS
[i
];
1504 V
[i
* 2] = Lo_32(tmp
);
1505 V
[i
* 2 + 1] = Hi_32(tmp
);
1508 // initialize the quotient and remainder
1509 memset(Q
, 0, (m
+n
) * sizeof(uint32_t));
1511 memset(R
, 0, n
* sizeof(uint32_t));
1513 // Now, adjust m and n for the Knuth division. n is the number of words in
1514 // the divisor. m is the number of words by which the dividend exceeds the
1515 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1516 // contain any zero words or the Knuth algorithm fails.
1517 for (unsigned i
= n
; i
> 0 && V
[i
-1] == 0; i
--) {
1521 for (unsigned i
= m
+n
; i
> 0 && U
[i
-1] == 0; i
--)
1524 // If we're left with only a single word for the divisor, Knuth doesn't work
1525 // so we implement the short division algorithm here. This is much simpler
1526 // and faster because we are certain that we can divide a 64-bit quantity
1527 // by a 32-bit quantity at hardware speed and short division is simply a
1528 // series of such operations. This is just like doing short division but we
1529 // are using base 2^32 instead of base 10.
1530 assert(n
!= 0 && "Divide by zero?");
1532 uint32_t divisor
= V
[0];
1533 uint32_t remainder
= 0;
1534 for (int i
= m
; i
>= 0; i
--) {
1535 uint64_t partial_dividend
= Make_64(remainder
, U
[i
]);
1536 if (partial_dividend
== 0) {
1539 } else if (partial_dividend
< divisor
) {
1541 remainder
= Lo_32(partial_dividend
);
1542 } else if (partial_dividend
== divisor
) {
1546 Q
[i
] = Lo_32(partial_dividend
/ divisor
);
1547 remainder
= Lo_32(partial_dividend
- (Q
[i
] * divisor
));
1553 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1555 KnuthDiv(U
, V
, Q
, R
, m
, n
);
1558 // If the caller wants the quotient
1560 for (unsigned i
= 0; i
< lhsWords
; ++i
)
1561 Quotient
[i
] = Make_64(Q
[i
*2+1], Q
[i
*2]);
1564 // If the caller wants the remainder
1566 for (unsigned i
= 0; i
< rhsWords
; ++i
)
1567 Remainder
[i
] = Make_64(R
[i
*2+1], R
[i
*2]);
1570 // Clean up the memory we allocated.
1571 if (U
!= &SPACE
[0]) {
1579 APInt
APInt::udiv(const APInt
&RHS
) const {
1580 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1582 // First, deal with the easy case
1583 if (isSingleWord()) {
1584 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1585 return APInt(BitWidth
, U
.VAL
/ RHS
.U
.VAL
);
1588 // Get some facts about the LHS and RHS number of bits and words
1589 unsigned lhsWords
= getNumWords(getActiveBits());
1590 unsigned rhsBits
= RHS
.getActiveBits();
1591 unsigned rhsWords
= getNumWords(rhsBits
);
1592 assert(rhsWords
&& "Divided by zero???");
1594 // Deal with some degenerate cases
1597 return APInt(BitWidth
, 0);
1601 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1602 // X / Y ===> 0, iff X < Y
1603 return APInt(BitWidth
, 0);
1606 return APInt(BitWidth
, 1);
1607 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1608 // All high words are zero, just use native divide
1609 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
.U
.pVal
[0]);
1611 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1612 APInt
Quotient(BitWidth
, 0); // to hold result.
1613 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
, nullptr);
1617 APInt
APInt::udiv(uint64_t RHS
) const {
1618 assert(RHS
!= 0 && "Divide by zero?");
1620 // First, deal with the easy case
1622 return APInt(BitWidth
, U
.VAL
/ RHS
);
1624 // Get some facts about the LHS words.
1625 unsigned lhsWords
= getNumWords(getActiveBits());
1627 // Deal with some degenerate cases
1630 return APInt(BitWidth
, 0);
1635 // X / Y ===> 0, iff X < Y
1636 return APInt(BitWidth
, 0);
1639 return APInt(BitWidth
, 1);
1640 if (lhsWords
== 1) // rhsWords is 1 if lhsWords is 1.
1641 // All high words are zero, just use native divide
1642 return APInt(BitWidth
, this->U
.pVal
[0] / RHS
);
1644 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1645 APInt
Quotient(BitWidth
, 0); // to hold result.
1646 divide(U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, nullptr);
1650 APInt
APInt::sdiv(const APInt
&RHS
) const {
1652 if (RHS
.isNegative())
1653 return (-(*this)).udiv(-RHS
);
1654 return -((-(*this)).udiv(RHS
));
1656 if (RHS
.isNegative())
1657 return -(this->udiv(-RHS
));
1658 return this->udiv(RHS
);
1661 APInt
APInt::sdiv(int64_t RHS
) const {
1664 return (-(*this)).udiv(-RHS
);
1665 return -((-(*this)).udiv(RHS
));
1668 return -(this->udiv(-RHS
));
1669 return this->udiv(RHS
);
1672 APInt
APInt::urem(const APInt
&RHS
) const {
1673 assert(BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1674 if (isSingleWord()) {
1675 assert(RHS
.U
.VAL
!= 0 && "Remainder by zero?");
1676 return APInt(BitWidth
, U
.VAL
% RHS
.U
.VAL
);
1679 // Get some facts about the LHS
1680 unsigned lhsWords
= getNumWords(getActiveBits());
1682 // Get some facts about the RHS
1683 unsigned rhsBits
= RHS
.getActiveBits();
1684 unsigned rhsWords
= getNumWords(rhsBits
);
1685 assert(rhsWords
&& "Performing remainder operation by zero ???");
1687 // Check the degenerate cases
1690 return APInt(BitWidth
, 0);
1693 return APInt(BitWidth
, 0);
1694 if (lhsWords
< rhsWords
|| this->ult(RHS
))
1695 // X % Y ===> X, iff X < Y
1699 return APInt(BitWidth
, 0);
1701 // All high words are zero, just use native remainder
1702 return APInt(BitWidth
, U
.pVal
[0] % RHS
.U
.pVal
[0]);
1704 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1705 APInt
Remainder(BitWidth
, 0);
1706 divide(U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, nullptr, Remainder
.U
.pVal
);
1710 uint64_t APInt::urem(uint64_t RHS
) const {
1711 assert(RHS
!= 0 && "Remainder by zero?");
1716 // Get some facts about the LHS
1717 unsigned lhsWords
= getNumWords(getActiveBits());
1719 // Check the degenerate cases
1727 // X % Y ===> X, iff X < Y
1728 return getZExtValue();
1733 // All high words are zero, just use native remainder
1734 return U
.pVal
[0] % RHS
;
1736 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1738 divide(U
.pVal
, lhsWords
, &RHS
, 1, nullptr, &Remainder
);
1742 APInt
APInt::srem(const APInt
&RHS
) const {
1744 if (RHS
.isNegative())
1745 return -((-(*this)).urem(-RHS
));
1746 return -((-(*this)).urem(RHS
));
1748 if (RHS
.isNegative())
1749 return this->urem(-RHS
);
1750 return this->urem(RHS
);
1753 int64_t APInt::srem(int64_t RHS
) const {
1756 return -((-(*this)).urem(-RHS
));
1757 return -((-(*this)).urem(RHS
));
1760 return this->urem(-RHS
);
1761 return this->urem(RHS
);
1764 void APInt::udivrem(const APInt
&LHS
, const APInt
&RHS
,
1765 APInt
&Quotient
, APInt
&Remainder
) {
1766 assert(LHS
.BitWidth
== RHS
.BitWidth
&& "Bit widths must be the same");
1767 unsigned BitWidth
= LHS
.BitWidth
;
1769 // First, deal with the easy case
1770 if (LHS
.isSingleWord()) {
1771 assert(RHS
.U
.VAL
!= 0 && "Divide by zero?");
1772 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
.U
.VAL
;
1773 uint64_t RemVal
= LHS
.U
.VAL
% RHS
.U
.VAL
;
1774 Quotient
= APInt(BitWidth
, QuotVal
);
1775 Remainder
= APInt(BitWidth
, RemVal
);
1779 // Get some size facts about the dividend and divisor
1780 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1781 unsigned rhsBits
= RHS
.getActiveBits();
1782 unsigned rhsWords
= getNumWords(rhsBits
);
1783 assert(rhsWords
&& "Performing divrem operation by zero ???");
1785 // Check the degenerate cases
1786 if (lhsWords
== 0) {
1787 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1788 Remainder
= APInt(BitWidth
, 0); // 0 % Y ===> 0
1793 Quotient
= LHS
; // X / 1 ===> X
1794 Remainder
= APInt(BitWidth
, 0); // X % 1 ===> 0
1797 if (lhsWords
< rhsWords
|| LHS
.ult(RHS
)) {
1798 Remainder
= LHS
; // X % Y ===> X, iff X < Y
1799 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1804 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1805 Remainder
= APInt(BitWidth
, 0); // X % X ===> 0;
1809 // Make sure there is enough space to hold the results.
1810 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1811 // change the size. This is necessary if Quotient or Remainder is aliased
1813 Quotient
.reallocate(BitWidth
);
1814 Remainder
.reallocate(BitWidth
);
1816 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1817 // There is only one word to consider so use the native versions.
1818 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1819 uint64_t rhsValue
= RHS
.U
.pVal
[0];
1820 Quotient
= lhsValue
/ rhsValue
;
1821 Remainder
= lhsValue
% rhsValue
;
1825 // Okay, lets do it the long way
1826 divide(LHS
.U
.pVal
, lhsWords
, RHS
.U
.pVal
, rhsWords
, Quotient
.U
.pVal
,
1828 // Clear the rest of the Quotient and Remainder.
1829 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1830 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1831 std::memset(Remainder
.U
.pVal
+ rhsWords
, 0,
1832 (getNumWords(BitWidth
) - rhsWords
) * APINT_WORD_SIZE
);
1835 void APInt::udivrem(const APInt
&LHS
, uint64_t RHS
, APInt
&Quotient
,
1836 uint64_t &Remainder
) {
1837 assert(RHS
!= 0 && "Divide by zero?");
1838 unsigned BitWidth
= LHS
.BitWidth
;
1840 // First, deal with the easy case
1841 if (LHS
.isSingleWord()) {
1842 uint64_t QuotVal
= LHS
.U
.VAL
/ RHS
;
1843 Remainder
= LHS
.U
.VAL
% RHS
;
1844 Quotient
= APInt(BitWidth
, QuotVal
);
1848 // Get some size facts about the dividend and divisor
1849 unsigned lhsWords
= getNumWords(LHS
.getActiveBits());
1851 // Check the degenerate cases
1852 if (lhsWords
== 0) {
1853 Quotient
= APInt(BitWidth
, 0); // 0 / Y ===> 0
1854 Remainder
= 0; // 0 % Y ===> 0
1859 Quotient
= LHS
; // X / 1 ===> X
1860 Remainder
= 0; // X % 1 ===> 0
1865 Remainder
= LHS
.getZExtValue(); // X % Y ===> X, iff X < Y
1866 Quotient
= APInt(BitWidth
, 0); // X / Y ===> 0, iff X < Y
1871 Quotient
= APInt(BitWidth
, 1); // X / X ===> 1
1872 Remainder
= 0; // X % X ===> 0;
1876 // Make sure there is enough space to hold the results.
1877 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1878 // change the size. This is necessary if Quotient is aliased with LHS.
1879 Quotient
.reallocate(BitWidth
);
1881 if (lhsWords
== 1) { // rhsWords is 1 if lhsWords is 1.
1882 // There is only one word to consider so use the native versions.
1883 uint64_t lhsValue
= LHS
.U
.pVal
[0];
1884 Quotient
= lhsValue
/ RHS
;
1885 Remainder
= lhsValue
% RHS
;
1889 // Okay, lets do it the long way
1890 divide(LHS
.U
.pVal
, lhsWords
, &RHS
, 1, Quotient
.U
.pVal
, &Remainder
);
1891 // Clear the rest of the Quotient.
1892 std::memset(Quotient
.U
.pVal
+ lhsWords
, 0,
1893 (getNumWords(BitWidth
) - lhsWords
) * APINT_WORD_SIZE
);
1896 void APInt::sdivrem(const APInt
&LHS
, const APInt
&RHS
,
1897 APInt
&Quotient
, APInt
&Remainder
) {
1898 if (LHS
.isNegative()) {
1899 if (RHS
.isNegative())
1900 APInt::udivrem(-LHS
, -RHS
, Quotient
, Remainder
);
1902 APInt::udivrem(-LHS
, RHS
, Quotient
, Remainder
);
1906 } else if (RHS
.isNegative()) {
1907 APInt::udivrem(LHS
, -RHS
, Quotient
, Remainder
);
1910 APInt::udivrem(LHS
, RHS
, Quotient
, Remainder
);
1914 void APInt::sdivrem(const APInt
&LHS
, int64_t RHS
,
1915 APInt
&Quotient
, int64_t &Remainder
) {
1916 uint64_t R
= Remainder
;
1917 if (LHS
.isNegative()) {
1919 APInt::udivrem(-LHS
, -RHS
, Quotient
, R
);
1921 APInt::udivrem(-LHS
, RHS
, Quotient
, R
);
1925 } else if (RHS
< 0) {
1926 APInt::udivrem(LHS
, -RHS
, Quotient
, R
);
1929 APInt::udivrem(LHS
, RHS
, Quotient
, R
);
1934 APInt
APInt::sadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1935 APInt Res
= *this+RHS
;
1936 Overflow
= isNonNegative() == RHS
.isNonNegative() &&
1937 Res
.isNonNegative() != isNonNegative();
1941 APInt
APInt::uadd_ov(const APInt
&RHS
, bool &Overflow
) const {
1942 APInt Res
= *this+RHS
;
1943 Overflow
= Res
.ult(RHS
);
1947 APInt
APInt::ssub_ov(const APInt
&RHS
, bool &Overflow
) const {
1948 APInt Res
= *this - RHS
;
1949 Overflow
= isNonNegative() != RHS
.isNonNegative() &&
1950 Res
.isNonNegative() != isNonNegative();
1954 APInt
APInt::usub_ov(const APInt
&RHS
, bool &Overflow
) const {
1955 APInt Res
= *this-RHS
;
1956 Overflow
= Res
.ugt(*this);
1960 APInt
APInt::sdiv_ov(const APInt
&RHS
, bool &Overflow
) const {
1961 // MININT/-1 --> overflow.
1962 Overflow
= isMinSignedValue() && RHS
.isAllOnes();
1966 APInt
APInt::smul_ov(const APInt
&RHS
, bool &Overflow
) const {
1967 APInt Res
= *this * RHS
;
1970 Overflow
= Res
.sdiv(RHS
) != *this ||
1971 (isMinSignedValue() && RHS
.isAllOnes());
1977 APInt
APInt::umul_ov(const APInt
&RHS
, bool &Overflow
) const {
1978 if (countl_zero() + RHS
.countl_zero() + 2 <= BitWidth
) {
1983 APInt Res
= lshr(1) * RHS
;
1984 Overflow
= Res
.isNegative();
1994 APInt
APInt::sshl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
1995 return sshl_ov(ShAmt
.getLimitedValue(getBitWidth()), Overflow
);
1998 APInt
APInt::sshl_ov(unsigned ShAmt
, bool &Overflow
) const {
1999 Overflow
= ShAmt
>= getBitWidth();
2001 return APInt(BitWidth
, 0);
2003 if (isNonNegative()) // Don't allow sign change.
2004 Overflow
= ShAmt
>= countl_zero();
2006 Overflow
= ShAmt
>= countl_one();
2008 return *this << ShAmt
;
2011 APInt
APInt::ushl_ov(const APInt
&ShAmt
, bool &Overflow
) const {
2012 return ushl_ov(ShAmt
.getLimitedValue(getBitWidth()), Overflow
);
2015 APInt
APInt::ushl_ov(unsigned ShAmt
, bool &Overflow
) const {
2016 Overflow
= ShAmt
>= getBitWidth();
2018 return APInt(BitWidth
, 0);
2020 Overflow
= ShAmt
> countl_zero();
2022 return *this << ShAmt
;
2025 APInt
APInt::sadd_sat(const APInt
&RHS
) const {
2027 APInt Res
= sadd_ov(RHS
, Overflow
);
2031 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2032 : APInt::getSignedMaxValue(BitWidth
);
2035 APInt
APInt::uadd_sat(const APInt
&RHS
) const {
2037 APInt Res
= uadd_ov(RHS
, Overflow
);
2041 return APInt::getMaxValue(BitWidth
);
2044 APInt
APInt::ssub_sat(const APInt
&RHS
) const {
2046 APInt Res
= ssub_ov(RHS
, Overflow
);
2050 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2051 : APInt::getSignedMaxValue(BitWidth
);
2054 APInt
APInt::usub_sat(const APInt
&RHS
) const {
2056 APInt Res
= usub_ov(RHS
, Overflow
);
2060 return APInt(BitWidth
, 0);
2063 APInt
APInt::smul_sat(const APInt
&RHS
) const {
2065 APInt Res
= smul_ov(RHS
, Overflow
);
2069 // The result is negative if one and only one of inputs is negative.
2070 bool ResIsNegative
= isNegative() ^ RHS
.isNegative();
2072 return ResIsNegative
? APInt::getSignedMinValue(BitWidth
)
2073 : APInt::getSignedMaxValue(BitWidth
);
2076 APInt
APInt::umul_sat(const APInt
&RHS
) const {
2078 APInt Res
= umul_ov(RHS
, Overflow
);
2082 return APInt::getMaxValue(BitWidth
);
2085 APInt
APInt::sshl_sat(const APInt
&RHS
) const {
2086 return sshl_sat(RHS
.getLimitedValue(getBitWidth()));
2089 APInt
APInt::sshl_sat(unsigned RHS
) const {
2091 APInt Res
= sshl_ov(RHS
, Overflow
);
2095 return isNegative() ? APInt::getSignedMinValue(BitWidth
)
2096 : APInt::getSignedMaxValue(BitWidth
);
2099 APInt
APInt::ushl_sat(const APInt
&RHS
) const {
2100 return ushl_sat(RHS
.getLimitedValue(getBitWidth()));
2103 APInt
APInt::ushl_sat(unsigned RHS
) const {
2105 APInt Res
= ushl_ov(RHS
, Overflow
);
2109 return APInt::getMaxValue(BitWidth
);
2112 void APInt::fromString(unsigned numbits
, StringRef str
, uint8_t radix
) {
2113 // Check our assumptions here
2114 assert(!str
.empty() && "Invalid string length");
2115 assert((radix
== 10 || radix
== 8 || radix
== 16 || radix
== 2 ||
2117 "Radix should be 2, 8, 10, 16, or 36!");
2119 StringRef::iterator p
= str
.begin();
2120 size_t slen
= str
.size();
2121 bool isNeg
= *p
== '-';
2122 if (*p
== '-' || *p
== '+') {
2125 assert(slen
&& "String is only a sign, needs a value.");
2127 assert((slen
<= numbits
|| radix
!= 2) && "Insufficient bit width");
2128 assert(((slen
-1)*3 <= numbits
|| radix
!= 8) && "Insufficient bit width");
2129 assert(((slen
-1)*4 <= numbits
|| radix
!= 16) && "Insufficient bit width");
2130 assert((((slen
-1)*64)/22 <= numbits
|| radix
!= 10) &&
2131 "Insufficient bit width");
2133 // Allocate memory if needed
2137 U
.pVal
= getClearedMemory(getNumWords());
2139 // Figure out if we can shift instead of multiply
2140 unsigned shift
= (radix
== 16 ? 4 : radix
== 8 ? 3 : radix
== 2 ? 1 : 0);
2142 // Enter digit traversal loop
2143 for (StringRef::iterator e
= str
.end(); p
!= e
; ++p
) {
2144 unsigned digit
= getDigit(*p
, radix
);
2145 assert(digit
< radix
&& "Invalid character in digit string");
2147 // Shift or multiply the value by the radix
2155 // Add in the digit we just interpreted
2158 // If its negative, put it in two's complement form
2163 void APInt::toString(SmallVectorImpl
<char> &Str
, unsigned Radix
, bool Signed
,
2164 bool formatAsCLiteral
, bool UpperCase
) const {
2165 assert((Radix
== 10 || Radix
== 8 || Radix
== 16 || Radix
== 2 ||
2167 "Radix should be 2, 8, 10, 16, or 36!");
2169 const char *Prefix
= "";
2170 if (formatAsCLiteral
) {
2173 // Binary literals are a non-standard extension added in gcc 4.3:
2174 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2186 llvm_unreachable("Invalid radix!");
2190 // First, check for a zero value and just short circuit the logic below.
2193 Str
.push_back(*Prefix
);
2200 static const char BothDigits
[] = "0123456789abcdefghijklmnopqrstuvwxyz"
2201 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2202 const char *Digits
= BothDigits
+ (UpperCase
? 36 : 0);
2204 if (isSingleWord()) {
2206 char *BufPtr
= std::end(Buffer
);
2212 int64_t I
= getSExtValue();
2222 Str
.push_back(*Prefix
);
2227 *--BufPtr
= Digits
[N
% Radix
];
2230 Str
.append(BufPtr
, std::end(Buffer
));
2236 if (Signed
&& isNegative()) {
2237 // They want to print the signed version and it is a negative value
2238 // Flip the bits and add one to turn it into the equivalent positive
2239 // value and put a '-' in the result.
2245 Str
.push_back(*Prefix
);
2249 // We insert the digits backward, then reverse them to get the right order.
2250 unsigned StartDig
= Str
.size();
2252 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2253 // because the number of bits per digit (1, 3 and 4 respectively) divides
2254 // equally. We just shift until the value is zero.
2255 if (Radix
== 2 || Radix
== 8 || Radix
== 16) {
2256 // Just shift tmp right for each digit width until it becomes zero
2257 unsigned ShiftAmt
= (Radix
== 16 ? 4 : (Radix
== 8 ? 3 : 1));
2258 unsigned MaskAmt
= Radix
- 1;
2260 while (Tmp
.getBoolValue()) {
2261 unsigned Digit
= unsigned(Tmp
.getRawData()[0]) & MaskAmt
;
2262 Str
.push_back(Digits
[Digit
]);
2263 Tmp
.lshrInPlace(ShiftAmt
);
2266 while (Tmp
.getBoolValue()) {
2268 udivrem(Tmp
, Radix
, Tmp
, Digit
);
2269 assert(Digit
< Radix
&& "divide failed");
2270 Str
.push_back(Digits
[Digit
]);
2274 // Reverse the digits before returning.
2275 std::reverse(Str
.begin()+StartDig
, Str
.end());
2278 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2279 LLVM_DUMP_METHOD
void APInt::dump() const {
2280 SmallString
<40> S
, U
;
2281 this->toStringUnsigned(U
);
2282 this->toStringSigned(S
);
2283 dbgs() << "APInt(" << BitWidth
<< "b, "
2284 << U
<< "u " << S
<< "s)\n";
2288 void APInt::print(raw_ostream
&OS
, bool isSigned
) const {
2290 this->toString(S
, 10, isSigned
, /* formatAsCLiteral = */false);
2294 // This implements a variety of operations on a representation of
2295 // arbitrary precision, two's-complement, bignum integer values.
2297 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2298 // and unrestricting assumption.
2299 static_assert(APInt::APINT_BITS_PER_WORD
% 2 == 0,
2300 "Part width must be divisible by 2!");
2302 // Returns the integer part with the least significant BITS set.
2303 // BITS cannot be zero.
2304 static inline APInt::WordType
lowBitMask(unsigned bits
) {
2305 assert(bits
!= 0 && bits
<= APInt::APINT_BITS_PER_WORD
);
2306 return ~(APInt::WordType
) 0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
2309 /// Returns the value of the lower half of PART.
2310 static inline APInt::WordType
lowHalf(APInt::WordType part
) {
2311 return part
& lowBitMask(APInt::APINT_BITS_PER_WORD
/ 2);
2314 /// Returns the value of the upper half of PART.
2315 static inline APInt::WordType
highHalf(APInt::WordType part
) {
2316 return part
>> (APInt::APINT_BITS_PER_WORD
/ 2);
2319 /// Sets the least significant part of a bignum to the input value, and zeroes
2320 /// out higher parts.
2321 void APInt::tcSet(WordType
*dst
, WordType part
, unsigned parts
) {
2324 for (unsigned i
= 1; i
< parts
; i
++)
2328 /// Assign one bignum to another.
2329 void APInt::tcAssign(WordType
*dst
, const WordType
*src
, unsigned parts
) {
2330 for (unsigned i
= 0; i
< parts
; i
++)
2334 /// Returns true if a bignum is zero, false otherwise.
2335 bool APInt::tcIsZero(const WordType
*src
, unsigned parts
) {
2336 for (unsigned i
= 0; i
< parts
; i
++)
2343 /// Extract the given bit of a bignum; returns 0 or 1.
2344 int APInt::tcExtractBit(const WordType
*parts
, unsigned bit
) {
2345 return (parts
[whichWord(bit
)] & maskBit(bit
)) != 0;
2348 /// Set the given bit of a bignum.
2349 void APInt::tcSetBit(WordType
*parts
, unsigned bit
) {
2350 parts
[whichWord(bit
)] |= maskBit(bit
);
2353 /// Clears the given bit of a bignum.
2354 void APInt::tcClearBit(WordType
*parts
, unsigned bit
) {
2355 parts
[whichWord(bit
)] &= ~maskBit(bit
);
2358 /// Returns the bit number of the least significant set bit of a number. If the
2359 /// input number has no bits set UINT_MAX is returned.
2360 unsigned APInt::tcLSB(const WordType
*parts
, unsigned n
) {
2361 for (unsigned i
= 0; i
< n
; i
++) {
2362 if (parts
[i
] != 0) {
2363 unsigned lsb
= llvm::countr_zero(parts
[i
]);
2364 return lsb
+ i
* APINT_BITS_PER_WORD
;
2371 /// Returns the bit number of the most significant set bit of a number.
2372 /// If the input number has no bits set UINT_MAX is returned.
2373 unsigned APInt::tcMSB(const WordType
*parts
, unsigned n
) {
2377 if (parts
[n
] != 0) {
2378 static_assert(sizeof(parts
[n
]) <= sizeof(uint64_t));
2379 unsigned msb
= llvm::Log2_64(parts
[n
]);
2381 return msb
+ n
* APINT_BITS_PER_WORD
;
2388 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
2389 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
2390 /// significant bit of DST. All high bits above srcBITS in DST are zero-filled.
2393 APInt::tcExtract(WordType
*dst
, unsigned dstCount
, const WordType
*src
,
2394 unsigned srcBits
, unsigned srcLSB
) {
2395 unsigned dstParts
= (srcBits
+ APINT_BITS_PER_WORD
- 1) / APINT_BITS_PER_WORD
;
2396 assert(dstParts
<= dstCount
);
2398 unsigned firstSrcPart
= srcLSB
/ APINT_BITS_PER_WORD
;
2399 tcAssign(dst
, src
+ firstSrcPart
, dstParts
);
2401 unsigned shift
= srcLSB
% APINT_BITS_PER_WORD
;
2402 tcShiftRight(dst
, dstParts
, shift
);
2404 // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2405 // in DST. If this is less that srcBits, append the rest, else
2406 // clear the high bits.
2407 unsigned n
= dstParts
* APINT_BITS_PER_WORD
- shift
;
2409 WordType mask
= lowBitMask (srcBits
- n
);
2410 dst
[dstParts
- 1] |= ((src
[firstSrcPart
+ dstParts
] & mask
)
2411 << n
% APINT_BITS_PER_WORD
);
2412 } else if (n
> srcBits
) {
2413 if (srcBits
% APINT_BITS_PER_WORD
)
2414 dst
[dstParts
- 1] &= lowBitMask (srcBits
% APINT_BITS_PER_WORD
);
2417 // Clear high parts.
2418 while (dstParts
< dstCount
)
2419 dst
[dstParts
++] = 0;
2422 //// DST += RHS + C where C is zero or one. Returns the carry flag.
2423 APInt::WordType
APInt::tcAdd(WordType
*dst
, const WordType
*rhs
,
2424 WordType c
, unsigned parts
) {
2427 for (unsigned i
= 0; i
< parts
; i
++) {
2428 WordType l
= dst
[i
];
2430 dst
[i
] += rhs
[i
] + 1;
2441 /// This function adds a single "word" integer, src, to the multiple
2442 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2443 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2444 /// @returns the carry of the addition.
2445 APInt::WordType
APInt::tcAddPart(WordType
*dst
, WordType src
,
2447 for (unsigned i
= 0; i
< parts
; ++i
) {
2450 return 0; // No need to carry so exit early.
2451 src
= 1; // Carry one to next digit.
2457 /// DST -= RHS + C where C is zero or one. Returns the carry flag.
2458 APInt::WordType
APInt::tcSubtract(WordType
*dst
, const WordType
*rhs
,
2459 WordType c
, unsigned parts
) {
2462 for (unsigned i
= 0; i
< parts
; i
++) {
2463 WordType l
= dst
[i
];
2465 dst
[i
] -= rhs
[i
] + 1;
2476 /// This function subtracts a single "word" (64-bit word), src, from
2477 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2478 /// no further borrowing is needed or it runs out of "words" in dst. The result
2479 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2480 /// exhausted. In other words, if src > dst then this function returns 1,
2482 /// @returns the borrow out of the subtraction
2483 APInt::WordType
APInt::tcSubtractPart(WordType
*dst
, WordType src
,
2485 for (unsigned i
= 0; i
< parts
; ++i
) {
2486 WordType Dst
= dst
[i
];
2489 return 0; // No need to borrow so exit early.
2490 src
= 1; // We have to "borrow 1" from next "word"
2496 /// Negate a bignum in-place.
2497 void APInt::tcNegate(WordType
*dst
, unsigned parts
) {
2498 tcComplement(dst
, parts
);
2499 tcIncrement(dst
, parts
);
2502 /// DST += SRC * MULTIPLIER + CARRY if add is true
2503 /// DST = SRC * MULTIPLIER + CARRY if add is false
2504 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2505 /// they must start at the same point, i.e. DST == SRC.
2506 /// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2507 /// returned. Otherwise DST is filled with the least significant
2508 /// DSTPARTS parts of the result, and if all of the omitted higher
2509 /// parts were zero return zero, otherwise overflow occurred and
2511 int APInt::tcMultiplyPart(WordType
*dst
, const WordType
*src
,
2512 WordType multiplier
, WordType carry
,
2513 unsigned srcParts
, unsigned dstParts
,
2515 // Otherwise our writes of DST kill our later reads of SRC.
2516 assert(dst
<= src
|| dst
>= src
+ srcParts
);
2517 assert(dstParts
<= srcParts
+ 1);
2519 // N loops; minimum of dstParts and srcParts.
2520 unsigned n
= std::min(dstParts
, srcParts
);
2522 for (unsigned i
= 0; i
< n
; i
++) {
2523 // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2524 // This cannot overflow, because:
2525 // (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2526 // which is less than n^2.
2527 WordType srcPart
= src
[i
];
2528 WordType low
, mid
, high
;
2529 if (multiplier
== 0 || srcPart
== 0) {
2533 low
= lowHalf(srcPart
) * lowHalf(multiplier
);
2534 high
= highHalf(srcPart
) * highHalf(multiplier
);
2536 mid
= lowHalf(srcPart
) * highHalf(multiplier
);
2537 high
+= highHalf(mid
);
2538 mid
<<= APINT_BITS_PER_WORD
/ 2;
2539 if (low
+ mid
< low
)
2543 mid
= highHalf(srcPart
) * lowHalf(multiplier
);
2544 high
+= highHalf(mid
);
2545 mid
<<= APINT_BITS_PER_WORD
/ 2;
2546 if (low
+ mid
< low
)
2551 if (low
+ carry
< low
)
2557 // And now DST[i], and store the new low part there.
2558 if (low
+ dst
[i
] < low
)
2567 if (srcParts
< dstParts
) {
2568 // Full multiplication, there is no overflow.
2569 assert(srcParts
+ 1 == dstParts
);
2570 dst
[srcParts
] = carry
;
2574 // We overflowed if there is carry.
2578 // We would overflow if any significant unwritten parts would be
2579 // non-zero. This is true if any remaining src parts are non-zero
2580 // and the multiplier is non-zero.
2582 for (unsigned i
= dstParts
; i
< srcParts
; i
++)
2586 // We fitted in the narrow destination.
2590 /// DST = LHS * RHS, where DST has the same width as the operands and
2591 /// is filled with the least significant parts of the result. Returns
2592 /// one if overflow occurred, otherwise zero. DST must be disjoint
2593 /// from both operands.
2594 int APInt::tcMultiply(WordType
*dst
, const WordType
*lhs
,
2595 const WordType
*rhs
, unsigned parts
) {
2596 assert(dst
!= lhs
&& dst
!= rhs
);
2599 tcSet(dst
, 0, parts
);
2601 for (unsigned i
= 0; i
< parts
; i
++)
2602 overflow
|= tcMultiplyPart(&dst
[i
], lhs
, rhs
[i
], 0, parts
,
2608 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2609 /// operands. No overflow occurs. DST must be disjoint from both operands.
2610 void APInt::tcFullMultiply(WordType
*dst
, const WordType
*lhs
,
2611 const WordType
*rhs
, unsigned lhsParts
,
2612 unsigned rhsParts
) {
2613 // Put the narrower number on the LHS for less loops below.
2614 if (lhsParts
> rhsParts
)
2615 return tcFullMultiply (dst
, rhs
, lhs
, rhsParts
, lhsParts
);
2617 assert(dst
!= lhs
&& dst
!= rhs
);
2619 tcSet(dst
, 0, rhsParts
);
2621 for (unsigned i
= 0; i
< lhsParts
; i
++)
2622 tcMultiplyPart(&dst
[i
], rhs
, lhs
[i
], 0, rhsParts
, rhsParts
+ 1, true);
2625 // If RHS is zero LHS and REMAINDER are left unchanged, return one.
2626 // Otherwise set LHS to LHS / RHS with the fractional part discarded,
2627 // set REMAINDER to the remainder, return zero. i.e.
2629 // OLD_LHS = RHS * LHS + REMAINDER
2631 // SCRATCH is a bignum of the same size as the operands and result for
2632 // use by the routine; its contents need not be initialized and are
2633 // destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2634 int APInt::tcDivide(WordType
*lhs
, const WordType
*rhs
,
2635 WordType
*remainder
, WordType
*srhs
,
2637 assert(lhs
!= remainder
&& lhs
!= srhs
&& remainder
!= srhs
);
2639 unsigned shiftCount
= tcMSB(rhs
, parts
) + 1;
2640 if (shiftCount
== 0)
2643 shiftCount
= parts
* APINT_BITS_PER_WORD
- shiftCount
;
2644 unsigned n
= shiftCount
/ APINT_BITS_PER_WORD
;
2645 WordType mask
= (WordType
) 1 << (shiftCount
% APINT_BITS_PER_WORD
);
2647 tcAssign(srhs
, rhs
, parts
);
2648 tcShiftLeft(srhs
, parts
, shiftCount
);
2649 tcAssign(remainder
, lhs
, parts
);
2650 tcSet(lhs
, 0, parts
);
2652 // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
2655 int compare
= tcCompare(remainder
, srhs
, parts
);
2657 tcSubtract(remainder
, srhs
, 0, parts
);
2661 if (shiftCount
== 0)
2664 tcShiftRight(srhs
, parts
, 1);
2665 if ((mask
>>= 1) == 0) {
2666 mask
= (WordType
) 1 << (APINT_BITS_PER_WORD
- 1);
2674 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2675 /// no restrictions on Count.
2676 void APInt::tcShiftLeft(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2677 // Don't bother performing a no-op shift.
2681 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2682 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2683 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2685 // Fastpath for moving by whole words.
2686 if (BitShift
== 0) {
2687 std::memmove(Dst
+ WordShift
, Dst
, (Words
- WordShift
) * APINT_WORD_SIZE
);
2689 while (Words
-- > WordShift
) {
2690 Dst
[Words
] = Dst
[Words
- WordShift
] << BitShift
;
2691 if (Words
> WordShift
)
2693 Dst
[Words
- WordShift
- 1] >> (APINT_BITS_PER_WORD
- BitShift
);
2697 // Fill in the remainder with 0s.
2698 std::memset(Dst
, 0, WordShift
* APINT_WORD_SIZE
);
2701 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2702 /// are no restrictions on Count.
2703 void APInt::tcShiftRight(WordType
*Dst
, unsigned Words
, unsigned Count
) {
2704 // Don't bother performing a no-op shift.
2708 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2709 unsigned WordShift
= std::min(Count
/ APINT_BITS_PER_WORD
, Words
);
2710 unsigned BitShift
= Count
% APINT_BITS_PER_WORD
;
2712 unsigned WordsToMove
= Words
- WordShift
;
2713 // Fastpath for moving by whole words.
2714 if (BitShift
== 0) {
2715 std::memmove(Dst
, Dst
+ WordShift
, WordsToMove
* APINT_WORD_SIZE
);
2717 for (unsigned i
= 0; i
!= WordsToMove
; ++i
) {
2718 Dst
[i
] = Dst
[i
+ WordShift
] >> BitShift
;
2719 if (i
+ 1 != WordsToMove
)
2720 Dst
[i
] |= Dst
[i
+ WordShift
+ 1] << (APINT_BITS_PER_WORD
- BitShift
);
2724 // Fill in the remainder with 0s.
2725 std::memset(Dst
+ WordsToMove
, 0, WordShift
* APINT_WORD_SIZE
);
2728 // Comparison (unsigned) of two bignums.
2729 int APInt::tcCompare(const WordType
*lhs
, const WordType
*rhs
,
2733 if (lhs
[parts
] != rhs
[parts
])
2734 return (lhs
[parts
] > rhs
[parts
]) ? 1 : -1;
2740 APInt
llvm::APIntOps::RoundingUDiv(const APInt
&A
, const APInt
&B
,
2741 APInt::Rounding RM
) {
2742 // Currently udivrem always rounds down.
2744 case APInt::Rounding::DOWN
:
2745 case APInt::Rounding::TOWARD_ZERO
:
2747 case APInt::Rounding::UP
: {
2749 APInt::udivrem(A
, B
, Quo
, Rem
);
2755 llvm_unreachable("Unknown APInt::Rounding enum");
2758 APInt
llvm::APIntOps::RoundingSDiv(const APInt
&A
, const APInt
&B
,
2759 APInt::Rounding RM
) {
2761 case APInt::Rounding::DOWN
:
2762 case APInt::Rounding::UP
: {
2764 APInt::sdivrem(A
, B
, Quo
, Rem
);
2767 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2768 // We want to check whether the non-integer part of the mathematical value
2769 // is negative or not. If the non-integer part is negative, we need to round
2770 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2771 // already rounded down.
2772 if (RM
== APInt::Rounding::DOWN
) {
2773 if (Rem
.isNegative() != B
.isNegative())
2777 if (Rem
.isNegative() != B
.isNegative())
2781 // Currently sdiv rounds towards zero.
2782 case APInt::Rounding::TOWARD_ZERO
:
2785 llvm_unreachable("Unknown APInt::Rounding enum");
2788 std::optional
<APInt
>
2789 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A
, APInt B
, APInt C
,
2790 unsigned RangeWidth
) {
2791 unsigned CoeffWidth
= A
.getBitWidth();
2792 assert(CoeffWidth
== B
.getBitWidth() && CoeffWidth
== C
.getBitWidth());
2793 assert(RangeWidth
<= CoeffWidth
&&
2794 "Value range width should be less than coefficient width");
2795 assert(RangeWidth
> 1 && "Value range bit width should be > 1");
2797 LLVM_DEBUG(dbgs() << __func__
<< ": solving " << A
<< "x^2 + " << B
2798 << "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2800 // Identify 0 as a (non)solution immediately.
2801 if (C
.sextOrTrunc(RangeWidth
).isZero()) {
2802 LLVM_DEBUG(dbgs() << __func__
<< ": zero solution\n");
2803 return APInt(CoeffWidth
, 0);
2806 // The result of APInt arithmetic has the same bit width as the operands,
2807 // so it can actually lose high bits. A product of two n-bit integers needs
2808 // 2n-1 bits to represent the full value.
2809 // The operation done below (on quadratic coefficients) that can produce
2810 // the largest value is the evaluation of the equation during bisection,
2811 // which needs 3 times the bitwidth of the coefficient, so the total number
2812 // of required bits is 3n.
2814 // The purpose of this extension is to simulate the set Z of all integers,
2815 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2816 // and negative numbers (not so much in a modulo arithmetic). The method
2817 // used to solve the equation is based on the standard formula for real
2818 // numbers, and uses the concepts of "positive" and "negative" with their
2821 A
= A
.sext(CoeffWidth
);
2822 B
= B
.sext(CoeffWidth
);
2823 C
= C
.sext(CoeffWidth
);
2825 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2826 // the bit width has increased.
2827 if (A
.isNegative()) {
2833 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2834 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2835 // and R = 2^BitWidth.
2836 // Since we're trying not only to find exact solutions, but also values
2837 // that "wrap around", such a set will always have a solution, i.e. an x
2838 // that satisfies at least one of the equations, or such that |q(x)|
2839 // exceeds kR, while |q(x-1)| for the same k does not.
2841 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2842 // positive solution n (in the above sense), and also such that the n
2843 // will be the least among all solutions corresponding to k = 0, 1, ...
2844 // (more precisely, the least element in the set
2845 // { n(k) | k is such that a solution n(k) exists }).
2847 // Consider the parabola (over real numbers) that corresponds to the
2848 // quadratic equation. Since A > 0, the arms of the parabola will point
2849 // up. Picking different values of k will shift it up and down by R.
2851 // We want to shift the parabola in such a way as to reduce the problem
2852 // of solving q(x) = kR to solving shifted_q(x) = 0.
2853 // (The interesting solutions are the ceilings of the real number
2855 APInt R
= APInt::getOneBitSet(CoeffWidth
, RangeWidth
);
2860 auto RoundUp
= [] (const APInt
&V
, const APInt
&A
) -> APInt
{
2861 assert(A
.isStrictlyPositive());
2862 APInt T
= V
.abs().urem(A
);
2865 return V
.isNegative() ? V
+T
: V
+(A
-T
);
2868 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2869 // iff B is positive.
2870 if (B
.isNonNegative()) {
2871 // If B >= 0, the vertex it at a negative location (or at 0), so in
2872 // order to have a non-negative solution we need to pick k that makes
2873 // C-kR negative. To satisfy all the requirements for the solution
2874 // that we are looking for, it needs to be closest to 0 of all k.
2876 if (C
.isStrictlyPositive())
2878 // Pick the greater solution.
2881 // If B < 0, the vertex is at a positive location. For any solution
2882 // to exist, the discriminant must be non-negative. This means that
2883 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2884 // lower bound on values of k: kR >= C - B^2/4A.
2885 APInt LowkR
= C
- SqrB
.udiv(2*TwoA
); // udiv because all values > 0.
2886 // Round LowkR up (towards +inf) to the nearest kR.
2887 LowkR
= RoundUp(LowkR
, R
);
2889 // If there exists k meeting the condition above, and such that
2890 // C-kR > 0, there will be two positive real number solutions of
2891 // q(x) = kR. Out of all such values of k, pick the one that makes
2892 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2893 // In other words, find maximum k such that LowkR <= kR < C.
2895 // If LowkR < C, then such a k is guaranteed to exist because
2896 // LowkR itself is a multiple of R.
2897 C
-= -RoundUp(-C
, R
); // C = C - RoundDown(C, R)
2898 // Pick the smaller solution.
2901 // If C-kR < 0 for all potential k's, it means that one solution
2902 // will be negative, while the other will be positive. The positive
2903 // solution will shift towards 0 if the parabola is moved up.
2904 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2905 // to 0, or in other words, out of all parabolas that have solutions,
2906 // pick the one that is the farthest "up").
2907 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2909 // Pick the greater solution.
2914 LLVM_DEBUG(dbgs() << __func__
<< ": updated coefficients " << A
<< "x^2 + "
2915 << B
<< "x + " << C
<< ", rw:" << RangeWidth
<< '\n');
2917 APInt D
= SqrB
- 4*A
*C
;
2918 assert(D
.isNonNegative() && "Negative discriminant");
2919 APInt SQ
= D
.sqrt();
2922 bool InexactSQ
= Q
!= D
;
2923 // The calculated SQ may actually be greater than the exact (non-integer)
2924 // value. If that's the case, decrement SQ to get a value that is lower.
2931 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2932 // When using the quadratic formula directly, the calculated low root
2933 // may be greater than the exact one, since we would be subtracting SQ.
2934 // To make sure that the calculated root is not greater than the exact
2935 // one, subtract SQ+1 when calculating the low root (for inexact value
2938 APInt::sdivrem(-B
- (SQ
+InexactSQ
), TwoA
, X
, Rem
);
2940 APInt::sdivrem(-B
+ SQ
, TwoA
, X
, Rem
);
2942 // The updated coefficients should be such that the (exact) solution is
2943 // positive. Since APInt division rounds towards 0, the calculated one
2944 // can be 0, but cannot be negative.
2945 assert(X
.isNonNegative() && "Solution should be non-negative");
2947 if (!InexactSQ
&& Rem
.isZero()) {
2948 LLVM_DEBUG(dbgs() << __func__
<< ": solution (root): " << X
<< '\n');
2952 assert((SQ
*SQ
).sle(D
) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2953 // The exact value of the square root of D should be between SQ and SQ+1.
2954 // This implies that the solution should be between that corresponding to
2955 // SQ (i.e. X) and that corresponding to SQ+1.
2957 // The calculated X cannot be greater than the exact (real) solution.
2958 // Actually it must be strictly less than the exact solution, while
2959 // X+1 will be greater than or equal to it.
2961 APInt VX
= (A
*X
+ B
)*X
+ C
;
2962 APInt VY
= VX
+ TwoA
*X
+ A
+ B
;
2964 VX
.isNegative() != VY
.isNegative() || VX
.isZero() != VY
.isZero();
2965 // If the sign did not change between X and X+1, X is not a valid solution.
2966 // This could happen when the actual (exact) roots don't have an integer
2967 // between them, so they would both be contained between X and X+1.
2969 LLVM_DEBUG(dbgs() << __func__
<< ": no valid solution\n");
2970 return std::nullopt
;
2974 LLVM_DEBUG(dbgs() << __func__
<< ": solution (wrap): " << X
<< '\n');
2978 std::optional
<unsigned>
2979 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt
&A
, const APInt
&B
) {
2980 assert(A
.getBitWidth() == B
.getBitWidth() && "Must have the same bitwidth");
2982 return std::nullopt
;
2983 return A
.getBitWidth() - ((A
^ B
).countl_zero() + 1);
2986 APInt
llvm::APIntOps::ScaleBitMask(const APInt
&A
, unsigned NewBitWidth
,
2987 bool MatchAllBits
) {
2988 unsigned OldBitWidth
= A
.getBitWidth();
2989 assert((((OldBitWidth
% NewBitWidth
) == 0) ||
2990 ((NewBitWidth
% OldBitWidth
) == 0)) &&
2991 "One size should be a multiple of the other one. "
2992 "Can't do fractional scaling.");
2994 // Check for matching bitwidths.
2995 if (OldBitWidth
== NewBitWidth
)
2998 APInt NewA
= APInt::getZero(NewBitWidth
);
3000 // Check for null input.
3004 if (NewBitWidth
> OldBitWidth
) {
3006 unsigned Scale
= NewBitWidth
/ OldBitWidth
;
3007 for (unsigned i
= 0; i
!= OldBitWidth
; ++i
)
3009 NewA
.setBits(i
* Scale
, (i
+ 1) * Scale
);
3011 unsigned Scale
= OldBitWidth
/ NewBitWidth
;
3012 for (unsigned i
= 0; i
!= NewBitWidth
; ++i
) {
3014 if (A
.extractBits(Scale
, i
* Scale
).isAllOnes())
3017 if (!A
.extractBits(Scale
, i
* Scale
).isZero())
3026 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3027 /// with the integer held in IntVal.
3028 void llvm::StoreIntToMemory(const APInt
&IntVal
, uint8_t *Dst
,
3029 unsigned StoreBytes
) {
3030 assert((IntVal
.getBitWidth()+7)/8 >= StoreBytes
&& "Integer too small!");
3031 const uint8_t *Src
= (const uint8_t *)IntVal
.getRawData();
3033 if (sys::IsLittleEndianHost
) {
3034 // Little-endian host - the source is ordered from LSB to MSB. Order the
3035 // destination from LSB to MSB: Do a straight copy.
3036 memcpy(Dst
, Src
, StoreBytes
);
3038 // Big-endian host - the source is an array of 64 bit words ordered from
3039 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
3040 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3041 while (StoreBytes
> sizeof(uint64_t)) {
3042 StoreBytes
-= sizeof(uint64_t);
3043 // May not be aligned so use memcpy.
3044 memcpy(Dst
+ StoreBytes
, Src
, sizeof(uint64_t));
3045 Src
+= sizeof(uint64_t);
3048 memcpy(Dst
, Src
+ sizeof(uint64_t) - StoreBytes
, StoreBytes
);
3052 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3053 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3054 void llvm::LoadIntFromMemory(APInt
&IntVal
, const uint8_t *Src
,
3055 unsigned LoadBytes
) {
3056 assert((IntVal
.getBitWidth()+7)/8 >= LoadBytes
&& "Integer too small!");
3057 uint8_t *Dst
= reinterpret_cast<uint8_t *>(
3058 const_cast<uint64_t *>(IntVal
.getRawData()));
3060 if (sys::IsLittleEndianHost
)
3061 // Little-endian host - the destination must be ordered from LSB to MSB.
3062 // The source is ordered from LSB to MSB: Do a straight copy.
3063 memcpy(Dst
, Src
, LoadBytes
);
3065 // Big-endian - the destination is an array of 64 bit words ordered from
3066 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
3067 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3069 while (LoadBytes
> sizeof(uint64_t)) {
3070 LoadBytes
-= sizeof(uint64_t);
3071 // May not be aligned so use memcpy.
3072 memcpy(Dst
, Src
+ LoadBytes
, sizeof(uint64_t));
3073 Dst
+= sizeof(uint64_t);
3076 memcpy(Dst
+ sizeof(uint64_t) - LoadBytes
, Src
, LoadBytes
);