[llvm-exegesis][NFC] Pass Instruction instead of bare Opcode
[llvm-core.git] / lib / Support / APInt.cpp
blobf2f5cca946f3706314a97f8c005aa26c90ce5b44
1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision integer
11 // constant values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/bit.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <climits>
29 #include <cmath>
30 #include <cstdlib>
31 #include <cstring>
32 using namespace llvm;
34 #define DEBUG_TYPE "apint"
36 /// A utility function for allocating memory, checking for allocation failures,
37 /// and ensuring the contents are zeroed.
38 inline static uint64_t* getClearedMemory(unsigned numWords) {
39 uint64_t *result = new uint64_t[numWords];
40 memset(result, 0, numWords * sizeof(uint64_t));
41 return result;
44 /// A utility function for allocating memory and checking for allocation
45 /// failure. The content is not zeroed.
46 inline static uint64_t* getMemory(unsigned numWords) {
47 return new uint64_t[numWords];
50 /// A utility function that converts a character to a digit.
51 inline static unsigned getDigit(char cdigit, uint8_t radix) {
52 unsigned r;
54 if (radix == 16 || radix == 36) {
55 r = cdigit - '0';
56 if (r <= 9)
57 return r;
59 r = cdigit - 'A';
60 if (r <= radix - 11U)
61 return r + 10;
63 r = cdigit - 'a';
64 if (r <= radix - 11U)
65 return r + 10;
67 radix = 10;
70 r = cdigit - '0';
71 if (r < radix)
72 return r;
74 return -1U;
78 void APInt::initSlowCase(uint64_t val, bool isSigned) {
79 U.pVal = getClearedMemory(getNumWords());
80 U.pVal[0] = val;
81 if (isSigned && int64_t(val) < 0)
82 for (unsigned i = 1; i < getNumWords(); ++i)
83 U.pVal[i] = WORDTYPE_MAX;
84 clearUnusedBits();
87 void APInt::initSlowCase(const APInt& that) {
88 U.pVal = getMemory(getNumWords());
89 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
92 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
93 assert(BitWidth && "Bitwidth too small");
94 assert(bigVal.data() && "Null pointer detected!");
95 if (isSingleWord())
96 U.VAL = bigVal[0];
97 else {
98 // Get memory, cleared to 0
99 U.pVal = getClearedMemory(getNumWords());
100 // Calculate the number of words to copy
101 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
102 // Copy the words from bigVal to pVal
103 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
105 // Make sure unused high bits are cleared
106 clearUnusedBits();
109 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
110 : BitWidth(numBits) {
111 initFromArray(bigVal);
114 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
115 : BitWidth(numBits) {
116 initFromArray(makeArrayRef(bigVal, numWords));
119 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
120 : BitWidth(numbits) {
121 assert(BitWidth && "Bitwidth too small");
122 fromString(numbits, Str, radix);
125 void APInt::reallocate(unsigned NewBitWidth) {
126 // If the number of words is the same we can just change the width and stop.
127 if (getNumWords() == getNumWords(NewBitWidth)) {
128 BitWidth = NewBitWidth;
129 return;
132 // If we have an allocation, delete it.
133 if (!isSingleWord())
134 delete [] U.pVal;
136 // Update BitWidth.
137 BitWidth = NewBitWidth;
139 // If we are supposed to have an allocation, create it.
140 if (!isSingleWord())
141 U.pVal = getMemory(getNumWords());
144 void APInt::AssignSlowCase(const APInt& RHS) {
145 // Don't do anything for X = X
146 if (this == &RHS)
147 return;
149 // Adjust the bit width and handle allocations as necessary.
150 reallocate(RHS.getBitWidth());
152 // Copy the data.
153 if (isSingleWord())
154 U.VAL = RHS.U.VAL;
155 else
156 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
159 /// This method 'profiles' an APInt for use with FoldingSet.
160 void APInt::Profile(FoldingSetNodeID& ID) const {
161 ID.AddInteger(BitWidth);
163 if (isSingleWord()) {
164 ID.AddInteger(U.VAL);
165 return;
168 unsigned NumWords = getNumWords();
169 for (unsigned i = 0; i < NumWords; ++i)
170 ID.AddInteger(U.pVal[i]);
173 /// Prefix increment operator. Increments the APInt by one.
174 APInt& APInt::operator++() {
175 if (isSingleWord())
176 ++U.VAL;
177 else
178 tcIncrement(U.pVal, getNumWords());
179 return clearUnusedBits();
182 /// Prefix decrement operator. Decrements the APInt by one.
183 APInt& APInt::operator--() {
184 if (isSingleWord())
185 --U.VAL;
186 else
187 tcDecrement(U.pVal, getNumWords());
188 return clearUnusedBits();
191 /// Adds the RHS APint to this APInt.
192 /// @returns this, after addition of RHS.
193 /// Addition assignment operator.
194 APInt& APInt::operator+=(const APInt& RHS) {
195 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
196 if (isSingleWord())
197 U.VAL += RHS.U.VAL;
198 else
199 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
200 return clearUnusedBits();
203 APInt& APInt::operator+=(uint64_t RHS) {
204 if (isSingleWord())
205 U.VAL += RHS;
206 else
207 tcAddPart(U.pVal, RHS, getNumWords());
208 return clearUnusedBits();
211 /// Subtracts the RHS APInt from this APInt
212 /// @returns this, after subtraction
213 /// Subtraction assignment operator.
214 APInt& APInt::operator-=(const APInt& RHS) {
215 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
216 if (isSingleWord())
217 U.VAL -= RHS.U.VAL;
218 else
219 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
220 return clearUnusedBits();
223 APInt& APInt::operator-=(uint64_t RHS) {
224 if (isSingleWord())
225 U.VAL -= RHS;
226 else
227 tcSubtractPart(U.pVal, RHS, getNumWords());
228 return clearUnusedBits();
231 APInt APInt::operator*(const APInt& RHS) const {
232 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
233 if (isSingleWord())
234 return APInt(BitWidth, U.VAL * RHS.U.VAL);
236 APInt Result(getMemory(getNumWords()), getBitWidth());
238 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
240 Result.clearUnusedBits();
241 return Result;
244 void APInt::AndAssignSlowCase(const APInt& RHS) {
245 tcAnd(U.pVal, RHS.U.pVal, getNumWords());
248 void APInt::OrAssignSlowCase(const APInt& RHS) {
249 tcOr(U.pVal, RHS.U.pVal, getNumWords());
252 void APInt::XorAssignSlowCase(const APInt& RHS) {
253 tcXor(U.pVal, RHS.U.pVal, getNumWords());
256 APInt& APInt::operator*=(const APInt& RHS) {
257 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
258 *this = *this * RHS;
259 return *this;
262 APInt& APInt::operator*=(uint64_t RHS) {
263 if (isSingleWord()) {
264 U.VAL *= RHS;
265 } else {
266 unsigned NumWords = getNumWords();
267 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
269 return clearUnusedBits();
272 bool APInt::EqualSlowCase(const APInt& RHS) const {
273 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
276 int APInt::compare(const APInt& RHS) const {
277 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
278 if (isSingleWord())
279 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
281 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
284 int APInt::compareSigned(const APInt& RHS) const {
285 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
286 if (isSingleWord()) {
287 int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
288 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
289 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
292 bool lhsNeg = isNegative();
293 bool rhsNeg = RHS.isNegative();
295 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
296 if (lhsNeg != rhsNeg)
297 return lhsNeg ? -1 : 1;
299 // Otherwise we can just use an unsigned comparison, because even negative
300 // numbers compare correctly this way if both have the same signed-ness.
301 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
304 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
305 unsigned loWord = whichWord(loBit);
306 unsigned hiWord = whichWord(hiBit);
308 // Create an initial mask for the low word with zeros below loBit.
309 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
311 // If hiBit is not aligned, we need a high mask.
312 unsigned hiShiftAmt = whichBit(hiBit);
313 if (hiShiftAmt != 0) {
314 // Create a high mask with zeros above hiBit.
315 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
316 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
317 // set the bits in hiWord.
318 if (hiWord == loWord)
319 loMask &= hiMask;
320 else
321 U.pVal[hiWord] |= hiMask;
323 // Apply the mask to the low word.
324 U.pVal[loWord] |= loMask;
326 // Fill any words between loWord and hiWord with all ones.
327 for (unsigned word = loWord + 1; word < hiWord; ++word)
328 U.pVal[word] = WORDTYPE_MAX;
331 /// Toggle every bit to its opposite value.
332 void APInt::flipAllBitsSlowCase() {
333 tcComplement(U.pVal, getNumWords());
334 clearUnusedBits();
337 /// Toggle a given bit to its opposite value whose position is given
338 /// as "bitPosition".
339 /// Toggles a given bit to its opposite value.
340 void APInt::flipBit(unsigned bitPosition) {
341 assert(bitPosition < BitWidth && "Out of the bit-width range!");
342 if ((*this)[bitPosition]) clearBit(bitPosition);
343 else setBit(bitPosition);
346 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
347 unsigned subBitWidth = subBits.getBitWidth();
348 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
349 "Illegal bit insertion");
351 // Insertion is a direct copy.
352 if (subBitWidth == BitWidth) {
353 *this = subBits;
354 return;
357 // Single word result can be done as a direct bitmask.
358 if (isSingleWord()) {
359 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
360 U.VAL &= ~(mask << bitPosition);
361 U.VAL |= (subBits.U.VAL << bitPosition);
362 return;
365 unsigned loBit = whichBit(bitPosition);
366 unsigned loWord = whichWord(bitPosition);
367 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
369 // Insertion within a single word can be done as a direct bitmask.
370 if (loWord == hi1Word) {
371 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
372 U.pVal[loWord] &= ~(mask << loBit);
373 U.pVal[loWord] |= (subBits.U.VAL << loBit);
374 return;
377 // Insert on word boundaries.
378 if (loBit == 0) {
379 // Direct copy whole words.
380 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
381 memcpy(U.pVal + loWord, subBits.getRawData(),
382 numWholeSubWords * APINT_WORD_SIZE);
384 // Mask+insert remaining bits.
385 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
386 if (remainingBits != 0) {
387 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
388 U.pVal[hi1Word] &= ~mask;
389 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
391 return;
394 // General case - set/clear individual bits in dst based on src.
395 // TODO - there is scope for optimization here, but at the moment this code
396 // path is barely used so prefer readability over performance.
397 for (unsigned i = 0; i != subBitWidth; ++i) {
398 if (subBits[i])
399 setBit(bitPosition + i);
400 else
401 clearBit(bitPosition + i);
405 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
406 assert(numBits > 0 && "Can't extract zero bits");
407 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
408 "Illegal bit extraction");
410 if (isSingleWord())
411 return APInt(numBits, U.VAL >> bitPosition);
413 unsigned loBit = whichBit(bitPosition);
414 unsigned loWord = whichWord(bitPosition);
415 unsigned hiWord = whichWord(bitPosition + numBits - 1);
417 // Single word result extracting bits from a single word source.
418 if (loWord == hiWord)
419 return APInt(numBits, U.pVal[loWord] >> loBit);
421 // Extracting bits that start on a source word boundary can be done
422 // as a fast memory copy.
423 if (loBit == 0)
424 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
426 // General case - shift + copy source words directly into place.
427 APInt Result(numBits, 0);
428 unsigned NumSrcWords = getNumWords();
429 unsigned NumDstWords = Result.getNumWords();
431 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
432 for (unsigned word = 0; word < NumDstWords; ++word) {
433 uint64_t w0 = U.pVal[loWord + word];
434 uint64_t w1 =
435 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
436 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
439 return Result.clearUnusedBits();
442 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
443 assert(!str.empty() && "Invalid string length");
444 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
445 radix == 36) &&
446 "Radix should be 2, 8, 10, 16, or 36!");
448 size_t slen = str.size();
450 // Each computation below needs to know if it's negative.
451 StringRef::iterator p = str.begin();
452 unsigned isNegative = *p == '-';
453 if (*p == '-' || *p == '+') {
454 p++;
455 slen--;
456 assert(slen && "String is only a sign, needs a value.");
459 // For radixes of power-of-two values, the bits required is accurately and
460 // easily computed
461 if (radix == 2)
462 return slen + isNegative;
463 if (radix == 8)
464 return slen * 3 + isNegative;
465 if (radix == 16)
466 return slen * 4 + isNegative;
468 // FIXME: base 36
470 // This is grossly inefficient but accurate. We could probably do something
471 // with a computation of roughly slen*64/20 and then adjust by the value of
472 // the first few digits. But, I'm not sure how accurate that could be.
474 // Compute a sufficient number of bits that is always large enough but might
475 // be too large. This avoids the assertion in the constructor. This
476 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
477 // bits in that case.
478 unsigned sufficient
479 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
480 : (slen == 1 ? 7 : slen * 16/3);
482 // Convert to the actual binary value.
483 APInt tmp(sufficient, StringRef(p, slen), radix);
485 // Compute how many bits are required. If the log is infinite, assume we need
486 // just bit.
487 unsigned log = tmp.logBase2();
488 if (log == (unsigned)-1) {
489 return isNegative + 1;
490 } else {
491 return isNegative + log + 1;
495 hash_code llvm::hash_value(const APInt &Arg) {
496 if (Arg.isSingleWord())
497 return hash_combine(Arg.U.VAL);
499 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502 bool APInt::isSplat(unsigned SplatSizeInBits) const {
503 assert(getBitWidth() % SplatSizeInBits == 0 &&
504 "SplatSizeInBits must divide width!");
505 // We can check that all parts of an integer are equal by making use of a
506 // little trick: rotate and check if it's still the same value.
507 return *this == rotl(SplatSizeInBits);
510 /// This function returns the high "numBits" bits of this APInt.
511 APInt APInt::getHiBits(unsigned numBits) const {
512 return this->lshr(BitWidth - numBits);
515 /// This function returns the low "numBits" bits of this APInt.
516 APInt APInt::getLoBits(unsigned numBits) const {
517 APInt Result(getLowBitsSet(BitWidth, numBits));
518 Result &= *this;
519 return Result;
522 /// Return a value containing V broadcasted over NewLen bits.
523 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
524 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
526 APInt Val = V.zextOrSelf(NewLen);
527 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
528 Val |= Val << I;
530 return Val;
533 unsigned APInt::countLeadingZerosSlowCase() const {
534 unsigned Count = 0;
535 for (int i = getNumWords()-1; i >= 0; --i) {
536 uint64_t V = U.pVal[i];
537 if (V == 0)
538 Count += APINT_BITS_PER_WORD;
539 else {
540 Count += llvm::countLeadingZeros(V);
541 break;
544 // Adjust for unused bits in the most significant word (they are zero).
545 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
546 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
547 return Count;
550 unsigned APInt::countLeadingOnesSlowCase() const {
551 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
552 unsigned shift;
553 if (!highWordBits) {
554 highWordBits = APINT_BITS_PER_WORD;
555 shift = 0;
556 } else {
557 shift = APINT_BITS_PER_WORD - highWordBits;
559 int i = getNumWords() - 1;
560 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
561 if (Count == highWordBits) {
562 for (i--; i >= 0; --i) {
563 if (U.pVal[i] == WORDTYPE_MAX)
564 Count += APINT_BITS_PER_WORD;
565 else {
566 Count += llvm::countLeadingOnes(U.pVal[i]);
567 break;
571 return Count;
574 unsigned APInt::countTrailingZerosSlowCase() const {
575 unsigned Count = 0;
576 unsigned i = 0;
577 for (; i < getNumWords() && U.pVal[i] == 0; ++i)
578 Count += APINT_BITS_PER_WORD;
579 if (i < getNumWords())
580 Count += llvm::countTrailingZeros(U.pVal[i]);
581 return std::min(Count, BitWidth);
584 unsigned APInt::countTrailingOnesSlowCase() const {
585 unsigned Count = 0;
586 unsigned i = 0;
587 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
588 Count += APINT_BITS_PER_WORD;
589 if (i < getNumWords())
590 Count += llvm::countTrailingOnes(U.pVal[i]);
591 assert(Count <= BitWidth);
592 return Count;
595 unsigned APInt::countPopulationSlowCase() const {
596 unsigned Count = 0;
597 for (unsigned i = 0; i < getNumWords(); ++i)
598 Count += llvm::countPopulation(U.pVal[i]);
599 return Count;
602 bool APInt::intersectsSlowCase(const APInt &RHS) const {
603 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
604 if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
605 return true;
607 return false;
610 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
611 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
612 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
613 return false;
615 return true;
618 APInt APInt::byteSwap() const {
619 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
620 if (BitWidth == 16)
621 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
622 if (BitWidth == 32)
623 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
624 if (BitWidth == 48) {
625 unsigned Tmp1 = unsigned(U.VAL >> 16);
626 Tmp1 = ByteSwap_32(Tmp1);
627 uint16_t Tmp2 = uint16_t(U.VAL);
628 Tmp2 = ByteSwap_16(Tmp2);
629 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
631 if (BitWidth == 64)
632 return APInt(BitWidth, ByteSwap_64(U.VAL));
634 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
635 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
636 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
637 if (Result.BitWidth != BitWidth) {
638 Result.lshrInPlace(Result.BitWidth - BitWidth);
639 Result.BitWidth = BitWidth;
641 return Result;
644 APInt APInt::reverseBits() const {
645 switch (BitWidth) {
646 case 64:
647 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
648 case 32:
649 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
650 case 16:
651 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
652 case 8:
653 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
654 default:
655 break;
658 APInt Val(*this);
659 APInt Reversed(BitWidth, 0);
660 unsigned S = BitWidth;
662 for (; Val != 0; Val.lshrInPlace(1)) {
663 Reversed <<= 1;
664 Reversed |= Val[0];
665 --S;
668 Reversed <<= S;
669 return Reversed;
672 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
673 // Fast-path a common case.
674 if (A == B) return A;
676 // Corner cases: if either operand is zero, the other is the gcd.
677 if (!A) return B;
678 if (!B) return A;
680 // Count common powers of 2 and remove all other powers of 2.
681 unsigned Pow2;
683 unsigned Pow2_A = A.countTrailingZeros();
684 unsigned Pow2_B = B.countTrailingZeros();
685 if (Pow2_A > Pow2_B) {
686 A.lshrInPlace(Pow2_A - Pow2_B);
687 Pow2 = Pow2_B;
688 } else if (Pow2_B > Pow2_A) {
689 B.lshrInPlace(Pow2_B - Pow2_A);
690 Pow2 = Pow2_A;
691 } else {
692 Pow2 = Pow2_A;
696 // Both operands are odd multiples of 2^Pow_2:
698 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
700 // This is a modified version of Stein's algorithm, taking advantage of
701 // efficient countTrailingZeros().
702 while (A != B) {
703 if (A.ugt(B)) {
704 A -= B;
705 A.lshrInPlace(A.countTrailingZeros() - Pow2);
706 } else {
707 B -= A;
708 B.lshrInPlace(B.countTrailingZeros() - Pow2);
712 return A;
715 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
716 uint64_t I = bit_cast<uint64_t>(Double);
718 // Get the sign bit from the highest order bit
719 bool isNeg = I >> 63;
721 // Get the 11-bit exponent and adjust for the 1023 bit bias
722 int64_t exp = ((I >> 52) & 0x7ff) - 1023;
724 // If the exponent is negative, the value is < 0 so just return 0.
725 if (exp < 0)
726 return APInt(width, 0u);
728 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
729 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
731 // If the exponent doesn't shift all bits out of the mantissa
732 if (exp < 52)
733 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
734 APInt(width, mantissa >> (52 - exp));
736 // If the client didn't provide enough bits for us to shift the mantissa into
737 // then the result is undefined, just return 0
738 if (width <= exp - 52)
739 return APInt(width, 0);
741 // Otherwise, we have to shift the mantissa bits up to the right location
742 APInt Tmp(width, mantissa);
743 Tmp <<= (unsigned)exp - 52;
744 return isNeg ? -Tmp : Tmp;
747 /// This function converts this APInt to a double.
748 /// The layout for double is as following (IEEE Standard 754):
749 /// --------------------------------------
750 /// | Sign Exponent Fraction Bias |
751 /// |-------------------------------------- |
752 /// | 1[63] 11[62-52] 52[51-00] 1023 |
753 /// --------------------------------------
754 double APInt::roundToDouble(bool isSigned) const {
756 // Handle the simple case where the value is contained in one uint64_t.
757 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
758 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
759 if (isSigned) {
760 int64_t sext = SignExtend64(getWord(0), BitWidth);
761 return double(sext);
762 } else
763 return double(getWord(0));
766 // Determine if the value is negative.
767 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
769 // Construct the absolute value if we're negative.
770 APInt Tmp(isNeg ? -(*this) : (*this));
772 // Figure out how many bits we're using.
773 unsigned n = Tmp.getActiveBits();
775 // The exponent (without bias normalization) is just the number of bits
776 // we are using. Note that the sign bit is gone since we constructed the
777 // absolute value.
778 uint64_t exp = n;
780 // Return infinity for exponent overflow
781 if (exp > 1023) {
782 if (!isSigned || !isNeg)
783 return std::numeric_limits<double>::infinity();
784 else
785 return -std::numeric_limits<double>::infinity();
787 exp += 1023; // Increment for 1023 bias
789 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
790 // extract the high 52 bits from the correct words in pVal.
791 uint64_t mantissa;
792 unsigned hiWord = whichWord(n-1);
793 if (hiWord == 0) {
794 mantissa = Tmp.U.pVal[0];
795 if (n > 52)
796 mantissa >>= n - 52; // shift down, we want the top 52 bits.
797 } else {
798 assert(hiWord > 0 && "huh?");
799 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
800 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
801 mantissa = hibits | lobits;
804 // The leading bit of mantissa is implicit, so get rid of it.
805 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
806 uint64_t I = sign | (exp << 52) | mantissa;
807 return bit_cast<double>(I);
810 // Truncate to new width.
811 APInt APInt::trunc(unsigned width) const {
812 assert(width < BitWidth && "Invalid APInt Truncate request");
813 assert(width && "Can't truncate to 0 bits");
815 if (width <= APINT_BITS_PER_WORD)
816 return APInt(width, getRawData()[0]);
818 APInt Result(getMemory(getNumWords(width)), width);
820 // Copy full words.
821 unsigned i;
822 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
823 Result.U.pVal[i] = U.pVal[i];
825 // Truncate and copy any partial word.
826 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
827 if (bits != 0)
828 Result.U.pVal[i] = U.pVal[i] << bits >> bits;
830 return Result;
833 // Sign extend to a new width.
834 APInt APInt::sext(unsigned Width) const {
835 assert(Width > BitWidth && "Invalid APInt SignExtend request");
837 if (Width <= APINT_BITS_PER_WORD)
838 return APInt(Width, SignExtend64(U.VAL, BitWidth));
840 APInt Result(getMemory(getNumWords(Width)), Width);
842 // Copy words.
843 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
845 // Sign extend the last word since there may be unused bits in the input.
846 Result.U.pVal[getNumWords() - 1] =
847 SignExtend64(Result.U.pVal[getNumWords() - 1],
848 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
850 // Fill with sign bits.
851 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
852 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
853 Result.clearUnusedBits();
854 return Result;
857 // Zero extend to a new width.
858 APInt APInt::zext(unsigned width) const {
859 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
861 if (width <= APINT_BITS_PER_WORD)
862 return APInt(width, U.VAL);
864 APInt Result(getMemory(getNumWords(width)), width);
866 // Copy words.
867 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
869 // Zero remaining words.
870 std::memset(Result.U.pVal + getNumWords(), 0,
871 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
873 return Result;
876 APInt APInt::zextOrTrunc(unsigned width) const {
877 if (BitWidth < width)
878 return zext(width);
879 if (BitWidth > width)
880 return trunc(width);
881 return *this;
884 APInt APInt::sextOrTrunc(unsigned width) const {
885 if (BitWidth < width)
886 return sext(width);
887 if (BitWidth > width)
888 return trunc(width);
889 return *this;
892 APInt APInt::zextOrSelf(unsigned width) const {
893 if (BitWidth < width)
894 return zext(width);
895 return *this;
898 APInt APInt::sextOrSelf(unsigned width) const {
899 if (BitWidth < width)
900 return sext(width);
901 return *this;
904 /// Arithmetic right-shift this APInt by shiftAmt.
905 /// Arithmetic right-shift function.
906 void APInt::ashrInPlace(const APInt &shiftAmt) {
907 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910 /// Arithmetic right-shift this APInt by shiftAmt.
911 /// Arithmetic right-shift function.
912 void APInt::ashrSlowCase(unsigned ShiftAmt) {
913 // Don't bother performing a no-op shift.
914 if (!ShiftAmt)
915 return;
917 // Save the original sign bit for later.
918 bool Negative = isNegative();
920 // WordShift is the inter-part shift; BitShift is intra-part shift.
921 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
922 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
924 unsigned WordsToMove = getNumWords() - WordShift;
925 if (WordsToMove != 0) {
926 // Sign extend the last word to fill in the unused bits.
927 U.pVal[getNumWords() - 1] = SignExtend64(
928 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
930 // Fastpath for moving by whole words.
931 if (BitShift == 0) {
932 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
933 } else {
934 // Move the words containing significant bits.
935 for (unsigned i = 0; i != WordsToMove - 1; ++i)
936 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
937 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
939 // Handle the last word which has no high bits to copy.
940 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
941 // Sign extend one more time.
942 U.pVal[WordsToMove - 1] =
943 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
947 // Fill in the remainder based on the original sign.
948 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
949 WordShift * APINT_WORD_SIZE);
950 clearUnusedBits();
953 /// Logical right-shift this APInt by shiftAmt.
954 /// Logical right-shift function.
955 void APInt::lshrInPlace(const APInt &shiftAmt) {
956 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959 /// Logical right-shift this APInt by shiftAmt.
960 /// Logical right-shift function.
961 void APInt::lshrSlowCase(unsigned ShiftAmt) {
962 tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965 /// Left-shift this APInt by shiftAmt.
966 /// Left-shift function.
967 APInt &APInt::operator<<=(const APInt &shiftAmt) {
968 // It's undefined behavior in C to shift by BitWidth or greater.
969 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
970 return *this;
973 void APInt::shlSlowCase(unsigned ShiftAmt) {
974 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
975 clearUnusedBits();
978 // Calculate the rotate amount modulo the bit width.
979 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
980 unsigned rotBitWidth = rotateAmt.getBitWidth();
981 APInt rot = rotateAmt;
982 if (rotBitWidth < BitWidth) {
983 // Extend the rotate APInt, so that the urem doesn't divide by 0.
984 // e.g. APInt(1, 32) would give APInt(1, 0).
985 rot = rotateAmt.zext(BitWidth);
987 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
988 return rot.getLimitedValue(BitWidth);
991 APInt APInt::rotl(const APInt &rotateAmt) const {
992 return rotl(rotateModulo(BitWidth, rotateAmt));
995 APInt APInt::rotl(unsigned rotateAmt) const {
996 rotateAmt %= BitWidth;
997 if (rotateAmt == 0)
998 return *this;
999 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002 APInt APInt::rotr(const APInt &rotateAmt) const {
1003 return rotr(rotateModulo(BitWidth, rotateAmt));
1006 APInt APInt::rotr(unsigned rotateAmt) const {
1007 rotateAmt %= BitWidth;
1008 if (rotateAmt == 0)
1009 return *this;
1010 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013 // Square Root - this method computes and returns the square root of "this".
1014 // Three mechanisms are used for computation. For small values (<= 5 bits),
1015 // a table lookup is done. This gets some performance for common cases. For
1016 // values using less than 52 bits, the value is converted to double and then
1017 // the libc sqrt function is called. The result is rounded and then converted
1018 // back to a uint64_t which is then used to construct the result. Finally,
1019 // the Babylonian method for computing square roots is used.
1020 APInt APInt::sqrt() const {
1022 // Determine the magnitude of the value.
1023 unsigned magnitude = getActiveBits();
1025 // Use a fast table for some small values. This also gets rid of some
1026 // rounding errors in libc sqrt for small values.
1027 if (magnitude <= 5) {
1028 static const uint8_t results[32] = {
1029 /* 0 */ 0,
1030 /* 1- 2 */ 1, 1,
1031 /* 3- 6 */ 2, 2, 2, 2,
1032 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1033 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1034 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1035 /* 31 */ 6
1037 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040 // If the magnitude of the value fits in less than 52 bits (the precision of
1041 // an IEEE double precision floating point value), then we can use the
1042 // libc sqrt function which will probably use a hardware sqrt computation.
1043 // This should be faster than the algorithm below.
1044 if (magnitude < 52) {
1045 return APInt(BitWidth,
1046 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1047 : U.pVal[0])))));
1050 // Okay, all the short cuts are exhausted. We must compute it. The following
1051 // is a classical Babylonian method for computing the square root. This code
1052 // was adapted to APInt from a wikipedia article on such computations.
1053 // See http://www.wikipedia.org/ and go to the page named
1054 // Calculate_an_integer_square_root.
1055 unsigned nbits = BitWidth, i = 4;
1056 APInt testy(BitWidth, 16);
1057 APInt x_old(BitWidth, 1);
1058 APInt x_new(BitWidth, 0);
1059 APInt two(BitWidth, 2);
1061 // Select a good starting value using binary logarithms.
1062 for (;; i += 2, testy = testy.shl(2))
1063 if (i >= nbits || this->ule(testy)) {
1064 x_old = x_old.shl(i / 2);
1065 break;
1068 // Use the Babylonian method to arrive at the integer square root:
1069 for (;;) {
1070 x_new = (this->udiv(x_old) + x_old).udiv(two);
1071 if (x_old.ule(x_new))
1072 break;
1073 x_old = x_new;
1076 // Make sure we return the closest approximation
1077 // NOTE: The rounding calculation below is correct. It will produce an
1078 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1079 // determined to be a rounding issue with pari/gp as it begins to use a
1080 // floating point representation after 192 bits. There are no discrepancies
1081 // between this algorithm and pari/gp for bit widths < 192 bits.
1082 APInt square(x_old * x_old);
1083 APInt nextSquare((x_old + 1) * (x_old +1));
1084 if (this->ult(square))
1085 return x_old;
1086 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1087 APInt midpoint((nextSquare - square).udiv(two));
1088 APInt offset(*this - square);
1089 if (offset.ult(midpoint))
1090 return x_old;
1091 return x_old + 1;
1094 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1095 /// iterative extended Euclidean algorithm is used to solve for this value,
1096 /// however we simplify it to speed up calculating only the inverse, and take
1097 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1098 /// (potentially large) APInts around.
1099 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1100 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1102 // Using the properties listed at the following web page (accessed 06/21/08):
1103 // http://www.numbertheory.org/php/euclid.html
1104 // (especially the properties numbered 3, 4 and 9) it can be proved that
1105 // BitWidth bits suffice for all the computations in the algorithm implemented
1106 // below. More precisely, this number of bits suffice if the multiplicative
1107 // inverse exists, but may not suffice for the general extended Euclidean
1108 // algorithm.
1110 APInt r[2] = { modulo, *this };
1111 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1112 APInt q(BitWidth, 0);
1114 unsigned i;
1115 for (i = 0; r[i^1] != 0; i ^= 1) {
1116 // An overview of the math without the confusing bit-flipping:
1117 // q = r[i-2] / r[i-1]
1118 // r[i] = r[i-2] % r[i-1]
1119 // t[i] = t[i-2] - t[i-1] * q
1120 udivrem(r[i], r[i^1], q, r[i]);
1121 t[i] -= t[i^1] * q;
1124 // If this APInt and the modulo are not coprime, there is no multiplicative
1125 // inverse, so return 0. We check this by looking at the next-to-last
1126 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1127 // algorithm.
1128 if (r[i] != 1)
1129 return APInt(BitWidth, 0);
1131 // The next-to-last t is the multiplicative inverse. However, we are
1132 // interested in a positive inverse. Calculate a positive one from a negative
1133 // one if necessary. A simple addition of the modulo suffices because
1134 // abs(t[i]) is known to be less than *this/2 (see the link above).
1135 if (t[i].isNegative())
1136 t[i] += modulo;
1138 return std::move(t[i]);
1141 /// Calculate the magic numbers required to implement a signed integer division
1142 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1143 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1144 /// Warren, Jr., chapter 10.
1145 APInt::ms APInt::magic() const {
1146 const APInt& d = *this;
1147 unsigned p;
1148 APInt ad, anc, delta, q1, r1, q2, r2, t;
1149 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1150 struct ms mag;
1152 ad = d.abs();
1153 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1154 anc = t - 1 - t.urem(ad); // absolute value of nc
1155 p = d.getBitWidth() - 1; // initialize p
1156 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1157 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1158 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1159 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1160 do {
1161 p = p + 1;
1162 q1 = q1<<1; // update q1 = 2p/abs(nc)
1163 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1164 if (r1.uge(anc)) { // must be unsigned comparison
1165 q1 = q1 + 1;
1166 r1 = r1 - anc;
1168 q2 = q2<<1; // update q2 = 2p/abs(d)
1169 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1170 if (r2.uge(ad)) { // must be unsigned comparison
1171 q2 = q2 + 1;
1172 r2 = r2 - ad;
1174 delta = ad - r2;
1175 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1177 mag.m = q2 + 1;
1178 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1179 mag.s = p - d.getBitWidth(); // resulting shift
1180 return mag;
1183 /// Calculate the magic numbers required to implement an unsigned integer
1184 /// division by a constant as a sequence of multiplies, adds and shifts.
1185 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1186 /// S. Warren, Jr., chapter 10.
1187 /// LeadingZeros can be used to simplify the calculation if the upper bits
1188 /// of the divided value are known zero.
1189 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1190 const APInt& d = *this;
1191 unsigned p;
1192 APInt nc, delta, q1, r1, q2, r2;
1193 struct mu magu;
1194 magu.a = 0; // initialize "add" indicator
1195 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1196 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1197 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1199 nc = allOnes - (allOnes - d).urem(d);
1200 p = d.getBitWidth() - 1; // initialize p
1201 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1202 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1203 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1204 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1205 do {
1206 p = p + 1;
1207 if (r1.uge(nc - r1)) {
1208 q1 = q1 + q1 + 1; // update q1
1209 r1 = r1 + r1 - nc; // update r1
1211 else {
1212 q1 = q1+q1; // update q1
1213 r1 = r1+r1; // update r1
1215 if ((r2 + 1).uge(d - r2)) {
1216 if (q2.uge(signedMax)) magu.a = 1;
1217 q2 = q2+q2 + 1; // update q2
1218 r2 = r2+r2 + 1 - d; // update r2
1220 else {
1221 if (q2.uge(signedMin)) magu.a = 1;
1222 q2 = q2+q2; // update q2
1223 r2 = r2+r2 + 1; // update r2
1225 delta = d - 1 - r2;
1226 } while (p < d.getBitWidth()*2 &&
1227 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1228 magu.m = q2 + 1; // resulting magic number
1229 magu.s = p - d.getBitWidth(); // resulting shift
1230 return magu;
1233 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1234 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1235 /// variables here have the same names as in the algorithm. Comments explain
1236 /// the algorithm and any deviation from it.
1237 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1238 unsigned m, unsigned n) {
1239 assert(u && "Must provide dividend");
1240 assert(v && "Must provide divisor");
1241 assert(q && "Must provide quotient");
1242 assert(u != v && u != q && v != q && "Must use different memory");
1243 assert(n>1 && "n must be > 1");
1245 // b denotes the base of the number system. In our case b is 2^32.
1246 const uint64_t b = uint64_t(1) << 32;
1248 // The DEBUG macros here tend to be spam in the debug output if you're not
1249 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1250 #ifdef KNUTH_DEBUG
1251 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1252 #else
1253 #define DEBUG_KNUTH(X) do {} while(false)
1254 #endif
1256 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1257 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1258 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1259 DEBUG_KNUTH(dbgs() << " by");
1260 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1261 DEBUG_KNUTH(dbgs() << '\n');
1262 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1263 // u and v by d. Note that we have taken Knuth's advice here to use a power
1264 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1265 // 2 allows us to shift instead of multiply and it is easy to determine the
1266 // shift amount from the leading zeros. We are basically normalizing the u
1267 // and v so that its high bits are shifted to the top of v's range without
1268 // overflow. Note that this can require an extra word in u so that u must
1269 // be of length m+n+1.
1270 unsigned shift = countLeadingZeros(v[n-1]);
1271 uint32_t v_carry = 0;
1272 uint32_t u_carry = 0;
1273 if (shift) {
1274 for (unsigned i = 0; i < m+n; ++i) {
1275 uint32_t u_tmp = u[i] >> (32 - shift);
1276 u[i] = (u[i] << shift) | u_carry;
1277 u_carry = u_tmp;
1279 for (unsigned i = 0; i < n; ++i) {
1280 uint32_t v_tmp = v[i] >> (32 - shift);
1281 v[i] = (v[i] << shift) | v_carry;
1282 v_carry = v_tmp;
1285 u[m+n] = u_carry;
1287 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1288 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1289 DEBUG_KNUTH(dbgs() << " by");
1290 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1291 DEBUG_KNUTH(dbgs() << '\n');
1293 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1294 int j = m;
1295 do {
1296 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1297 // D3. [Calculate q'.].
1298 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1299 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1300 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1301 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1302 // on v[n-2] determines at high speed most of the cases in which the trial
1303 // value qp is one too large, and it eliminates all cases where qp is two
1304 // too large.
1305 uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1306 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1307 uint64_t qp = dividend / v[n-1];
1308 uint64_t rp = dividend % v[n-1];
1309 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1310 qp--;
1311 rp += v[n-1];
1312 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1313 qp--;
1315 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1317 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1318 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1319 // consists of a simple multiplication by a one-place number, combined with
1320 // a subtraction.
1321 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1322 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1323 // true value plus b**(n+1), namely as the b's complement of
1324 // the true value, and a "borrow" to the left should be remembered.
1325 int64_t borrow = 0;
1326 for (unsigned i = 0; i < n; ++i) {
1327 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1328 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1329 u[j+i] = Lo_32(subres);
1330 borrow = Hi_32(p) - Hi_32(subres);
1331 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1332 << ", borrow = " << borrow << '\n');
1334 bool isNeg = u[j+n] < borrow;
1335 u[j+n] -= Lo_32(borrow);
1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339 DEBUG_KNUTH(dbgs() << '\n');
1341 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1342 // negative, go to step D6; otherwise go on to step D7.
1343 q[j] = Lo_32(qp);
1344 if (isNeg) {
1345 // D6. [Add back]. The probability that this step is necessary is very
1346 // small, on the order of only 2/b. Make sure that test data accounts for
1347 // this possibility. Decrease q[j] by 1
1348 q[j]--;
1349 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1350 // A carry will occur to the left of u[j+n], and it should be ignored
1351 // since it cancels with the borrow that occurred in D4.
1352 bool carry = false;
1353 for (unsigned i = 0; i < n; i++) {
1354 uint32_t limit = std::min(u[j+i],v[i]);
1355 u[j+i] += v[i] + carry;
1356 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1358 u[j+n] += carry;
1360 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1361 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1362 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1364 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1365 } while (--j >= 0);
1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1368 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1369 DEBUG_KNUTH(dbgs() << '\n');
1371 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1372 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1373 // compute the remainder (urem uses this).
1374 if (r) {
1375 // The value d is expressed by the "shift" value above since we avoided
1376 // multiplication by d by using a shift left. So, all we have to do is
1377 // shift right here.
1378 if (shift) {
1379 uint32_t carry = 0;
1380 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1381 for (int i = n-1; i >= 0; i--) {
1382 r[i] = (u[i] >> shift) | carry;
1383 carry = u[i] << (32 - shift);
1384 DEBUG_KNUTH(dbgs() << " " << r[i]);
1386 } else {
1387 for (int i = n-1; i >= 0; i--) {
1388 r[i] = u[i];
1389 DEBUG_KNUTH(dbgs() << " " << r[i]);
1392 DEBUG_KNUTH(dbgs() << '\n');
1394 DEBUG_KNUTH(dbgs() << '\n');
1397 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1398 unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1399 assert(lhsWords >= rhsWords && "Fractional result");
1401 // First, compose the values into an array of 32-bit words instead of
1402 // 64-bit words. This is a necessity of both the "short division" algorithm
1403 // and the Knuth "classical algorithm" which requires there to be native
1404 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1405 // can't use 64-bit operands here because we don't have native results of
1406 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1407 // work on large-endian machines.
1408 unsigned n = rhsWords * 2;
1409 unsigned m = (lhsWords * 2) - n;
1411 // Allocate space for the temporary values we need either on the stack, if
1412 // it will fit, or on the heap if it won't.
1413 uint32_t SPACE[128];
1414 uint32_t *U = nullptr;
1415 uint32_t *V = nullptr;
1416 uint32_t *Q = nullptr;
1417 uint32_t *R = nullptr;
1418 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1419 U = &SPACE[0];
1420 V = &SPACE[m+n+1];
1421 Q = &SPACE[(m+n+1) + n];
1422 if (Remainder)
1423 R = &SPACE[(m+n+1) + n + (m+n)];
1424 } else {
1425 U = new uint32_t[m + n + 1];
1426 V = new uint32_t[n];
1427 Q = new uint32_t[m+n];
1428 if (Remainder)
1429 R = new uint32_t[n];
1432 // Initialize the dividend
1433 memset(U, 0, (m+n+1)*sizeof(uint32_t));
1434 for (unsigned i = 0; i < lhsWords; ++i) {
1435 uint64_t tmp = LHS[i];
1436 U[i * 2] = Lo_32(tmp);
1437 U[i * 2 + 1] = Hi_32(tmp);
1439 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1441 // Initialize the divisor
1442 memset(V, 0, (n)*sizeof(uint32_t));
1443 for (unsigned i = 0; i < rhsWords; ++i) {
1444 uint64_t tmp = RHS[i];
1445 V[i * 2] = Lo_32(tmp);
1446 V[i * 2 + 1] = Hi_32(tmp);
1449 // initialize the quotient and remainder
1450 memset(Q, 0, (m+n) * sizeof(uint32_t));
1451 if (Remainder)
1452 memset(R, 0, n * sizeof(uint32_t));
1454 // Now, adjust m and n for the Knuth division. n is the number of words in
1455 // the divisor. m is the number of words by which the dividend exceeds the
1456 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1457 // contain any zero words or the Knuth algorithm fails.
1458 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1459 n--;
1460 m++;
1462 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1463 m--;
1465 // If we're left with only a single word for the divisor, Knuth doesn't work
1466 // so we implement the short division algorithm here. This is much simpler
1467 // and faster because we are certain that we can divide a 64-bit quantity
1468 // by a 32-bit quantity at hardware speed and short division is simply a
1469 // series of such operations. This is just like doing short division but we
1470 // are using base 2^32 instead of base 10.
1471 assert(n != 0 && "Divide by zero?");
1472 if (n == 1) {
1473 uint32_t divisor = V[0];
1474 uint32_t remainder = 0;
1475 for (int i = m; i >= 0; i--) {
1476 uint64_t partial_dividend = Make_64(remainder, U[i]);
1477 if (partial_dividend == 0) {
1478 Q[i] = 0;
1479 remainder = 0;
1480 } else if (partial_dividend < divisor) {
1481 Q[i] = 0;
1482 remainder = Lo_32(partial_dividend);
1483 } else if (partial_dividend == divisor) {
1484 Q[i] = 1;
1485 remainder = 0;
1486 } else {
1487 Q[i] = Lo_32(partial_dividend / divisor);
1488 remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1491 if (R)
1492 R[0] = remainder;
1493 } else {
1494 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1495 // case n > 1.
1496 KnuthDiv(U, V, Q, R, m, n);
1499 // If the caller wants the quotient
1500 if (Quotient) {
1501 for (unsigned i = 0; i < lhsWords; ++i)
1502 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1505 // If the caller wants the remainder
1506 if (Remainder) {
1507 for (unsigned i = 0; i < rhsWords; ++i)
1508 Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1511 // Clean up the memory we allocated.
1512 if (U != &SPACE[0]) {
1513 delete [] U;
1514 delete [] V;
1515 delete [] Q;
1516 delete [] R;
1520 APInt APInt::udiv(const APInt &RHS) const {
1521 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1523 // First, deal with the easy case
1524 if (isSingleWord()) {
1525 assert(RHS.U.VAL != 0 && "Divide by zero?");
1526 return APInt(BitWidth, U.VAL / RHS.U.VAL);
1529 // Get some facts about the LHS and RHS number of bits and words
1530 unsigned lhsWords = getNumWords(getActiveBits());
1531 unsigned rhsBits = RHS.getActiveBits();
1532 unsigned rhsWords = getNumWords(rhsBits);
1533 assert(rhsWords && "Divided by zero???");
1535 // Deal with some degenerate cases
1536 if (!lhsWords)
1537 // 0 / X ===> 0
1538 return APInt(BitWidth, 0);
1539 if (rhsBits == 1)
1540 // X / 1 ===> X
1541 return *this;
1542 if (lhsWords < rhsWords || this->ult(RHS))
1543 // X / Y ===> 0, iff X < Y
1544 return APInt(BitWidth, 0);
1545 if (*this == RHS)
1546 // X / X ===> 1
1547 return APInt(BitWidth, 1);
1548 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1549 // All high words are zero, just use native divide
1550 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1552 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1553 APInt Quotient(BitWidth, 0); // to hold result.
1554 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1555 return Quotient;
1558 APInt APInt::udiv(uint64_t RHS) const {
1559 assert(RHS != 0 && "Divide by zero?");
1561 // First, deal with the easy case
1562 if (isSingleWord())
1563 return APInt(BitWidth, U.VAL / RHS);
1565 // Get some facts about the LHS words.
1566 unsigned lhsWords = getNumWords(getActiveBits());
1568 // Deal with some degenerate cases
1569 if (!lhsWords)
1570 // 0 / X ===> 0
1571 return APInt(BitWidth, 0);
1572 if (RHS == 1)
1573 // X / 1 ===> X
1574 return *this;
1575 if (this->ult(RHS))
1576 // X / Y ===> 0, iff X < Y
1577 return APInt(BitWidth, 0);
1578 if (*this == RHS)
1579 // X / X ===> 1
1580 return APInt(BitWidth, 1);
1581 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1582 // All high words are zero, just use native divide
1583 return APInt(BitWidth, this->U.pVal[0] / RHS);
1585 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1586 APInt Quotient(BitWidth, 0); // to hold result.
1587 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1588 return Quotient;
1591 APInt APInt::sdiv(const APInt &RHS) const {
1592 if (isNegative()) {
1593 if (RHS.isNegative())
1594 return (-(*this)).udiv(-RHS);
1595 return -((-(*this)).udiv(RHS));
1597 if (RHS.isNegative())
1598 return -(this->udiv(-RHS));
1599 return this->udiv(RHS);
1602 APInt APInt::sdiv(int64_t RHS) const {
1603 if (isNegative()) {
1604 if (RHS < 0)
1605 return (-(*this)).udiv(-RHS);
1606 return -((-(*this)).udiv(RHS));
1608 if (RHS < 0)
1609 return -(this->udiv(-RHS));
1610 return this->udiv(RHS);
1613 APInt APInt::urem(const APInt &RHS) const {
1614 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1615 if (isSingleWord()) {
1616 assert(RHS.U.VAL != 0 && "Remainder by zero?");
1617 return APInt(BitWidth, U.VAL % RHS.U.VAL);
1620 // Get some facts about the LHS
1621 unsigned lhsWords = getNumWords(getActiveBits());
1623 // Get some facts about the RHS
1624 unsigned rhsBits = RHS.getActiveBits();
1625 unsigned rhsWords = getNumWords(rhsBits);
1626 assert(rhsWords && "Performing remainder operation by zero ???");
1628 // Check the degenerate cases
1629 if (lhsWords == 0)
1630 // 0 % Y ===> 0
1631 return APInt(BitWidth, 0);
1632 if (rhsBits == 1)
1633 // X % 1 ===> 0
1634 return APInt(BitWidth, 0);
1635 if (lhsWords < rhsWords || this->ult(RHS))
1636 // X % Y ===> X, iff X < Y
1637 return *this;
1638 if (*this == RHS)
1639 // X % X == 0;
1640 return APInt(BitWidth, 0);
1641 if (lhsWords == 1)
1642 // All high words are zero, just use native remainder
1643 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1645 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1646 APInt Remainder(BitWidth, 0);
1647 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1648 return Remainder;
1651 uint64_t APInt::urem(uint64_t RHS) const {
1652 assert(RHS != 0 && "Remainder by zero?");
1654 if (isSingleWord())
1655 return U.VAL % RHS;
1657 // Get some facts about the LHS
1658 unsigned lhsWords = getNumWords(getActiveBits());
1660 // Check the degenerate cases
1661 if (lhsWords == 0)
1662 // 0 % Y ===> 0
1663 return 0;
1664 if (RHS == 1)
1665 // X % 1 ===> 0
1666 return 0;
1667 if (this->ult(RHS))
1668 // X % Y ===> X, iff X < Y
1669 return getZExtValue();
1670 if (*this == RHS)
1671 // X % X == 0;
1672 return 0;
1673 if (lhsWords == 1)
1674 // All high words are zero, just use native remainder
1675 return U.pVal[0] % RHS;
1677 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1678 uint64_t Remainder;
1679 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1680 return Remainder;
1683 APInt APInt::srem(const APInt &RHS) const {
1684 if (isNegative()) {
1685 if (RHS.isNegative())
1686 return -((-(*this)).urem(-RHS));
1687 return -((-(*this)).urem(RHS));
1689 if (RHS.isNegative())
1690 return this->urem(-RHS);
1691 return this->urem(RHS);
1694 int64_t APInt::srem(int64_t RHS) const {
1695 if (isNegative()) {
1696 if (RHS < 0)
1697 return -((-(*this)).urem(-RHS));
1698 return -((-(*this)).urem(RHS));
1700 if (RHS < 0)
1701 return this->urem(-RHS);
1702 return this->urem(RHS);
1705 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1706 APInt &Quotient, APInt &Remainder) {
1707 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1708 unsigned BitWidth = LHS.BitWidth;
1710 // First, deal with the easy case
1711 if (LHS.isSingleWord()) {
1712 assert(RHS.U.VAL != 0 && "Divide by zero?");
1713 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1714 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1715 Quotient = APInt(BitWidth, QuotVal);
1716 Remainder = APInt(BitWidth, RemVal);
1717 return;
1720 // Get some size facts about the dividend and divisor
1721 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1722 unsigned rhsBits = RHS.getActiveBits();
1723 unsigned rhsWords = getNumWords(rhsBits);
1724 assert(rhsWords && "Performing divrem operation by zero ???");
1726 // Check the degenerate cases
1727 if (lhsWords == 0) {
1728 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1729 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
1730 return;
1733 if (rhsBits == 1) {
1734 Quotient = LHS; // X / 1 ===> X
1735 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
1738 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1739 Remainder = LHS; // X % Y ===> X, iff X < Y
1740 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1741 return;
1744 if (LHS == RHS) {
1745 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1746 Remainder = APInt(BitWidth, 0); // X % X ===> 0;
1747 return;
1750 // Make sure there is enough space to hold the results.
1751 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1752 // change the size. This is necessary if Quotient or Remainder is aliased
1753 // with LHS or RHS.
1754 Quotient.reallocate(BitWidth);
1755 Remainder.reallocate(BitWidth);
1757 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1758 // There is only one word to consider so use the native versions.
1759 uint64_t lhsValue = LHS.U.pVal[0];
1760 uint64_t rhsValue = RHS.U.pVal[0];
1761 Quotient = lhsValue / rhsValue;
1762 Remainder = lhsValue % rhsValue;
1763 return;
1766 // Okay, lets do it the long way
1767 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1768 Remainder.U.pVal);
1769 // Clear the rest of the Quotient and Remainder.
1770 std::memset(Quotient.U.pVal + lhsWords, 0,
1771 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1772 std::memset(Remainder.U.pVal + rhsWords, 0,
1773 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1776 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1777 uint64_t &Remainder) {
1778 assert(RHS != 0 && "Divide by zero?");
1779 unsigned BitWidth = LHS.BitWidth;
1781 // First, deal with the easy case
1782 if (LHS.isSingleWord()) {
1783 uint64_t QuotVal = LHS.U.VAL / RHS;
1784 Remainder = LHS.U.VAL % RHS;
1785 Quotient = APInt(BitWidth, QuotVal);
1786 return;
1789 // Get some size facts about the dividend and divisor
1790 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1792 // Check the degenerate cases
1793 if (lhsWords == 0) {
1794 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1795 Remainder = 0; // 0 % Y ===> 0
1796 return;
1799 if (RHS == 1) {
1800 Quotient = LHS; // X / 1 ===> X
1801 Remainder = 0; // X % 1 ===> 0
1802 return;
1805 if (LHS.ult(RHS)) {
1806 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1807 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1808 return;
1811 if (LHS == RHS) {
1812 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1813 Remainder = 0; // X % X ===> 0;
1814 return;
1817 // Make sure there is enough space to hold the results.
1818 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1819 // change the size. This is necessary if Quotient is aliased with LHS.
1820 Quotient.reallocate(BitWidth);
1822 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1823 // There is only one word to consider so use the native versions.
1824 uint64_t lhsValue = LHS.U.pVal[0];
1825 Quotient = lhsValue / RHS;
1826 Remainder = lhsValue % RHS;
1827 return;
1830 // Okay, lets do it the long way
1831 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1832 // Clear the rest of the Quotient.
1833 std::memset(Quotient.U.pVal + lhsWords, 0,
1834 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1837 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1838 APInt &Quotient, APInt &Remainder) {
1839 if (LHS.isNegative()) {
1840 if (RHS.isNegative())
1841 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1842 else {
1843 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1844 Quotient.negate();
1846 Remainder.negate();
1847 } else if (RHS.isNegative()) {
1848 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1849 Quotient.negate();
1850 } else {
1851 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1855 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1856 APInt &Quotient, int64_t &Remainder) {
1857 uint64_t R = Remainder;
1858 if (LHS.isNegative()) {
1859 if (RHS < 0)
1860 APInt::udivrem(-LHS, -RHS, Quotient, R);
1861 else {
1862 APInt::udivrem(-LHS, RHS, Quotient, R);
1863 Quotient.negate();
1865 R = -R;
1866 } else if (RHS < 0) {
1867 APInt::udivrem(LHS, -RHS, Quotient, R);
1868 Quotient.negate();
1869 } else {
1870 APInt::udivrem(LHS, RHS, Quotient, R);
1872 Remainder = R;
1875 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1876 APInt Res = *this+RHS;
1877 Overflow = isNonNegative() == RHS.isNonNegative() &&
1878 Res.isNonNegative() != isNonNegative();
1879 return Res;
1882 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1883 APInt Res = *this+RHS;
1884 Overflow = Res.ult(RHS);
1885 return Res;
1888 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1889 APInt Res = *this - RHS;
1890 Overflow = isNonNegative() != RHS.isNonNegative() &&
1891 Res.isNonNegative() != isNonNegative();
1892 return Res;
1895 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1896 APInt Res = *this-RHS;
1897 Overflow = Res.ugt(*this);
1898 return Res;
1901 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1902 // MININT/-1 --> overflow.
1903 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1904 return sdiv(RHS);
1907 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1908 APInt Res = *this * RHS;
1910 if (*this != 0 && RHS != 0)
1911 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1912 else
1913 Overflow = false;
1914 return Res;
1917 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1918 APInt Res = *this * RHS;
1920 if (*this != 0 && RHS != 0)
1921 Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS;
1922 else
1923 Overflow = false;
1924 return Res;
1927 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1928 Overflow = ShAmt.uge(getBitWidth());
1929 if (Overflow)
1930 return APInt(BitWidth, 0);
1932 if (isNonNegative()) // Don't allow sign change.
1933 Overflow = ShAmt.uge(countLeadingZeros());
1934 else
1935 Overflow = ShAmt.uge(countLeadingOnes());
1937 return *this << ShAmt;
1940 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1941 Overflow = ShAmt.uge(getBitWidth());
1942 if (Overflow)
1943 return APInt(BitWidth, 0);
1945 Overflow = ShAmt.ugt(countLeadingZeros());
1947 return *this << ShAmt;
1953 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
1954 // Check our assumptions here
1955 assert(!str.empty() && "Invalid string length");
1956 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
1957 radix == 36) &&
1958 "Radix should be 2, 8, 10, 16, or 36!");
1960 StringRef::iterator p = str.begin();
1961 size_t slen = str.size();
1962 bool isNeg = *p == '-';
1963 if (*p == '-' || *p == '+') {
1964 p++;
1965 slen--;
1966 assert(slen && "String is only a sign, needs a value.");
1968 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
1969 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
1970 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
1971 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
1972 "Insufficient bit width");
1974 // Allocate memory if needed
1975 if (isSingleWord())
1976 U.VAL = 0;
1977 else
1978 U.pVal = getClearedMemory(getNumWords());
1980 // Figure out if we can shift instead of multiply
1981 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
1983 // Enter digit traversal loop
1984 for (StringRef::iterator e = str.end(); p != e; ++p) {
1985 unsigned digit = getDigit(*p, radix);
1986 assert(digit < radix && "Invalid character in digit string");
1988 // Shift or multiply the value by the radix
1989 if (slen > 1) {
1990 if (shift)
1991 *this <<= shift;
1992 else
1993 *this *= radix;
1996 // Add in the digit we just interpreted
1997 *this += digit;
1999 // If its negative, put it in two's complement form
2000 if (isNeg)
2001 this->negate();
2004 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2005 bool Signed, bool formatAsCLiteral) const {
2006 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2007 Radix == 36) &&
2008 "Radix should be 2, 8, 10, 16, or 36!");
2010 const char *Prefix = "";
2011 if (formatAsCLiteral) {
2012 switch (Radix) {
2013 case 2:
2014 // Binary literals are a non-standard extension added in gcc 4.3:
2015 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2016 Prefix = "0b";
2017 break;
2018 case 8:
2019 Prefix = "0";
2020 break;
2021 case 10:
2022 break; // No prefix
2023 case 16:
2024 Prefix = "0x";
2025 break;
2026 default:
2027 llvm_unreachable("Invalid radix!");
2031 // First, check for a zero value and just short circuit the logic below.
2032 if (*this == 0) {
2033 while (*Prefix) {
2034 Str.push_back(*Prefix);
2035 ++Prefix;
2037 Str.push_back('0');
2038 return;
2041 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2043 if (isSingleWord()) {
2044 char Buffer[65];
2045 char *BufPtr = std::end(Buffer);
2047 uint64_t N;
2048 if (!Signed) {
2049 N = getZExtValue();
2050 } else {
2051 int64_t I = getSExtValue();
2052 if (I >= 0) {
2053 N = I;
2054 } else {
2055 Str.push_back('-');
2056 N = -(uint64_t)I;
2060 while (*Prefix) {
2061 Str.push_back(*Prefix);
2062 ++Prefix;
2065 while (N) {
2066 *--BufPtr = Digits[N % Radix];
2067 N /= Radix;
2069 Str.append(BufPtr, std::end(Buffer));
2070 return;
2073 APInt Tmp(*this);
2075 if (Signed && isNegative()) {
2076 // They want to print the signed version and it is a negative value
2077 // Flip the bits and add one to turn it into the equivalent positive
2078 // value and put a '-' in the result.
2079 Tmp.negate();
2080 Str.push_back('-');
2083 while (*Prefix) {
2084 Str.push_back(*Prefix);
2085 ++Prefix;
2088 // We insert the digits backward, then reverse them to get the right order.
2089 unsigned StartDig = Str.size();
2091 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2092 // because the number of bits per digit (1, 3 and 4 respectively) divides
2093 // equally. We just shift until the value is zero.
2094 if (Radix == 2 || Radix == 8 || Radix == 16) {
2095 // Just shift tmp right for each digit width until it becomes zero
2096 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2097 unsigned MaskAmt = Radix - 1;
2099 while (Tmp.getBoolValue()) {
2100 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2101 Str.push_back(Digits[Digit]);
2102 Tmp.lshrInPlace(ShiftAmt);
2104 } else {
2105 while (Tmp.getBoolValue()) {
2106 uint64_t Digit;
2107 udivrem(Tmp, Radix, Tmp, Digit);
2108 assert(Digit < Radix && "divide failed");
2109 Str.push_back(Digits[Digit]);
2113 // Reverse the digits before returning.
2114 std::reverse(Str.begin()+StartDig, Str.end());
2117 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2118 /// It is better to pass in a SmallVector/SmallString to the methods above.
2119 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2120 SmallString<40> S;
2121 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2122 return S.str();
2125 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2126 LLVM_DUMP_METHOD void APInt::dump() const {
2127 SmallString<40> S, U;
2128 this->toStringUnsigned(U);
2129 this->toStringSigned(S);
2130 dbgs() << "APInt(" << BitWidth << "b, "
2131 << U << "u " << S << "s)\n";
2133 #endif
2135 void APInt::print(raw_ostream &OS, bool isSigned) const {
2136 SmallString<40> S;
2137 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2138 OS << S;
2141 // This implements a variety of operations on a representation of
2142 // arbitrary precision, two's-complement, bignum integer values.
2144 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2145 // and unrestricting assumption.
2146 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2147 "Part width must be divisible by 2!");
2149 /* Some handy functions local to this file. */
2151 /* Returns the integer part with the least significant BITS set.
2152 BITS cannot be zero. */
2153 static inline APInt::WordType lowBitMask(unsigned bits) {
2154 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2156 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2159 /* Returns the value of the lower half of PART. */
2160 static inline APInt::WordType lowHalf(APInt::WordType part) {
2161 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2164 /* Returns the value of the upper half of PART. */
2165 static inline APInt::WordType highHalf(APInt::WordType part) {
2166 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2169 /* Returns the bit number of the most significant set bit of a part.
2170 If the input number has no bits set -1U is returned. */
2171 static unsigned partMSB(APInt::WordType value) {
2172 return findLastSet(value, ZB_Max);
2175 /* Returns the bit number of the least significant set bit of a
2176 part. If the input number has no bits set -1U is returned. */
2177 static unsigned partLSB(APInt::WordType value) {
2178 return findFirstSet(value, ZB_Max);
2181 /* Sets the least significant part of a bignum to the input value, and
2182 zeroes out higher parts. */
2183 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2184 assert(parts > 0);
2186 dst[0] = part;
2187 for (unsigned i = 1; i < parts; i++)
2188 dst[i] = 0;
2191 /* Assign one bignum to another. */
2192 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2193 for (unsigned i = 0; i < parts; i++)
2194 dst[i] = src[i];
2197 /* Returns true if a bignum is zero, false otherwise. */
2198 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2199 for (unsigned i = 0; i < parts; i++)
2200 if (src[i])
2201 return false;
2203 return true;
2206 /* Extract the given bit of a bignum; returns 0 or 1. */
2207 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2208 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2211 /* Set the given bit of a bignum. */
2212 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2213 parts[whichWord(bit)] |= maskBit(bit);
2216 /* Clears the given bit of a bignum. */
2217 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2218 parts[whichWord(bit)] &= ~maskBit(bit);
2221 /* Returns the bit number of the least significant set bit of a
2222 number. If the input number has no bits set -1U is returned. */
2223 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2224 for (unsigned i = 0; i < n; i++) {
2225 if (parts[i] != 0) {
2226 unsigned lsb = partLSB(parts[i]);
2228 return lsb + i * APINT_BITS_PER_WORD;
2232 return -1U;
2235 /* Returns the bit number of the most significant set bit of a number.
2236 If the input number has no bits set -1U is returned. */
2237 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2238 do {
2239 --n;
2241 if (parts[n] != 0) {
2242 unsigned msb = partMSB(parts[n]);
2244 return msb + n * APINT_BITS_PER_WORD;
2246 } while (n);
2248 return -1U;
2251 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2252 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2253 the least significant bit of DST. All high bits above srcBITS in
2254 DST are zero-filled. */
2255 void
2256 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2257 unsigned srcBits, unsigned srcLSB) {
2258 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2259 assert(dstParts <= dstCount);
2261 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2262 tcAssign (dst, src + firstSrcPart, dstParts);
2264 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2265 tcShiftRight (dst, dstParts, shift);
2267 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2268 in DST. If this is less that srcBits, append the rest, else
2269 clear the high bits. */
2270 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2271 if (n < srcBits) {
2272 WordType mask = lowBitMask (srcBits - n);
2273 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2274 << n % APINT_BITS_PER_WORD);
2275 } else if (n > srcBits) {
2276 if (srcBits % APINT_BITS_PER_WORD)
2277 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2280 /* Clear high parts. */
2281 while (dstParts < dstCount)
2282 dst[dstParts++] = 0;
2285 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2286 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2287 WordType c, unsigned parts) {
2288 assert(c <= 1);
2290 for (unsigned i = 0; i < parts; i++) {
2291 WordType l = dst[i];
2292 if (c) {
2293 dst[i] += rhs[i] + 1;
2294 c = (dst[i] <= l);
2295 } else {
2296 dst[i] += rhs[i];
2297 c = (dst[i] < l);
2301 return c;
2304 /// This function adds a single "word" integer, src, to the multiple
2305 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2306 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2307 /// @returns the carry of the addition.
2308 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2309 unsigned parts) {
2310 for (unsigned i = 0; i < parts; ++i) {
2311 dst[i] += src;
2312 if (dst[i] >= src)
2313 return 0; // No need to carry so exit early.
2314 src = 1; // Carry one to next digit.
2317 return 1;
2320 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2321 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2322 WordType c, unsigned parts) {
2323 assert(c <= 1);
2325 for (unsigned i = 0; i < parts; i++) {
2326 WordType l = dst[i];
2327 if (c) {
2328 dst[i] -= rhs[i] + 1;
2329 c = (dst[i] >= l);
2330 } else {
2331 dst[i] -= rhs[i];
2332 c = (dst[i] > l);
2336 return c;
2339 /// This function subtracts a single "word" (64-bit word), src, from
2340 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2341 /// no further borrowing is needed or it runs out of "words" in dst. The result
2342 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2343 /// exhausted. In other words, if src > dst then this function returns 1,
2344 /// otherwise 0.
2345 /// @returns the borrow out of the subtraction
2346 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2347 unsigned parts) {
2348 for (unsigned i = 0; i < parts; ++i) {
2349 WordType Dst = dst[i];
2350 dst[i] -= src;
2351 if (src <= Dst)
2352 return 0; // No need to borrow so exit early.
2353 src = 1; // We have to "borrow 1" from next "word"
2356 return 1;
2359 /* Negate a bignum in-place. */
2360 void APInt::tcNegate(WordType *dst, unsigned parts) {
2361 tcComplement(dst, parts);
2362 tcIncrement(dst, parts);
2365 /* DST += SRC * MULTIPLIER + CARRY if add is true
2366 DST = SRC * MULTIPLIER + CARRY if add is false
2368 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2369 they must start at the same point, i.e. DST == SRC.
2371 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2372 returned. Otherwise DST is filled with the least significant
2373 DSTPARTS parts of the result, and if all of the omitted higher
2374 parts were zero return zero, otherwise overflow occurred and
2375 return one. */
2376 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2377 WordType multiplier, WordType carry,
2378 unsigned srcParts, unsigned dstParts,
2379 bool add) {
2380 /* Otherwise our writes of DST kill our later reads of SRC. */
2381 assert(dst <= src || dst >= src + srcParts);
2382 assert(dstParts <= srcParts + 1);
2384 /* N loops; minimum of dstParts and srcParts. */
2385 unsigned n = std::min(dstParts, srcParts);
2387 for (unsigned i = 0; i < n; i++) {
2388 WordType low, mid, high, srcPart;
2390 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2392 This cannot overflow, because
2394 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2396 which is less than n^2. */
2398 srcPart = src[i];
2400 if (multiplier == 0 || srcPart == 0) {
2401 low = carry;
2402 high = 0;
2403 } else {
2404 low = lowHalf(srcPart) * lowHalf(multiplier);
2405 high = highHalf(srcPart) * highHalf(multiplier);
2407 mid = lowHalf(srcPart) * highHalf(multiplier);
2408 high += highHalf(mid);
2409 mid <<= APINT_BITS_PER_WORD / 2;
2410 if (low + mid < low)
2411 high++;
2412 low += mid;
2414 mid = highHalf(srcPart) * lowHalf(multiplier);
2415 high += highHalf(mid);
2416 mid <<= APINT_BITS_PER_WORD / 2;
2417 if (low + mid < low)
2418 high++;
2419 low += mid;
2421 /* Now add carry. */
2422 if (low + carry < low)
2423 high++;
2424 low += carry;
2427 if (add) {
2428 /* And now DST[i], and store the new low part there. */
2429 if (low + dst[i] < low)
2430 high++;
2431 dst[i] += low;
2432 } else
2433 dst[i] = low;
2435 carry = high;
2438 if (srcParts < dstParts) {
2439 /* Full multiplication, there is no overflow. */
2440 assert(srcParts + 1 == dstParts);
2441 dst[srcParts] = carry;
2442 return 0;
2445 /* We overflowed if there is carry. */
2446 if (carry)
2447 return 1;
2449 /* We would overflow if any significant unwritten parts would be
2450 non-zero. This is true if any remaining src parts are non-zero
2451 and the multiplier is non-zero. */
2452 if (multiplier)
2453 for (unsigned i = dstParts; i < srcParts; i++)
2454 if (src[i])
2455 return 1;
2457 /* We fitted in the narrow destination. */
2458 return 0;
2461 /* DST = LHS * RHS, where DST has the same width as the operands and
2462 is filled with the least significant parts of the result. Returns
2463 one if overflow occurred, otherwise zero. DST must be disjoint
2464 from both operands. */
2465 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2466 const WordType *rhs, unsigned parts) {
2467 assert(dst != lhs && dst != rhs);
2469 int overflow = 0;
2470 tcSet(dst, 0, parts);
2472 for (unsigned i = 0; i < parts; i++)
2473 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2474 parts - i, true);
2476 return overflow;
2479 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2480 /// operands. No overflow occurs. DST must be disjoint from both operands.
2481 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2482 const WordType *rhs, unsigned lhsParts,
2483 unsigned rhsParts) {
2484 /* Put the narrower number on the LHS for less loops below. */
2485 if (lhsParts > rhsParts)
2486 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2488 assert(dst != lhs && dst != rhs);
2490 tcSet(dst, 0, rhsParts);
2492 for (unsigned i = 0; i < lhsParts; i++)
2493 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2496 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2497 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2498 set REMAINDER to the remainder, return zero. i.e.
2500 OLD_LHS = RHS * LHS + REMAINDER
2502 SCRATCH is a bignum of the same size as the operands and result for
2503 use by the routine; its contents need not be initialized and are
2504 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2506 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2507 WordType *remainder, WordType *srhs,
2508 unsigned parts) {
2509 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2511 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2512 if (shiftCount == 0)
2513 return true;
2515 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2516 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2517 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2519 tcAssign(srhs, rhs, parts);
2520 tcShiftLeft(srhs, parts, shiftCount);
2521 tcAssign(remainder, lhs, parts);
2522 tcSet(lhs, 0, parts);
2524 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2525 the total. */
2526 for (;;) {
2527 int compare = tcCompare(remainder, srhs, parts);
2528 if (compare >= 0) {
2529 tcSubtract(remainder, srhs, 0, parts);
2530 lhs[n] |= mask;
2533 if (shiftCount == 0)
2534 break;
2535 shiftCount--;
2536 tcShiftRight(srhs, parts, 1);
2537 if ((mask >>= 1) == 0) {
2538 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2539 n--;
2543 return false;
2546 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2547 /// no restrictions on Count.
2548 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2549 // Don't bother performing a no-op shift.
2550 if (!Count)
2551 return;
2553 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2554 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2555 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2557 // Fastpath for moving by whole words.
2558 if (BitShift == 0) {
2559 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2560 } else {
2561 while (Words-- > WordShift) {
2562 Dst[Words] = Dst[Words - WordShift] << BitShift;
2563 if (Words > WordShift)
2564 Dst[Words] |=
2565 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2569 // Fill in the remainder with 0s.
2570 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2573 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2574 /// are no restrictions on Count.
2575 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2576 // Don't bother performing a no-op shift.
2577 if (!Count)
2578 return;
2580 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2581 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2582 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2584 unsigned WordsToMove = Words - WordShift;
2585 // Fastpath for moving by whole words.
2586 if (BitShift == 0) {
2587 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2588 } else {
2589 for (unsigned i = 0; i != WordsToMove; ++i) {
2590 Dst[i] = Dst[i + WordShift] >> BitShift;
2591 if (i + 1 != WordsToMove)
2592 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2596 // Fill in the remainder with 0s.
2597 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2600 /* Bitwise and of two bignums. */
2601 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2602 for (unsigned i = 0; i < parts; i++)
2603 dst[i] &= rhs[i];
2606 /* Bitwise inclusive or of two bignums. */
2607 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2608 for (unsigned i = 0; i < parts; i++)
2609 dst[i] |= rhs[i];
2612 /* Bitwise exclusive or of two bignums. */
2613 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2614 for (unsigned i = 0; i < parts; i++)
2615 dst[i] ^= rhs[i];
2618 /* Complement a bignum in-place. */
2619 void APInt::tcComplement(WordType *dst, unsigned parts) {
2620 for (unsigned i = 0; i < parts; i++)
2621 dst[i] = ~dst[i];
2624 /* Comparison (unsigned) of two bignums. */
2625 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2626 unsigned parts) {
2627 while (parts) {
2628 parts--;
2629 if (lhs[parts] != rhs[parts])
2630 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2633 return 0;
2636 /* Set the least significant BITS bits of a bignum, clear the
2637 rest. */
2638 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2639 unsigned bits) {
2640 unsigned i = 0;
2641 while (bits > APINT_BITS_PER_WORD) {
2642 dst[i++] = ~(WordType) 0;
2643 bits -= APINT_BITS_PER_WORD;
2646 if (bits)
2647 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2649 while (i < parts)
2650 dst[i++] = 0;
2653 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2654 APInt::Rounding RM) {
2655 // Currently udivrem always rounds down.
2656 switch (RM) {
2657 case APInt::Rounding::DOWN:
2658 case APInt::Rounding::TOWARD_ZERO:
2659 return A.udiv(B);
2660 case APInt::Rounding::UP: {
2661 APInt Quo, Rem;
2662 APInt::udivrem(A, B, Quo, Rem);
2663 if (Rem == 0)
2664 return Quo;
2665 return Quo + 1;
2668 llvm_unreachable("Unknown APInt::Rounding enum");
2671 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2672 APInt::Rounding RM) {
2673 switch (RM) {
2674 case APInt::Rounding::DOWN:
2675 case APInt::Rounding::UP: {
2676 APInt Quo, Rem;
2677 APInt::sdivrem(A, B, Quo, Rem);
2678 if (Rem == 0)
2679 return Quo;
2680 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2681 // We want to check whether the non-integer part of the mathematical value
2682 // is negative or not. If the non-integer part is negative, we need to round
2683 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2684 // already rounded down.
2685 if (RM == APInt::Rounding::DOWN) {
2686 if (Rem.isNegative() != B.isNegative())
2687 return Quo - 1;
2688 return Quo;
2690 if (Rem.isNegative() != B.isNegative())
2691 return Quo;
2692 return Quo + 1;
2694 // Currently sdiv rounds twards zero.
2695 case APInt::Rounding::TOWARD_ZERO:
2696 return A.sdiv(B);
2698 llvm_unreachable("Unknown APInt::Rounding enum");
2701 Optional<APInt>
2702 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2703 unsigned RangeWidth) {
2704 unsigned CoeffWidth = A.getBitWidth();
2705 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2706 assert(RangeWidth <= CoeffWidth &&
2707 "Value range width should be less than coefficient width");
2708 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2710 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2711 << "x + " << C << ", rw:" << RangeWidth << '\n');
2713 // Identify 0 as a (non)solution immediately.
2714 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2715 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2716 return APInt(CoeffWidth, 0);
2719 // The result of APInt arithmetic has the same bit width as the operands,
2720 // so it can actually lose high bits. A product of two n-bit integers needs
2721 // 2n-1 bits to represent the full value.
2722 // The operation done below (on quadratic coefficients) that can produce
2723 // the largest value is the evaluation of the equation during bisection,
2724 // which needs 3 times the bitwidth of the coefficient, so the total number
2725 // of required bits is 3n.
2727 // The purpose of this extension is to simulate the set Z of all integers,
2728 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2729 // and negative numbers (not so much in a modulo arithmetic). The method
2730 // used to solve the equation is based on the standard formula for real
2731 // numbers, and uses the concepts of "positive" and "negative" with their
2732 // usual meanings.
2733 CoeffWidth *= 3;
2734 A = A.sext(CoeffWidth);
2735 B = B.sext(CoeffWidth);
2736 C = C.sext(CoeffWidth);
2738 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2739 // the bit width has increased.
2740 if (A.isNegative()) {
2741 A.negate();
2742 B.negate();
2743 C.negate();
2746 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2747 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2748 // and R = 2^BitWidth.
2749 // Since we're trying not only to find exact solutions, but also values
2750 // that "wrap around", such a set will always have a solution, i.e. an x
2751 // that satisfies at least one of the equations, or such that |q(x)|
2752 // exceeds kR, while |q(x-1)| for the same k does not.
2754 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2755 // positive solution n (in the above sense), and also such that the n
2756 // will be the least among all solutions corresponding to k = 0, 1, ...
2757 // (more precisely, the least element in the set
2758 // { n(k) | k is such that a solution n(k) exists }).
2760 // Consider the parabola (over real numbers) that corresponds to the
2761 // quadratic equation. Since A > 0, the arms of the parabola will point
2762 // up. Picking different values of k will shift it up and down by R.
2764 // We want to shift the parabola in such a way as to reduce the problem
2765 // of solving q(x) = kR to solving shifted_q(x) = 0.
2766 // (The interesting solutions are the ceilings of the real number
2767 // solutions.)
2768 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2769 APInt TwoA = 2 * A;
2770 APInt SqrB = B * B;
2771 bool PickLow;
2773 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2774 assert(A.isStrictlyPositive());
2775 APInt T = V.abs().urem(A);
2776 if (T.isNullValue())
2777 return V;
2778 return V.isNegative() ? V+T : V+(A-T);
2781 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2782 // iff B is positive.
2783 if (B.isNonNegative()) {
2784 // If B >= 0, the vertex it at a negative location (or at 0), so in
2785 // order to have a non-negative solution we need to pick k that makes
2786 // C-kR negative. To satisfy all the requirements for the solution
2787 // that we are looking for, it needs to be closest to 0 of all k.
2788 C = C.srem(R);
2789 if (C.isStrictlyPositive())
2790 C -= R;
2791 // Pick the greater solution.
2792 PickLow = false;
2793 } else {
2794 // If B < 0, the vertex is at a positive location. For any solution
2795 // to exist, the discriminant must be non-negative. This means that
2796 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2797 // lower bound on values of k: kR >= C - B^2/4A.
2798 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2799 // Round LowkR up (towards +inf) to the nearest kR.
2800 LowkR = RoundUp(LowkR, R);
2802 // If there exists k meeting the condition above, and such that
2803 // C-kR > 0, there will be two positive real number solutions of
2804 // q(x) = kR. Out of all such values of k, pick the one that makes
2805 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2806 // In other words, find maximum k such that LowkR <= kR < C.
2807 if (C.sgt(LowkR)) {
2808 // If LowkR < C, then such a k is guaranteed to exist because
2809 // LowkR itself is a multiple of R.
2810 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2811 // Pick the smaller solution.
2812 PickLow = true;
2813 } else {
2814 // If C-kR < 0 for all potential k's, it means that one solution
2815 // will be negative, while the other will be positive. The positive
2816 // solution will shift towards 0 if the parabola is moved up.
2817 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2818 // to 0, or in other words, out of all parabolas that have solutions,
2819 // pick the one that is the farthest "up").
2820 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2821 C -= LowkR;
2822 // Pick the greater solution.
2823 PickLow = false;
2827 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2828 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2830 APInt D = SqrB - 4*A*C;
2831 assert(D.isNonNegative() && "Negative discriminant");
2832 APInt SQ = D.sqrt();
2834 APInt Q = SQ * SQ;
2835 bool InexactSQ = Q != D;
2836 // The calculated SQ may actually be greater than the exact (non-integer)
2837 // value. If that's the case, decremement SQ to get a value that is lower.
2838 if (Q.sgt(D))
2839 SQ -= 1;
2841 APInt X;
2842 APInt Rem;
2844 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2845 // When using the quadratic formula directly, the calculated low root
2846 // may be greater than the exact one, since we would be subtracting SQ.
2847 // To make sure that the calculated root is not greater than the exact
2848 // one, subtract SQ+1 when calculating the low root (for inexact value
2849 // of SQ).
2850 if (PickLow)
2851 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2852 else
2853 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2855 // The updated coefficients should be such that the (exact) solution is
2856 // positive. Since APInt division rounds towards 0, the calculated one
2857 // can be 0, but cannot be negative.
2858 assert(X.isNonNegative() && "Solution should be non-negative");
2860 if (!InexactSQ && Rem.isNullValue()) {
2861 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2862 return X;
2865 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2866 // The exact value of the square root of D should be between SQ and SQ+1.
2867 // This implies that the solution should be between that corresponding to
2868 // SQ (i.e. X) and that corresponding to SQ+1.
2870 // The calculated X cannot be greater than the exact (real) solution.
2871 // Actually it must be strictly less than the exact solution, while
2872 // X+1 will be greater than or equal to it.
2874 APInt VX = (A*X + B)*X + C;
2875 APInt VY = VX + TwoA*X + A + B;
2876 bool SignChange = VX.isNegative() != VY.isNegative() ||
2877 VX.isNullValue() != VY.isNullValue();
2878 // If the sign did not change between X and X+1, X is not a valid solution.
2879 // This could happen when the actual (exact) roots don't have an integer
2880 // between them, so they would both be contained between X and X+1.
2881 if (!SignChange) {
2882 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2883 return None;
2886 X += 1;
2887 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2888 return X;