[ARM] Rejig MVE load store tests. NFC
[llvm-core.git] / lib / Support / APInt.cpp
blob43173311cd8029caf08fa54d7f9bb8e3c144e8ca
1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
33 #define DEBUG_TYPE "apint"
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38 uint64_t *result = new uint64_t[numWords];
39 memset(result, 0, numWords * sizeof(uint64_t));
40 return result;
43 /// A utility function for allocating memory and checking for allocation
44 /// failure. The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46 return new uint64_t[numWords];
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51 unsigned r;
53 if (radix == 16 || radix == 36) {
54 r = cdigit - '0';
55 if (r <= 9)
56 return r;
58 r = cdigit - 'A';
59 if (r <= radix - 11U)
60 return r + 10;
62 r = cdigit - 'a';
63 if (r <= radix - 11U)
64 return r + 10;
66 radix = 10;
69 r = cdigit - '0';
70 if (r < radix)
71 return r;
73 return -1U;
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78 U.pVal = getClearedMemory(getNumWords());
79 U.pVal[0] = val;
80 if (isSigned && int64_t(val) < 0)
81 for (unsigned i = 1; i < getNumWords(); ++i)
82 U.pVal[i] = WORDTYPE_MAX;
83 clearUnusedBits();
86 void APInt::initSlowCase(const APInt& that) {
87 U.pVal = getMemory(getNumWords());
88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92 assert(BitWidth && "Bitwidth too small");
93 assert(bigVal.data() && "Null pointer detected!");
94 if (isSingleWord())
95 U.VAL = bigVal[0];
96 else {
97 // Get memory, cleared to 0
98 U.pVal = getClearedMemory(getNumWords());
99 // Calculate the number of words to copy
100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101 // Copy the words from bigVal to pVal
102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
104 // Make sure unused high bits are cleared
105 clearUnusedBits();
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109 : BitWidth(numBits) {
110 initFromArray(bigVal);
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114 : BitWidth(numBits) {
115 initFromArray(makeArrayRef(bigVal, numWords));
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119 : BitWidth(numbits) {
120 assert(BitWidth && "Bitwidth too small");
121 fromString(numbits, Str, radix);
124 void APInt::reallocate(unsigned NewBitWidth) {
125 // If the number of words is the same we can just change the width and stop.
126 if (getNumWords() == getNumWords(NewBitWidth)) {
127 BitWidth = NewBitWidth;
128 return;
131 // If we have an allocation, delete it.
132 if (!isSingleWord())
133 delete [] U.pVal;
135 // Update BitWidth.
136 BitWidth = NewBitWidth;
138 // If we are supposed to have an allocation, create it.
139 if (!isSingleWord())
140 U.pVal = getMemory(getNumWords());
143 void APInt::AssignSlowCase(const APInt& RHS) {
144 // Don't do anything for X = X
145 if (this == &RHS)
146 return;
148 // Adjust the bit width and handle allocations as necessary.
149 reallocate(RHS.getBitWidth());
151 // Copy the data.
152 if (isSingleWord())
153 U.VAL = RHS.U.VAL;
154 else
155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160 ID.AddInteger(BitWidth);
162 if (isSingleWord()) {
163 ID.AddInteger(U.VAL);
164 return;
167 unsigned NumWords = getNumWords();
168 for (unsigned i = 0; i < NumWords; ++i)
169 ID.AddInteger(U.pVal[i]);
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174 if (isSingleWord())
175 ++U.VAL;
176 else
177 tcIncrement(U.pVal, getNumWords());
178 return clearUnusedBits();
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183 if (isSingleWord())
184 --U.VAL;
185 else
186 tcDecrement(U.pVal, getNumWords());
187 return clearUnusedBits();
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195 if (isSingleWord())
196 U.VAL += RHS.U.VAL;
197 else
198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199 return clearUnusedBits();
202 APInt& APInt::operator+=(uint64_t RHS) {
203 if (isSingleWord())
204 U.VAL += RHS;
205 else
206 tcAddPart(U.pVal, RHS, getNumWords());
207 return clearUnusedBits();
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215 if (isSingleWord())
216 U.VAL -= RHS.U.VAL;
217 else
218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219 return clearUnusedBits();
222 APInt& APInt::operator-=(uint64_t RHS) {
223 if (isSingleWord())
224 U.VAL -= RHS;
225 else
226 tcSubtractPart(U.pVal, RHS, getNumWords());
227 return clearUnusedBits();
230 APInt APInt::operator*(const APInt& RHS) const {
231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232 if (isSingleWord())
233 return APInt(BitWidth, U.VAL * RHS.U.VAL);
235 APInt Result(getMemory(getNumWords()), getBitWidth());
237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
239 Result.clearUnusedBits();
240 return Result;
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244 tcAnd(U.pVal, RHS.U.pVal, getNumWords());
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248 tcOr(U.pVal, RHS.U.pVal, getNumWords());
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252 tcXor(U.pVal, RHS.U.pVal, getNumWords());
255 APInt& APInt::operator*=(const APInt& RHS) {
256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257 *this = *this * RHS;
258 return *this;
261 APInt& APInt::operator*=(uint64_t RHS) {
262 if (isSingleWord()) {
263 U.VAL *= RHS;
264 } else {
265 unsigned NumWords = getNumWords();
266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
268 return clearUnusedBits();
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
275 int APInt::compare(const APInt& RHS) const {
276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277 if (isSingleWord())
278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
283 int APInt::compareSigned(const APInt& RHS) const {
284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285 if (isSingleWord()) {
286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
291 bool lhsNeg = isNegative();
292 bool rhsNeg = RHS.isNegative();
294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295 if (lhsNeg != rhsNeg)
296 return lhsNeg ? -1 : 1;
298 // Otherwise we can just use an unsigned comparison, because even negative
299 // numbers compare correctly this way if both have the same signed-ness.
300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304 unsigned loWord = whichWord(loBit);
305 unsigned hiWord = whichWord(hiBit);
307 // Create an initial mask for the low word with zeros below loBit.
308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
310 // If hiBit is not aligned, we need a high mask.
311 unsigned hiShiftAmt = whichBit(hiBit);
312 if (hiShiftAmt != 0) {
313 // Create a high mask with zeros above hiBit.
314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315 // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316 // set the bits in hiWord.
317 if (hiWord == loWord)
318 loMask &= hiMask;
319 else
320 U.pVal[hiWord] |= hiMask;
322 // Apply the mask to the low word.
323 U.pVal[loWord] |= loMask;
325 // Fill any words between loWord and hiWord with all ones.
326 for (unsigned word = loWord + 1; word < hiWord; ++word)
327 U.pVal[word] = WORDTYPE_MAX;
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332 tcComplement(U.pVal, getNumWords());
333 clearUnusedBits();
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340 assert(bitPosition < BitWidth && "Out of the bit-width range!");
341 if ((*this)[bitPosition]) clearBit(bitPosition);
342 else setBit(bitPosition);
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346 unsigned subBitWidth = subBits.getBitWidth();
347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348 "Illegal bit insertion");
350 // Insertion is a direct copy.
351 if (subBitWidth == BitWidth) {
352 *this = subBits;
353 return;
356 // Single word result can be done as a direct bitmask.
357 if (isSingleWord()) {
358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359 U.VAL &= ~(mask << bitPosition);
360 U.VAL |= (subBits.U.VAL << bitPosition);
361 return;
364 unsigned loBit = whichBit(bitPosition);
365 unsigned loWord = whichWord(bitPosition);
366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
368 // Insertion within a single word can be done as a direct bitmask.
369 if (loWord == hi1Word) {
370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371 U.pVal[loWord] &= ~(mask << loBit);
372 U.pVal[loWord] |= (subBits.U.VAL << loBit);
373 return;
376 // Insert on word boundaries.
377 if (loBit == 0) {
378 // Direct copy whole words.
379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380 memcpy(U.pVal + loWord, subBits.getRawData(),
381 numWholeSubWords * APINT_WORD_SIZE);
383 // Mask+insert remaining bits.
384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385 if (remainingBits != 0) {
386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387 U.pVal[hi1Word] &= ~mask;
388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
390 return;
393 // General case - set/clear individual bits in dst based on src.
394 // TODO - there is scope for optimization here, but at the moment this code
395 // path is barely used so prefer readability over performance.
396 for (unsigned i = 0; i != subBitWidth; ++i) {
397 if (subBits[i])
398 setBit(bitPosition + i);
399 else
400 clearBit(bitPosition + i);
404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405 assert(numBits > 0 && "Can't extract zero bits");
406 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407 "Illegal bit extraction");
409 if (isSingleWord())
410 return APInt(numBits, U.VAL >> bitPosition);
412 unsigned loBit = whichBit(bitPosition);
413 unsigned loWord = whichWord(bitPosition);
414 unsigned hiWord = whichWord(bitPosition + numBits - 1);
416 // Single word result extracting bits from a single word source.
417 if (loWord == hiWord)
418 return APInt(numBits, U.pVal[loWord] >> loBit);
420 // Extracting bits that start on a source word boundary can be done
421 // as a fast memory copy.
422 if (loBit == 0)
423 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
425 // General case - shift + copy source words directly into place.
426 APInt Result(numBits, 0);
427 unsigned NumSrcWords = getNumWords();
428 unsigned NumDstWords = Result.getNumWords();
430 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431 for (unsigned word = 0; word < NumDstWords; ++word) {
432 uint64_t w0 = U.pVal[loWord + word];
433 uint64_t w1 =
434 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
438 return Result.clearUnusedBits();
441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442 assert(!str.empty() && "Invalid string length");
443 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444 radix == 36) &&
445 "Radix should be 2, 8, 10, 16, or 36!");
447 size_t slen = str.size();
449 // Each computation below needs to know if it's negative.
450 StringRef::iterator p = str.begin();
451 unsigned isNegative = *p == '-';
452 if (*p == '-' || *p == '+') {
453 p++;
454 slen--;
455 assert(slen && "String is only a sign, needs a value.");
458 // For radixes of power-of-two values, the bits required is accurately and
459 // easily computed
460 if (radix == 2)
461 return slen + isNegative;
462 if (radix == 8)
463 return slen * 3 + isNegative;
464 if (radix == 16)
465 return slen * 4 + isNegative;
467 // FIXME: base 36
469 // This is grossly inefficient but accurate. We could probably do something
470 // with a computation of roughly slen*64/20 and then adjust by the value of
471 // the first few digits. But, I'm not sure how accurate that could be.
473 // Compute a sufficient number of bits that is always large enough but might
474 // be too large. This avoids the assertion in the constructor. This
475 // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476 // bits in that case.
477 unsigned sufficient
478 = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479 : (slen == 1 ? 7 : slen * 16/3);
481 // Convert to the actual binary value.
482 APInt tmp(sufficient, StringRef(p, slen), radix);
484 // Compute how many bits are required. If the log is infinite, assume we need
485 // just bit. If the log is exact and value is negative, then the value is
486 // MinSignedValue with (log + 1) bits.
487 unsigned log = tmp.logBase2();
488 if (log == (unsigned)-1) {
489 return isNegative + 1;
490 } else if (isNegative && tmp.isPowerOf2()) {
491 return isNegative + log;
492 } else {
493 return isNegative + log + 1;
497 hash_code llvm::hash_value(const APInt &Arg) {
498 if (Arg.isSingleWord())
499 return hash_combine(Arg.U.VAL);
501 return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
504 bool APInt::isSplat(unsigned SplatSizeInBits) const {
505 assert(getBitWidth() % SplatSizeInBits == 0 &&
506 "SplatSizeInBits must divide width!");
507 // We can check that all parts of an integer are equal by making use of a
508 // little trick: rotate and check if it's still the same value.
509 return *this == rotl(SplatSizeInBits);
512 /// This function returns the high "numBits" bits of this APInt.
513 APInt APInt::getHiBits(unsigned numBits) const {
514 return this->lshr(BitWidth - numBits);
517 /// This function returns the low "numBits" bits of this APInt.
518 APInt APInt::getLoBits(unsigned numBits) const {
519 APInt Result(getLowBitsSet(BitWidth, numBits));
520 Result &= *this;
521 return Result;
524 /// Return a value containing V broadcasted over NewLen bits.
525 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
526 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
528 APInt Val = V.zextOrSelf(NewLen);
529 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
530 Val |= Val << I;
532 return Val;
535 unsigned APInt::countLeadingZerosSlowCase() const {
536 unsigned Count = 0;
537 for (int i = getNumWords()-1; i >= 0; --i) {
538 uint64_t V = U.pVal[i];
539 if (V == 0)
540 Count += APINT_BITS_PER_WORD;
541 else {
542 Count += llvm::countLeadingZeros(V);
543 break;
546 // Adjust for unused bits in the most significant word (they are zero).
547 unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
548 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
549 return Count;
552 unsigned APInt::countLeadingOnesSlowCase() const {
553 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554 unsigned shift;
555 if (!highWordBits) {
556 highWordBits = APINT_BITS_PER_WORD;
557 shift = 0;
558 } else {
559 shift = APINT_BITS_PER_WORD - highWordBits;
561 int i = getNumWords() - 1;
562 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
563 if (Count == highWordBits) {
564 for (i--; i >= 0; --i) {
565 if (U.pVal[i] == WORDTYPE_MAX)
566 Count += APINT_BITS_PER_WORD;
567 else {
568 Count += llvm::countLeadingOnes(U.pVal[i]);
569 break;
573 return Count;
576 unsigned APInt::countTrailingZerosSlowCase() const {
577 unsigned Count = 0;
578 unsigned i = 0;
579 for (; i < getNumWords() && U.pVal[i] == 0; ++i)
580 Count += APINT_BITS_PER_WORD;
581 if (i < getNumWords())
582 Count += llvm::countTrailingZeros(U.pVal[i]);
583 return std::min(Count, BitWidth);
586 unsigned APInt::countTrailingOnesSlowCase() const {
587 unsigned Count = 0;
588 unsigned i = 0;
589 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
590 Count += APINT_BITS_PER_WORD;
591 if (i < getNumWords())
592 Count += llvm::countTrailingOnes(U.pVal[i]);
593 assert(Count <= BitWidth);
594 return Count;
597 unsigned APInt::countPopulationSlowCase() const {
598 unsigned Count = 0;
599 for (unsigned i = 0; i < getNumWords(); ++i)
600 Count += llvm::countPopulation(U.pVal[i]);
601 return Count;
604 bool APInt::intersectsSlowCase(const APInt &RHS) const {
605 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
606 if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
607 return true;
609 return false;
612 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
613 for (unsigned i = 0, e = getNumWords(); i != e; ++i)
614 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
615 return false;
617 return true;
620 APInt APInt::byteSwap() const {
621 assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
622 if (BitWidth == 16)
623 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
624 if (BitWidth == 32)
625 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
626 if (BitWidth == 48) {
627 unsigned Tmp1 = unsigned(U.VAL >> 16);
628 Tmp1 = ByteSwap_32(Tmp1);
629 uint16_t Tmp2 = uint16_t(U.VAL);
630 Tmp2 = ByteSwap_16(Tmp2);
631 return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
633 if (BitWidth == 64)
634 return APInt(BitWidth, ByteSwap_64(U.VAL));
636 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
637 for (unsigned I = 0, N = getNumWords(); I != N; ++I)
638 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
639 if (Result.BitWidth != BitWidth) {
640 Result.lshrInPlace(Result.BitWidth - BitWidth);
641 Result.BitWidth = BitWidth;
643 return Result;
646 APInt APInt::reverseBits() const {
647 switch (BitWidth) {
648 case 64:
649 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
650 case 32:
651 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
652 case 16:
653 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
654 case 8:
655 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
656 default:
657 break;
660 APInt Val(*this);
661 APInt Reversed(BitWidth, 0);
662 unsigned S = BitWidth;
664 for (; Val != 0; Val.lshrInPlace(1)) {
665 Reversed <<= 1;
666 Reversed |= Val[0];
667 --S;
670 Reversed <<= S;
671 return Reversed;
674 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
675 // Fast-path a common case.
676 if (A == B) return A;
678 // Corner cases: if either operand is zero, the other is the gcd.
679 if (!A) return B;
680 if (!B) return A;
682 // Count common powers of 2 and remove all other powers of 2.
683 unsigned Pow2;
685 unsigned Pow2_A = A.countTrailingZeros();
686 unsigned Pow2_B = B.countTrailingZeros();
687 if (Pow2_A > Pow2_B) {
688 A.lshrInPlace(Pow2_A - Pow2_B);
689 Pow2 = Pow2_B;
690 } else if (Pow2_B > Pow2_A) {
691 B.lshrInPlace(Pow2_B - Pow2_A);
692 Pow2 = Pow2_A;
693 } else {
694 Pow2 = Pow2_A;
698 // Both operands are odd multiples of 2^Pow_2:
700 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
702 // This is a modified version of Stein's algorithm, taking advantage of
703 // efficient countTrailingZeros().
704 while (A != B) {
705 if (A.ugt(B)) {
706 A -= B;
707 A.lshrInPlace(A.countTrailingZeros() - Pow2);
708 } else {
709 B -= A;
710 B.lshrInPlace(B.countTrailingZeros() - Pow2);
714 return A;
717 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
718 uint64_t I = bit_cast<uint64_t>(Double);
720 // Get the sign bit from the highest order bit
721 bool isNeg = I >> 63;
723 // Get the 11-bit exponent and adjust for the 1023 bit bias
724 int64_t exp = ((I >> 52) & 0x7ff) - 1023;
726 // If the exponent is negative, the value is < 0 so just return 0.
727 if (exp < 0)
728 return APInt(width, 0u);
730 // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
733 // If the exponent doesn't shift all bits out of the mantissa
734 if (exp < 52)
735 return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
736 APInt(width, mantissa >> (52 - exp));
738 // If the client didn't provide enough bits for us to shift the mantissa into
739 // then the result is undefined, just return 0
740 if (width <= exp - 52)
741 return APInt(width, 0);
743 // Otherwise, we have to shift the mantissa bits up to the right location
744 APInt Tmp(width, mantissa);
745 Tmp <<= (unsigned)exp - 52;
746 return isNeg ? -Tmp : Tmp;
749 /// This function converts this APInt to a double.
750 /// The layout for double is as following (IEEE Standard 754):
751 /// --------------------------------------
752 /// | Sign Exponent Fraction Bias |
753 /// |-------------------------------------- |
754 /// | 1[63] 11[62-52] 52[51-00] 1023 |
755 /// --------------------------------------
756 double APInt::roundToDouble(bool isSigned) const {
758 // Handle the simple case where the value is contained in one uint64_t.
759 // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
761 if (isSigned) {
762 int64_t sext = SignExtend64(getWord(0), BitWidth);
763 return double(sext);
764 } else
765 return double(getWord(0));
768 // Determine if the value is negative.
769 bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
771 // Construct the absolute value if we're negative.
772 APInt Tmp(isNeg ? -(*this) : (*this));
774 // Figure out how many bits we're using.
775 unsigned n = Tmp.getActiveBits();
777 // The exponent (without bias normalization) is just the number of bits
778 // we are using. Note that the sign bit is gone since we constructed the
779 // absolute value.
780 uint64_t exp = n;
782 // Return infinity for exponent overflow
783 if (exp > 1023) {
784 if (!isSigned || !isNeg)
785 return std::numeric_limits<double>::infinity();
786 else
787 return -std::numeric_limits<double>::infinity();
789 exp += 1023; // Increment for 1023 bias
791 // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792 // extract the high 52 bits from the correct words in pVal.
793 uint64_t mantissa;
794 unsigned hiWord = whichWord(n-1);
795 if (hiWord == 0) {
796 mantissa = Tmp.U.pVal[0];
797 if (n > 52)
798 mantissa >>= n - 52; // shift down, we want the top 52 bits.
799 } else {
800 assert(hiWord > 0 && "huh?");
801 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803 mantissa = hibits | lobits;
806 // The leading bit of mantissa is implicit, so get rid of it.
807 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808 uint64_t I = sign | (exp << 52) | mantissa;
809 return bit_cast<double>(I);
812 // Truncate to new width.
813 APInt APInt::trunc(unsigned width) const {
814 assert(width < BitWidth && "Invalid APInt Truncate request");
815 assert(width && "Can't truncate to 0 bits");
817 if (width <= APINT_BITS_PER_WORD)
818 return APInt(width, getRawData()[0]);
820 APInt Result(getMemory(getNumWords(width)), width);
822 // Copy full words.
823 unsigned i;
824 for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
825 Result.U.pVal[i] = U.pVal[i];
827 // Truncate and copy any partial word.
828 unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
829 if (bits != 0)
830 Result.U.pVal[i] = U.pVal[i] << bits >> bits;
832 return Result;
835 // Sign extend to a new width.
836 APInt APInt::sext(unsigned Width) const {
837 assert(Width > BitWidth && "Invalid APInt SignExtend request");
839 if (Width <= APINT_BITS_PER_WORD)
840 return APInt(Width, SignExtend64(U.VAL, BitWidth));
842 APInt Result(getMemory(getNumWords(Width)), Width);
844 // Copy words.
845 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
847 // Sign extend the last word since there may be unused bits in the input.
848 Result.U.pVal[getNumWords() - 1] =
849 SignExtend64(Result.U.pVal[getNumWords() - 1],
850 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
852 // Fill with sign bits.
853 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
854 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
855 Result.clearUnusedBits();
856 return Result;
859 // Zero extend to a new width.
860 APInt APInt::zext(unsigned width) const {
861 assert(width > BitWidth && "Invalid APInt ZeroExtend request");
863 if (width <= APINT_BITS_PER_WORD)
864 return APInt(width, U.VAL);
866 APInt Result(getMemory(getNumWords(width)), width);
868 // Copy words.
869 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
871 // Zero remaining words.
872 std::memset(Result.U.pVal + getNumWords(), 0,
873 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
875 return Result;
878 APInt APInt::zextOrTrunc(unsigned width) const {
879 if (BitWidth < width)
880 return zext(width);
881 if (BitWidth > width)
882 return trunc(width);
883 return *this;
886 APInt APInt::sextOrTrunc(unsigned width) const {
887 if (BitWidth < width)
888 return sext(width);
889 if (BitWidth > width)
890 return trunc(width);
891 return *this;
894 APInt APInt::zextOrSelf(unsigned width) const {
895 if (BitWidth < width)
896 return zext(width);
897 return *this;
900 APInt APInt::sextOrSelf(unsigned width) const {
901 if (BitWidth < width)
902 return sext(width);
903 return *this;
906 /// Arithmetic right-shift this APInt by shiftAmt.
907 /// Arithmetic right-shift function.
908 void APInt::ashrInPlace(const APInt &shiftAmt) {
909 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
912 /// Arithmetic right-shift this APInt by shiftAmt.
913 /// Arithmetic right-shift function.
914 void APInt::ashrSlowCase(unsigned ShiftAmt) {
915 // Don't bother performing a no-op shift.
916 if (!ShiftAmt)
917 return;
919 // Save the original sign bit for later.
920 bool Negative = isNegative();
922 // WordShift is the inter-part shift; BitShift is intra-part shift.
923 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
924 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
926 unsigned WordsToMove = getNumWords() - WordShift;
927 if (WordsToMove != 0) {
928 // Sign extend the last word to fill in the unused bits.
929 U.pVal[getNumWords() - 1] = SignExtend64(
930 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
932 // Fastpath for moving by whole words.
933 if (BitShift == 0) {
934 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
935 } else {
936 // Move the words containing significant bits.
937 for (unsigned i = 0; i != WordsToMove - 1; ++i)
938 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
939 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
941 // Handle the last word which has no high bits to copy.
942 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
943 // Sign extend one more time.
944 U.pVal[WordsToMove - 1] =
945 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
949 // Fill in the remainder based on the original sign.
950 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
951 WordShift * APINT_WORD_SIZE);
952 clearUnusedBits();
955 /// Logical right-shift this APInt by shiftAmt.
956 /// Logical right-shift function.
957 void APInt::lshrInPlace(const APInt &shiftAmt) {
958 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
961 /// Logical right-shift this APInt by shiftAmt.
962 /// Logical right-shift function.
963 void APInt::lshrSlowCase(unsigned ShiftAmt) {
964 tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
967 /// Left-shift this APInt by shiftAmt.
968 /// Left-shift function.
969 APInt &APInt::operator<<=(const APInt &shiftAmt) {
970 // It's undefined behavior in C to shift by BitWidth or greater.
971 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
972 return *this;
975 void APInt::shlSlowCase(unsigned ShiftAmt) {
976 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
977 clearUnusedBits();
980 // Calculate the rotate amount modulo the bit width.
981 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
982 unsigned rotBitWidth = rotateAmt.getBitWidth();
983 APInt rot = rotateAmt;
984 if (rotBitWidth < BitWidth) {
985 // Extend the rotate APInt, so that the urem doesn't divide by 0.
986 // e.g. APInt(1, 32) would give APInt(1, 0).
987 rot = rotateAmt.zext(BitWidth);
989 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
990 return rot.getLimitedValue(BitWidth);
993 APInt APInt::rotl(const APInt &rotateAmt) const {
994 return rotl(rotateModulo(BitWidth, rotateAmt));
997 APInt APInt::rotl(unsigned rotateAmt) const {
998 rotateAmt %= BitWidth;
999 if (rotateAmt == 0)
1000 return *this;
1001 return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1004 APInt APInt::rotr(const APInt &rotateAmt) const {
1005 return rotr(rotateModulo(BitWidth, rotateAmt));
1008 APInt APInt::rotr(unsigned rotateAmt) const {
1009 rotateAmt %= BitWidth;
1010 if (rotateAmt == 0)
1011 return *this;
1012 return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1015 // Square Root - this method computes and returns the square root of "this".
1016 // Three mechanisms are used for computation. For small values (<= 5 bits),
1017 // a table lookup is done. This gets some performance for common cases. For
1018 // values using less than 52 bits, the value is converted to double and then
1019 // the libc sqrt function is called. The result is rounded and then converted
1020 // back to a uint64_t which is then used to construct the result. Finally,
1021 // the Babylonian method for computing square roots is used.
1022 APInt APInt::sqrt() const {
1024 // Determine the magnitude of the value.
1025 unsigned magnitude = getActiveBits();
1027 // Use a fast table for some small values. This also gets rid of some
1028 // rounding errors in libc sqrt for small values.
1029 if (magnitude <= 5) {
1030 static const uint8_t results[32] = {
1031 /* 0 */ 0,
1032 /* 1- 2 */ 1, 1,
1033 /* 3- 6 */ 2, 2, 2, 2,
1034 /* 7-12 */ 3, 3, 3, 3, 3, 3,
1035 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037 /* 31 */ 6
1039 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1042 // If the magnitude of the value fits in less than 52 bits (the precision of
1043 // an IEEE double precision floating point value), then we can use the
1044 // libc sqrt function which will probably use a hardware sqrt computation.
1045 // This should be faster than the algorithm below.
1046 if (magnitude < 52) {
1047 return APInt(BitWidth,
1048 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1049 : U.pVal[0])))));
1052 // Okay, all the short cuts are exhausted. We must compute it. The following
1053 // is a classical Babylonian method for computing the square root. This code
1054 // was adapted to APInt from a wikipedia article on such computations.
1055 // See http://www.wikipedia.org/ and go to the page named
1056 // Calculate_an_integer_square_root.
1057 unsigned nbits = BitWidth, i = 4;
1058 APInt testy(BitWidth, 16);
1059 APInt x_old(BitWidth, 1);
1060 APInt x_new(BitWidth, 0);
1061 APInt two(BitWidth, 2);
1063 // Select a good starting value using binary logarithms.
1064 for (;; i += 2, testy = testy.shl(2))
1065 if (i >= nbits || this->ule(testy)) {
1066 x_old = x_old.shl(i / 2);
1067 break;
1070 // Use the Babylonian method to arrive at the integer square root:
1071 for (;;) {
1072 x_new = (this->udiv(x_old) + x_old).udiv(two);
1073 if (x_old.ule(x_new))
1074 break;
1075 x_old = x_new;
1078 // Make sure we return the closest approximation
1079 // NOTE: The rounding calculation below is correct. It will produce an
1080 // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081 // determined to be a rounding issue with pari/gp as it begins to use a
1082 // floating point representation after 192 bits. There are no discrepancies
1083 // between this algorithm and pari/gp for bit widths < 192 bits.
1084 APInt square(x_old * x_old);
1085 APInt nextSquare((x_old + 1) * (x_old +1));
1086 if (this->ult(square))
1087 return x_old;
1088 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089 APInt midpoint((nextSquare - square).udiv(two));
1090 APInt offset(*this - square);
1091 if (offset.ult(midpoint))
1092 return x_old;
1093 return x_old + 1;
1096 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1097 /// iterative extended Euclidean algorithm is used to solve for this value,
1098 /// however we simplify it to speed up calculating only the inverse, and take
1099 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1100 /// (potentially large) APInts around.
1101 /// WARNING: a value of '0' may be returned,
1102 /// signifying that no multiplicative inverse exists!
1103 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104 assert(ult(modulo) && "This APInt must be smaller than the modulo");
1106 // Using the properties listed at the following web page (accessed 06/21/08):
1107 // http://www.numbertheory.org/php/euclid.html
1108 // (especially the properties numbered 3, 4 and 9) it can be proved that
1109 // BitWidth bits suffice for all the computations in the algorithm implemented
1110 // below. More precisely, this number of bits suffice if the multiplicative
1111 // inverse exists, but may not suffice for the general extended Euclidean
1112 // algorithm.
1114 APInt r[2] = { modulo, *this };
1115 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116 APInt q(BitWidth, 0);
1118 unsigned i;
1119 for (i = 0; r[i^1] != 0; i ^= 1) {
1120 // An overview of the math without the confusing bit-flipping:
1121 // q = r[i-2] / r[i-1]
1122 // r[i] = r[i-2] % r[i-1]
1123 // t[i] = t[i-2] - t[i-1] * q
1124 udivrem(r[i], r[i^1], q, r[i]);
1125 t[i] -= t[i^1] * q;
1128 // If this APInt and the modulo are not coprime, there is no multiplicative
1129 // inverse, so return 0. We check this by looking at the next-to-last
1130 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131 // algorithm.
1132 if (r[i] != 1)
1133 return APInt(BitWidth, 0);
1135 // The next-to-last t is the multiplicative inverse. However, we are
1136 // interested in a positive inverse. Calculate a positive one from a negative
1137 // one if necessary. A simple addition of the modulo suffices because
1138 // abs(t[i]) is known to be less than *this/2 (see the link above).
1139 if (t[i].isNegative())
1140 t[i] += modulo;
1142 return std::move(t[i]);
1145 /// Calculate the magic numbers required to implement a signed integer division
1146 /// by a constant as a sequence of multiplies, adds and shifts. Requires that
1147 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
1148 /// Warren, Jr., chapter 10.
1149 APInt::ms APInt::magic() const {
1150 const APInt& d = *this;
1151 unsigned p;
1152 APInt ad, anc, delta, q1, r1, q2, r2, t;
1153 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154 struct ms mag;
1156 ad = d.abs();
1157 t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158 anc = t - 1 - t.urem(ad); // absolute value of nc
1159 p = d.getBitWidth() - 1; // initialize p
1160 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
1161 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
1162 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
1163 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
1164 do {
1165 p = p + 1;
1166 q1 = q1<<1; // update q1 = 2p/abs(nc)
1167 r1 = r1<<1; // update r1 = rem(2p/abs(nc))
1168 if (r1.uge(anc)) { // must be unsigned comparison
1169 q1 = q1 + 1;
1170 r1 = r1 - anc;
1172 q2 = q2<<1; // update q2 = 2p/abs(d)
1173 r2 = r2<<1; // update r2 = rem(2p/abs(d))
1174 if (r2.uge(ad)) { // must be unsigned comparison
1175 q2 = q2 + 1;
1176 r2 = r2 - ad;
1178 delta = ad - r2;
1179 } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1181 mag.m = q2 + 1;
1182 if (d.isNegative()) mag.m = -mag.m; // resulting magic number
1183 mag.s = p - d.getBitWidth(); // resulting shift
1184 return mag;
1187 /// Calculate the magic numbers required to implement an unsigned integer
1188 /// division by a constant as a sequence of multiplies, adds and shifts.
1189 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
1190 /// S. Warren, Jr., chapter 10.
1191 /// LeadingZeros can be used to simplify the calculation if the upper bits
1192 /// of the divided value are known zero.
1193 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194 const APInt& d = *this;
1195 unsigned p;
1196 APInt nc, delta, q1, r1, q2, r2;
1197 struct mu magu;
1198 magu.a = 0; // initialize "add" indicator
1199 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1203 nc = allOnes - (allOnes - d).urem(d);
1204 p = d.getBitWidth() - 1; // initialize p
1205 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
1206 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
1207 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
1208 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
1209 do {
1210 p = p + 1;
1211 if (r1.uge(nc - r1)) {
1212 q1 = q1 + q1 + 1; // update q1
1213 r1 = r1 + r1 - nc; // update r1
1215 else {
1216 q1 = q1+q1; // update q1
1217 r1 = r1+r1; // update r1
1219 if ((r2 + 1).uge(d - r2)) {
1220 if (q2.uge(signedMax)) magu.a = 1;
1221 q2 = q2+q2 + 1; // update q2
1222 r2 = r2+r2 + 1 - d; // update r2
1224 else {
1225 if (q2.uge(signedMin)) magu.a = 1;
1226 q2 = q2+q2; // update q2
1227 r2 = r2+r2 + 1; // update r2
1229 delta = d - 1 - r2;
1230 } while (p < d.getBitWidth()*2 &&
1231 (q1.ult(delta) || (q1 == delta && r1 == 0)));
1232 magu.m = q2 + 1; // resulting magic number
1233 magu.s = p - d.getBitWidth(); // resulting shift
1234 return magu;
1237 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239 /// variables here have the same names as in the algorithm. Comments explain
1240 /// the algorithm and any deviation from it.
1241 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242 unsigned m, unsigned n) {
1243 assert(u && "Must provide dividend");
1244 assert(v && "Must provide divisor");
1245 assert(q && "Must provide quotient");
1246 assert(u != v && u != q && v != q && "Must use different memory");
1247 assert(n>1 && "n must be > 1");
1249 // b denotes the base of the number system. In our case b is 2^32.
1250 const uint64_t b = uint64_t(1) << 32;
1252 // The DEBUG macros here tend to be spam in the debug output if you're not
1253 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254 #ifdef KNUTH_DEBUG
1255 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256 #else
1257 #define DEBUG_KNUTH(X) do {} while(false)
1258 #endif
1260 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263 DEBUG_KNUTH(dbgs() << " by");
1264 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265 DEBUG_KNUTH(dbgs() << '\n');
1266 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267 // u and v by d. Note that we have taken Knuth's advice here to use a power
1268 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269 // 2 allows us to shift instead of multiply and it is easy to determine the
1270 // shift amount from the leading zeros. We are basically normalizing the u
1271 // and v so that its high bits are shifted to the top of v's range without
1272 // overflow. Note that this can require an extra word in u so that u must
1273 // be of length m+n+1.
1274 unsigned shift = countLeadingZeros(v[n-1]);
1275 uint32_t v_carry = 0;
1276 uint32_t u_carry = 0;
1277 if (shift) {
1278 for (unsigned i = 0; i < m+n; ++i) {
1279 uint32_t u_tmp = u[i] >> (32 - shift);
1280 u[i] = (u[i] << shift) | u_carry;
1281 u_carry = u_tmp;
1283 for (unsigned i = 0; i < n; ++i) {
1284 uint32_t v_tmp = v[i] >> (32 - shift);
1285 v[i] = (v[i] << shift) | v_carry;
1286 v_carry = v_tmp;
1289 u[m+n] = u_carry;
1291 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:");
1292 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293 DEBUG_KNUTH(dbgs() << " by");
1294 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295 DEBUG_KNUTH(dbgs() << '\n');
1297 // D2. [Initialize j.] Set j to m. This is the loop counter over the places.
1298 int j = m;
1299 do {
1300 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301 // D3. [Calculate q'.].
1302 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306 // on v[n-2] determines at high speed most of the cases in which the trial
1307 // value qp is one too large, and it eliminates all cases where qp is two
1308 // too large.
1309 uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311 uint64_t qp = dividend / v[n-1];
1312 uint64_t rp = dividend % v[n-1];
1313 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1314 qp--;
1315 rp += v[n-1];
1316 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1317 qp--;
1319 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1321 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323 // consists of a simple multiplication by a one-place number, combined with
1324 // a subtraction.
1325 // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326 // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327 // true value plus b**(n+1), namely as the b's complement of
1328 // the true value, and a "borrow" to the left should be remembered.
1329 int64_t borrow = 0;
1330 for (unsigned i = 0; i < n; ++i) {
1331 uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333 u[j+i] = Lo_32(subres);
1334 borrow = Hi_32(p) - Hi_32(subres);
1335 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336 << ", borrow = " << borrow << '\n');
1338 bool isNeg = u[j+n] < borrow;
1339 u[j+n] -= Lo_32(borrow);
1341 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343 DEBUG_KNUTH(dbgs() << '\n');
1345 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346 // negative, go to step D6; otherwise go on to step D7.
1347 q[j] = Lo_32(qp);
1348 if (isNeg) {
1349 // D6. [Add back]. The probability that this step is necessary is very
1350 // small, on the order of only 2/b. Make sure that test data accounts for
1351 // this possibility. Decrease q[j] by 1
1352 q[j]--;
1353 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354 // A carry will occur to the left of u[j+n], and it should be ignored
1355 // since it cancels with the borrow that occurred in D4.
1356 bool carry = false;
1357 for (unsigned i = 0; i < n; i++) {
1358 uint32_t limit = std::min(u[j+i],v[i]);
1359 u[j+i] += v[i] + carry;
1360 carry = u[j+i] < limit || (carry && u[j+i] == limit);
1362 u[j+n] += carry;
1364 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1368 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3.
1369 } while (--j >= 0);
1371 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373 DEBUG_KNUTH(dbgs() << '\n');
1375 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376 // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377 // compute the remainder (urem uses this).
1378 if (r) {
1379 // The value d is expressed by the "shift" value above since we avoided
1380 // multiplication by d by using a shift left. So, all we have to do is
1381 // shift right here.
1382 if (shift) {
1383 uint32_t carry = 0;
1384 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385 for (int i = n-1; i >= 0; i--) {
1386 r[i] = (u[i] >> shift) | carry;
1387 carry = u[i] << (32 - shift);
1388 DEBUG_KNUTH(dbgs() << " " << r[i]);
1390 } else {
1391 for (int i = n-1; i >= 0; i--) {
1392 r[i] = u[i];
1393 DEBUG_KNUTH(dbgs() << " " << r[i]);
1396 DEBUG_KNUTH(dbgs() << '\n');
1398 DEBUG_KNUTH(dbgs() << '\n');
1401 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402 unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403 assert(lhsWords >= rhsWords && "Fractional result");
1405 // First, compose the values into an array of 32-bit words instead of
1406 // 64-bit words. This is a necessity of both the "short division" algorithm
1407 // and the Knuth "classical algorithm" which requires there to be native
1408 // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409 // can't use 64-bit operands here because we don't have native results of
1410 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411 // work on large-endian machines.
1412 unsigned n = rhsWords * 2;
1413 unsigned m = (lhsWords * 2) - n;
1415 // Allocate space for the temporary values we need either on the stack, if
1416 // it will fit, or on the heap if it won't.
1417 uint32_t SPACE[128];
1418 uint32_t *U = nullptr;
1419 uint32_t *V = nullptr;
1420 uint32_t *Q = nullptr;
1421 uint32_t *R = nullptr;
1422 if ((Remainder?4:3)*n+2*m+1 <= 128) {
1423 U = &SPACE[0];
1424 V = &SPACE[m+n+1];
1425 Q = &SPACE[(m+n+1) + n];
1426 if (Remainder)
1427 R = &SPACE[(m+n+1) + n + (m+n)];
1428 } else {
1429 U = new uint32_t[m + n + 1];
1430 V = new uint32_t[n];
1431 Q = new uint32_t[m+n];
1432 if (Remainder)
1433 R = new uint32_t[n];
1436 // Initialize the dividend
1437 memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438 for (unsigned i = 0; i < lhsWords; ++i) {
1439 uint64_t tmp = LHS[i];
1440 U[i * 2] = Lo_32(tmp);
1441 U[i * 2 + 1] = Hi_32(tmp);
1443 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1445 // Initialize the divisor
1446 memset(V, 0, (n)*sizeof(uint32_t));
1447 for (unsigned i = 0; i < rhsWords; ++i) {
1448 uint64_t tmp = RHS[i];
1449 V[i * 2] = Lo_32(tmp);
1450 V[i * 2 + 1] = Hi_32(tmp);
1453 // initialize the quotient and remainder
1454 memset(Q, 0, (m+n) * sizeof(uint32_t));
1455 if (Remainder)
1456 memset(R, 0, n * sizeof(uint32_t));
1458 // Now, adjust m and n for the Knuth division. n is the number of words in
1459 // the divisor. m is the number of words by which the dividend exceeds the
1460 // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461 // contain any zero words or the Knuth algorithm fails.
1462 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1463 n--;
1464 m++;
1466 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1467 m--;
1469 // If we're left with only a single word for the divisor, Knuth doesn't work
1470 // so we implement the short division algorithm here. This is much simpler
1471 // and faster because we are certain that we can divide a 64-bit quantity
1472 // by a 32-bit quantity at hardware speed and short division is simply a
1473 // series of such operations. This is just like doing short division but we
1474 // are using base 2^32 instead of base 10.
1475 assert(n != 0 && "Divide by zero?");
1476 if (n == 1) {
1477 uint32_t divisor = V[0];
1478 uint32_t remainder = 0;
1479 for (int i = m; i >= 0; i--) {
1480 uint64_t partial_dividend = Make_64(remainder, U[i]);
1481 if (partial_dividend == 0) {
1482 Q[i] = 0;
1483 remainder = 0;
1484 } else if (partial_dividend < divisor) {
1485 Q[i] = 0;
1486 remainder = Lo_32(partial_dividend);
1487 } else if (partial_dividend == divisor) {
1488 Q[i] = 1;
1489 remainder = 0;
1490 } else {
1491 Q[i] = Lo_32(partial_dividend / divisor);
1492 remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1495 if (R)
1496 R[0] = remainder;
1497 } else {
1498 // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499 // case n > 1.
1500 KnuthDiv(U, V, Q, R, m, n);
1503 // If the caller wants the quotient
1504 if (Quotient) {
1505 for (unsigned i = 0; i < lhsWords; ++i)
1506 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1509 // If the caller wants the remainder
1510 if (Remainder) {
1511 for (unsigned i = 0; i < rhsWords; ++i)
1512 Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1515 // Clean up the memory we allocated.
1516 if (U != &SPACE[0]) {
1517 delete [] U;
1518 delete [] V;
1519 delete [] Q;
1520 delete [] R;
1524 APInt APInt::udiv(const APInt &RHS) const {
1525 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1527 // First, deal with the easy case
1528 if (isSingleWord()) {
1529 assert(RHS.U.VAL != 0 && "Divide by zero?");
1530 return APInt(BitWidth, U.VAL / RHS.U.VAL);
1533 // Get some facts about the LHS and RHS number of bits and words
1534 unsigned lhsWords = getNumWords(getActiveBits());
1535 unsigned rhsBits = RHS.getActiveBits();
1536 unsigned rhsWords = getNumWords(rhsBits);
1537 assert(rhsWords && "Divided by zero???");
1539 // Deal with some degenerate cases
1540 if (!lhsWords)
1541 // 0 / X ===> 0
1542 return APInt(BitWidth, 0);
1543 if (rhsBits == 1)
1544 // X / 1 ===> X
1545 return *this;
1546 if (lhsWords < rhsWords || this->ult(RHS))
1547 // X / Y ===> 0, iff X < Y
1548 return APInt(BitWidth, 0);
1549 if (*this == RHS)
1550 // X / X ===> 1
1551 return APInt(BitWidth, 1);
1552 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553 // All high words are zero, just use native divide
1554 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1556 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557 APInt Quotient(BitWidth, 0); // to hold result.
1558 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559 return Quotient;
1562 APInt APInt::udiv(uint64_t RHS) const {
1563 assert(RHS != 0 && "Divide by zero?");
1565 // First, deal with the easy case
1566 if (isSingleWord())
1567 return APInt(BitWidth, U.VAL / RHS);
1569 // Get some facts about the LHS words.
1570 unsigned lhsWords = getNumWords(getActiveBits());
1572 // Deal with some degenerate cases
1573 if (!lhsWords)
1574 // 0 / X ===> 0
1575 return APInt(BitWidth, 0);
1576 if (RHS == 1)
1577 // X / 1 ===> X
1578 return *this;
1579 if (this->ult(RHS))
1580 // X / Y ===> 0, iff X < Y
1581 return APInt(BitWidth, 0);
1582 if (*this == RHS)
1583 // X / X ===> 1
1584 return APInt(BitWidth, 1);
1585 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586 // All high words are zero, just use native divide
1587 return APInt(BitWidth, this->U.pVal[0] / RHS);
1589 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590 APInt Quotient(BitWidth, 0); // to hold result.
1591 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592 return Quotient;
1595 APInt APInt::sdiv(const APInt &RHS) const {
1596 if (isNegative()) {
1597 if (RHS.isNegative())
1598 return (-(*this)).udiv(-RHS);
1599 return -((-(*this)).udiv(RHS));
1601 if (RHS.isNegative())
1602 return -(this->udiv(-RHS));
1603 return this->udiv(RHS);
1606 APInt APInt::sdiv(int64_t RHS) const {
1607 if (isNegative()) {
1608 if (RHS < 0)
1609 return (-(*this)).udiv(-RHS);
1610 return -((-(*this)).udiv(RHS));
1612 if (RHS < 0)
1613 return -(this->udiv(-RHS));
1614 return this->udiv(RHS);
1617 APInt APInt::urem(const APInt &RHS) const {
1618 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619 if (isSingleWord()) {
1620 assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621 return APInt(BitWidth, U.VAL % RHS.U.VAL);
1624 // Get some facts about the LHS
1625 unsigned lhsWords = getNumWords(getActiveBits());
1627 // Get some facts about the RHS
1628 unsigned rhsBits = RHS.getActiveBits();
1629 unsigned rhsWords = getNumWords(rhsBits);
1630 assert(rhsWords && "Performing remainder operation by zero ???");
1632 // Check the degenerate cases
1633 if (lhsWords == 0)
1634 // 0 % Y ===> 0
1635 return APInt(BitWidth, 0);
1636 if (rhsBits == 1)
1637 // X % 1 ===> 0
1638 return APInt(BitWidth, 0);
1639 if (lhsWords < rhsWords || this->ult(RHS))
1640 // X % Y ===> X, iff X < Y
1641 return *this;
1642 if (*this == RHS)
1643 // X % X == 0;
1644 return APInt(BitWidth, 0);
1645 if (lhsWords == 1)
1646 // All high words are zero, just use native remainder
1647 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1649 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650 APInt Remainder(BitWidth, 0);
1651 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652 return Remainder;
1655 uint64_t APInt::urem(uint64_t RHS) const {
1656 assert(RHS != 0 && "Remainder by zero?");
1658 if (isSingleWord())
1659 return U.VAL % RHS;
1661 // Get some facts about the LHS
1662 unsigned lhsWords = getNumWords(getActiveBits());
1664 // Check the degenerate cases
1665 if (lhsWords == 0)
1666 // 0 % Y ===> 0
1667 return 0;
1668 if (RHS == 1)
1669 // X % 1 ===> 0
1670 return 0;
1671 if (this->ult(RHS))
1672 // X % Y ===> X, iff X < Y
1673 return getZExtValue();
1674 if (*this == RHS)
1675 // X % X == 0;
1676 return 0;
1677 if (lhsWords == 1)
1678 // All high words are zero, just use native remainder
1679 return U.pVal[0] % RHS;
1681 // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682 uint64_t Remainder;
1683 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684 return Remainder;
1687 APInt APInt::srem(const APInt &RHS) const {
1688 if (isNegative()) {
1689 if (RHS.isNegative())
1690 return -((-(*this)).urem(-RHS));
1691 return -((-(*this)).urem(RHS));
1693 if (RHS.isNegative())
1694 return this->urem(-RHS);
1695 return this->urem(RHS);
1698 int64_t APInt::srem(int64_t RHS) const {
1699 if (isNegative()) {
1700 if (RHS < 0)
1701 return -((-(*this)).urem(-RHS));
1702 return -((-(*this)).urem(RHS));
1704 if (RHS < 0)
1705 return this->urem(-RHS);
1706 return this->urem(RHS);
1709 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710 APInt &Quotient, APInt &Remainder) {
1711 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712 unsigned BitWidth = LHS.BitWidth;
1714 // First, deal with the easy case
1715 if (LHS.isSingleWord()) {
1716 assert(RHS.U.VAL != 0 && "Divide by zero?");
1717 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719 Quotient = APInt(BitWidth, QuotVal);
1720 Remainder = APInt(BitWidth, RemVal);
1721 return;
1724 // Get some size facts about the dividend and divisor
1725 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726 unsigned rhsBits = RHS.getActiveBits();
1727 unsigned rhsWords = getNumWords(rhsBits);
1728 assert(rhsWords && "Performing divrem operation by zero ???");
1730 // Check the degenerate cases
1731 if (lhsWords == 0) {
1732 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1733 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0
1734 return;
1737 if (rhsBits == 1) {
1738 Quotient = LHS; // X / 1 ===> X
1739 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0
1742 if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743 Remainder = LHS; // X % Y ===> X, iff X < Y
1744 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1745 return;
1748 if (LHS == RHS) {
1749 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1750 Remainder = APInt(BitWidth, 0); // X % X ===> 0;
1751 return;
1754 // Make sure there is enough space to hold the results.
1755 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756 // change the size. This is necessary if Quotient or Remainder is aliased
1757 // with LHS or RHS.
1758 Quotient.reallocate(BitWidth);
1759 Remainder.reallocate(BitWidth);
1761 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762 // There is only one word to consider so use the native versions.
1763 uint64_t lhsValue = LHS.U.pVal[0];
1764 uint64_t rhsValue = RHS.U.pVal[0];
1765 Quotient = lhsValue / rhsValue;
1766 Remainder = lhsValue % rhsValue;
1767 return;
1770 // Okay, lets do it the long way
1771 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772 Remainder.U.pVal);
1773 // Clear the rest of the Quotient and Remainder.
1774 std::memset(Quotient.U.pVal + lhsWords, 0,
1775 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776 std::memset(Remainder.U.pVal + rhsWords, 0,
1777 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1780 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781 uint64_t &Remainder) {
1782 assert(RHS != 0 && "Divide by zero?");
1783 unsigned BitWidth = LHS.BitWidth;
1785 // First, deal with the easy case
1786 if (LHS.isSingleWord()) {
1787 uint64_t QuotVal = LHS.U.VAL / RHS;
1788 Remainder = LHS.U.VAL % RHS;
1789 Quotient = APInt(BitWidth, QuotVal);
1790 return;
1793 // Get some size facts about the dividend and divisor
1794 unsigned lhsWords = getNumWords(LHS.getActiveBits());
1796 // Check the degenerate cases
1797 if (lhsWords == 0) {
1798 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0
1799 Remainder = 0; // 0 % Y ===> 0
1800 return;
1803 if (RHS == 1) {
1804 Quotient = LHS; // X / 1 ===> X
1805 Remainder = 0; // X % 1 ===> 0
1806 return;
1809 if (LHS.ult(RHS)) {
1810 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y
1811 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y
1812 return;
1815 if (LHS == RHS) {
1816 Quotient = APInt(BitWidth, 1); // X / X ===> 1
1817 Remainder = 0; // X % X ===> 0;
1818 return;
1821 // Make sure there is enough space to hold the results.
1822 // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823 // change the size. This is necessary if Quotient is aliased with LHS.
1824 Quotient.reallocate(BitWidth);
1826 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827 // There is only one word to consider so use the native versions.
1828 uint64_t lhsValue = LHS.U.pVal[0];
1829 Quotient = lhsValue / RHS;
1830 Remainder = lhsValue % RHS;
1831 return;
1834 // Okay, lets do it the long way
1835 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836 // Clear the rest of the Quotient.
1837 std::memset(Quotient.U.pVal + lhsWords, 0,
1838 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1841 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842 APInt &Quotient, APInt &Remainder) {
1843 if (LHS.isNegative()) {
1844 if (RHS.isNegative())
1845 APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846 else {
1847 APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848 Quotient.negate();
1850 Remainder.negate();
1851 } else if (RHS.isNegative()) {
1852 APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853 Quotient.negate();
1854 } else {
1855 APInt::udivrem(LHS, RHS, Quotient, Remainder);
1859 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860 APInt &Quotient, int64_t &Remainder) {
1861 uint64_t R = Remainder;
1862 if (LHS.isNegative()) {
1863 if (RHS < 0)
1864 APInt::udivrem(-LHS, -RHS, Quotient, R);
1865 else {
1866 APInt::udivrem(-LHS, RHS, Quotient, R);
1867 Quotient.negate();
1869 R = -R;
1870 } else if (RHS < 0) {
1871 APInt::udivrem(LHS, -RHS, Quotient, R);
1872 Quotient.negate();
1873 } else {
1874 APInt::udivrem(LHS, RHS, Quotient, R);
1876 Remainder = R;
1879 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880 APInt Res = *this+RHS;
1881 Overflow = isNonNegative() == RHS.isNonNegative() &&
1882 Res.isNonNegative() != isNonNegative();
1883 return Res;
1886 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887 APInt Res = *this+RHS;
1888 Overflow = Res.ult(RHS);
1889 return Res;
1892 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893 APInt Res = *this - RHS;
1894 Overflow = isNonNegative() != RHS.isNonNegative() &&
1895 Res.isNonNegative() != isNonNegative();
1896 return Res;
1899 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900 APInt Res = *this-RHS;
1901 Overflow = Res.ugt(*this);
1902 return Res;
1905 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906 // MININT/-1 --> overflow.
1907 Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1908 return sdiv(RHS);
1911 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912 APInt Res = *this * RHS;
1914 if (*this != 0 && RHS != 0)
1915 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1916 else
1917 Overflow = false;
1918 return Res;
1921 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923 Overflow = true;
1924 return *this * RHS;
1927 APInt Res = lshr(1) * RHS;
1928 Overflow = Res.isNegative();
1929 Res <<= 1;
1930 if ((*this)[0]) {
1931 Res += RHS;
1932 if (Res.ult(RHS))
1933 Overflow = true;
1935 return Res;
1938 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939 Overflow = ShAmt.uge(getBitWidth());
1940 if (Overflow)
1941 return APInt(BitWidth, 0);
1943 if (isNonNegative()) // Don't allow sign change.
1944 Overflow = ShAmt.uge(countLeadingZeros());
1945 else
1946 Overflow = ShAmt.uge(countLeadingOnes());
1948 return *this << ShAmt;
1951 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952 Overflow = ShAmt.uge(getBitWidth());
1953 if (Overflow)
1954 return APInt(BitWidth, 0);
1956 Overflow = ShAmt.ugt(countLeadingZeros());
1958 return *this << ShAmt;
1961 APInt APInt::sadd_sat(const APInt &RHS) const {
1962 bool Overflow;
1963 APInt Res = sadd_ov(RHS, Overflow);
1964 if (!Overflow)
1965 return Res;
1967 return isNegative() ? APInt::getSignedMinValue(BitWidth)
1968 : APInt::getSignedMaxValue(BitWidth);
1971 APInt APInt::uadd_sat(const APInt &RHS) const {
1972 bool Overflow;
1973 APInt Res = uadd_ov(RHS, Overflow);
1974 if (!Overflow)
1975 return Res;
1977 return APInt::getMaxValue(BitWidth);
1980 APInt APInt::ssub_sat(const APInt &RHS) const {
1981 bool Overflow;
1982 APInt Res = ssub_ov(RHS, Overflow);
1983 if (!Overflow)
1984 return Res;
1986 return isNegative() ? APInt::getSignedMinValue(BitWidth)
1987 : APInt::getSignedMaxValue(BitWidth);
1990 APInt APInt::usub_sat(const APInt &RHS) const {
1991 bool Overflow;
1992 APInt Res = usub_ov(RHS, Overflow);
1993 if (!Overflow)
1994 return Res;
1996 return APInt(BitWidth, 0);
2000 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001 // Check our assumptions here
2002 assert(!str.empty() && "Invalid string length");
2003 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004 radix == 36) &&
2005 "Radix should be 2, 8, 10, 16, or 36!");
2007 StringRef::iterator p = str.begin();
2008 size_t slen = str.size();
2009 bool isNeg = *p == '-';
2010 if (*p == '-' || *p == '+') {
2011 p++;
2012 slen--;
2013 assert(slen && "String is only a sign, needs a value.");
2015 assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018 assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019 "Insufficient bit width");
2021 // Allocate memory if needed
2022 if (isSingleWord())
2023 U.VAL = 0;
2024 else
2025 U.pVal = getClearedMemory(getNumWords());
2027 // Figure out if we can shift instead of multiply
2028 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2030 // Enter digit traversal loop
2031 for (StringRef::iterator e = str.end(); p != e; ++p) {
2032 unsigned digit = getDigit(*p, radix);
2033 assert(digit < radix && "Invalid character in digit string");
2035 // Shift or multiply the value by the radix
2036 if (slen > 1) {
2037 if (shift)
2038 *this <<= shift;
2039 else
2040 *this *= radix;
2043 // Add in the digit we just interpreted
2044 *this += digit;
2046 // If its negative, put it in two's complement form
2047 if (isNeg)
2048 this->negate();
2051 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052 bool Signed, bool formatAsCLiteral) const {
2053 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054 Radix == 36) &&
2055 "Radix should be 2, 8, 10, 16, or 36!");
2057 const char *Prefix = "";
2058 if (formatAsCLiteral) {
2059 switch (Radix) {
2060 case 2:
2061 // Binary literals are a non-standard extension added in gcc 4.3:
2062 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063 Prefix = "0b";
2064 break;
2065 case 8:
2066 Prefix = "0";
2067 break;
2068 case 10:
2069 break; // No prefix
2070 case 16:
2071 Prefix = "0x";
2072 break;
2073 default:
2074 llvm_unreachable("Invalid radix!");
2078 // First, check for a zero value and just short circuit the logic below.
2079 if (*this == 0) {
2080 while (*Prefix) {
2081 Str.push_back(*Prefix);
2082 ++Prefix;
2084 Str.push_back('0');
2085 return;
2088 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2090 if (isSingleWord()) {
2091 char Buffer[65];
2092 char *BufPtr = std::end(Buffer);
2094 uint64_t N;
2095 if (!Signed) {
2096 N = getZExtValue();
2097 } else {
2098 int64_t I = getSExtValue();
2099 if (I >= 0) {
2100 N = I;
2101 } else {
2102 Str.push_back('-');
2103 N = -(uint64_t)I;
2107 while (*Prefix) {
2108 Str.push_back(*Prefix);
2109 ++Prefix;
2112 while (N) {
2113 *--BufPtr = Digits[N % Radix];
2114 N /= Radix;
2116 Str.append(BufPtr, std::end(Buffer));
2117 return;
2120 APInt Tmp(*this);
2122 if (Signed && isNegative()) {
2123 // They want to print the signed version and it is a negative value
2124 // Flip the bits and add one to turn it into the equivalent positive
2125 // value and put a '-' in the result.
2126 Tmp.negate();
2127 Str.push_back('-');
2130 while (*Prefix) {
2131 Str.push_back(*Prefix);
2132 ++Prefix;
2135 // We insert the digits backward, then reverse them to get the right order.
2136 unsigned StartDig = Str.size();
2138 // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139 // because the number of bits per digit (1, 3 and 4 respectively) divides
2140 // equally. We just shift until the value is zero.
2141 if (Radix == 2 || Radix == 8 || Radix == 16) {
2142 // Just shift tmp right for each digit width until it becomes zero
2143 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2144 unsigned MaskAmt = Radix - 1;
2146 while (Tmp.getBoolValue()) {
2147 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148 Str.push_back(Digits[Digit]);
2149 Tmp.lshrInPlace(ShiftAmt);
2151 } else {
2152 while (Tmp.getBoolValue()) {
2153 uint64_t Digit;
2154 udivrem(Tmp, Radix, Tmp, Digit);
2155 assert(Digit < Radix && "divide failed");
2156 Str.push_back(Digits[Digit]);
2160 // Reverse the digits before returning.
2161 std::reverse(Str.begin()+StartDig, Str.end());
2164 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2165 /// It is better to pass in a SmallVector/SmallString to the methods above.
2166 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167 SmallString<40> S;
2168 toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169 return S.str();
2172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173 LLVM_DUMP_METHOD void APInt::dump() const {
2174 SmallString<40> S, U;
2175 this->toStringUnsigned(U);
2176 this->toStringSigned(S);
2177 dbgs() << "APInt(" << BitWidth << "b, "
2178 << U << "u " << S << "s)\n";
2180 #endif
2182 void APInt::print(raw_ostream &OS, bool isSigned) const {
2183 SmallString<40> S;
2184 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185 OS << S;
2188 // This implements a variety of operations on a representation of
2189 // arbitrary precision, two's-complement, bignum integer values.
2191 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe
2192 // and unrestricting assumption.
2193 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194 "Part width must be divisible by 2!");
2196 /* Some handy functions local to this file. */
2198 /* Returns the integer part with the least significant BITS set.
2199 BITS cannot be zero. */
2200 static inline APInt::WordType lowBitMask(unsigned bits) {
2201 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2203 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2206 /* Returns the value of the lower half of PART. */
2207 static inline APInt::WordType lowHalf(APInt::WordType part) {
2208 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2211 /* Returns the value of the upper half of PART. */
2212 static inline APInt::WordType highHalf(APInt::WordType part) {
2213 return part >> (APInt::APINT_BITS_PER_WORD / 2);
2216 /* Returns the bit number of the most significant set bit of a part.
2217 If the input number has no bits set -1U is returned. */
2218 static unsigned partMSB(APInt::WordType value) {
2219 return findLastSet(value, ZB_Max);
2222 /* Returns the bit number of the least significant set bit of a
2223 part. If the input number has no bits set -1U is returned. */
2224 static unsigned partLSB(APInt::WordType value) {
2225 return findFirstSet(value, ZB_Max);
2228 /* Sets the least significant part of a bignum to the input value, and
2229 zeroes out higher parts. */
2230 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231 assert(parts > 0);
2233 dst[0] = part;
2234 for (unsigned i = 1; i < parts; i++)
2235 dst[i] = 0;
2238 /* Assign one bignum to another. */
2239 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240 for (unsigned i = 0; i < parts; i++)
2241 dst[i] = src[i];
2244 /* Returns true if a bignum is zero, false otherwise. */
2245 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246 for (unsigned i = 0; i < parts; i++)
2247 if (src[i])
2248 return false;
2250 return true;
2253 /* Extract the given bit of a bignum; returns 0 or 1. */
2254 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255 return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2258 /* Set the given bit of a bignum. */
2259 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260 parts[whichWord(bit)] |= maskBit(bit);
2263 /* Clears the given bit of a bignum. */
2264 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265 parts[whichWord(bit)] &= ~maskBit(bit);
2268 /* Returns the bit number of the least significant set bit of a
2269 number. If the input number has no bits set -1U is returned. */
2270 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271 for (unsigned i = 0; i < n; i++) {
2272 if (parts[i] != 0) {
2273 unsigned lsb = partLSB(parts[i]);
2275 return lsb + i * APINT_BITS_PER_WORD;
2279 return -1U;
2282 /* Returns the bit number of the most significant set bit of a number.
2283 If the input number has no bits set -1U is returned. */
2284 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285 do {
2286 --n;
2288 if (parts[n] != 0) {
2289 unsigned msb = partMSB(parts[n]);
2291 return msb + n * APINT_BITS_PER_WORD;
2293 } while (n);
2295 return -1U;
2298 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2299 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300 the least significant bit of DST. All high bits above srcBITS in
2301 DST are zero-filled. */
2302 void
2303 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304 unsigned srcBits, unsigned srcLSB) {
2305 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306 assert(dstParts <= dstCount);
2308 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309 tcAssign (dst, src + firstSrcPart, dstParts);
2311 unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312 tcShiftRight (dst, dstParts, shift);
2314 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315 in DST. If this is less that srcBits, append the rest, else
2316 clear the high bits. */
2317 unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318 if (n < srcBits) {
2319 WordType mask = lowBitMask (srcBits - n);
2320 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321 << n % APINT_BITS_PER_WORD);
2322 } else if (n > srcBits) {
2323 if (srcBits % APINT_BITS_PER_WORD)
2324 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2327 /* Clear high parts. */
2328 while (dstParts < dstCount)
2329 dst[dstParts++] = 0;
2332 /* DST += RHS + C where C is zero or one. Returns the carry flag. */
2333 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334 WordType c, unsigned parts) {
2335 assert(c <= 1);
2337 for (unsigned i = 0; i < parts; i++) {
2338 WordType l = dst[i];
2339 if (c) {
2340 dst[i] += rhs[i] + 1;
2341 c = (dst[i] <= l);
2342 } else {
2343 dst[i] += rhs[i];
2344 c = (dst[i] < l);
2348 return c;
2351 /// This function adds a single "word" integer, src, to the multiple
2352 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2354 /// @returns the carry of the addition.
2355 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356 unsigned parts) {
2357 for (unsigned i = 0; i < parts; ++i) {
2358 dst[i] += src;
2359 if (dst[i] >= src)
2360 return 0; // No need to carry so exit early.
2361 src = 1; // Carry one to next digit.
2364 return 1;
2367 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */
2368 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369 WordType c, unsigned parts) {
2370 assert(c <= 1);
2372 for (unsigned i = 0; i < parts; i++) {
2373 WordType l = dst[i];
2374 if (c) {
2375 dst[i] -= rhs[i] + 1;
2376 c = (dst[i] >= l);
2377 } else {
2378 dst[i] -= rhs[i];
2379 c = (dst[i] > l);
2383 return c;
2386 /// This function subtracts a single "word" (64-bit word), src, from
2387 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388 /// no further borrowing is needed or it runs out of "words" in dst. The result
2389 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390 /// exhausted. In other words, if src > dst then this function returns 1,
2391 /// otherwise 0.
2392 /// @returns the borrow out of the subtraction
2393 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394 unsigned parts) {
2395 for (unsigned i = 0; i < parts; ++i) {
2396 WordType Dst = dst[i];
2397 dst[i] -= src;
2398 if (src <= Dst)
2399 return 0; // No need to borrow so exit early.
2400 src = 1; // We have to "borrow 1" from next "word"
2403 return 1;
2406 /* Negate a bignum in-place. */
2407 void APInt::tcNegate(WordType *dst, unsigned parts) {
2408 tcComplement(dst, parts);
2409 tcIncrement(dst, parts);
2412 /* DST += SRC * MULTIPLIER + CARRY if add is true
2413 DST = SRC * MULTIPLIER + CARRY if add is false
2415 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
2416 they must start at the same point, i.e. DST == SRC.
2418 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419 returned. Otherwise DST is filled with the least significant
2420 DSTPARTS parts of the result, and if all of the omitted higher
2421 parts were zero return zero, otherwise overflow occurred and
2422 return one. */
2423 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424 WordType multiplier, WordType carry,
2425 unsigned srcParts, unsigned dstParts,
2426 bool add) {
2427 /* Otherwise our writes of DST kill our later reads of SRC. */
2428 assert(dst <= src || dst >= src + srcParts);
2429 assert(dstParts <= srcParts + 1);
2431 /* N loops; minimum of dstParts and srcParts. */
2432 unsigned n = std::min(dstParts, srcParts);
2434 for (unsigned i = 0; i < n; i++) {
2435 WordType low, mid, high, srcPart;
2437 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2439 This cannot overflow, because
2441 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2443 which is less than n^2. */
2445 srcPart = src[i];
2447 if (multiplier == 0 || srcPart == 0) {
2448 low = carry;
2449 high = 0;
2450 } else {
2451 low = lowHalf(srcPart) * lowHalf(multiplier);
2452 high = highHalf(srcPart) * highHalf(multiplier);
2454 mid = lowHalf(srcPart) * highHalf(multiplier);
2455 high += highHalf(mid);
2456 mid <<= APINT_BITS_PER_WORD / 2;
2457 if (low + mid < low)
2458 high++;
2459 low += mid;
2461 mid = highHalf(srcPart) * lowHalf(multiplier);
2462 high += highHalf(mid);
2463 mid <<= APINT_BITS_PER_WORD / 2;
2464 if (low + mid < low)
2465 high++;
2466 low += mid;
2468 /* Now add carry. */
2469 if (low + carry < low)
2470 high++;
2471 low += carry;
2474 if (add) {
2475 /* And now DST[i], and store the new low part there. */
2476 if (low + dst[i] < low)
2477 high++;
2478 dst[i] += low;
2479 } else
2480 dst[i] = low;
2482 carry = high;
2485 if (srcParts < dstParts) {
2486 /* Full multiplication, there is no overflow. */
2487 assert(srcParts + 1 == dstParts);
2488 dst[srcParts] = carry;
2489 return 0;
2492 /* We overflowed if there is carry. */
2493 if (carry)
2494 return 1;
2496 /* We would overflow if any significant unwritten parts would be
2497 non-zero. This is true if any remaining src parts are non-zero
2498 and the multiplier is non-zero. */
2499 if (multiplier)
2500 for (unsigned i = dstParts; i < srcParts; i++)
2501 if (src[i])
2502 return 1;
2504 /* We fitted in the narrow destination. */
2505 return 0;
2508 /* DST = LHS * RHS, where DST has the same width as the operands and
2509 is filled with the least significant parts of the result. Returns
2510 one if overflow occurred, otherwise zero. DST must be disjoint
2511 from both operands. */
2512 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513 const WordType *rhs, unsigned parts) {
2514 assert(dst != lhs && dst != rhs);
2516 int overflow = 0;
2517 tcSet(dst, 0, parts);
2519 for (unsigned i = 0; i < parts; i++)
2520 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521 parts - i, true);
2523 return overflow;
2526 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2527 /// operands. No overflow occurs. DST must be disjoint from both operands.
2528 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529 const WordType *rhs, unsigned lhsParts,
2530 unsigned rhsParts) {
2531 /* Put the narrower number on the LHS for less loops below. */
2532 if (lhsParts > rhsParts)
2533 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2535 assert(dst != lhs && dst != rhs);
2537 tcSet(dst, 0, rhsParts);
2539 for (unsigned i = 0; i < lhsParts; i++)
2540 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2543 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544 Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545 set REMAINDER to the remainder, return zero. i.e.
2547 OLD_LHS = RHS * LHS + REMAINDER
2549 SCRATCH is a bignum of the same size as the operands and result for
2550 use by the routine; its contents need not be initialized and are
2551 destroyed. LHS, REMAINDER and SCRATCH must be distinct.
2553 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554 WordType *remainder, WordType *srhs,
2555 unsigned parts) {
2556 assert(lhs != remainder && lhs != srhs && remainder != srhs);
2558 unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559 if (shiftCount == 0)
2560 return true;
2562 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563 unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2566 tcAssign(srhs, rhs, parts);
2567 tcShiftLeft(srhs, parts, shiftCount);
2568 tcAssign(remainder, lhs, parts);
2569 tcSet(lhs, 0, parts);
2571 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572 the total. */
2573 for (;;) {
2574 int compare = tcCompare(remainder, srhs, parts);
2575 if (compare >= 0) {
2576 tcSubtract(remainder, srhs, 0, parts);
2577 lhs[n] |= mask;
2580 if (shiftCount == 0)
2581 break;
2582 shiftCount--;
2583 tcShiftRight(srhs, parts, 1);
2584 if ((mask >>= 1) == 0) {
2585 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586 n--;
2590 return false;
2593 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594 /// no restrictions on Count.
2595 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596 // Don't bother performing a no-op shift.
2597 if (!Count)
2598 return;
2600 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2604 // Fastpath for moving by whole words.
2605 if (BitShift == 0) {
2606 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607 } else {
2608 while (Words-- > WordShift) {
2609 Dst[Words] = Dst[Words - WordShift] << BitShift;
2610 if (Words > WordShift)
2611 Dst[Words] |=
2612 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2616 // Fill in the remainder with 0s.
2617 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2620 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621 /// are no restrictions on Count.
2622 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623 // Don't bother performing a no-op shift.
2624 if (!Count)
2625 return;
2627 // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629 unsigned BitShift = Count % APINT_BITS_PER_WORD;
2631 unsigned WordsToMove = Words - WordShift;
2632 // Fastpath for moving by whole words.
2633 if (BitShift == 0) {
2634 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635 } else {
2636 for (unsigned i = 0; i != WordsToMove; ++i) {
2637 Dst[i] = Dst[i + WordShift] >> BitShift;
2638 if (i + 1 != WordsToMove)
2639 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2643 // Fill in the remainder with 0s.
2644 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2647 /* Bitwise and of two bignums. */
2648 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649 for (unsigned i = 0; i < parts; i++)
2650 dst[i] &= rhs[i];
2653 /* Bitwise inclusive or of two bignums. */
2654 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655 for (unsigned i = 0; i < parts; i++)
2656 dst[i] |= rhs[i];
2659 /* Bitwise exclusive or of two bignums. */
2660 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661 for (unsigned i = 0; i < parts; i++)
2662 dst[i] ^= rhs[i];
2665 /* Complement a bignum in-place. */
2666 void APInt::tcComplement(WordType *dst, unsigned parts) {
2667 for (unsigned i = 0; i < parts; i++)
2668 dst[i] = ~dst[i];
2671 /* Comparison (unsigned) of two bignums. */
2672 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673 unsigned parts) {
2674 while (parts) {
2675 parts--;
2676 if (lhs[parts] != rhs[parts])
2677 return (lhs[parts] > rhs[parts]) ? 1 : -1;
2680 return 0;
2683 /* Set the least significant BITS bits of a bignum, clear the
2684 rest. */
2685 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686 unsigned bits) {
2687 unsigned i = 0;
2688 while (bits > APINT_BITS_PER_WORD) {
2689 dst[i++] = ~(WordType) 0;
2690 bits -= APINT_BITS_PER_WORD;
2693 if (bits)
2694 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2696 while (i < parts)
2697 dst[i++] = 0;
2700 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701 APInt::Rounding RM) {
2702 // Currently udivrem always rounds down.
2703 switch (RM) {
2704 case APInt::Rounding::DOWN:
2705 case APInt::Rounding::TOWARD_ZERO:
2706 return A.udiv(B);
2707 case APInt::Rounding::UP: {
2708 APInt Quo, Rem;
2709 APInt::udivrem(A, B, Quo, Rem);
2710 if (Rem == 0)
2711 return Quo;
2712 return Quo + 1;
2715 llvm_unreachable("Unknown APInt::Rounding enum");
2718 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719 APInt::Rounding RM) {
2720 switch (RM) {
2721 case APInt::Rounding::DOWN:
2722 case APInt::Rounding::UP: {
2723 APInt Quo, Rem;
2724 APInt::sdivrem(A, B, Quo, Rem);
2725 if (Rem == 0)
2726 return Quo;
2727 // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728 // We want to check whether the non-integer part of the mathematical value
2729 // is negative or not. If the non-integer part is negative, we need to round
2730 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731 // already rounded down.
2732 if (RM == APInt::Rounding::DOWN) {
2733 if (Rem.isNegative() != B.isNegative())
2734 return Quo - 1;
2735 return Quo;
2737 if (Rem.isNegative() != B.isNegative())
2738 return Quo;
2739 return Quo + 1;
2741 // Currently sdiv rounds twards zero.
2742 case APInt::Rounding::TOWARD_ZERO:
2743 return A.sdiv(B);
2745 llvm_unreachable("Unknown APInt::Rounding enum");
2748 Optional<APInt>
2749 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750 unsigned RangeWidth) {
2751 unsigned CoeffWidth = A.getBitWidth();
2752 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753 assert(RangeWidth <= CoeffWidth &&
2754 "Value range width should be less than coefficient width");
2755 assert(RangeWidth > 1 && "Value range bit width should be > 1");
2757 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758 << "x + " << C << ", rw:" << RangeWidth << '\n');
2760 // Identify 0 as a (non)solution immediately.
2761 if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763 return APInt(CoeffWidth, 0);
2766 // The result of APInt arithmetic has the same bit width as the operands,
2767 // so it can actually lose high bits. A product of two n-bit integers needs
2768 // 2n-1 bits to represent the full value.
2769 // The operation done below (on quadratic coefficients) that can produce
2770 // the largest value is the evaluation of the equation during bisection,
2771 // which needs 3 times the bitwidth of the coefficient, so the total number
2772 // of required bits is 3n.
2774 // The purpose of this extension is to simulate the set Z of all integers,
2775 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776 // and negative numbers (not so much in a modulo arithmetic). The method
2777 // used to solve the equation is based on the standard formula for real
2778 // numbers, and uses the concepts of "positive" and "negative" with their
2779 // usual meanings.
2780 CoeffWidth *= 3;
2781 A = A.sext(CoeffWidth);
2782 B = B.sext(CoeffWidth);
2783 C = C.sext(CoeffWidth);
2785 // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786 // the bit width has increased.
2787 if (A.isNegative()) {
2788 A.negate();
2789 B.negate();
2790 C.negate();
2793 // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795 // and R = 2^BitWidth.
2796 // Since we're trying not only to find exact solutions, but also values
2797 // that "wrap around", such a set will always have a solution, i.e. an x
2798 // that satisfies at least one of the equations, or such that |q(x)|
2799 // exceeds kR, while |q(x-1)| for the same k does not.
2801 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802 // positive solution n (in the above sense), and also such that the n
2803 // will be the least among all solutions corresponding to k = 0, 1, ...
2804 // (more precisely, the least element in the set
2805 // { n(k) | k is such that a solution n(k) exists }).
2807 // Consider the parabola (over real numbers) that corresponds to the
2808 // quadratic equation. Since A > 0, the arms of the parabola will point
2809 // up. Picking different values of k will shift it up and down by R.
2811 // We want to shift the parabola in such a way as to reduce the problem
2812 // of solving q(x) = kR to solving shifted_q(x) = 0.
2813 // (The interesting solutions are the ceilings of the real number
2814 // solutions.)
2815 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816 APInt TwoA = 2 * A;
2817 APInt SqrB = B * B;
2818 bool PickLow;
2820 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821 assert(A.isStrictlyPositive());
2822 APInt T = V.abs().urem(A);
2823 if (T.isNullValue())
2824 return V;
2825 return V.isNegative() ? V+T : V+(A-T);
2828 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829 // iff B is positive.
2830 if (B.isNonNegative()) {
2831 // If B >= 0, the vertex it at a negative location (or at 0), so in
2832 // order to have a non-negative solution we need to pick k that makes
2833 // C-kR negative. To satisfy all the requirements for the solution
2834 // that we are looking for, it needs to be closest to 0 of all k.
2835 C = C.srem(R);
2836 if (C.isStrictlyPositive())
2837 C -= R;
2838 // Pick the greater solution.
2839 PickLow = false;
2840 } else {
2841 // If B < 0, the vertex is at a positive location. For any solution
2842 // to exist, the discriminant must be non-negative. This means that
2843 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844 // lower bound on values of k: kR >= C - B^2/4A.
2845 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846 // Round LowkR up (towards +inf) to the nearest kR.
2847 LowkR = RoundUp(LowkR, R);
2849 // If there exists k meeting the condition above, and such that
2850 // C-kR > 0, there will be two positive real number solutions of
2851 // q(x) = kR. Out of all such values of k, pick the one that makes
2852 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853 // In other words, find maximum k such that LowkR <= kR < C.
2854 if (C.sgt(LowkR)) {
2855 // If LowkR < C, then such a k is guaranteed to exist because
2856 // LowkR itself is a multiple of R.
2857 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R)
2858 // Pick the smaller solution.
2859 PickLow = true;
2860 } else {
2861 // If C-kR < 0 for all potential k's, it means that one solution
2862 // will be negative, while the other will be positive. The positive
2863 // solution will shift towards 0 if the parabola is moved up.
2864 // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865 // to 0, or in other words, out of all parabolas that have solutions,
2866 // pick the one that is the farthest "up").
2867 // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868 C -= LowkR;
2869 // Pick the greater solution.
2870 PickLow = false;
2874 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875 << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2877 APInt D = SqrB - 4*A*C;
2878 assert(D.isNonNegative() && "Negative discriminant");
2879 APInt SQ = D.sqrt();
2881 APInt Q = SQ * SQ;
2882 bool InexactSQ = Q != D;
2883 // The calculated SQ may actually be greater than the exact (non-integer)
2884 // value. If that's the case, decremement SQ to get a value that is lower.
2885 if (Q.sgt(D))
2886 SQ -= 1;
2888 APInt X;
2889 APInt Rem;
2891 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892 // When using the quadratic formula directly, the calculated low root
2893 // may be greater than the exact one, since we would be subtracting SQ.
2894 // To make sure that the calculated root is not greater than the exact
2895 // one, subtract SQ+1 when calculating the low root (for inexact value
2896 // of SQ).
2897 if (PickLow)
2898 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899 else
2900 APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2902 // The updated coefficients should be such that the (exact) solution is
2903 // positive. Since APInt division rounds towards 0, the calculated one
2904 // can be 0, but cannot be negative.
2905 assert(X.isNonNegative() && "Solution should be non-negative");
2907 if (!InexactSQ && Rem.isNullValue()) {
2908 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909 return X;
2912 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913 // The exact value of the square root of D should be between SQ and SQ+1.
2914 // This implies that the solution should be between that corresponding to
2915 // SQ (i.e. X) and that corresponding to SQ+1.
2917 // The calculated X cannot be greater than the exact (real) solution.
2918 // Actually it must be strictly less than the exact solution, while
2919 // X+1 will be greater than or equal to it.
2921 APInt VX = (A*X + B)*X + C;
2922 APInt VY = VX + TwoA*X + A + B;
2923 bool SignChange = VX.isNegative() != VY.isNegative() ||
2924 VX.isNullValue() != VY.isNullValue();
2925 // If the sign did not change between X and X+1, X is not a valid solution.
2926 // This could happen when the actual (exact) roots don't have an integer
2927 // between them, so they would both be contained between X and X+1.
2928 if (!SignChange) {
2929 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930 return None;
2933 X += 1;
2934 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935 return X;
2938 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939 /// with the integer held in IntVal.
2940 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941 unsigned StoreBytes) {
2942 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943 const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2945 if (sys::IsLittleEndianHost) {
2946 // Little-endian host - the source is ordered from LSB to MSB. Order the
2947 // destination from LSB to MSB: Do a straight copy.
2948 memcpy(Dst, Src, StoreBytes);
2949 } else {
2950 // Big-endian host - the source is an array of 64 bit words ordered from
2951 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination
2952 // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953 while (StoreBytes > sizeof(uint64_t)) {
2954 StoreBytes -= sizeof(uint64_t);
2955 // May not be aligned so use memcpy.
2956 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957 Src += sizeof(uint64_t);
2960 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2964 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968 uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969 const_cast<uint64_t *>(IntVal.getRawData()));
2971 if (sys::IsLittleEndianHost)
2972 // Little-endian host - the destination must be ordered from LSB to MSB.
2973 // The source is ordered from LSB to MSB: Do a straight copy.
2974 memcpy(Dst, Src, LoadBytes);
2975 else {
2976 // Big-endian - the destination is an array of 64 bit words ordered from
2977 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is
2978 // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979 // a word.
2980 while (LoadBytes > sizeof(uint64_t)) {
2981 LoadBytes -= sizeof(uint64_t);
2982 // May not be aligned so use memcpy.
2983 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984 Dst += sizeof(uint64_t);
2987 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);