llvm/lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision integer
  10 // constant values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APInt.h"
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/FoldingSet.h"
  17 #include "llvm/ADT/Hashing.h"
  18 #include "llvm/ADT/Optional.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/ADT/bit.h"
  22 #include "llvm/Config/llvm-config.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/ErrorHandling.h"
  25 #include "llvm/Support/MathExtras.h"
  26 #include "llvm/Support/raw_ostream.h"
  27 #include <climits>
  28 #include <cmath>
  29 #include <cstdlib>
  30 #include <cstring>
  31 using namespace llvm;
  32
  33 #define DEBUG_TYPE "apint"
  34
  35 /// A utility function for allocating memory, checking for allocation failures,
  36 /// and ensuring the contents are zeroed.
  37 inline static uint64_t* getClearedMemory(unsigned numWords) {
  38   uint64_t *result = new uint64_t[numWords];
  39   memset(result, 0, numWords * sizeof(uint64_t));
  40   return result;
  41 }
  42
  43 /// A utility function for allocating memory and checking for allocation
  44 /// failure.  The content is not zeroed.
  45 inline static uint64_t* getMemory(unsigned numWords) {
  46   return new uint64_t[numWords];
  47 }
  48
  49 /// A utility function that converts a character to a digit.
  50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  51   unsigned r;
  52
  53   if (radix == 16 || radix == 36) {
  54     r = cdigit - '0';
  55     if (r <= 9)
  56       return r;
  57
  58     r = cdigit - 'A';
  59     if (r <= radix - 11U)
  60       return r + 10;
  61
  62     r = cdigit - 'a';
  63     if (r <= radix - 11U)
  64       return r + 10;
  65
  66     radix = 10;
  67   }
  68
  69   r = cdigit - '0';
  70   if (r < radix)
  71     return r;
  72
  73   return -1U;
  74 }
  75
  76
  77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  78   U.pVal = getClearedMemory(getNumWords());
  79   U.pVal[0] = val;
  80   if (isSigned && int64_t(val) < 0)
  81     for (unsigned i = 1; i < getNumWords(); ++i)
  82       U.pVal[i] = WORDTYPE_MAX;
  83   clearUnusedBits();
  84 }
  85
  86 void APInt::initSlowCase(const APInt& that) {
  87   U.pVal = getMemory(getNumWords());
  88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  89 }
  90
  91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  92   assert(BitWidth && "Bitwidth too small");
  93   assert(bigVal.data() && "Null pointer detected!");
  94   if (isSingleWord())
  95     U.VAL = bigVal[0];
  96   else {
  97     // Get memory, cleared to 0
  98     U.pVal = getClearedMemory(getNumWords());
  99     // Calculate the number of words to copy
 100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 101     // Copy the words from bigVal to pVal
 102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 103   }
 104   // Make sure unused high bits are cleared
 105   clearUnusedBits();
 106 }
 107
 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 109   : BitWidth(numBits) {
 110   initFromArray(bigVal);
 111 }
 112
 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 114   : BitWidth(numBits) {
 115   initFromArray(makeArrayRef(bigVal, numWords));
 116 }
 117
 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 119   : BitWidth(numbits) {
 120   assert(BitWidth && "Bitwidth too small");
 121   fromString(numbits, Str, radix);
 122 }
 123
 124 void APInt::reallocate(unsigned NewBitWidth) {
 125   // If the number of words is the same we can just change the width and stop.
 126   if (getNumWords() == getNumWords(NewBitWidth)) {
 127     BitWidth = NewBitWidth;
 128     return;
 129   }
 130
 131   // If we have an allocation, delete it.
 132   if (!isSingleWord())
 133     delete [] U.pVal;
 134
 135   // Update BitWidth.
 136   BitWidth = NewBitWidth;
 137
 138   // If we are supposed to have an allocation, create it.
 139   if (!isSingleWord())
 140     U.pVal = getMemory(getNumWords());
 141 }
 142
 143 void APInt::AssignSlowCase(const APInt& RHS) {
 144   // Don't do anything for X = X
 145   if (this == &RHS)
 146     return;
 147
 148   // Adjust the bit width and handle allocations as necessary.
 149   reallocate(RHS.getBitWidth());
 150
 151   // Copy the data.
 152   if (isSingleWord())
 153     U.VAL = RHS.U.VAL;
 154   else
 155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 156 }
 157
 158 /// This method 'profiles' an APInt for use with FoldingSet.
 159 void APInt::Profile(FoldingSetNodeID& ID) const {
 160   ID.AddInteger(BitWidth);
 161
 162   if (isSingleWord()) {
 163     ID.AddInteger(U.VAL);
 164     return;
 165   }
 166
 167   unsigned NumWords = getNumWords();
 168   for (unsigned i = 0; i < NumWords; ++i)
 169     ID.AddInteger(U.pVal[i]);
 170 }
 171
 172 /// Prefix increment operator. Increments the APInt by one.
 173 APInt& APInt::operator++() {
 174   if (isSingleWord())
 175     ++U.VAL;
 176   else
 177     tcIncrement(U.pVal, getNumWords());
 178   return clearUnusedBits();
 179 }
 180
 181 /// Prefix decrement operator. Decrements the APInt by one.
 182 APInt& APInt::operator--() {
 183   if (isSingleWord())
 184     --U.VAL;
 185   else
 186     tcDecrement(U.pVal, getNumWords());
 187   return clearUnusedBits();
 188 }
 189
 190 /// Adds the RHS APInt to this APInt.
 191 /// @returns this, after addition of RHS.
 192 /// Addition assignment operator.
 193 APInt& APInt::operator+=(const APInt& RHS) {
 194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 195   if (isSingleWord())
 196     U.VAL += RHS.U.VAL;
 197   else
 198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 199   return clearUnusedBits();
 200 }
 201
 202 APInt& APInt::operator+=(uint64_t RHS) {
 203   if (isSingleWord())
 204     U.VAL += RHS;
 205   else
 206     tcAddPart(U.pVal, RHS, getNumWords());
 207   return clearUnusedBits();
 208 }
 209
 210 /// Subtracts the RHS APInt from this APInt
 211 /// @returns this, after subtraction
 212 /// Subtraction assignment operator.
 213 APInt& APInt::operator-=(const APInt& RHS) {
 214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 215   if (isSingleWord())
 216     U.VAL -= RHS.U.VAL;
 217   else
 218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 219   return clearUnusedBits();
 220 }
 221
 222 APInt& APInt::operator-=(uint64_t RHS) {
 223   if (isSingleWord())
 224     U.VAL -= RHS;
 225   else
 226     tcSubtractPart(U.pVal, RHS, getNumWords());
 227   return clearUnusedBits();
 228 }
 229
 230 APInt APInt::operator*(const APInt& RHS) const {
 231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 232   if (isSingleWord())
 233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 234
 235   APInt Result(getMemory(getNumWords()), getBitWidth());
 236
 237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 238
 239   Result.clearUnusedBits();
 240   return Result;
 241 }
 242
 243 void APInt::AndAssignSlowCase(const APInt& RHS) {
 244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 245 }
 246
 247 void APInt::OrAssignSlowCase(const APInt& RHS) {
 248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 249 }
 250
 251 void APInt::XorAssignSlowCase(const APInt& RHS) {
 252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 253 }
 254
 255 APInt& APInt::operator*=(const APInt& RHS) {
 256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 257   *this = *this * RHS;
 258   return *this;
 259 }
 260
 261 APInt& APInt::operator*=(uint64_t RHS) {
 262   if (isSingleWord()) {
 263     U.VAL *= RHS;
 264   } else {
 265     unsigned NumWords = getNumWords();
 266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 267   }
 268   return clearUnusedBits();
 269 }
 270
 271 bool APInt::EqualSlowCase(const APInt& RHS) const {
 272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 273 }
 274
 275 int APInt::compare(const APInt& RHS) const {
 276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 277   if (isSingleWord())
 278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 279
 280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 281 }
 282
 283 int APInt::compareSigned(const APInt& RHS) const {
 284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 285   if (isSingleWord()) {
 286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 289   }
 290
 291   bool lhsNeg = isNegative();
 292   bool rhsNeg = RHS.isNegative();
 293
 294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 295   if (lhsNeg != rhsNeg)
 296     return lhsNeg ? -1 : 1;
 297
 298   // Otherwise we can just use an unsigned comparison, because even negative
 299   // numbers compare correctly this way if both have the same signed-ness.
 300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 301 }
 302
 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 304   unsigned loWord = whichWord(loBit);
 305   unsigned hiWord = whichWord(hiBit);
 306
 307   // Create an initial mask for the low word with zeros below loBit.
 308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 309
 310   // If hiBit is not aligned, we need a high mask.
 311   unsigned hiShiftAmt = whichBit(hiBit);
 312   if (hiShiftAmt != 0) {
 313     // Create a high mask with zeros above hiBit.
 314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 316     // set the bits in hiWord.
 317     if (hiWord == loWord)
 318       loMask &= hiMask;
 319     else
 320       U.pVal[hiWord] |= hiMask;
 321   }
 322   // Apply the mask to the low word.
 323   U.pVal[loWord] |= loMask;
 324
 325   // Fill any words between loWord and hiWord with all ones.
 326   for (unsigned word = loWord + 1; word < hiWord; ++word)
 327     U.pVal[word] = WORDTYPE_MAX;
 328 }
 329
 330 /// Toggle every bit to its opposite value.
 331 void APInt::flipAllBitsSlowCase() {
 332   tcComplement(U.pVal, getNumWords());
 333   clearUnusedBits();
 334 }
 335
 336 /// Toggle a given bit to its opposite value whose position is given
 337 /// as "bitPosition".
 338 /// Toggles a given bit to its opposite value.
 339 void APInt::flipBit(unsigned bitPosition) {
 340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 341   setBitVal(bitPosition, !(*this)[bitPosition]);
 342 }
 343
 344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 345   unsigned subBitWidth = subBits.getBitWidth();
 346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 347          "Illegal bit insertion");
 348
 349   // Insertion is a direct copy.
 350   if (subBitWidth == BitWidth) {
 351     *this = subBits;
 352     return;
 353   }
 354
 355   // Single word result can be done as a direct bitmask.
 356   if (isSingleWord()) {
 357     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 358     U.VAL &= ~(mask << bitPosition);
 359     U.VAL |= (subBits.U.VAL << bitPosition);
 360     return;
 361   }
 362
 363   unsigned loBit = whichBit(bitPosition);
 364   unsigned loWord = whichWord(bitPosition);
 365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 366
 367   // Insertion within a single word can be done as a direct bitmask.
 368   if (loWord == hi1Word) {
 369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 370     U.pVal[loWord] &= ~(mask << loBit);
 371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 372     return;
 373   }
 374
 375   // Insert on word boundaries.
 376   if (loBit == 0) {
 377     // Direct copy whole words.
 378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 379     memcpy(U.pVal + loWord, subBits.getRawData(),
 380            numWholeSubWords * APINT_WORD_SIZE);
 381
 382     // Mask+insert remaining bits.
 383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 384     if (remainingBits != 0) {
 385       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 386       U.pVal[hi1Word] &= ~mask;
 387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 388     }
 389     return;
 390   }
 391
 392   // General case - set/clear individual bits in dst based on src.
 393   // TODO - there is scope for optimization here, but at the moment this code
 394   // path is barely used so prefer readability over performance.
 395   for (unsigned i = 0; i != subBitWidth; ++i)
 396     setBitVal(bitPosition + i, subBits[i]);
 397 }
 398
 399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
 400   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 401   subBits &= maskBits;
 402   if (isSingleWord()) {
 403     U.VAL &= ~(maskBits << bitPosition);
 404     U.VAL |= subBits << bitPosition;
 405     return;
 406   }
 407
 408   unsigned loBit = whichBit(bitPosition);
 409   unsigned loWord = whichWord(bitPosition);
 410   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 411   if (loWord == hiWord) {
 412     U.pVal[loWord] &= ~(maskBits << loBit);
 413     U.pVal[loWord] |= subBits << loBit;
 414     return;
 415   }
 416
 417   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 418   unsigned wordBits = 8 * sizeof(WordType);
 419   U.pVal[loWord] &= ~(maskBits << loBit);
 420   U.pVal[loWord] |= subBits << loBit;
 421
 422   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
 423   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
 424 }
 425
 426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 427   assert(numBits > 0 && "Can't extract zero bits");
 428   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 429          "Illegal bit extraction");
 430
 431   if (isSingleWord())
 432     return APInt(numBits, U.VAL >> bitPosition);
 433
 434   unsigned loBit = whichBit(bitPosition);
 435   unsigned loWord = whichWord(bitPosition);
 436   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 437
 438   // Single word result extracting bits from a single word source.
 439   if (loWord == hiWord)
 440     return APInt(numBits, U.pVal[loWord] >> loBit);
 441
 442   // Extracting bits that start on a source word boundary can be done
 443   // as a fast memory copy.
 444   if (loBit == 0)
 445     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 446
 447   // General case - shift + copy source words directly into place.
 448   APInt Result(numBits, 0);
 449   unsigned NumSrcWords = getNumWords();
 450   unsigned NumDstWords = Result.getNumWords();
 451
 452   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
 453   for (unsigned word = 0; word < NumDstWords; ++word) {
 454     uint64_t w0 = U.pVal[loWord + word];
 455     uint64_t w1 =
 456         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 457     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 458   }
 459
 460   return Result.clearUnusedBits();
 461 }
 462
 463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
 464                                        unsigned bitPosition) const {
 465   assert(numBits > 0 && "Can't extract zero bits");
 466   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 467          "Illegal bit extraction");
 468   assert(numBits <= 64 && "Illegal bit extraction");
 469
 470   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
 471   if (isSingleWord())
 472     return (U.VAL >> bitPosition) & maskBits;
 473
 474   unsigned loBit = whichBit(bitPosition);
 475   unsigned loWord = whichWord(bitPosition);
 476   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 477   if (loWord == hiWord)
 478     return (U.pVal[loWord] >> loBit) & maskBits;
 479
 480   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
 481   unsigned wordBits = 8 * sizeof(WordType);
 482   uint64_t retBits = U.pVal[loWord] >> loBit;
 483   retBits |= U.pVal[hiWord] << (wordBits - loBit);
 484   retBits &= maskBits;
 485   return retBits;
 486 }
 487
 488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 489   assert(!str.empty() && "Invalid string length");
 490   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 491           radix == 36) &&
 492          "Radix should be 2, 8, 10, 16, or 36!");
 493
 494   size_t slen = str.size();
 495
 496   // Each computation below needs to know if it's negative.
 497   StringRef::iterator p = str.begin();
 498   unsigned isNegative = *p == '-';
 499   if (*p == '-' || *p == '+') {
 500     p++;
 501     slen--;
 502     assert(slen && "String is only a sign, needs a value.");
 503   }
 504
 505   // For radixes of power-of-two values, the bits required is accurately and
 506   // easily computed
 507   if (radix == 2)
 508     return slen + isNegative;
 509   if (radix == 8)
 510     return slen * 3 + isNegative;
 511   if (radix == 16)
 512     return slen * 4 + isNegative;
 513
 514   // FIXME: base 36
 515
 516   // This is grossly inefficient but accurate. We could probably do something
 517   // with a computation of roughly slen*64/20 and then adjust by the value of
 518   // the first few digits. But, I'm not sure how accurate that could be.
 519
 520   // Compute a sufficient number of bits that is always large enough but might
 521   // be too large. This avoids the assertion in the constructor. This
 522   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 523   // bits in that case.
 524   unsigned sufficient
 525     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 526                  : (slen == 1 ? 7 : slen * 16/3);
 527
 528   // Convert to the actual binary value.
 529   APInt tmp(sufficient, StringRef(p, slen), radix);
 530
 531   // Compute how many bits are required. If the log is infinite, assume we need
 532   // just bit. If the log is exact and value is negative, then the value is
 533   // MinSignedValue with (log + 1) bits.
 534   unsigned log = tmp.logBase2();
 535   if (log == (unsigned)-1) {
 536     return isNegative + 1;
 537   } else if (isNegative && tmp.isPowerOf2()) {
 538     return isNegative + log;
 539   } else {
 540     return isNegative + log + 1;
 541   }
 542 }
 543
 544 hash_code llvm::hash_value(const APInt &Arg) {
 545   if (Arg.isSingleWord())
 546     return hash_combine(Arg.BitWidth, Arg.U.VAL);
 547
 548   return hash_combine(
 549       Arg.BitWidth,
 550       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
 551 }
 552
 553 unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
 554   return static_cast<unsigned>(hash_value(Key));
 555 }
 556
 557 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 558   assert(getBitWidth() % SplatSizeInBits == 0 &&
 559          "SplatSizeInBits must divide width!");
 560   // We can check that all parts of an integer are equal by making use of a
 561   // little trick: rotate and check if it's still the same value.
 562   return *this == rotl(SplatSizeInBits);
 563 }
 564
 565 /// This function returns the high "numBits" bits of this APInt.
 566 APInt APInt::getHiBits(unsigned numBits) const {
 567   return this->lshr(BitWidth - numBits);
 568 }
 569
 570 /// This function returns the low "numBits" bits of this APInt.
 571 APInt APInt::getLoBits(unsigned numBits) const {
 572   APInt Result(getLowBitsSet(BitWidth, numBits));
 573   Result &= *this;
 574   return Result;
 575 }
 576
 577 /// Return a value containing V broadcasted over NewLen bits.
 578 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 579   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 580
 581   APInt Val = V.zextOrSelf(NewLen);
 582   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 583     Val |= Val << I;
 584
 585   return Val;
 586 }
 587
 588 unsigned APInt::countLeadingZerosSlowCase() const {
 589   unsigned Count = 0;
 590   for (int i = getNumWords()-1; i >= 0; --i) {
 591     uint64_t V = U.pVal[i];
 592     if (V == 0)
 593       Count += APINT_BITS_PER_WORD;
 594     else {
 595       Count += llvm::countLeadingZeros(V);
 596       break;
 597     }
 598   }
 599   // Adjust for unused bits in the most significant word (they are zero).
 600   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 601   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 602   return Count;
 603 }
 604
 605 unsigned APInt::countLeadingOnesSlowCase() const {
 606   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 607   unsigned shift;
 608   if (!highWordBits) {
 609     highWordBits = APINT_BITS_PER_WORD;
 610     shift = 0;
 611   } else {
 612     shift = APINT_BITS_PER_WORD - highWordBits;
 613   }
 614   int i = getNumWords() - 1;
 615   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 616   if (Count == highWordBits) {
 617     for (i--; i >= 0; --i) {
 618       if (U.pVal[i] == WORDTYPE_MAX)
 619         Count += APINT_BITS_PER_WORD;
 620       else {
 621         Count += llvm::countLeadingOnes(U.pVal[i]);
 622         break;
 623       }
 624     }
 625   }
 626   return Count;
 627 }
 628
 629 unsigned APInt::countTrailingZerosSlowCase() const {
 630   unsigned Count = 0;
 631   unsigned i = 0;
 632   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 633     Count += APINT_BITS_PER_WORD;
 634   if (i < getNumWords())
 635     Count += llvm::countTrailingZeros(U.pVal[i]);
 636   return std::min(Count, BitWidth);
 637 }
 638
 639 unsigned APInt::countTrailingOnesSlowCase() const {
 640   unsigned Count = 0;
 641   unsigned i = 0;
 642   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
 643     Count += APINT_BITS_PER_WORD;
 644   if (i < getNumWords())
 645     Count += llvm::countTrailingOnes(U.pVal[i]);
 646   assert(Count <= BitWidth);
 647   return Count;
 648 }
 649
 650 unsigned APInt::countPopulationSlowCase() const {
 651   unsigned Count = 0;
 652   for (unsigned i = 0; i < getNumWords(); ++i)
 653     Count += llvm::countPopulation(U.pVal[i]);
 654   return Count;
 655 }
 656
 657 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 658   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 659     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 660       return true;
 661
 662   return false;
 663 }
 664
 665 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 666   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 667     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 668       return false;
 669
 670   return true;
 671 }
 672
 673 APInt APInt::byteSwap() const {
 674   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
 675   if (BitWidth == 16)
 676     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 677   if (BitWidth == 32)
 678     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 679   if (BitWidth <= 64) {
 680     uint64_t Tmp1 = ByteSwap_64(U.VAL);
 681     Tmp1 >>= (64 - BitWidth);
 682     return APInt(BitWidth, Tmp1);
 683   }
 684
 685   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 686   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 687     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 688   if (Result.BitWidth != BitWidth) {
 689     Result.lshrInPlace(Result.BitWidth - BitWidth);
 690     Result.BitWidth = BitWidth;
 691   }
 692   return Result;
 693 }
 694
 695 APInt APInt::reverseBits() const {
 696   switch (BitWidth) {
 697   case 64:
 698     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 699   case 32:
 700     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 701   case 16:
 702     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 703   case 8:
 704     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 705   default:
 706     break;
 707   }
 708
 709   APInt Val(*this);
 710   APInt Reversed(BitWidth, 0);
 711   unsigned S = BitWidth;
 712
 713   for (; Val != 0; Val.lshrInPlace(1)) {
 714     Reversed <<= 1;
 715     Reversed |= Val[0];
 716     --S;
 717   }
 718
 719   Reversed <<= S;
 720   return Reversed;
 721 }
 722
 723 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 724   // Fast-path a common case.
 725   if (A == B) return A;
 726
 727   // Corner cases: if either operand is zero, the other is the gcd.
 728   if (!A) return B;
 729   if (!B) return A;
 730
 731   // Count common powers of 2 and remove all other powers of 2.
 732   unsigned Pow2;
 733   {
 734     unsigned Pow2_A = A.countTrailingZeros();
 735     unsigned Pow2_B = B.countTrailingZeros();
 736     if (Pow2_A > Pow2_B) {
 737       A.lshrInPlace(Pow2_A - Pow2_B);
 738       Pow2 = Pow2_B;
 739     } else if (Pow2_B > Pow2_A) {
 740       B.lshrInPlace(Pow2_B - Pow2_A);
 741       Pow2 = Pow2_A;
 742     } else {
 743       Pow2 = Pow2_A;
 744     }
 745   }
 746
 747   // Both operands are odd multiples of 2^Pow_2:
 748   //
 749   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 750   //
 751   // This is a modified version of Stein's algorithm, taking advantage of
 752   // efficient countTrailingZeros().
 753   while (A != B) {
 754     if (A.ugt(B)) {
 755       A -= B;
 756       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 757     } else {
 758       B -= A;
 759       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 760     }
 761   }
 762
 763   return A;
 764 }
 765
 766 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 767   uint64_t I = bit_cast<uint64_t>(Double);
 768
 769   // Get the sign bit from the highest order bit
 770   bool isNeg = I >> 63;
 771
 772   // Get the 11-bit exponent and adjust for the 1023 bit bias
 773   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 774
 775   // If the exponent is negative, the value is < 0 so just return 0.
 776   if (exp < 0)
 777     return APInt(width, 0u);
 778
 779   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 780   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 781
 782   // If the exponent doesn't shift all bits out of the mantissa
 783   if (exp < 52)
 784     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 785                     APInt(width, mantissa >> (52 - exp));
 786
 787   // If the client didn't provide enough bits for us to shift the mantissa into
 788   // then the result is undefined, just return 0
 789   if (width <= exp - 52)
 790     return APInt(width, 0);
 791
 792   // Otherwise, we have to shift the mantissa bits up to the right location
 793   APInt Tmp(width, mantissa);
 794   Tmp <<= (unsigned)exp - 52;
 795   return isNeg ? -Tmp : Tmp;
 796 }
 797
 798 /// This function converts this APInt to a double.
 799 /// The layout for double is as following (IEEE Standard 754):
 800 ///  --------------------------------------
 801 /// |  Sign    Exponent    Fraction    Bias |
 802 /// |-------------------------------------- |
 803 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 804 ///  --------------------------------------
 805 double APInt::roundToDouble(bool isSigned) const {
 806
 807   // Handle the simple case where the value is contained in one uint64_t.
 808   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 809   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 810     if (isSigned) {
 811       int64_t sext = SignExtend64(getWord(0), BitWidth);
 812       return double(sext);
 813     } else
 814       return double(getWord(0));
 815   }
 816
 817   // Determine if the value is negative.
 818   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 819
 820   // Construct the absolute value if we're negative.
 821   APInt Tmp(isNeg ? -(*this) : (*this));
 822
 823   // Figure out how many bits we're using.
 824   unsigned n = Tmp.getActiveBits();
 825
 826   // The exponent (without bias normalization) is just the number of bits
 827   // we are using. Note that the sign bit is gone since we constructed the
 828   // absolute value.
 829   uint64_t exp = n;
 830
 831   // Return infinity for exponent overflow
 832   if (exp > 1023) {
 833     if (!isSigned || !isNeg)
 834       return std::numeric_limits<double>::infinity();
 835     else
 836       return -std::numeric_limits<double>::infinity();
 837   }
 838   exp += 1023; // Increment for 1023 bias
 839
 840   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 841   // extract the high 52 bits from the correct words in pVal.
 842   uint64_t mantissa;
 843   unsigned hiWord = whichWord(n-1);
 844   if (hiWord == 0) {
 845     mantissa = Tmp.U.pVal[0];
 846     if (n > 52)
 847       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 848   } else {
 849     assert(hiWord > 0 && "huh?");
 850     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 851     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 852     mantissa = hibits | lobits;
 853   }
 854
 855   // The leading bit of mantissa is implicit, so get rid of it.
 856   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 857   uint64_t I = sign | (exp << 52) | mantissa;
 858   return bit_cast<double>(I);
 859 }
 860
 861 // Truncate to new width.
 862 APInt APInt::trunc(unsigned width) const {
 863   assert(width < BitWidth && "Invalid APInt Truncate request");
 864   assert(width && "Can't truncate to 0 bits");
 865
 866   if (width <= APINT_BITS_PER_WORD)
 867     return APInt(width, getRawData()[0]);
 868
 869   APInt Result(getMemory(getNumWords(width)), width);
 870
 871   // Copy full words.
 872   unsigned i;
 873   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 874     Result.U.pVal[i] = U.pVal[i];
 875
 876   // Truncate and copy any partial word.
 877   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 878   if (bits != 0)
 879     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 880
 881   return Result;
 882 }
 883
 884 // Truncate to new width with unsigned saturation.
 885 APInt APInt::truncUSat(unsigned width) const {
 886   assert(width < BitWidth && "Invalid APInt Truncate request");
 887   assert(width && "Can't truncate to 0 bits");
 888
 889   // Can we just losslessly truncate it?
 890   if (isIntN(width))
 891     return trunc(width);
 892   // If not, then just return the new limit.
 893   return APInt::getMaxValue(width);
 894 }
 895
 896 // Truncate to new width with signed saturation.
 897 APInt APInt::truncSSat(unsigned width) const {
 898   assert(width < BitWidth && "Invalid APInt Truncate request");
 899   assert(width && "Can't truncate to 0 bits");
 900
 901   // Can we just losslessly truncate it?
 902   if (isSignedIntN(width))
 903     return trunc(width);
 904   // If not, then just return the new limits.
 905   return isNegative() ? APInt::getSignedMinValue(width)
 906                       : APInt::getSignedMaxValue(width);
 907 }
 908
 909 // Sign extend to a new width.
 910 APInt APInt::sext(unsigned Width) const {
 911   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 912
 913   if (Width <= APINT_BITS_PER_WORD)
 914     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 915
 916   APInt Result(getMemory(getNumWords(Width)), Width);
 917
 918   // Copy words.
 919   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 920
 921   // Sign extend the last word since there may be unused bits in the input.
 922   Result.U.pVal[getNumWords() - 1] =
 923       SignExtend64(Result.U.pVal[getNumWords() - 1],
 924                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 925
 926   // Fill with sign bits.
 927   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 928               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 929   Result.clearUnusedBits();
 930   return Result;
 931 }
 932
 933 //  Zero extend to a new width.
 934 APInt APInt::zext(unsigned width) const {
 935   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 936
 937   if (width <= APINT_BITS_PER_WORD)
 938     return APInt(width, U.VAL);
 939
 940   APInt Result(getMemory(getNumWords(width)), width);
 941
 942   // Copy words.
 943   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 944
 945   // Zero remaining words.
 946   std::memset(Result.U.pVal + getNumWords(), 0,
 947               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 948
 949   return Result;
 950 }
 951
 952 APInt APInt::zextOrTrunc(unsigned width) const {
 953   if (BitWidth < width)
 954     return zext(width);
 955   if (BitWidth > width)
 956     return trunc(width);
 957   return *this;
 958 }
 959
 960 APInt APInt::sextOrTrunc(unsigned width) const {
 961   if (BitWidth < width)
 962     return sext(width);
 963   if (BitWidth > width)
 964     return trunc(width);
 965   return *this;
 966 }
 967
 968 APInt APInt::truncOrSelf(unsigned width) const {
 969   if (BitWidth > width)
 970     return trunc(width);
 971   return *this;
 972 }
 973
 974 APInt APInt::zextOrSelf(unsigned width) const {
 975   if (BitWidth < width)
 976     return zext(width);
 977   return *this;
 978 }
 979
 980 APInt APInt::sextOrSelf(unsigned width) const {
 981   if (BitWidth < width)
 982     return sext(width);
 983   return *this;
 984 }
 985
 986 /// Arithmetic right-shift this APInt by shiftAmt.
 987 /// Arithmetic right-shift function.
 988 void APInt::ashrInPlace(const APInt &shiftAmt) {
 989   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 990 }
 991
 992 /// Arithmetic right-shift this APInt by shiftAmt.
 993 /// Arithmetic right-shift function.
 994 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 995   // Don't bother performing a no-op shift.
 996   if (!ShiftAmt)
 997     return;
 998
 999   // Save the original sign bit for later.
1000   bool Negative = isNegative();
1001
1002   // WordShift is the inter-part shift; BitShift is intra-part shift.
1003   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1004   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1005
1006   unsigned WordsToMove = getNumWords() - WordShift;
1007   if (WordsToMove != 0) {
1008     // Sign extend the last word to fill in the unused bits.
1009     U.pVal[getNumWords() - 1] = SignExtend64(
1010         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1011
1012     // Fastpath for moving by whole words.
1013     if (BitShift == 0) {
1014       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1015     } else {
1016       // Move the words containing significant bits.
1017       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1018         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1019                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1020
1021       // Handle the last word which has no high bits to copy.
1022       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1023       // Sign extend one more time.
1024       U.pVal[WordsToMove - 1] =
1025           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1026     }
1027   }
1028
1029   // Fill in the remainder based on the original sign.
1030   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1031               WordShift * APINT_WORD_SIZE);
1032   clearUnusedBits();
1033 }
1034
1035 /// Logical right-shift this APInt by shiftAmt.
1036 /// Logical right-shift function.
1037 void APInt::lshrInPlace(const APInt &shiftAmt) {
1038   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1039 }
1040
1041 /// Logical right-shift this APInt by shiftAmt.
1042 /// Logical right-shift function.
1043 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1044   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1045 }
1046
1047 /// Left-shift this APInt by shiftAmt.
1048 /// Left-shift function.
1049 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1050   // It's undefined behavior in C to shift by BitWidth or greater.
1051   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1052   return *this;
1053 }
1054
1055 void APInt::shlSlowCase(unsigned ShiftAmt) {
1056   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1057   clearUnusedBits();
1058 }
1059
1060 // Calculate the rotate amount modulo the bit width.
1061 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1062   unsigned rotBitWidth = rotateAmt.getBitWidth();
1063   APInt rot = rotateAmt;
1064   if (rotBitWidth < BitWidth) {
1065     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1066     // e.g. APInt(1, 32) would give APInt(1, 0).
1067     rot = rotateAmt.zext(BitWidth);
1068   }
1069   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1070   return rot.getLimitedValue(BitWidth);
1071 }
1072
1073 APInt APInt::rotl(const APInt &rotateAmt) const {
1074   return rotl(rotateModulo(BitWidth, rotateAmt));
1075 }
1076
1077 APInt APInt::rotl(unsigned rotateAmt) const {
1078   rotateAmt %= BitWidth;
1079   if (rotateAmt == 0)
1080     return *this;
1081   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1082 }
1083
1084 APInt APInt::rotr(const APInt &rotateAmt) const {
1085   return rotr(rotateModulo(BitWidth, rotateAmt));
1086 }
1087
1088 APInt APInt::rotr(unsigned rotateAmt) const {
1089   rotateAmt %= BitWidth;
1090   if (rotateAmt == 0)
1091     return *this;
1092   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1093 }
1094
1095 // Square Root - this method computes and returns the square root of "this".
1096 // Three mechanisms are used for computation. For small values (<= 5 bits),
1097 // a table lookup is done. This gets some performance for common cases. For
1098 // values using less than 52 bits, the value is converted to double and then
1099 // the libc sqrt function is called. The result is rounded and then converted
1100 // back to a uint64_t which is then used to construct the result. Finally,
1101 // the Babylonian method for computing square roots is used.
1102 APInt APInt::sqrt() const {
1103
1104   // Determine the magnitude of the value.
1105   unsigned magnitude = getActiveBits();
1106
1107   // Use a fast table for some small values. This also gets rid of some
1108   // rounding errors in libc sqrt for small values.
1109   if (magnitude <= 5) {
1110     static const uint8_t results[32] = {
1111       /*     0 */ 0,
1112       /*  1- 2 */ 1, 1,
1113       /*  3- 6 */ 2, 2, 2, 2,
1114       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1115       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1116       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1117       /*    31 */ 6
1118     };
1119     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1120   }
1121
1122   // If the magnitude of the value fits in less than 52 bits (the precision of
1123   // an IEEE double precision floating point value), then we can use the
1124   // libc sqrt function which will probably use a hardware sqrt computation.
1125   // This should be faster than the algorithm below.
1126   if (magnitude < 52) {
1127     return APInt(BitWidth,
1128                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1129                                                                : U.pVal[0])))));
1130   }
1131
1132   // Okay, all the short cuts are exhausted. We must compute it. The following
1133   // is a classical Babylonian method for computing the square root. This code
1134   // was adapted to APInt from a wikipedia article on such computations.
1135   // See http://www.wikipedia.org/ and go to the page named
1136   // Calculate_an_integer_square_root.
1137   unsigned nbits = BitWidth, i = 4;
1138   APInt testy(BitWidth, 16);
1139   APInt x_old(BitWidth, 1);
1140   APInt x_new(BitWidth, 0);
1141   APInt two(BitWidth, 2);
1142
1143   // Select a good starting value using binary logarithms.
1144   for (;; i += 2, testy = testy.shl(2))
1145     if (i >= nbits || this->ule(testy)) {
1146       x_old = x_old.shl(i / 2);
1147       break;
1148     }
1149
1150   // Use the Babylonian method to arrive at the integer square root:
1151   for (;;) {
1152     x_new = (this->udiv(x_old) + x_old).udiv(two);
1153     if (x_old.ule(x_new))
1154       break;
1155     x_old = x_new;
1156   }
1157
1158   // Make sure we return the closest approximation
1159   // NOTE: The rounding calculation below is correct. It will produce an
1160   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1161   // determined to be a rounding issue with pari/gp as it begins to use a
1162   // floating point representation after 192 bits. There are no discrepancies
1163   // between this algorithm and pari/gp for bit widths < 192 bits.
1164   APInt square(x_old * x_old);
1165   APInt nextSquare((x_old + 1) * (x_old +1));
1166   if (this->ult(square))
1167     return x_old;
1168   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1169   APInt midpoint((nextSquare - square).udiv(two));
1170   APInt offset(*this - square);
1171   if (offset.ult(midpoint))
1172     return x_old;
1173   return x_old + 1;
1174 }
1175
1176 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1177 /// iterative extended Euclidean algorithm is used to solve for this value,
1178 /// however we simplify it to speed up calculating only the inverse, and take
1179 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1180 /// (potentially large) APInts around.
1181 /// WARNING: a value of '0' may be returned,
1182 ///          signifying that no multiplicative inverse exists!
1183 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1184   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1185
1186   // Using the properties listed at the following web page (accessed 06/21/08):
1187   //   http://www.numbertheory.org/php/euclid.html
1188   // (especially the properties numbered 3, 4 and 9) it can be proved that
1189   // BitWidth bits suffice for all the computations in the algorithm implemented
1190   // below. More precisely, this number of bits suffice if the multiplicative
1191   // inverse exists, but may not suffice for the general extended Euclidean
1192   // algorithm.
1193
1194   APInt r[2] = { modulo, *this };
1195   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1196   APInt q(BitWidth, 0);
1197
1198   unsigned i;
1199   for (i = 0; r[i^1] != 0; i ^= 1) {
1200     // An overview of the math without the confusing bit-flipping:
1201     // q = r[i-2] / r[i-1]
1202     // r[i] = r[i-2] % r[i-1]
1203     // t[i] = t[i-2] - t[i-1] * q
1204     udivrem(r[i], r[i^1], q, r[i]);
1205     t[i] -= t[i^1] * q;
1206   }
1207
1208   // If this APInt and the modulo are not coprime, there is no multiplicative
1209   // inverse, so return 0. We check this by looking at the next-to-last
1210   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1211   // algorithm.
1212   if (r[i] != 1)
1213     return APInt(BitWidth, 0);
1214
1215   // The next-to-last t is the multiplicative inverse.  However, we are
1216   // interested in a positive inverse. Calculate a positive one from a negative
1217   // one if necessary. A simple addition of the modulo suffices because
1218   // abs(t[i]) is known to be less than *this/2 (see the link above).
1219   if (t[i].isNegative())
1220     t[i] += modulo;
1221
1222   return std::move(t[i]);
1223 }
1224
1225 /// Calculate the magic numbers required to implement a signed integer division
1226 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1227 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1228 /// Warren, Jr., chapter 10.
1229 APInt::ms APInt::magic() const {
1230   const APInt& d = *this;
1231   unsigned p;
1232   APInt ad, anc, delta, q1, r1, q2, r2, t;
1233   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1234   struct ms mag;
1235
1236   ad = d.abs();
1237   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1238   anc = t - 1 - t.urem(ad);   // absolute value of nc
1239   p = d.getBitWidth() - 1;    // initialize p
1240   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1241   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1242   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1243   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1244   do {
1245     p = p + 1;
1246     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1247     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1248     if (r1.uge(anc)) {  // must be unsigned comparison
1249       q1 = q1 + 1;
1250       r1 = r1 - anc;
1251     }
1252     q2 = q2<<1;          // update q2 = 2p/abs(d)
1253     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1254     if (r2.uge(ad)) {   // must be unsigned comparison
1255       q2 = q2 + 1;
1256       r2 = r2 - ad;
1257     }
1258     delta = ad - r2;
1259   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1260
1261   mag.m = q2 + 1;
1262   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1263   mag.s = p - d.getBitWidth();          // resulting shift
1264   return mag;
1265 }
1266
1267 /// Calculate the magic numbers required to implement an unsigned integer
1268 /// division by a constant as a sequence of multiplies, adds and shifts.
1269 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1270 /// S. Warren, Jr., chapter 10.
1271 /// LeadingZeros can be used to simplify the calculation if the upper bits
1272 /// of the divided value are known zero.
1273 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1274   const APInt& d = *this;
1275   unsigned p;
1276   APInt nc, delta, q1, r1, q2, r2;
1277   struct mu magu;
1278   magu.a = 0;               // initialize "add" indicator
1279   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1280   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1281   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1282
1283   nc = allOnes - (allOnes - d).urem(d);
1284   p = d.getBitWidth() - 1;  // initialize p
1285   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1286   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1287   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1288   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1289   do {
1290     p = p + 1;
1291     if (r1.uge(nc - r1)) {
1292       q1 = q1 + q1 + 1;  // update q1
1293       r1 = r1 + r1 - nc; // update r1
1294     }
1295     else {
1296       q1 = q1+q1; // update q1
1297       r1 = r1+r1; // update r1
1298     }
1299     if ((r2 + 1).uge(d - r2)) {
1300       if (q2.uge(signedMax)) magu.a = 1;
1301       q2 = q2+q2 + 1;     // update q2
1302       r2 = r2+r2 + 1 - d; // update r2
1303     }
1304     else {
1305       if (q2.uge(signedMin)) magu.a = 1;
1306       q2 = q2+q2;     // update q2
1307       r2 = r2+r2 + 1; // update r2
1308     }
1309     delta = d - 1 - r2;
1310   } while (p < d.getBitWidth()*2 &&
1311            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1312   magu.m = q2 + 1; // resulting magic number
1313   magu.s = p - d.getBitWidth();  // resulting shift
1314   return magu;
1315 }
1316
1317 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1318 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1319 /// variables here have the same names as in the algorithm. Comments explain
1320 /// the algorithm and any deviation from it.
1321 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1322                      unsigned m, unsigned n) {
1323   assert(u && "Must provide dividend");
1324   assert(v && "Must provide divisor");
1325   assert(q && "Must provide quotient");
1326   assert(u != v && u != q && v != q && "Must use different memory");
1327   assert(n>1 && "n must be > 1");
1328
1329   // b denotes the base of the number system. In our case b is 2^32.
1330   const uint64_t b = uint64_t(1) << 32;
1331
1332 // The DEBUG macros here tend to be spam in the debug output if you're not
1333 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1334 #ifdef KNUTH_DEBUG
1335 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1336 #else
1337 #define DEBUG_KNUTH(X) do {} while(false)
1338 #endif
1339
1340   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1341   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1342   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343   DEBUG_KNUTH(dbgs() << " by");
1344   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1345   DEBUG_KNUTH(dbgs() << '\n');
1346   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1347   // u and v by d. Note that we have taken Knuth's advice here to use a power
1348   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1349   // 2 allows us to shift instead of multiply and it is easy to determine the
1350   // shift amount from the leading zeros.  We are basically normalizing the u
1351   // and v so that its high bits are shifted to the top of v's range without
1352   // overflow. Note that this can require an extra word in u so that u must
1353   // be of length m+n+1.
1354   unsigned shift = countLeadingZeros(v[n-1]);
1355   uint32_t v_carry = 0;
1356   uint32_t u_carry = 0;
1357   if (shift) {
1358     for (unsigned i = 0; i < m+n; ++i) {
1359       uint32_t u_tmp = u[i] >> (32 - shift);
1360       u[i] = (u[i] << shift) | u_carry;
1361       u_carry = u_tmp;
1362     }
1363     for (unsigned i = 0; i < n; ++i) {
1364       uint32_t v_tmp = v[i] >> (32 - shift);
1365       v[i] = (v[i] << shift) | v_carry;
1366       v_carry = v_tmp;
1367     }
1368   }
1369   u[m+n] = u_carry;
1370
1371   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1372   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1373   DEBUG_KNUTH(dbgs() << " by");
1374   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1375   DEBUG_KNUTH(dbgs() << '\n');
1376
1377   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1378   int j = m;
1379   do {
1380     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1381     // D3. [Calculate q'.].
1382     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1383     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1384     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1385     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1386     // on v[n-2] determines at high speed most of the cases in which the trial
1387     // value qp is one too large, and it eliminates all cases where qp is two
1388     // too large.
1389     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1390     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1391     uint64_t qp = dividend / v[n-1];
1392     uint64_t rp = dividend % v[n-1];
1393     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1394       qp--;
1395       rp += v[n-1];
1396       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1397         qp--;
1398     }
1399     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1400
1401     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1402     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1403     // consists of a simple multiplication by a one-place number, combined with
1404     // a subtraction.
1405     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1406     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1407     // true value plus b**(n+1), namely as the b's complement of
1408     // the true value, and a "borrow" to the left should be remembered.
1409     int64_t borrow = 0;
1410     for (unsigned i = 0; i < n; ++i) {
1411       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1412       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1413       u[j+i] = Lo_32(subres);
1414       borrow = Hi_32(p) - Hi_32(subres);
1415       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1416                         << ", borrow = " << borrow << '\n');
1417     }
1418     bool isNeg = u[j+n] < borrow;
1419     u[j+n] -= Lo_32(borrow);
1420
1421     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1422     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1423     DEBUG_KNUTH(dbgs() << '\n');
1424
1425     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1426     // negative, go to step D6; otherwise go on to step D7.
1427     q[j] = Lo_32(qp);
1428     if (isNeg) {
1429       // D6. [Add back]. The probability that this step is necessary is very
1430       // small, on the order of only 2/b. Make sure that test data accounts for
1431       // this possibility. Decrease q[j] by 1
1432       q[j]--;
1433       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1434       // A carry will occur to the left of u[j+n], and it should be ignored
1435       // since it cancels with the borrow that occurred in D4.
1436       bool carry = false;
1437       for (unsigned i = 0; i < n; i++) {
1438         uint32_t limit = std::min(u[j+i],v[i]);
1439         u[j+i] += v[i] + carry;
1440         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1441       }
1442       u[j+n] += carry;
1443     }
1444     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1445     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1446     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1447
1448     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1449   } while (--j >= 0);
1450
1451   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1452   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1453   DEBUG_KNUTH(dbgs() << '\n');
1454
1455   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1456   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1457   // compute the remainder (urem uses this).
1458   if (r) {
1459     // The value d is expressed by the "shift" value above since we avoided
1460     // multiplication by d by using a shift left. So, all we have to do is
1461     // shift right here.
1462     if (shift) {
1463       uint32_t carry = 0;
1464       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1465       for (int i = n-1; i >= 0; i--) {
1466         r[i] = (u[i] >> shift) | carry;
1467         carry = u[i] << (32 - shift);
1468         DEBUG_KNUTH(dbgs() << " " << r[i]);
1469       }
1470     } else {
1471       for (int i = n-1; i >= 0; i--) {
1472         r[i] = u[i];
1473         DEBUG_KNUTH(dbgs() << " " << r[i]);
1474       }
1475     }
1476     DEBUG_KNUTH(dbgs() << '\n');
1477   }
1478   DEBUG_KNUTH(dbgs() << '\n');
1479 }
1480
1481 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1482                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1483   assert(lhsWords >= rhsWords && "Fractional result");
1484
1485   // First, compose the values into an array of 32-bit words instead of
1486   // 64-bit words. This is a necessity of both the "short division" algorithm
1487   // and the Knuth "classical algorithm" which requires there to be native
1488   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1489   // can't use 64-bit operands here because we don't have native results of
1490   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1491   // work on large-endian machines.
1492   unsigned n = rhsWords * 2;
1493   unsigned m = (lhsWords * 2) - n;
1494
1495   // Allocate space for the temporary values we need either on the stack, if
1496   // it will fit, or on the heap if it won't.
1497   uint32_t SPACE[128];
1498   uint32_t *U = nullptr;
1499   uint32_t *V = nullptr;
1500   uint32_t *Q = nullptr;
1501   uint32_t *R = nullptr;
1502   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1503     U = &SPACE[0];
1504     V = &SPACE[m+n+1];
1505     Q = &SPACE[(m+n+1) + n];
1506     if (Remainder)
1507       R = &SPACE[(m+n+1) + n + (m+n)];
1508   } else {
1509     U = new uint32_t[m + n + 1];
1510     V = new uint32_t[n];
1511     Q = new uint32_t[m+n];
1512     if (Remainder)
1513       R = new uint32_t[n];
1514   }
1515
1516   // Initialize the dividend
1517   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1518   for (unsigned i = 0; i < lhsWords; ++i) {
1519     uint64_t tmp = LHS[i];
1520     U[i * 2] = Lo_32(tmp);
1521     U[i * 2 + 1] = Hi_32(tmp);
1522   }
1523   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1524
1525   // Initialize the divisor
1526   memset(V, 0, (n)*sizeof(uint32_t));
1527   for (unsigned i = 0; i < rhsWords; ++i) {
1528     uint64_t tmp = RHS[i];
1529     V[i * 2] = Lo_32(tmp);
1530     V[i * 2 + 1] = Hi_32(tmp);
1531   }
1532
1533   // initialize the quotient and remainder
1534   memset(Q, 0, (m+n) * sizeof(uint32_t));
1535   if (Remainder)
1536     memset(R, 0, n * sizeof(uint32_t));
1537
1538   // Now, adjust m and n for the Knuth division. n is the number of words in
1539   // the divisor. m is the number of words by which the dividend exceeds the
1540   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1541   // contain any zero words or the Knuth algorithm fails.
1542   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1543     n--;
1544     m++;
1545   }
1546   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1547     m--;
1548
1549   // If we're left with only a single word for the divisor, Knuth doesn't work
1550   // so we implement the short division algorithm here. This is much simpler
1551   // and faster because we are certain that we can divide a 64-bit quantity
1552   // by a 32-bit quantity at hardware speed and short division is simply a
1553   // series of such operations. This is just like doing short division but we
1554   // are using base 2^32 instead of base 10.
1555   assert(n != 0 && "Divide by zero?");
1556   if (n == 1) {
1557     uint32_t divisor = V[0];
1558     uint32_t remainder = 0;
1559     for (int i = m; i >= 0; i--) {
1560       uint64_t partial_dividend = Make_64(remainder, U[i]);
1561       if (partial_dividend == 0) {
1562         Q[i] = 0;
1563         remainder = 0;
1564       } else if (partial_dividend < divisor) {
1565         Q[i] = 0;
1566         remainder = Lo_32(partial_dividend);
1567       } else if (partial_dividend == divisor) {
1568         Q[i] = 1;
1569         remainder = 0;
1570       } else {
1571         Q[i] = Lo_32(partial_dividend / divisor);
1572         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1573       }
1574     }
1575     if (R)
1576       R[0] = remainder;
1577   } else {
1578     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1579     // case n > 1.
1580     KnuthDiv(U, V, Q, R, m, n);
1581   }
1582
1583   // If the caller wants the quotient
1584   if (Quotient) {
1585     for (unsigned i = 0; i < lhsWords; ++i)
1586       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1587   }
1588
1589   // If the caller wants the remainder
1590   if (Remainder) {
1591     for (unsigned i = 0; i < rhsWords; ++i)
1592       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1593   }
1594
1595   // Clean up the memory we allocated.
1596   if (U != &SPACE[0]) {
1597     delete [] U;
1598     delete [] V;
1599     delete [] Q;
1600     delete [] R;
1601   }
1602 }
1603
1604 APInt APInt::udiv(const APInt &RHS) const {
1605   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1606
1607   // First, deal with the easy case
1608   if (isSingleWord()) {
1609     assert(RHS.U.VAL != 0 && "Divide by zero?");
1610     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1611   }
1612
1613   // Get some facts about the LHS and RHS number of bits and words
1614   unsigned lhsWords = getNumWords(getActiveBits());
1615   unsigned rhsBits  = RHS.getActiveBits();
1616   unsigned rhsWords = getNumWords(rhsBits);
1617   assert(rhsWords && "Divided by zero???");
1618
1619   // Deal with some degenerate cases
1620   if (!lhsWords)
1621     // 0 / X ===> 0
1622     return APInt(BitWidth, 0);
1623   if (rhsBits == 1)
1624     // X / 1 ===> X
1625     return *this;
1626   if (lhsWords < rhsWords || this->ult(RHS))
1627     // X / Y ===> 0, iff X < Y
1628     return APInt(BitWidth, 0);
1629   if (*this == RHS)
1630     // X / X ===> 1
1631     return APInt(BitWidth, 1);
1632   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1633     // All high words are zero, just use native divide
1634     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1635
1636   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1637   APInt Quotient(BitWidth, 0); // to hold result.
1638   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1639   return Quotient;
1640 }
1641
1642 APInt APInt::udiv(uint64_t RHS) const {
1643   assert(RHS != 0 && "Divide by zero?");
1644
1645   // First, deal with the easy case
1646   if (isSingleWord())
1647     return APInt(BitWidth, U.VAL / RHS);
1648
1649   // Get some facts about the LHS words.
1650   unsigned lhsWords = getNumWords(getActiveBits());
1651
1652   // Deal with some degenerate cases
1653   if (!lhsWords)
1654     // 0 / X ===> 0
1655     return APInt(BitWidth, 0);
1656   if (RHS == 1)
1657     // X / 1 ===> X
1658     return *this;
1659   if (this->ult(RHS))
1660     // X / Y ===> 0, iff X < Y
1661     return APInt(BitWidth, 0);
1662   if (*this == RHS)
1663     // X / X ===> 1
1664     return APInt(BitWidth, 1);
1665   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1666     // All high words are zero, just use native divide
1667     return APInt(BitWidth, this->U.pVal[0] / RHS);
1668
1669   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1670   APInt Quotient(BitWidth, 0); // to hold result.
1671   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1672   return Quotient;
1673 }
1674
1675 APInt APInt::sdiv(const APInt &RHS) const {
1676   if (isNegative()) {
1677     if (RHS.isNegative())
1678       return (-(*this)).udiv(-RHS);
1679     return -((-(*this)).udiv(RHS));
1680   }
1681   if (RHS.isNegative())
1682     return -(this->udiv(-RHS));
1683   return this->udiv(RHS);
1684 }
1685
1686 APInt APInt::sdiv(int64_t RHS) const {
1687   if (isNegative()) {
1688     if (RHS < 0)
1689       return (-(*this)).udiv(-RHS);
1690     return -((-(*this)).udiv(RHS));
1691   }
1692   if (RHS < 0)
1693     return -(this->udiv(-RHS));
1694   return this->udiv(RHS);
1695 }
1696
1697 APInt APInt::urem(const APInt &RHS) const {
1698   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1699   if (isSingleWord()) {
1700     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1701     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1702   }
1703
1704   // Get some facts about the LHS
1705   unsigned lhsWords = getNumWords(getActiveBits());
1706
1707   // Get some facts about the RHS
1708   unsigned rhsBits = RHS.getActiveBits();
1709   unsigned rhsWords = getNumWords(rhsBits);
1710   assert(rhsWords && "Performing remainder operation by zero ???");
1711
1712   // Check the degenerate cases
1713   if (lhsWords == 0)
1714     // 0 % Y ===> 0
1715     return APInt(BitWidth, 0);
1716   if (rhsBits == 1)
1717     // X % 1 ===> 0
1718     return APInt(BitWidth, 0);
1719   if (lhsWords < rhsWords || this->ult(RHS))
1720     // X % Y ===> X, iff X < Y
1721     return *this;
1722   if (*this == RHS)
1723     // X % X == 0;
1724     return APInt(BitWidth, 0);
1725   if (lhsWords == 1)
1726     // All high words are zero, just use native remainder
1727     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1728
1729   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1730   APInt Remainder(BitWidth, 0);
1731   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1732   return Remainder;
1733 }
1734
1735 uint64_t APInt::urem(uint64_t RHS) const {
1736   assert(RHS != 0 && "Remainder by zero?");
1737
1738   if (isSingleWord())
1739     return U.VAL % RHS;
1740
1741   // Get some facts about the LHS
1742   unsigned lhsWords = getNumWords(getActiveBits());
1743
1744   // Check the degenerate cases
1745   if (lhsWords == 0)
1746     // 0 % Y ===> 0
1747     return 0;
1748   if (RHS == 1)
1749     // X % 1 ===> 0
1750     return 0;
1751   if (this->ult(RHS))
1752     // X % Y ===> X, iff X < Y
1753     return getZExtValue();
1754   if (*this == RHS)
1755     // X % X == 0;
1756     return 0;
1757   if (lhsWords == 1)
1758     // All high words are zero, just use native remainder
1759     return U.pVal[0] % RHS;
1760
1761   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1762   uint64_t Remainder;
1763   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1764   return Remainder;
1765 }
1766
1767 APInt APInt::srem(const APInt &RHS) const {
1768   if (isNegative()) {
1769     if (RHS.isNegative())
1770       return -((-(*this)).urem(-RHS));
1771     return -((-(*this)).urem(RHS));
1772   }
1773   if (RHS.isNegative())
1774     return this->urem(-RHS);
1775   return this->urem(RHS);
1776 }
1777
1778 int64_t APInt::srem(int64_t RHS) const {
1779   if (isNegative()) {
1780     if (RHS < 0)
1781       return -((-(*this)).urem(-RHS));
1782     return -((-(*this)).urem(RHS));
1783   }
1784   if (RHS < 0)
1785     return this->urem(-RHS);
1786   return this->urem(RHS);
1787 }
1788
1789 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1790                     APInt &Quotient, APInt &Remainder) {
1791   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1792   unsigned BitWidth = LHS.BitWidth;
1793
1794   // First, deal with the easy case
1795   if (LHS.isSingleWord()) {
1796     assert(RHS.U.VAL != 0 && "Divide by zero?");
1797     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1798     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1799     Quotient = APInt(BitWidth, QuotVal);
1800     Remainder = APInt(BitWidth, RemVal);
1801     return;
1802   }
1803
1804   // Get some size facts about the dividend and divisor
1805   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1806   unsigned rhsBits  = RHS.getActiveBits();
1807   unsigned rhsWords = getNumWords(rhsBits);
1808   assert(rhsWords && "Performing divrem operation by zero ???");
1809
1810   // Check the degenerate cases
1811   if (lhsWords == 0) {
1812     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1813     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1814     return;
1815   }
1816
1817   if (rhsBits == 1) {
1818     Quotient = LHS;                   // X / 1 ===> X
1819     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1820   }
1821
1822   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1823     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1824     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1825     return;
1826   }
1827
1828   if (LHS == RHS) {
1829     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1830     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1831     return;
1832   }
1833
1834   // Make sure there is enough space to hold the results.
1835   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1836   // change the size. This is necessary if Quotient or Remainder is aliased
1837   // with LHS or RHS.
1838   Quotient.reallocate(BitWidth);
1839   Remainder.reallocate(BitWidth);
1840
1841   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1842     // There is only one word to consider so use the native versions.
1843     uint64_t lhsValue = LHS.U.pVal[0];
1844     uint64_t rhsValue = RHS.U.pVal[0];
1845     Quotient = lhsValue / rhsValue;
1846     Remainder = lhsValue % rhsValue;
1847     return;
1848   }
1849
1850   // Okay, lets do it the long way
1851   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1852          Remainder.U.pVal);
1853   // Clear the rest of the Quotient and Remainder.
1854   std::memset(Quotient.U.pVal + lhsWords, 0,
1855               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1856   std::memset(Remainder.U.pVal + rhsWords, 0,
1857               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1858 }
1859
1860 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1861                     uint64_t &Remainder) {
1862   assert(RHS != 0 && "Divide by zero?");
1863   unsigned BitWidth = LHS.BitWidth;
1864
1865   // First, deal with the easy case
1866   if (LHS.isSingleWord()) {
1867     uint64_t QuotVal = LHS.U.VAL / RHS;
1868     Remainder = LHS.U.VAL % RHS;
1869     Quotient = APInt(BitWidth, QuotVal);
1870     return;
1871   }
1872
1873   // Get some size facts about the dividend and divisor
1874   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1875
1876   // Check the degenerate cases
1877   if (lhsWords == 0) {
1878     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1879     Remainder = 0;                    // 0 % Y ===> 0
1880     return;
1881   }
1882
1883   if (RHS == 1) {
1884     Quotient = LHS;                   // X / 1 ===> X
1885     Remainder = 0;                    // X % 1 ===> 0
1886     return;
1887   }
1888
1889   if (LHS.ult(RHS)) {
1890     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1891     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1892     return;
1893   }
1894
1895   if (LHS == RHS) {
1896     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1897     Remainder = 0;                    // X % X ===> 0;
1898     return;
1899   }
1900
1901   // Make sure there is enough space to hold the results.
1902   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1903   // change the size. This is necessary if Quotient is aliased with LHS.
1904   Quotient.reallocate(BitWidth);
1905
1906   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1907     // There is only one word to consider so use the native versions.
1908     uint64_t lhsValue = LHS.U.pVal[0];
1909     Quotient = lhsValue / RHS;
1910     Remainder = lhsValue % RHS;
1911     return;
1912   }
1913
1914   // Okay, lets do it the long way
1915   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1916   // Clear the rest of the Quotient.
1917   std::memset(Quotient.U.pVal + lhsWords, 0,
1918               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1919 }
1920
1921 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1922                     APInt &Quotient, APInt &Remainder) {
1923   if (LHS.isNegative()) {
1924     if (RHS.isNegative())
1925       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1926     else {
1927       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1928       Quotient.negate();
1929     }
1930     Remainder.negate();
1931   } else if (RHS.isNegative()) {
1932     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1933     Quotient.negate();
1934   } else {
1935     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1936   }
1937 }
1938
1939 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1940                     APInt &Quotient, int64_t &Remainder) {
1941   uint64_t R = Remainder;
1942   if (LHS.isNegative()) {
1943     if (RHS < 0)
1944       APInt::udivrem(-LHS, -RHS, Quotient, R);
1945     else {
1946       APInt::udivrem(-LHS, RHS, Quotient, R);
1947       Quotient.negate();
1948     }
1949     R = -R;
1950   } else if (RHS < 0) {
1951     APInt::udivrem(LHS, -RHS, Quotient, R);
1952     Quotient.negate();
1953   } else {
1954     APInt::udivrem(LHS, RHS, Quotient, R);
1955   }
1956   Remainder = R;
1957 }
1958
1959 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1960   APInt Res = *this+RHS;
1961   Overflow = isNonNegative() == RHS.isNonNegative() &&
1962              Res.isNonNegative() != isNonNegative();
1963   return Res;
1964 }
1965
1966 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1967   APInt Res = *this+RHS;
1968   Overflow = Res.ult(RHS);
1969   return Res;
1970 }
1971
1972 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1973   APInt Res = *this - RHS;
1974   Overflow = isNonNegative() != RHS.isNonNegative() &&
1975              Res.isNonNegative() != isNonNegative();
1976   return Res;
1977 }
1978
1979 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1980   APInt Res = *this-RHS;
1981   Overflow = Res.ugt(*this);
1982   return Res;
1983 }
1984
1985 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1986   // MININT/-1  -->  overflow.
1987   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1988   return sdiv(RHS);
1989 }
1990
1991 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1992   APInt Res = *this * RHS;
1993
1994   if (*this != 0 && RHS != 0)
1995     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1996   else
1997     Overflow = false;
1998   return Res;
1999 }
2000
2001 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
2002   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
2003     Overflow = true;
2004     return *this * RHS;
2005   }
2006
2007   APInt Res = lshr(1) * RHS;
2008   Overflow = Res.isNegative();
2009   Res <<= 1;
2010   if ((*this)[0]) {
2011     Res += RHS;
2012     if (Res.ult(RHS))
2013       Overflow = true;
2014   }
2015   return Res;
2016 }
2017
2018 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2019   Overflow = ShAmt.uge(getBitWidth());
2020   if (Overflow)
2021     return APInt(BitWidth, 0);
2022
2023   if (isNonNegative()) // Don't allow sign change.
2024     Overflow = ShAmt.uge(countLeadingZeros());
2025   else
2026     Overflow = ShAmt.uge(countLeadingOnes());
2027
2028   return *this << ShAmt;
2029 }
2030
2031 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2032   Overflow = ShAmt.uge(getBitWidth());
2033   if (Overflow)
2034     return APInt(BitWidth, 0);
2035
2036   Overflow = ShAmt.ugt(countLeadingZeros());
2037
2038   return *this << ShAmt;
2039 }
2040
2041 APInt APInt::sadd_sat(const APInt &RHS) const {
2042   bool Overflow;
2043   APInt Res = sadd_ov(RHS, Overflow);
2044   if (!Overflow)
2045     return Res;
2046
2047   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2048                       : APInt::getSignedMaxValue(BitWidth);
2049 }
2050
2051 APInt APInt::uadd_sat(const APInt &RHS) const {
2052   bool Overflow;
2053   APInt Res = uadd_ov(RHS, Overflow);
2054   if (!Overflow)
2055     return Res;
2056
2057   return APInt::getMaxValue(BitWidth);
2058 }
2059
2060 APInt APInt::ssub_sat(const APInt &RHS) const {
2061   bool Overflow;
2062   APInt Res = ssub_ov(RHS, Overflow);
2063   if (!Overflow)
2064     return Res;
2065
2066   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2067                       : APInt::getSignedMaxValue(BitWidth);
2068 }
2069
2070 APInt APInt::usub_sat(const APInt &RHS) const {
2071   bool Overflow;
2072   APInt Res = usub_ov(RHS, Overflow);
2073   if (!Overflow)
2074     return Res;
2075
2076   return APInt(BitWidth, 0);
2077 }
2078
2079 APInt APInt::smul_sat(const APInt &RHS) const {
2080   bool Overflow;
2081   APInt Res = smul_ov(RHS, Overflow);
2082   if (!Overflow)
2083     return Res;
2084
2085   // The result is negative if one and only one of inputs is negative.
2086   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2087
2088   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2089                        : APInt::getSignedMaxValue(BitWidth);
2090 }
2091
2092 APInt APInt::umul_sat(const APInt &RHS) const {
2093   bool Overflow;
2094   APInt Res = umul_ov(RHS, Overflow);
2095   if (!Overflow)
2096     return Res;
2097
2098   return APInt::getMaxValue(BitWidth);
2099 }
2100
2101 APInt APInt::sshl_sat(const APInt &RHS) const {
2102   bool Overflow;
2103   APInt Res = sshl_ov(RHS, Overflow);
2104   if (!Overflow)
2105     return Res;
2106
2107   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2108                       : APInt::getSignedMaxValue(BitWidth);
2109 }
2110
2111 APInt APInt::ushl_sat(const APInt &RHS) const {
2112   bool Overflow;
2113   APInt Res = ushl_ov(RHS, Overflow);
2114   if (!Overflow)
2115     return Res;
2116
2117   return APInt::getMaxValue(BitWidth);
2118 }
2119
2120 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2121   // Check our assumptions here
2122   assert(!str.empty() && "Invalid string length");
2123   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2124           radix == 36) &&
2125          "Radix should be 2, 8, 10, 16, or 36!");
2126
2127   StringRef::iterator p = str.begin();
2128   size_t slen = str.size();
2129   bool isNeg = *p == '-';
2130   if (*p == '-' || *p == '+') {
2131     p++;
2132     slen--;
2133     assert(slen && "String is only a sign, needs a value.");
2134   }
2135   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2136   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2137   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2138   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2139          "Insufficient bit width");
2140
2141   // Allocate memory if needed
2142   if (isSingleWord())
2143     U.VAL = 0;
2144   else
2145     U.pVal = getClearedMemory(getNumWords());
2146
2147   // Figure out if we can shift instead of multiply
2148   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2149
2150   // Enter digit traversal loop
2151   for (StringRef::iterator e = str.end(); p != e; ++p) {
2152     unsigned digit = getDigit(*p, radix);
2153     assert(digit < radix && "Invalid character in digit string");
2154
2155     // Shift or multiply the value by the radix
2156     if (slen > 1) {
2157       if (shift)
2158         *this <<= shift;
2159       else
2160         *this *= radix;
2161     }
2162
2163     // Add in the digit we just interpreted
2164     *this += digit;
2165   }
2166   // If its negative, put it in two's complement form
2167   if (isNeg)
2168     this->negate();
2169 }
2170
2171 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2172                      bool Signed, bool formatAsCLiteral) const {
2173   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2174           Radix == 36) &&
2175          "Radix should be 2, 8, 10, 16, or 36!");
2176
2177   const char *Prefix = "";
2178   if (formatAsCLiteral) {
2179     switch (Radix) {
2180       case 2:
2181         // Binary literals are a non-standard extension added in gcc 4.3:
2182         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2183         Prefix = "0b";
2184         break;
2185       case 8:
2186         Prefix = "0";
2187         break;
2188       case 10:
2189         break; // No prefix
2190       case 16:
2191         Prefix = "0x";
2192         break;
2193       default:
2194         llvm_unreachable("Invalid radix!");
2195     }
2196   }
2197
2198   // First, check for a zero value and just short circuit the logic below.
2199   if (*this == 0) {
2200     while (*Prefix) {
2201       Str.push_back(*Prefix);
2202       ++Prefix;
2203     };
2204     Str.push_back('0');
2205     return;
2206   }
2207
2208   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2209
2210   if (isSingleWord()) {
2211     char Buffer[65];
2212     char *BufPtr = std::end(Buffer);
2213
2214     uint64_t N;
2215     if (!Signed) {
2216       N = getZExtValue();
2217     } else {
2218       int64_t I = getSExtValue();
2219       if (I >= 0) {
2220         N = I;
2221       } else {
2222         Str.push_back('-');
2223         N = -(uint64_t)I;
2224       }
2225     }
2226
2227     while (*Prefix) {
2228       Str.push_back(*Prefix);
2229       ++Prefix;
2230     };
2231
2232     while (N) {
2233       *--BufPtr = Digits[N % Radix];
2234       N /= Radix;
2235     }
2236     Str.append(BufPtr, std::end(Buffer));
2237     return;
2238   }
2239
2240   APInt Tmp(*this);
2241
2242   if (Signed && isNegative()) {
2243     // They want to print the signed version and it is a negative value
2244     // Flip the bits and add one to turn it into the equivalent positive
2245     // value and put a '-' in the result.
2246     Tmp.negate();
2247     Str.push_back('-');
2248   }
2249
2250   while (*Prefix) {
2251     Str.push_back(*Prefix);
2252     ++Prefix;
2253   };
2254
2255   // We insert the digits backward, then reverse them to get the right order.
2256   unsigned StartDig = Str.size();
2257
2258   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2259   // because the number of bits per digit (1, 3 and 4 respectively) divides
2260   // equally.  We just shift until the value is zero.
2261   if (Radix == 2 || Radix == 8 || Radix == 16) {
2262     // Just shift tmp right for each digit width until it becomes zero
2263     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2264     unsigned MaskAmt = Radix - 1;
2265
2266     while (Tmp.getBoolValue()) {
2267       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2268       Str.push_back(Digits[Digit]);
2269       Tmp.lshrInPlace(ShiftAmt);
2270     }
2271   } else {
2272     while (Tmp.getBoolValue()) {
2273       uint64_t Digit;
2274       udivrem(Tmp, Radix, Tmp, Digit);
2275       assert(Digit < Radix && "divide failed");
2276       Str.push_back(Digits[Digit]);
2277     }
2278   }
2279
2280   // Reverse the digits before returning.
2281   std::reverse(Str.begin()+StartDig, Str.end());
2282 }
2283
2284 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2285 LLVM_DUMP_METHOD void APInt::dump() const {
2286   SmallString<40> S, U;
2287   this->toStringUnsigned(U);
2288   this->toStringSigned(S);
2289   dbgs() << "APInt(" << BitWidth << "b, "
2290          << U << "u " << S << "s)\n";
2291 }
2292 #endif
2293
2294 void APInt::print(raw_ostream &OS, bool isSigned) const {
2295   SmallString<40> S;
2296   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2297   OS << S;
2298 }
2299
2300 // This implements a variety of operations on a representation of
2301 // arbitrary precision, two's-complement, bignum integer values.
2302
2303 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2304 // and unrestricting assumption.
2305 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2306               "Part width must be divisible by 2!");
2307
2308 /* Some handy functions local to this file.  */
2309
2310 /* Returns the integer part with the least significant BITS set.
2311    BITS cannot be zero.  */
2312 static inline APInt::WordType lowBitMask(unsigned bits) {
2313   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2314
2315   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2316 }
2317
2318 /* Returns the value of the lower half of PART.  */
2319 static inline APInt::WordType lowHalf(APInt::WordType part) {
2320   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2321 }
2322
2323 /* Returns the value of the upper half of PART.  */
2324 static inline APInt::WordType highHalf(APInt::WordType part) {
2325   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2326 }
2327
2328 /* Returns the bit number of the most significant set bit of a part.
2329    If the input number has no bits set -1U is returned.  */
2330 static unsigned partMSB(APInt::WordType value) {
2331   return findLastSet(value, ZB_Max);
2332 }
2333
2334 /* Returns the bit number of the least significant set bit of a
2335    part.  If the input number has no bits set -1U is returned.  */
2336 static unsigned partLSB(APInt::WordType value) {
2337   return findFirstSet(value, ZB_Max);
2338 }
2339
2340 /* Sets the least significant part of a bignum to the input value, and
2341    zeroes out higher parts.  */
2342 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2343   assert(parts > 0);
2344
2345   dst[0] = part;
2346   for (unsigned i = 1; i < parts; i++)
2347     dst[i] = 0;
2348 }
2349
2350 /* Assign one bignum to another.  */
2351 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2352   for (unsigned i = 0; i < parts; i++)
2353     dst[i] = src[i];
2354 }
2355
2356 /* Returns true if a bignum is zero, false otherwise.  */
2357 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2358   for (unsigned i = 0; i < parts; i++)
2359     if (src[i])
2360       return false;
2361
2362   return true;
2363 }
2364
2365 /* Extract the given bit of a bignum; returns 0 or 1.  */
2366 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2367   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2368 }
2369
2370 /* Set the given bit of a bignum. */
2371 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2372   parts[whichWord(bit)] |= maskBit(bit);
2373 }
2374
2375 /* Clears the given bit of a bignum. */
2376 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2377   parts[whichWord(bit)] &= ~maskBit(bit);
2378 }
2379
2380 /* Returns the bit number of the least significant set bit of a
2381    number.  If the input number has no bits set -1U is returned.  */
2382 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2383   for (unsigned i = 0; i < n; i++) {
2384     if (parts[i] != 0) {
2385       unsigned lsb = partLSB(parts[i]);
2386
2387       return lsb + i * APINT_BITS_PER_WORD;
2388     }
2389   }
2390
2391   return -1U;
2392 }
2393
2394 /* Returns the bit number of the most significant set bit of a number.
2395    If the input number has no bits set -1U is returned.  */
2396 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2397   do {
2398     --n;
2399
2400     if (parts[n] != 0) {
2401       unsigned msb = partMSB(parts[n]);
2402
2403       return msb + n * APINT_BITS_PER_WORD;
2404     }
2405   } while (n);
2406
2407   return -1U;
2408 }
2409
2410 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2411    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2412    the least significant bit of DST.  All high bits above srcBITS in
2413    DST are zero-filled.  */
2414 void
2415 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2416                  unsigned srcBits, unsigned srcLSB) {
2417   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2418   assert(dstParts <= dstCount);
2419
2420   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2421   tcAssign (dst, src + firstSrcPart, dstParts);
2422
2423   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2424   tcShiftRight (dst, dstParts, shift);
2425
2426   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2427      in DST.  If this is less that srcBits, append the rest, else
2428      clear the high bits.  */
2429   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2430   if (n < srcBits) {
2431     WordType mask = lowBitMask (srcBits - n);
2432     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2433                           << n % APINT_BITS_PER_WORD);
2434   } else if (n > srcBits) {
2435     if (srcBits % APINT_BITS_PER_WORD)
2436       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2437   }
2438
2439   /* Clear high parts.  */
2440   while (dstParts < dstCount)
2441     dst[dstParts++] = 0;
2442 }
2443
2444 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2445 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2446                              WordType c, unsigned parts) {
2447   assert(c <= 1);
2448
2449   for (unsigned i = 0; i < parts; i++) {
2450     WordType l = dst[i];
2451     if (c) {
2452       dst[i] += rhs[i] + 1;
2453       c = (dst[i] <= l);
2454     } else {
2455       dst[i] += rhs[i];
2456       c = (dst[i] < l);
2457     }
2458   }
2459
2460   return c;
2461 }
2462
2463 /// This function adds a single "word" integer, src, to the multiple
2464 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2465 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2466 /// @returns the carry of the addition.
2467 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2468                                  unsigned parts) {
2469   for (unsigned i = 0; i < parts; ++i) {
2470     dst[i] += src;
2471     if (dst[i] >= src)
2472       return 0; // No need to carry so exit early.
2473     src = 1; // Carry one to next digit.
2474   }
2475
2476   return 1;
2477 }
2478
2479 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2480 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2481                                   WordType c, unsigned parts) {
2482   assert(c <= 1);
2483
2484   for (unsigned i = 0; i < parts; i++) {
2485     WordType l = dst[i];
2486     if (c) {
2487       dst[i] -= rhs[i] + 1;
2488       c = (dst[i] >= l);
2489     } else {
2490       dst[i] -= rhs[i];
2491       c = (dst[i] > l);
2492     }
2493   }
2494
2495   return c;
2496 }
2497
2498 /// This function subtracts a single "word" (64-bit word), src, from
2499 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2500 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2501 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2502 /// exhausted. In other words, if src > dst then this function returns 1,
2503 /// otherwise 0.
2504 /// @returns the borrow out of the subtraction
2505 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2506                                       unsigned parts) {
2507   for (unsigned i = 0; i < parts; ++i) {
2508     WordType Dst = dst[i];
2509     dst[i] -= src;
2510     if (src <= Dst)
2511       return 0; // No need to borrow so exit early.
2512     src = 1; // We have to "borrow 1" from next "word"
2513   }
2514
2515   return 1;
2516 }
2517
2518 /* Negate a bignum in-place.  */
2519 void APInt::tcNegate(WordType *dst, unsigned parts) {
2520   tcComplement(dst, parts);
2521   tcIncrement(dst, parts);
2522 }
2523
2524 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2525     DST  = SRC * MULTIPLIER + CARRY   if add is false
2526
2527     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2528     they must start at the same point, i.e. DST == SRC.
2529
2530     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2531     returned.  Otherwise DST is filled with the least significant
2532     DSTPARTS parts of the result, and if all of the omitted higher
2533     parts were zero return zero, otherwise overflow occurred and
2534     return one.  */
2535 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2536                           WordType multiplier, WordType carry,
2537                           unsigned srcParts, unsigned dstParts,
2538                           bool add) {
2539   /* Otherwise our writes of DST kill our later reads of SRC.  */
2540   assert(dst <= src || dst >= src + srcParts);
2541   assert(dstParts <= srcParts + 1);
2542
2543   /* N loops; minimum of dstParts and srcParts.  */
2544   unsigned n = std::min(dstParts, srcParts);
2545
2546   for (unsigned i = 0; i < n; i++) {
2547     WordType low, mid, high, srcPart;
2548
2549       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2550
2551          This cannot overflow, because
2552
2553          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2554
2555          which is less than n^2.  */
2556
2557     srcPart = src[i];
2558
2559     if (multiplier == 0 || srcPart == 0) {
2560       low = carry;
2561       high = 0;
2562     } else {
2563       low = lowHalf(srcPart) * lowHalf(multiplier);
2564       high = highHalf(srcPart) * highHalf(multiplier);
2565
2566       mid = lowHalf(srcPart) * highHalf(multiplier);
2567       high += highHalf(mid);
2568       mid <<= APINT_BITS_PER_WORD / 2;
2569       if (low + mid < low)
2570         high++;
2571       low += mid;
2572
2573       mid = highHalf(srcPart) * lowHalf(multiplier);
2574       high += highHalf(mid);
2575       mid <<= APINT_BITS_PER_WORD / 2;
2576       if (low + mid < low)
2577         high++;
2578       low += mid;
2579
2580       /* Now add carry.  */
2581       if (low + carry < low)
2582         high++;
2583       low += carry;
2584     }
2585
2586     if (add) {
2587       /* And now DST[i], and store the new low part there.  */
2588       if (low + dst[i] < low)
2589         high++;
2590       dst[i] += low;
2591     } else
2592       dst[i] = low;
2593
2594     carry = high;
2595   }
2596
2597   if (srcParts < dstParts) {
2598     /* Full multiplication, there is no overflow.  */
2599     assert(srcParts + 1 == dstParts);
2600     dst[srcParts] = carry;
2601     return 0;
2602   }
2603
2604   /* We overflowed if there is carry.  */
2605   if (carry)
2606     return 1;
2607
2608   /* We would overflow if any significant unwritten parts would be
2609      non-zero.  This is true if any remaining src parts are non-zero
2610      and the multiplier is non-zero.  */
2611   if (multiplier)
2612     for (unsigned i = dstParts; i < srcParts; i++)
2613       if (src[i])
2614         return 1;
2615
2616   /* We fitted in the narrow destination.  */
2617   return 0;
2618 }
2619
2620 /* DST = LHS * RHS, where DST has the same width as the operands and
2621    is filled with the least significant parts of the result.  Returns
2622    one if overflow occurred, otherwise zero.  DST must be disjoint
2623    from both operands.  */
2624 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2625                       const WordType *rhs, unsigned parts) {
2626   assert(dst != lhs && dst != rhs);
2627
2628   int overflow = 0;
2629   tcSet(dst, 0, parts);
2630
2631   for (unsigned i = 0; i < parts; i++)
2632     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2633                                parts - i, true);
2634
2635   return overflow;
2636 }
2637
2638 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2639 /// operands. No overflow occurs. DST must be disjoint from both operands.
2640 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2641                            const WordType *rhs, unsigned lhsParts,
2642                            unsigned rhsParts) {
2643   /* Put the narrower number on the LHS for less loops below.  */
2644   if (lhsParts > rhsParts)
2645     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2646
2647   assert(dst != lhs && dst != rhs);
2648
2649   tcSet(dst, 0, rhsParts);
2650
2651   for (unsigned i = 0; i < lhsParts; i++)
2652     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2653 }
2654
2655 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2656    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2657    set REMAINDER to the remainder, return zero.  i.e.
2658
2659    OLD_LHS = RHS * LHS + REMAINDER
2660
2661    SCRATCH is a bignum of the same size as the operands and result for
2662    use by the routine; its contents need not be initialized and are
2663    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2664 */
2665 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2666                     WordType *remainder, WordType *srhs,
2667                     unsigned parts) {
2668   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2669
2670   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2671   if (shiftCount == 0)
2672     return true;
2673
2674   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2675   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2676   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2677
2678   tcAssign(srhs, rhs, parts);
2679   tcShiftLeft(srhs, parts, shiftCount);
2680   tcAssign(remainder, lhs, parts);
2681   tcSet(lhs, 0, parts);
2682
2683   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2684      the total.  */
2685   for (;;) {
2686     int compare = tcCompare(remainder, srhs, parts);
2687     if (compare >= 0) {
2688       tcSubtract(remainder, srhs, 0, parts);
2689       lhs[n] |= mask;
2690     }
2691
2692     if (shiftCount == 0)
2693       break;
2694     shiftCount--;
2695     tcShiftRight(srhs, parts, 1);
2696     if ((mask >>= 1) == 0) {
2697       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2698       n--;
2699     }
2700   }
2701
2702   return false;
2703 }
2704
2705 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2706 /// no restrictions on Count.
2707 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2708   // Don't bother performing a no-op shift.
2709   if (!Count)
2710     return;
2711
2712   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2713   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2714   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2715
2716   // Fastpath for moving by whole words.
2717   if (BitShift == 0) {
2718     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2719   } else {
2720     while (Words-- > WordShift) {
2721       Dst[Words] = Dst[Words - WordShift] << BitShift;
2722       if (Words > WordShift)
2723         Dst[Words] |=
2724           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2725     }
2726   }
2727
2728   // Fill in the remainder with 0s.
2729   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2730 }
2731
2732 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2733 /// are no restrictions on Count.
2734 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2735   // Don't bother performing a no-op shift.
2736   if (!Count)
2737     return;
2738
2739   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2740   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2741   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2742
2743   unsigned WordsToMove = Words - WordShift;
2744   // Fastpath for moving by whole words.
2745   if (BitShift == 0) {
2746     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2747   } else {
2748     for (unsigned i = 0; i != WordsToMove; ++i) {
2749       Dst[i] = Dst[i + WordShift] >> BitShift;
2750       if (i + 1 != WordsToMove)
2751         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2752     }
2753   }
2754
2755   // Fill in the remainder with 0s.
2756   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2757 }
2758
2759 /* Bitwise and of two bignums.  */
2760 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2761   for (unsigned i = 0; i < parts; i++)
2762     dst[i] &= rhs[i];
2763 }
2764
2765 /* Bitwise inclusive or of two bignums.  */
2766 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2767   for (unsigned i = 0; i < parts; i++)
2768     dst[i] |= rhs[i];
2769 }
2770
2771 /* Bitwise exclusive or of two bignums.  */
2772 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2773   for (unsigned i = 0; i < parts; i++)
2774     dst[i] ^= rhs[i];
2775 }
2776
2777 /* Complement a bignum in-place.  */
2778 void APInt::tcComplement(WordType *dst, unsigned parts) {
2779   for (unsigned i = 0; i < parts; i++)
2780     dst[i] = ~dst[i];
2781 }
2782
2783 /* Comparison (unsigned) of two bignums.  */
2784 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2785                      unsigned parts) {
2786   while (parts) {
2787     parts--;
2788     if (lhs[parts] != rhs[parts])
2789       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2790   }
2791
2792   return 0;
2793 }
2794
2795 /* Set the least significant BITS bits of a bignum, clear the
2796    rest.  */
2797 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2798                                       unsigned bits) {
2799   unsigned i = 0;
2800   while (bits > APINT_BITS_PER_WORD) {
2801     dst[i++] = ~(WordType) 0;
2802     bits -= APINT_BITS_PER_WORD;
2803   }
2804
2805   if (bits)
2806     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2807
2808   while (i < parts)
2809     dst[i++] = 0;
2810 }
2811
2812 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2813                                    APInt::Rounding RM) {
2814   // Currently udivrem always rounds down.
2815   switch (RM) {
2816   case APInt::Rounding::DOWN:
2817   case APInt::Rounding::TOWARD_ZERO:
2818     return A.udiv(B);
2819   case APInt::Rounding::UP: {
2820     APInt Quo, Rem;
2821     APInt::udivrem(A, B, Quo, Rem);
2822     if (Rem == 0)
2823       return Quo;
2824     return Quo + 1;
2825   }
2826   }
2827   llvm_unreachable("Unknown APInt::Rounding enum");
2828 }
2829
2830 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2831                                    APInt::Rounding RM) {
2832   switch (RM) {
2833   case APInt::Rounding::DOWN:
2834   case APInt::Rounding::UP: {
2835     APInt Quo, Rem;
2836     APInt::sdivrem(A, B, Quo, Rem);
2837     if (Rem == 0)
2838       return Quo;
2839     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2840     // We want to check whether the non-integer part of the mathematical value
2841     // is negative or not. If the non-integer part is negative, we need to round
2842     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2843     // already rounded down.
2844     if (RM == APInt::Rounding::DOWN) {
2845       if (Rem.isNegative() != B.isNegative())
2846         return Quo - 1;
2847       return Quo;
2848     }
2849     if (Rem.isNegative() != B.isNegative())
2850       return Quo;
2851     return Quo + 1;
2852   }
2853   // Currently sdiv rounds towards zero.
2854   case APInt::Rounding::TOWARD_ZERO:
2855     return A.sdiv(B);
2856   }
2857   llvm_unreachable("Unknown APInt::Rounding enum");
2858 }
2859
2860 Optional<APInt>
2861 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2862                                            unsigned RangeWidth) {
2863   unsigned CoeffWidth = A.getBitWidth();
2864   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2865   assert(RangeWidth <= CoeffWidth &&
2866          "Value range width should be less than coefficient width");
2867   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2868
2869   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2870                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2871
2872   // Identify 0 as a (non)solution immediately.
2873   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2874     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2875     return APInt(CoeffWidth, 0);
2876   }
2877
2878   // The result of APInt arithmetic has the same bit width as the operands,
2879   // so it can actually lose high bits. A product of two n-bit integers needs
2880   // 2n-1 bits to represent the full value.
2881   // The operation done below (on quadratic coefficients) that can produce
2882   // the largest value is the evaluation of the equation during bisection,
2883   // which needs 3 times the bitwidth of the coefficient, so the total number
2884   // of required bits is 3n.
2885   //
2886   // The purpose of this extension is to simulate the set Z of all integers,
2887   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2888   // and negative numbers (not so much in a modulo arithmetic). The method
2889   // used to solve the equation is based on the standard formula for real
2890   // numbers, and uses the concepts of "positive" and "negative" with their
2891   // usual meanings.
2892   CoeffWidth *= 3;
2893   A = A.sext(CoeffWidth);
2894   B = B.sext(CoeffWidth);
2895   C = C.sext(CoeffWidth);
2896
2897   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2898   // the bit width has increased.
2899   if (A.isNegative()) {
2900     A.negate();
2901     B.negate();
2902     C.negate();
2903   }
2904
2905   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2906   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2907   // and R = 2^BitWidth.
2908   // Since we're trying not only to find exact solutions, but also values
2909   // that "wrap around", such a set will always have a solution, i.e. an x
2910   // that satisfies at least one of the equations, or such that |q(x)|
2911   // exceeds kR, while |q(x-1)| for the same k does not.
2912   //
2913   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2914   // positive solution n (in the above sense), and also such that the n
2915   // will be the least among all solutions corresponding to k = 0, 1, ...
2916   // (more precisely, the least element in the set
2917   //   { n(k) | k is such that a solution n(k) exists }).
2918   //
2919   // Consider the parabola (over real numbers) that corresponds to the
2920   // quadratic equation. Since A > 0, the arms of the parabola will point
2921   // up. Picking different values of k will shift it up and down by R.
2922   //
2923   // We want to shift the parabola in such a way as to reduce the problem
2924   // of solving q(x) = kR to solving shifted_q(x) = 0.
2925   // (The interesting solutions are the ceilings of the real number
2926   // solutions.)
2927   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2928   APInt TwoA = 2 * A;
2929   APInt SqrB = B * B;
2930   bool PickLow;
2931
2932   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2933     assert(A.isStrictlyPositive());
2934     APInt T = V.abs().urem(A);
2935     if (T.isNullValue())
2936       return V;
2937     return V.isNegative() ? V+T : V+(A-T);
2938   };
2939
2940   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2941   // iff B is positive.
2942   if (B.isNonNegative()) {
2943     // If B >= 0, the vertex it at a negative location (or at 0), so in
2944     // order to have a non-negative solution we need to pick k that makes
2945     // C-kR negative. To satisfy all the requirements for the solution
2946     // that we are looking for, it needs to be closest to 0 of all k.
2947     C = C.srem(R);
2948     if (C.isStrictlyPositive())
2949       C -= R;
2950     // Pick the greater solution.
2951     PickLow = false;
2952   } else {
2953     // If B < 0, the vertex is at a positive location. For any solution
2954     // to exist, the discriminant must be non-negative. This means that
2955     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2956     // lower bound on values of k: kR >= C - B^2/4A.
2957     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2958     // Round LowkR up (towards +inf) to the nearest kR.
2959     LowkR = RoundUp(LowkR, R);
2960
2961     // If there exists k meeting the condition above, and such that
2962     // C-kR > 0, there will be two positive real number solutions of
2963     // q(x) = kR. Out of all such values of k, pick the one that makes
2964     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2965     // In other words, find maximum k such that LowkR <= kR < C.
2966     if (C.sgt(LowkR)) {
2967       // If LowkR < C, then such a k is guaranteed to exist because
2968       // LowkR itself is a multiple of R.
2969       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2970       // Pick the smaller solution.
2971       PickLow = true;
2972     } else {
2973       // If C-kR < 0 for all potential k's, it means that one solution
2974       // will be negative, while the other will be positive. The positive
2975       // solution will shift towards 0 if the parabola is moved up.
2976       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2977       // to 0, or in other words, out of all parabolas that have solutions,
2978       // pick the one that is the farthest "up").
2979       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2980       C -= LowkR;
2981       // Pick the greater solution.
2982       PickLow = false;
2983     }
2984   }
2985
2986   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2987                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2988
2989   APInt D = SqrB - 4*A*C;
2990   assert(D.isNonNegative() && "Negative discriminant");
2991   APInt SQ = D.sqrt();
2992
2993   APInt Q = SQ * SQ;
2994   bool InexactSQ = Q != D;
2995   // The calculated SQ may actually be greater than the exact (non-integer)
2996   // value. If that's the case, decrement SQ to get a value that is lower.
2997   if (Q.sgt(D))
2998     SQ -= 1;
2999
3000   APInt X;
3001   APInt Rem;
3002
3003   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3004   // When using the quadratic formula directly, the calculated low root
3005   // may be greater than the exact one, since we would be subtracting SQ.
3006   // To make sure that the calculated root is not greater than the exact
3007   // one, subtract SQ+1 when calculating the low root (for inexact value
3008   // of SQ).
3009   if (PickLow)
3010     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3011   else
3012     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3013
3014   // The updated coefficients should be such that the (exact) solution is
3015   // positive. Since APInt division rounds towards 0, the calculated one
3016   // can be 0, but cannot be negative.
3017   assert(X.isNonNegative() && "Solution should be non-negative");
3018
3019   if (!InexactSQ && Rem.isNullValue()) {
3020     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3021     return X;
3022   }
3023
3024   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3025   // The exact value of the square root of D should be between SQ and SQ+1.
3026   // This implies that the solution should be between that corresponding to
3027   // SQ (i.e. X) and that corresponding to SQ+1.
3028   //
3029   // The calculated X cannot be greater than the exact (real) solution.
3030   // Actually it must be strictly less than the exact solution, while
3031   // X+1 will be greater than or equal to it.
3032
3033   APInt VX = (A*X + B)*X + C;
3034   APInt VY = VX + TwoA*X + A + B;
3035   bool SignChange = VX.isNegative() != VY.isNegative() ||
3036                     VX.isNullValue() != VY.isNullValue();
3037   // If the sign did not change between X and X+1, X is not a valid solution.
3038   // This could happen when the actual (exact) roots don't have an integer
3039   // between them, so they would both be contained between X and X+1.
3040   if (!SignChange) {
3041     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3042     return None;
3043   }
3044
3045   X += 1;
3046   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3047   return X;
3048 }
3049
3050 Optional<unsigned>
3051 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3052   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3053   if (A == B)
3054     return llvm::None;
3055   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3056 }
3057
3058 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3059 /// with the integer held in IntVal.
3060 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3061                             unsigned StoreBytes) {
3062   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3063   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3064
3065   if (sys::IsLittleEndianHost) {
3066     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3067     // destination from LSB to MSB: Do a straight copy.
3068     memcpy(Dst, Src, StoreBytes);
3069   } else {
3070     // Big-endian host - the source is an array of 64 bit words ordered from
3071     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3072     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3073     while (StoreBytes > sizeof(uint64_t)) {
3074       StoreBytes -= sizeof(uint64_t);
3075       // May not be aligned so use memcpy.
3076       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3077       Src += sizeof(uint64_t);
3078     }
3079
3080     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3081   }
3082 }
3083
3084 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3085 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3086 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3087                              unsigned LoadBytes) {
3088   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3089   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3090                    const_cast<uint64_t *>(IntVal.getRawData()));
3091
3092   if (sys::IsLittleEndianHost)
3093     // Little-endian host - the destination must be ordered from LSB to MSB.
3094     // The source is ordered from LSB to MSB: Do a straight copy.
3095     memcpy(Dst, Src, LoadBytes);
3096   else {
3097     // Big-endian - the destination is an array of 64 bit words ordered from
3098     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3099     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3100     // a word.
3101     while (LoadBytes > sizeof(uint64_t)) {
3102       LoadBytes -= sizeof(uint64_t);
3103       // May not be aligned so use memcpy.
3104       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3105       Dst += sizeof(uint64_t);
3106     }
3107
3108     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3109   }
3110 }