lib/Support/APInt.cpp

   1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision integer
  10 // constant values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APInt.h"
  15 #include "llvm/ADT/ArrayRef.h"
  16 #include "llvm/ADT/FoldingSet.h"
  17 #include "llvm/ADT/Hashing.h"
  18 #include "llvm/ADT/Optional.h"
  19 #include "llvm/ADT/SmallString.h"
  20 #include "llvm/ADT/StringRef.h"
  21 #include "llvm/ADT/bit.h"
  22 #include "llvm/Config/llvm-config.h"
  23 #include "llvm/Support/Debug.h"
  24 #include "llvm/Support/ErrorHandling.h"
  25 #include "llvm/Support/MathExtras.h"
  26 #include "llvm/Support/raw_ostream.h"
  27 #include <climits>
  28 #include <cmath>
  29 #include <cstdlib>
  30 #include <cstring>
  31 using namespace llvm;
  32
  33 #define DEBUG_TYPE "apint"
  34
  35 /// A utility function for allocating memory, checking for allocation failures,
  36 /// and ensuring the contents are zeroed.
  37 inline static uint64_t* getClearedMemory(unsigned numWords) {
  38   uint64_t *result = new uint64_t[numWords];
  39   memset(result, 0, numWords * sizeof(uint64_t));
  40   return result;
  41 }
  42
  43 /// A utility function for allocating memory and checking for allocation
  44 /// failure.  The content is not zeroed.
  45 inline static uint64_t* getMemory(unsigned numWords) {
  46   return new uint64_t[numWords];
  47 }
  48
  49 /// A utility function that converts a character to a digit.
  50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
  51   unsigned r;
  52
  53   if (radix == 16 || radix == 36) {
  54     r = cdigit - '0';
  55     if (r <= 9)
  56       return r;
  57
  58     r = cdigit - 'A';
  59     if (r <= radix - 11U)
  60       return r + 10;
  61
  62     r = cdigit - 'a';
  63     if (r <= radix - 11U)
  64       return r + 10;
  65
  66     radix = 10;
  67   }
  68
  69   r = cdigit - '0';
  70   if (r < radix)
  71     return r;
  72
  73   return -1U;
  74 }
  75
  76
  77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
  78   U.pVal = getClearedMemory(getNumWords());
  79   U.pVal[0] = val;
  80   if (isSigned && int64_t(val) < 0)
  81     for (unsigned i = 1; i < getNumWords(); ++i)
  82       U.pVal[i] = WORDTYPE_MAX;
  83   clearUnusedBits();
  84 }
  85
  86 void APInt::initSlowCase(const APInt& that) {
  87   U.pVal = getMemory(getNumWords());
  88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
  89 }
  90
  91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
  92   assert(BitWidth && "Bitwidth too small");
  93   assert(bigVal.data() && "Null pointer detected!");
  94   if (isSingleWord())
  95     U.VAL = bigVal[0];
  96   else {
  97     // Get memory, cleared to 0
  98     U.pVal = getClearedMemory(getNumWords());
  99     // Calculate the number of words to copy
 100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
 101     // Copy the words from bigVal to pVal
 102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
 103   }
 104   // Make sure unused high bits are cleared
 105   clearUnusedBits();
 106 }
 107
 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
 109   : BitWidth(numBits) {
 110   initFromArray(bigVal);
 111 }
 112
 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
 114   : BitWidth(numBits) {
 115   initFromArray(makeArrayRef(bigVal, numWords));
 116 }
 117
 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
 119   : BitWidth(numbits) {
 120   assert(BitWidth && "Bitwidth too small");
 121   fromString(numbits, Str, radix);
 122 }
 123
 124 void APInt::reallocate(unsigned NewBitWidth) {
 125   // If the number of words is the same we can just change the width and stop.
 126   if (getNumWords() == getNumWords(NewBitWidth)) {
 127     BitWidth = NewBitWidth;
 128     return;
 129   }
 130
 131   // If we have an allocation, delete it.
 132   if (!isSingleWord())
 133     delete [] U.pVal;
 134
 135   // Update BitWidth.
 136   BitWidth = NewBitWidth;
 137
 138   // If we are supposed to have an allocation, create it.
 139   if (!isSingleWord())
 140     U.pVal = getMemory(getNumWords());
 141 }
 142
 143 void APInt::AssignSlowCase(const APInt& RHS) {
 144   // Don't do anything for X = X
 145   if (this == &RHS)
 146     return;
 147
 148   // Adjust the bit width and handle allocations as necessary.
 149   reallocate(RHS.getBitWidth());
 150
 151   // Copy the data.
 152   if (isSingleWord())
 153     U.VAL = RHS.U.VAL;
 154   else
 155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
 156 }
 157
 158 /// This method 'profiles' an APInt for use with FoldingSet.
 159 void APInt::Profile(FoldingSetNodeID& ID) const {
 160   ID.AddInteger(BitWidth);
 161
 162   if (isSingleWord()) {
 163     ID.AddInteger(U.VAL);
 164     return;
 165   }
 166
 167   unsigned NumWords = getNumWords();
 168   for (unsigned i = 0; i < NumWords; ++i)
 169     ID.AddInteger(U.pVal[i]);
 170 }
 171
 172 /// Prefix increment operator. Increments the APInt by one.
 173 APInt& APInt::operator++() {
 174   if (isSingleWord())
 175     ++U.VAL;
 176   else
 177     tcIncrement(U.pVal, getNumWords());
 178   return clearUnusedBits();
 179 }
 180
 181 /// Prefix decrement operator. Decrements the APInt by one.
 182 APInt& APInt::operator--() {
 183   if (isSingleWord())
 184     --U.VAL;
 185   else
 186     tcDecrement(U.pVal, getNumWords());
 187   return clearUnusedBits();
 188 }
 189
 190 /// Adds the RHS APint to this APInt.
 191 /// @returns this, after addition of RHS.
 192 /// Addition assignment operator.
 193 APInt& APInt::operator+=(const APInt& RHS) {
 194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 195   if (isSingleWord())
 196     U.VAL += RHS.U.VAL;
 197   else
 198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
 199   return clearUnusedBits();
 200 }
 201
 202 APInt& APInt::operator+=(uint64_t RHS) {
 203   if (isSingleWord())
 204     U.VAL += RHS;
 205   else
 206     tcAddPart(U.pVal, RHS, getNumWords());
 207   return clearUnusedBits();
 208 }
 209
 210 /// Subtracts the RHS APInt from this APInt
 211 /// @returns this, after subtraction
 212 /// Subtraction assignment operator.
 213 APInt& APInt::operator-=(const APInt& RHS) {
 214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 215   if (isSingleWord())
 216     U.VAL -= RHS.U.VAL;
 217   else
 218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
 219   return clearUnusedBits();
 220 }
 221
 222 APInt& APInt::operator-=(uint64_t RHS) {
 223   if (isSingleWord())
 224     U.VAL -= RHS;
 225   else
 226     tcSubtractPart(U.pVal, RHS, getNumWords());
 227   return clearUnusedBits();
 228 }
 229
 230 APInt APInt::operator*(const APInt& RHS) const {
 231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 232   if (isSingleWord())
 233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
 234
 235   APInt Result(getMemory(getNumWords()), getBitWidth());
 236
 237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
 238
 239   Result.clearUnusedBits();
 240   return Result;
 241 }
 242
 243 void APInt::AndAssignSlowCase(const APInt& RHS) {
 244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
 245 }
 246
 247 void APInt::OrAssignSlowCase(const APInt& RHS) {
 248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
 249 }
 250
 251 void APInt::XorAssignSlowCase(const APInt& RHS) {
 252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
 253 }
 254
 255 APInt& APInt::operator*=(const APInt& RHS) {
 256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
 257   *this = *this * RHS;
 258   return *this;
 259 }
 260
 261 APInt& APInt::operator*=(uint64_t RHS) {
 262   if (isSingleWord()) {
 263     U.VAL *= RHS;
 264   } else {
 265     unsigned NumWords = getNumWords();
 266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
 267   }
 268   return clearUnusedBits();
 269 }
 270
 271 bool APInt::EqualSlowCase(const APInt& RHS) const {
 272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
 273 }
 274
 275 int APInt::compare(const APInt& RHS) const {
 276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 277   if (isSingleWord())
 278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
 279
 280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 281 }
 282
 283 int APInt::compareSigned(const APInt& RHS) const {
 284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
 285   if (isSingleWord()) {
 286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
 287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
 288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
 289   }
 290
 291   bool lhsNeg = isNegative();
 292   bool rhsNeg = RHS.isNegative();
 293
 294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
 295   if (lhsNeg != rhsNeg)
 296     return lhsNeg ? -1 : 1;
 297
 298   // Otherwise we can just use an unsigned comparison, because even negative
 299   // numbers compare correctly this way if both have the same signed-ness.
 300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
 301 }
 302
 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
 304   unsigned loWord = whichWord(loBit);
 305   unsigned hiWord = whichWord(hiBit);
 306
 307   // Create an initial mask for the low word with zeros below loBit.
 308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
 309
 310   // If hiBit is not aligned, we need a high mask.
 311   unsigned hiShiftAmt = whichBit(hiBit);
 312   if (hiShiftAmt != 0) {
 313     // Create a high mask with zeros above hiBit.
 314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
 315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
 316     // set the bits in hiWord.
 317     if (hiWord == loWord)
 318       loMask &= hiMask;
 319     else
 320       U.pVal[hiWord] |= hiMask;
 321   }
 322   // Apply the mask to the low word.
 323   U.pVal[loWord] |= loMask;
 324
 325   // Fill any words between loWord and hiWord with all ones.
 326   for (unsigned word = loWord + 1; word < hiWord; ++word)
 327     U.pVal[word] = WORDTYPE_MAX;
 328 }
 329
 330 /// Toggle every bit to its opposite value.
 331 void APInt::flipAllBitsSlowCase() {
 332   tcComplement(U.pVal, getNumWords());
 333   clearUnusedBits();
 334 }
 335
 336 /// Toggle a given bit to its opposite value whose position is given
 337 /// as "bitPosition".
 338 /// Toggles a given bit to its opposite value.
 339 void APInt::flipBit(unsigned bitPosition) {
 340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
 341   if ((*this)[bitPosition]) clearBit(bitPosition);
 342   else setBit(bitPosition);
 343 }
 344
 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
 346   unsigned subBitWidth = subBits.getBitWidth();
 347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
 348          "Illegal bit insertion");
 349
 350   // Insertion is a direct copy.
 351   if (subBitWidth == BitWidth) {
 352     *this = subBits;
 353     return;
 354   }
 355
 356   // Single word result can be done as a direct bitmask.
 357   if (isSingleWord()) {
 358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 359     U.VAL &= ~(mask << bitPosition);
 360     U.VAL |= (subBits.U.VAL << bitPosition);
 361     return;
 362   }
 363
 364   unsigned loBit = whichBit(bitPosition);
 365   unsigned loWord = whichWord(bitPosition);
 366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
 367
 368   // Insertion within a single word can be done as a direct bitmask.
 369   if (loWord == hi1Word) {
 370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
 371     U.pVal[loWord] &= ~(mask << loBit);
 372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
 373     return;
 374   }
 375
 376   // Insert on word boundaries.
 377   if (loBit == 0) {
 378     // Direct copy whole words.
 379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
 380     memcpy(U.pVal + loWord, subBits.getRawData(),
 381            numWholeSubWords * APINT_WORD_SIZE);
 382
 383     // Mask+insert remaining bits.
 384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
 385     if (remainingBits != 0) {
 386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
 387       U.pVal[hi1Word] &= ~mask;
 388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
 389     }
 390     return;
 391   }
 392
 393   // General case - set/clear individual bits in dst based on src.
 394   // TODO - there is scope for optimization here, but at the moment this code
 395   // path is barely used so prefer readability over performance.
 396   for (unsigned i = 0; i != subBitWidth; ++i) {
 397     if (subBits[i])
 398       setBit(bitPosition + i);
 399     else
 400       clearBit(bitPosition + i);
 401   }
 402 }
 403
 404 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
 405   assert(numBits > 0 && "Can't extract zero bits");
 406   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
 407          "Illegal bit extraction");
 408
 409   if (isSingleWord())
 410     return APInt(numBits, U.VAL >> bitPosition);
 411
 412   unsigned loBit = whichBit(bitPosition);
 413   unsigned loWord = whichWord(bitPosition);
 414   unsigned hiWord = whichWord(bitPosition + numBits - 1);
 415
 416   // Single word result extracting bits from a single word source.
 417   if (loWord == hiWord)
 418     return APInt(numBits, U.pVal[loWord] >> loBit);
 419
 420   // Extracting bits that start on a source word boundary can be done
 421   // as a fast memory copy.
 422   if (loBit == 0)
 423     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
 424
 425   // General case - shift + copy source words directly into place.
 426   APInt Result(numBits, 0);
 427   unsigned NumSrcWords = getNumWords();
 428   unsigned NumDstWords = Result.getNumWords();
 429
 430   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
 431   for (unsigned word = 0; word < NumDstWords; ++word) {
 432     uint64_t w0 = U.pVal[loWord + word];
 433     uint64_t w1 =
 434         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
 435     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
 436   }
 437
 438   return Result.clearUnusedBits();
 439 }
 440
 441 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
 442   assert(!str.empty() && "Invalid string length");
 443   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
 444           radix == 36) &&
 445          "Radix should be 2, 8, 10, 16, or 36!");
 446
 447   size_t slen = str.size();
 448
 449   // Each computation below needs to know if it's negative.
 450   StringRef::iterator p = str.begin();
 451   unsigned isNegative = *p == '-';
 452   if (*p == '-' || *p == '+') {
 453     p++;
 454     slen--;
 455     assert(slen && "String is only a sign, needs a value.");
 456   }
 457
 458   // For radixes of power-of-two values, the bits required is accurately and
 459   // easily computed
 460   if (radix == 2)
 461     return slen + isNegative;
 462   if (radix == 8)
 463     return slen * 3 + isNegative;
 464   if (radix == 16)
 465     return slen * 4 + isNegative;
 466
 467   // FIXME: base 36
 468
 469   // This is grossly inefficient but accurate. We could probably do something
 470   // with a computation of roughly slen*64/20 and then adjust by the value of
 471   // the first few digits. But, I'm not sure how accurate that could be.
 472
 473   // Compute a sufficient number of bits that is always large enough but might
 474   // be too large. This avoids the assertion in the constructor. This
 475   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
 476   // bits in that case.
 477   unsigned sufficient
 478     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
 479                  : (slen == 1 ? 7 : slen * 16/3);
 480
 481   // Convert to the actual binary value.
 482   APInt tmp(sufficient, StringRef(p, slen), radix);
 483
 484   // Compute how many bits are required. If the log is infinite, assume we need
 485   // just bit. If the log is exact and value is negative, then the value is
 486   // MinSignedValue with (log + 1) bits.
 487   unsigned log = tmp.logBase2();
 488   if (log == (unsigned)-1) {
 489     return isNegative + 1;
 490   } else if (isNegative && tmp.isPowerOf2()) {
 491     return isNegative + log;
 492   } else {
 493     return isNegative + log + 1;
 494   }
 495 }
 496
 497 hash_code llvm::hash_value(const APInt &Arg) {
 498   if (Arg.isSingleWord())
 499     return hash_combine(Arg.U.VAL);
 500
 501   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
 502 }
 503
 504 bool APInt::isSplat(unsigned SplatSizeInBits) const {
 505   assert(getBitWidth() % SplatSizeInBits == 0 &&
 506          "SplatSizeInBits must divide width!");
 507   // We can check that all parts of an integer are equal by making use of a
 508   // little trick: rotate and check if it's still the same value.
 509   return *this == rotl(SplatSizeInBits);
 510 }
 511
 512 /// This function returns the high "numBits" bits of this APInt.
 513 APInt APInt::getHiBits(unsigned numBits) const {
 514   return this->lshr(BitWidth - numBits);
 515 }
 516
 517 /// This function returns the low "numBits" bits of this APInt.
 518 APInt APInt::getLoBits(unsigned numBits) const {
 519   APInt Result(getLowBitsSet(BitWidth, numBits));
 520   Result &= *this;
 521   return Result;
 522 }
 523
 524 /// Return a value containing V broadcasted over NewLen bits.
 525 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
 526   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
 527
 528   APInt Val = V.zextOrSelf(NewLen);
 529   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
 530     Val |= Val << I;
 531
 532   return Val;
 533 }
 534
 535 unsigned APInt::countLeadingZerosSlowCase() const {
 536   unsigned Count = 0;
 537   for (int i = getNumWords()-1; i >= 0; --i) {
 538     uint64_t V = U.pVal[i];
 539     if (V == 0)
 540       Count += APINT_BITS_PER_WORD;
 541     else {
 542       Count += llvm::countLeadingZeros(V);
 543       break;
 544     }
 545   }
 546   // Adjust for unused bits in the most significant word (they are zero).
 547   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
 548   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
 549   return Count;
 550 }
 551
 552 unsigned APInt::countLeadingOnesSlowCase() const {
 553   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
 554   unsigned shift;
 555   if (!highWordBits) {
 556     highWordBits = APINT_BITS_PER_WORD;
 557     shift = 0;
 558   } else {
 559     shift = APINT_BITS_PER_WORD - highWordBits;
 560   }
 561   int i = getNumWords() - 1;
 562   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
 563   if (Count == highWordBits) {
 564     for (i--; i >= 0; --i) {
 565       if (U.pVal[i] == WORDTYPE_MAX)
 566         Count += APINT_BITS_PER_WORD;
 567       else {
 568         Count += llvm::countLeadingOnes(U.pVal[i]);
 569         break;
 570       }
 571     }
 572   }
 573   return Count;
 574 }
 575
 576 unsigned APInt::countTrailingZerosSlowCase() const {
 577   unsigned Count = 0;
 578   unsigned i = 0;
 579   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
 580     Count += APINT_BITS_PER_WORD;
 581   if (i < getNumWords())
 582     Count += llvm::countTrailingZeros(U.pVal[i]);
 583   return std::min(Count, BitWidth);
 584 }
 585
 586 unsigned APInt::countTrailingOnesSlowCase() const {
 587   unsigned Count = 0;
 588   unsigned i = 0;
 589   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
 590     Count += APINT_BITS_PER_WORD;
 591   if (i < getNumWords())
 592     Count += llvm::countTrailingOnes(U.pVal[i]);
 593   assert(Count <= BitWidth);
 594   return Count;
 595 }
 596
 597 unsigned APInt::countPopulationSlowCase() const {
 598   unsigned Count = 0;
 599   for (unsigned i = 0; i < getNumWords(); ++i)
 600     Count += llvm::countPopulation(U.pVal[i]);
 601   return Count;
 602 }
 603
 604 bool APInt::intersectsSlowCase(const APInt &RHS) const {
 605   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 606     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
 607       return true;
 608
 609   return false;
 610 }
 611
 612 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
 613   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
 614     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
 615       return false;
 616
 617   return true;
 618 }
 619
 620 APInt APInt::byteSwap() const {
 621   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
 622   if (BitWidth == 16)
 623     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
 624   if (BitWidth == 32)
 625     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
 626   if (BitWidth == 48) {
 627     unsigned Tmp1 = unsigned(U.VAL >> 16);
 628     Tmp1 = ByteSwap_32(Tmp1);
 629     uint16_t Tmp2 = uint16_t(U.VAL);
 630     Tmp2 = ByteSwap_16(Tmp2);
 631     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
 632   }
 633   if (BitWidth == 64)
 634     return APInt(BitWidth, ByteSwap_64(U.VAL));
 635
 636   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
 637   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
 638     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
 639   if (Result.BitWidth != BitWidth) {
 640     Result.lshrInPlace(Result.BitWidth - BitWidth);
 641     Result.BitWidth = BitWidth;
 642   }
 643   return Result;
 644 }
 645
 646 APInt APInt::reverseBits() const {
 647   switch (BitWidth) {
 648   case 64:
 649     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
 650   case 32:
 651     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
 652   case 16:
 653     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
 654   case 8:
 655     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
 656   default:
 657     break;
 658   }
 659
 660   APInt Val(*this);
 661   APInt Reversed(BitWidth, 0);
 662   unsigned S = BitWidth;
 663
 664   for (; Val != 0; Val.lshrInPlace(1)) {
 665     Reversed <<= 1;
 666     Reversed |= Val[0];
 667     --S;
 668   }
 669
 670   Reversed <<= S;
 671   return Reversed;
 672 }
 673
 674 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
 675   // Fast-path a common case.
 676   if (A == B) return A;
 677
 678   // Corner cases: if either operand is zero, the other is the gcd.
 679   if (!A) return B;
 680   if (!B) return A;
 681
 682   // Count common powers of 2 and remove all other powers of 2.
 683   unsigned Pow2;
 684   {
 685     unsigned Pow2_A = A.countTrailingZeros();
 686     unsigned Pow2_B = B.countTrailingZeros();
 687     if (Pow2_A > Pow2_B) {
 688       A.lshrInPlace(Pow2_A - Pow2_B);
 689       Pow2 = Pow2_B;
 690     } else if (Pow2_B > Pow2_A) {
 691       B.lshrInPlace(Pow2_B - Pow2_A);
 692       Pow2 = Pow2_A;
 693     } else {
 694       Pow2 = Pow2_A;
 695     }
 696   }
 697
 698   // Both operands are odd multiples of 2^Pow_2:
 699   //
 700   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
 701   //
 702   // This is a modified version of Stein's algorithm, taking advantage of
 703   // efficient countTrailingZeros().
 704   while (A != B) {
 705     if (A.ugt(B)) {
 706       A -= B;
 707       A.lshrInPlace(A.countTrailingZeros() - Pow2);
 708     } else {
 709       B -= A;
 710       B.lshrInPlace(B.countTrailingZeros() - Pow2);
 711     }
 712   }
 713
 714   return A;
 715 }
 716
 717 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
 718   uint64_t I = bit_cast<uint64_t>(Double);
 719
 720   // Get the sign bit from the highest order bit
 721   bool isNeg = I >> 63;
 722
 723   // Get the 11-bit exponent and adjust for the 1023 bit bias
 724   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
 725
 726   // If the exponent is negative, the value is < 0 so just return 0.
 727   if (exp < 0)
 728     return APInt(width, 0u);
 729
 730   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
 731   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
 732
 733   // If the exponent doesn't shift all bits out of the mantissa
 734   if (exp < 52)
 735     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
 736                     APInt(width, mantissa >> (52 - exp));
 737
 738   // If the client didn't provide enough bits for us to shift the mantissa into
 739   // then the result is undefined, just return 0
 740   if (width <= exp - 52)
 741     return APInt(width, 0);
 742
 743   // Otherwise, we have to shift the mantissa bits up to the right location
 744   APInt Tmp(width, mantissa);
 745   Tmp <<= (unsigned)exp - 52;
 746   return isNeg ? -Tmp : Tmp;
 747 }
 748
 749 /// This function converts this APInt to a double.
 750 /// The layout for double is as following (IEEE Standard 754):
 751 ///  --------------------------------------
 752 /// |  Sign    Exponent    Fraction    Bias |
 753 /// |-------------------------------------- |
 754 /// |  1[63]   11[62-52]   52[51-00]   1023 |
 755 ///  --------------------------------------
 756 double APInt::roundToDouble(bool isSigned) const {
 757
 758   // Handle the simple case where the value is contained in one uint64_t.
 759   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
 760   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
 761     if (isSigned) {
 762       int64_t sext = SignExtend64(getWord(0), BitWidth);
 763       return double(sext);
 764     } else
 765       return double(getWord(0));
 766   }
 767
 768   // Determine if the value is negative.
 769   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
 770
 771   // Construct the absolute value if we're negative.
 772   APInt Tmp(isNeg ? -(*this) : (*this));
 773
 774   // Figure out how many bits we're using.
 775   unsigned n = Tmp.getActiveBits();
 776
 777   // The exponent (without bias normalization) is just the number of bits
 778   // we are using. Note that the sign bit is gone since we constructed the
 779   // absolute value.
 780   uint64_t exp = n;
 781
 782   // Return infinity for exponent overflow
 783   if (exp > 1023) {
 784     if (!isSigned || !isNeg)
 785       return std::numeric_limits<double>::infinity();
 786     else
 787       return -std::numeric_limits<double>::infinity();
 788   }
 789   exp += 1023; // Increment for 1023 bias
 790
 791   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
 792   // extract the high 52 bits from the correct words in pVal.
 793   uint64_t mantissa;
 794   unsigned hiWord = whichWord(n-1);
 795   if (hiWord == 0) {
 796     mantissa = Tmp.U.pVal[0];
 797     if (n > 52)
 798       mantissa >>= n - 52; // shift down, we want the top 52 bits.
 799   } else {
 800     assert(hiWord > 0 && "huh?");
 801     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
 802     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
 803     mantissa = hibits | lobits;
 804   }
 805
 806   // The leading bit of mantissa is implicit, so get rid of it.
 807   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
 808   uint64_t I = sign | (exp << 52) | mantissa;
 809   return bit_cast<double>(I);
 810 }
 811
 812 // Truncate to new width.
 813 APInt APInt::trunc(unsigned width) const {
 814   assert(width < BitWidth && "Invalid APInt Truncate request");
 815   assert(width && "Can't truncate to 0 bits");
 816
 817   if (width <= APINT_BITS_PER_WORD)
 818     return APInt(width, getRawData()[0]);
 819
 820   APInt Result(getMemory(getNumWords(width)), width);
 821
 822   // Copy full words.
 823   unsigned i;
 824   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
 825     Result.U.pVal[i] = U.pVal[i];
 826
 827   // Truncate and copy any partial word.
 828   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
 829   if (bits != 0)
 830     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
 831
 832   return Result;
 833 }
 834
 835 // Sign extend to a new width.
 836 APInt APInt::sext(unsigned Width) const {
 837   assert(Width > BitWidth && "Invalid APInt SignExtend request");
 838
 839   if (Width <= APINT_BITS_PER_WORD)
 840     return APInt(Width, SignExtend64(U.VAL, BitWidth));
 841
 842   APInt Result(getMemory(getNumWords(Width)), Width);
 843
 844   // Copy words.
 845   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 846
 847   // Sign extend the last word since there may be unused bits in the input.
 848   Result.U.pVal[getNumWords() - 1] =
 849       SignExtend64(Result.U.pVal[getNumWords() - 1],
 850                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 851
 852   // Fill with sign bits.
 853   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
 854               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 855   Result.clearUnusedBits();
 856   return Result;
 857 }
 858
 859 //  Zero extend to a new width.
 860 APInt APInt::zext(unsigned width) const {
 861   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
 862
 863   if (width <= APINT_BITS_PER_WORD)
 864     return APInt(width, U.VAL);
 865
 866   APInt Result(getMemory(getNumWords(width)), width);
 867
 868   // Copy words.
 869   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
 870
 871   // Zero remaining words.
 872   std::memset(Result.U.pVal + getNumWords(), 0,
 873               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
 874
 875   return Result;
 876 }
 877
 878 APInt APInt::zextOrTrunc(unsigned width) const {
 879   if (BitWidth < width)
 880     return zext(width);
 881   if (BitWidth > width)
 882     return trunc(width);
 883   return *this;
 884 }
 885
 886 APInt APInt::sextOrTrunc(unsigned width) const {
 887   if (BitWidth < width)
 888     return sext(width);
 889   if (BitWidth > width)
 890     return trunc(width);
 891   return *this;
 892 }
 893
 894 APInt APInt::zextOrSelf(unsigned width) const {
 895   if (BitWidth < width)
 896     return zext(width);
 897   return *this;
 898 }
 899
 900 APInt APInt::sextOrSelf(unsigned width) const {
 901   if (BitWidth < width)
 902     return sext(width);
 903   return *this;
 904 }
 905
 906 /// Arithmetic right-shift this APInt by shiftAmt.
 907 /// Arithmetic right-shift function.
 908 void APInt::ashrInPlace(const APInt &shiftAmt) {
 909   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 910 }
 911
 912 /// Arithmetic right-shift this APInt by shiftAmt.
 913 /// Arithmetic right-shift function.
 914 void APInt::ashrSlowCase(unsigned ShiftAmt) {
 915   // Don't bother performing a no-op shift.
 916   if (!ShiftAmt)
 917     return;
 918
 919   // Save the original sign bit for later.
 920   bool Negative = isNegative();
 921
 922   // WordShift is the inter-part shift; BitShift is intra-part shift.
 923   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
 924   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
 925
 926   unsigned WordsToMove = getNumWords() - WordShift;
 927   if (WordsToMove != 0) {
 928     // Sign extend the last word to fill in the unused bits.
 929     U.pVal[getNumWords() - 1] = SignExtend64(
 930         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
 931
 932     // Fastpath for moving by whole words.
 933     if (BitShift == 0) {
 934       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
 935     } else {
 936       // Move the words containing significant bits.
 937       for (unsigned i = 0; i != WordsToMove - 1; ++i)
 938         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
 939                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
 940
 941       // Handle the last word which has no high bits to copy.
 942       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
 943       // Sign extend one more time.
 944       U.pVal[WordsToMove - 1] =
 945           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
 946     }
 947   }
 948
 949   // Fill in the remainder based on the original sign.
 950   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
 951               WordShift * APINT_WORD_SIZE);
 952   clearUnusedBits();
 953 }
 954
 955 /// Logical right-shift this APInt by shiftAmt.
 956 /// Logical right-shift function.
 957 void APInt::lshrInPlace(const APInt &shiftAmt) {
 958   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
 959 }
 960
 961 /// Logical right-shift this APInt by shiftAmt.
 962 /// Logical right-shift function.
 963 void APInt::lshrSlowCase(unsigned ShiftAmt) {
 964   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
 965 }
 966
 967 /// Left-shift this APInt by shiftAmt.
 968 /// Left-shift function.
 969 APInt &APInt::operator<<=(const APInt &shiftAmt) {
 970   // It's undefined behavior in C to shift by BitWidth or greater.
 971   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
 972   return *this;
 973 }
 974
 975 void APInt::shlSlowCase(unsigned ShiftAmt) {
 976   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
 977   clearUnusedBits();
 978 }
 979
 980 // Calculate the rotate amount modulo the bit width.
 981 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
 982   unsigned rotBitWidth = rotateAmt.getBitWidth();
 983   APInt rot = rotateAmt;
 984   if (rotBitWidth < BitWidth) {
 985     // Extend the rotate APInt, so that the urem doesn't divide by 0.
 986     // e.g. APInt(1, 32) would give APInt(1, 0).
 987     rot = rotateAmt.zext(BitWidth);
 988   }
 989   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
 990   return rot.getLimitedValue(BitWidth);
 991 }
 992
 993 APInt APInt::rotl(const APInt &rotateAmt) const {
 994   return rotl(rotateModulo(BitWidth, rotateAmt));
 995 }
 996
 997 APInt APInt::rotl(unsigned rotateAmt) const {
 998   rotateAmt %= BitWidth;
 999   if (rotateAmt == 0)
1000     return *this;
1001   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002 }
1003
1004 APInt APInt::rotr(const APInt &rotateAmt) const {
1005   return rotr(rotateModulo(BitWidth, rotateAmt));
1006 }
1007
1008 APInt APInt::rotr(unsigned rotateAmt) const {
1009   rotateAmt %= BitWidth;
1010   if (rotateAmt == 0)
1011     return *this;
1012   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013 }
1014
1015 // Square Root - this method computes and returns the square root of "this".
1016 // Three mechanisms are used for computation. For small values (<= 5 bits),
1017 // a table lookup is done. This gets some performance for common cases. For
1018 // values using less than 52 bits, the value is converted to double and then
1019 // the libc sqrt function is called. The result is rounded and then converted
1020 // back to a uint64_t which is then used to construct the result. Finally,
1021 // the Babylonian method for computing square roots is used.
1022 APInt APInt::sqrt() const {
1023
1024   // Determine the magnitude of the value.
1025   unsigned magnitude = getActiveBits();
1026
1027   // Use a fast table for some small values. This also gets rid of some
1028   // rounding errors in libc sqrt for small values.
1029   if (magnitude <= 5) {
1030     static const uint8_t results[32] = {
1031       /*     0 */ 0,
1032       /*  1- 2 */ 1, 1,
1033       /*  3- 6 */ 2, 2, 2, 2,
1034       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1035       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037       /*    31 */ 6
1038     };
1039     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040   }
1041
1042   // If the magnitude of the value fits in less than 52 bits (the precision of
1043   // an IEEE double precision floating point value), then we can use the
1044   // libc sqrt function which will probably use a hardware sqrt computation.
1045   // This should be faster than the algorithm below.
1046   if (magnitude < 52) {
1047     return APInt(BitWidth,
1048                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1049                                                                : U.pVal[0])))));
1050   }
1051
1052   // Okay, all the short cuts are exhausted. We must compute it. The following
1053   // is a classical Babylonian method for computing the square root. This code
1054   // was adapted to APInt from a wikipedia article on such computations.
1055   // See http://www.wikipedia.org/ and go to the page named
1056   // Calculate_an_integer_square_root.
1057   unsigned nbits = BitWidth, i = 4;
1058   APInt testy(BitWidth, 16);
1059   APInt x_old(BitWidth, 1);
1060   APInt x_new(BitWidth, 0);
1061   APInt two(BitWidth, 2);
1062
1063   // Select a good starting value using binary logarithms.
1064   for (;; i += 2, testy = testy.shl(2))
1065     if (i >= nbits || this->ule(testy)) {
1066       x_old = x_old.shl(i / 2);
1067       break;
1068     }
1069
1070   // Use the Babylonian method to arrive at the integer square root:
1071   for (;;) {
1072     x_new = (this->udiv(x_old) + x_old).udiv(two);
1073     if (x_old.ule(x_new))
1074       break;
1075     x_old = x_new;
1076   }
1077
1078   // Make sure we return the closest approximation
1079   // NOTE: The rounding calculation below is correct. It will produce an
1080   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081   // determined to be a rounding issue with pari/gp as it begins to use a
1082   // floating point representation after 192 bits. There are no discrepancies
1083   // between this algorithm and pari/gp for bit widths < 192 bits.
1084   APInt square(x_old * x_old);
1085   APInt nextSquare((x_old + 1) * (x_old +1));
1086   if (this->ult(square))
1087     return x_old;
1088   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089   APInt midpoint((nextSquare - square).udiv(two));
1090   APInt offset(*this - square);
1091   if (offset.ult(midpoint))
1092     return x_old;
1093   return x_old + 1;
1094 }
1095
1096 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1097 /// iterative extended Euclidean algorithm is used to solve for this value,
1098 /// however we simplify it to speed up calculating only the inverse, and take
1099 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1100 /// (potentially large) APInts around.
1101 /// WARNING: a value of '0' may be returned,
1102 ///          signifying that no multiplicative inverse exists!
1103 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1105
1106   // Using the properties listed at the following web page (accessed 06/21/08):
1107   //   http://www.numbertheory.org/php/euclid.html
1108   // (especially the properties numbered 3, 4 and 9) it can be proved that
1109   // BitWidth bits suffice for all the computations in the algorithm implemented
1110   // below. More precisely, this number of bits suffice if the multiplicative
1111   // inverse exists, but may not suffice for the general extended Euclidean
1112   // algorithm.
1113
1114   APInt r[2] = { modulo, *this };
1115   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116   APInt q(BitWidth, 0);
1117
1118   unsigned i;
1119   for (i = 0; r[i^1] != 0; i ^= 1) {
1120     // An overview of the math without the confusing bit-flipping:
1121     // q = r[i-2] / r[i-1]
1122     // r[i] = r[i-2] % r[i-1]
1123     // t[i] = t[i-2] - t[i-1] * q
1124     udivrem(r[i], r[i^1], q, r[i]);
1125     t[i] -= t[i^1] * q;
1126   }
1127
1128   // If this APInt and the modulo are not coprime, there is no multiplicative
1129   // inverse, so return 0. We check this by looking at the next-to-last
1130   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131   // algorithm.
1132   if (r[i] != 1)
1133     return APInt(BitWidth, 0);
1134
1135   // The next-to-last t is the multiplicative inverse.  However, we are
1136   // interested in a positive inverse. Calculate a positive one from a negative
1137   // one if necessary. A simple addition of the modulo suffices because
1138   // abs(t[i]) is known to be less than *this/2 (see the link above).
1139   if (t[i].isNegative())
1140     t[i] += modulo;
1141
1142   return std::move(t[i]);
1143 }
1144
1145 /// Calculate the magic numbers required to implement a signed integer division
1146 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1147 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1148 /// Warren, Jr., chapter 10.
1149 APInt::ms APInt::magic() const {
1150   const APInt& d = *this;
1151   unsigned p;
1152   APInt ad, anc, delta, q1, r1, q2, r2, t;
1153   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154   struct ms mag;
1155
1156   ad = d.abs();
1157   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158   anc = t - 1 - t.urem(ad);   // absolute value of nc
1159   p = d.getBitWidth() - 1;    // initialize p
1160   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1161   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1162   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1163   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1164   do {
1165     p = p + 1;
1166     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1167     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1168     if (r1.uge(anc)) {  // must be unsigned comparison
1169       q1 = q1 + 1;
1170       r1 = r1 - anc;
1171     }
1172     q2 = q2<<1;          // update q2 = 2p/abs(d)
1173     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1174     if (r2.uge(ad)) {   // must be unsigned comparison
1175       q2 = q2 + 1;
1176       r2 = r2 - ad;
1177     }
1178     delta = ad - r2;
1179   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1180
1181   mag.m = q2 + 1;
1182   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1183   mag.s = p - d.getBitWidth();          // resulting shift
1184   return mag;
1185 }
1186
1187 /// Calculate the magic numbers required to implement an unsigned integer
1188 /// division by a constant as a sequence of multiplies, adds and shifts.
1189 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1190 /// S. Warren, Jr., chapter 10.
1191 /// LeadingZeros can be used to simplify the calculation if the upper bits
1192 /// of the divided value are known zero.
1193 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194   const APInt& d = *this;
1195   unsigned p;
1196   APInt nc, delta, q1, r1, q2, r2;
1197   struct mu magu;
1198   magu.a = 0;               // initialize "add" indicator
1199   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1202
1203   nc = allOnes - (allOnes - d).urem(d);
1204   p = d.getBitWidth() - 1;  // initialize p
1205   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1206   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1207   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1208   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1209   do {
1210     p = p + 1;
1211     if (r1.uge(nc - r1)) {
1212       q1 = q1 + q1 + 1;  // update q1
1213       r1 = r1 + r1 - nc; // update r1
1214     }
1215     else {
1216       q1 = q1+q1; // update q1
1217       r1 = r1+r1; // update r1
1218     }
1219     if ((r2 + 1).uge(d - r2)) {
1220       if (q2.uge(signedMax)) magu.a = 1;
1221       q2 = q2+q2 + 1;     // update q2
1222       r2 = r2+r2 + 1 - d; // update r2
1223     }
1224     else {
1225       if (q2.uge(signedMin)) magu.a = 1;
1226       q2 = q2+q2;     // update q2
1227       r2 = r2+r2 + 1; // update r2
1228     }
1229     delta = d - 1 - r2;
1230   } while (p < d.getBitWidth()*2 &&
1231            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1232   magu.m = q2 + 1; // resulting magic number
1233   magu.s = p - d.getBitWidth();  // resulting shift
1234   return magu;
1235 }
1236
1237 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239 /// variables here have the same names as in the algorithm. Comments explain
1240 /// the algorithm and any deviation from it.
1241 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242                      unsigned m, unsigned n) {
1243   assert(u && "Must provide dividend");
1244   assert(v && "Must provide divisor");
1245   assert(q && "Must provide quotient");
1246   assert(u != v && u != q && v != q && "Must use different memory");
1247   assert(n>1 && "n must be > 1");
1248
1249   // b denotes the base of the number system. In our case b is 2^32.
1250   const uint64_t b = uint64_t(1) << 32;
1251
1252 // The DEBUG macros here tend to be spam in the debug output if you're not
1253 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254 #ifdef KNUTH_DEBUG
1255 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256 #else
1257 #define DEBUG_KNUTH(X) do {} while(false)
1258 #endif
1259
1260   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263   DEBUG_KNUTH(dbgs() << " by");
1264   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265   DEBUG_KNUTH(dbgs() << '\n');
1266   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267   // u and v by d. Note that we have taken Knuth's advice here to use a power
1268   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269   // 2 allows us to shift instead of multiply and it is easy to determine the
1270   // shift amount from the leading zeros.  We are basically normalizing the u
1271   // and v so that its high bits are shifted to the top of v's range without
1272   // overflow. Note that this can require an extra word in u so that u must
1273   // be of length m+n+1.
1274   unsigned shift = countLeadingZeros(v[n-1]);
1275   uint32_t v_carry = 0;
1276   uint32_t u_carry = 0;
1277   if (shift) {
1278     for (unsigned i = 0; i < m+n; ++i) {
1279       uint32_t u_tmp = u[i] >> (32 - shift);
1280       u[i] = (u[i] << shift) | u_carry;
1281       u_carry = u_tmp;
1282     }
1283     for (unsigned i = 0; i < n; ++i) {
1284       uint32_t v_tmp = v[i] >> (32 - shift);
1285       v[i] = (v[i] << shift) | v_carry;
1286       v_carry = v_tmp;
1287     }
1288   }
1289   u[m+n] = u_carry;
1290
1291   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1292   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293   DEBUG_KNUTH(dbgs() << " by");
1294   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295   DEBUG_KNUTH(dbgs() << '\n');
1296
1297   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1298   int j = m;
1299   do {
1300     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301     // D3. [Calculate q'.].
1302     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306     // on v[n-2] determines at high speed most of the cases in which the trial
1307     // value qp is one too large, and it eliminates all cases where qp is two
1308     // too large.
1309     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311     uint64_t qp = dividend / v[n-1];
1312     uint64_t rp = dividend % v[n-1];
1313     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1314       qp--;
1315       rp += v[n-1];
1316       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1317         qp--;
1318     }
1319     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1320
1321     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323     // consists of a simple multiplication by a one-place number, combined with
1324     // a subtraction.
1325     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327     // true value plus b**(n+1), namely as the b's complement of
1328     // the true value, and a "borrow" to the left should be remembered.
1329     int64_t borrow = 0;
1330     for (unsigned i = 0; i < n; ++i) {
1331       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333       u[j+i] = Lo_32(subres);
1334       borrow = Hi_32(p) - Hi_32(subres);
1335       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336                         << ", borrow = " << borrow << '\n');
1337     }
1338     bool isNeg = u[j+n] < borrow;
1339     u[j+n] -= Lo_32(borrow);
1340
1341     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343     DEBUG_KNUTH(dbgs() << '\n');
1344
1345     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346     // negative, go to step D6; otherwise go on to step D7.
1347     q[j] = Lo_32(qp);
1348     if (isNeg) {
1349       // D6. [Add back]. The probability that this step is necessary is very
1350       // small, on the order of only 2/b. Make sure that test data accounts for
1351       // this possibility. Decrease q[j] by 1
1352       q[j]--;
1353       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354       // A carry will occur to the left of u[j+n], and it should be ignored
1355       // since it cancels with the borrow that occurred in D4.
1356       bool carry = false;
1357       for (unsigned i = 0; i < n; i++) {
1358         uint32_t limit = std::min(u[j+i],v[i]);
1359         u[j+i] += v[i] + carry;
1360         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1361       }
1362       u[j+n] += carry;
1363     }
1364     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1367
1368     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1369   } while (--j >= 0);
1370
1371   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373   DEBUG_KNUTH(dbgs() << '\n');
1374
1375   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377   // compute the remainder (urem uses this).
1378   if (r) {
1379     // The value d is expressed by the "shift" value above since we avoided
1380     // multiplication by d by using a shift left. So, all we have to do is
1381     // shift right here.
1382     if (shift) {
1383       uint32_t carry = 0;
1384       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385       for (int i = n-1; i >= 0; i--) {
1386         r[i] = (u[i] >> shift) | carry;
1387         carry = u[i] << (32 - shift);
1388         DEBUG_KNUTH(dbgs() << " " << r[i]);
1389       }
1390     } else {
1391       for (int i = n-1; i >= 0; i--) {
1392         r[i] = u[i];
1393         DEBUG_KNUTH(dbgs() << " " << r[i]);
1394       }
1395     }
1396     DEBUG_KNUTH(dbgs() << '\n');
1397   }
1398   DEBUG_KNUTH(dbgs() << '\n');
1399 }
1400
1401 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403   assert(lhsWords >= rhsWords && "Fractional result");
1404
1405   // First, compose the values into an array of 32-bit words instead of
1406   // 64-bit words. This is a necessity of both the "short division" algorithm
1407   // and the Knuth "classical algorithm" which requires there to be native
1408   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409   // can't use 64-bit operands here because we don't have native results of
1410   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411   // work on large-endian machines.
1412   unsigned n = rhsWords * 2;
1413   unsigned m = (lhsWords * 2) - n;
1414
1415   // Allocate space for the temporary values we need either on the stack, if
1416   // it will fit, or on the heap if it won't.
1417   uint32_t SPACE[128];
1418   uint32_t *U = nullptr;
1419   uint32_t *V = nullptr;
1420   uint32_t *Q = nullptr;
1421   uint32_t *R = nullptr;
1422   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1423     U = &SPACE[0];
1424     V = &SPACE[m+n+1];
1425     Q = &SPACE[(m+n+1) + n];
1426     if (Remainder)
1427       R = &SPACE[(m+n+1) + n + (m+n)];
1428   } else {
1429     U = new uint32_t[m + n + 1];
1430     V = new uint32_t[n];
1431     Q = new uint32_t[m+n];
1432     if (Remainder)
1433       R = new uint32_t[n];
1434   }
1435
1436   // Initialize the dividend
1437   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438   for (unsigned i = 0; i < lhsWords; ++i) {
1439     uint64_t tmp = LHS[i];
1440     U[i * 2] = Lo_32(tmp);
1441     U[i * 2 + 1] = Hi_32(tmp);
1442   }
1443   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1444
1445   // Initialize the divisor
1446   memset(V, 0, (n)*sizeof(uint32_t));
1447   for (unsigned i = 0; i < rhsWords; ++i) {
1448     uint64_t tmp = RHS[i];
1449     V[i * 2] = Lo_32(tmp);
1450     V[i * 2 + 1] = Hi_32(tmp);
1451   }
1452
1453   // initialize the quotient and remainder
1454   memset(Q, 0, (m+n) * sizeof(uint32_t));
1455   if (Remainder)
1456     memset(R, 0, n * sizeof(uint32_t));
1457
1458   // Now, adjust m and n for the Knuth division. n is the number of words in
1459   // the divisor. m is the number of words by which the dividend exceeds the
1460   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461   // contain any zero words or the Knuth algorithm fails.
1462   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1463     n--;
1464     m++;
1465   }
1466   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1467     m--;
1468
1469   // If we're left with only a single word for the divisor, Knuth doesn't work
1470   // so we implement the short division algorithm here. This is much simpler
1471   // and faster because we are certain that we can divide a 64-bit quantity
1472   // by a 32-bit quantity at hardware speed and short division is simply a
1473   // series of such operations. This is just like doing short division but we
1474   // are using base 2^32 instead of base 10.
1475   assert(n != 0 && "Divide by zero?");
1476   if (n == 1) {
1477     uint32_t divisor = V[0];
1478     uint32_t remainder = 0;
1479     for (int i = m; i >= 0; i--) {
1480       uint64_t partial_dividend = Make_64(remainder, U[i]);
1481       if (partial_dividend == 0) {
1482         Q[i] = 0;
1483         remainder = 0;
1484       } else if (partial_dividend < divisor) {
1485         Q[i] = 0;
1486         remainder = Lo_32(partial_dividend);
1487       } else if (partial_dividend == divisor) {
1488         Q[i] = 1;
1489         remainder = 0;
1490       } else {
1491         Q[i] = Lo_32(partial_dividend / divisor);
1492         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1493       }
1494     }
1495     if (R)
1496       R[0] = remainder;
1497   } else {
1498     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499     // case n > 1.
1500     KnuthDiv(U, V, Q, R, m, n);
1501   }
1502
1503   // If the caller wants the quotient
1504   if (Quotient) {
1505     for (unsigned i = 0; i < lhsWords; ++i)
1506       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1507   }
1508
1509   // If the caller wants the remainder
1510   if (Remainder) {
1511     for (unsigned i = 0; i < rhsWords; ++i)
1512       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1513   }
1514
1515   // Clean up the memory we allocated.
1516   if (U != &SPACE[0]) {
1517     delete [] U;
1518     delete [] V;
1519     delete [] Q;
1520     delete [] R;
1521   }
1522 }
1523
1524 APInt APInt::udiv(const APInt &RHS) const {
1525   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1526
1527   // First, deal with the easy case
1528   if (isSingleWord()) {
1529     assert(RHS.U.VAL != 0 && "Divide by zero?");
1530     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1531   }
1532
1533   // Get some facts about the LHS and RHS number of bits and words
1534   unsigned lhsWords = getNumWords(getActiveBits());
1535   unsigned rhsBits  = RHS.getActiveBits();
1536   unsigned rhsWords = getNumWords(rhsBits);
1537   assert(rhsWords && "Divided by zero???");
1538
1539   // Deal with some degenerate cases
1540   if (!lhsWords)
1541     // 0 / X ===> 0
1542     return APInt(BitWidth, 0);
1543   if (rhsBits == 1)
1544     // X / 1 ===> X
1545     return *this;
1546   if (lhsWords < rhsWords || this->ult(RHS))
1547     // X / Y ===> 0, iff X < Y
1548     return APInt(BitWidth, 0);
1549   if (*this == RHS)
1550     // X / X ===> 1
1551     return APInt(BitWidth, 1);
1552   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553     // All high words are zero, just use native divide
1554     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1555
1556   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557   APInt Quotient(BitWidth, 0); // to hold result.
1558   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559   return Quotient;
1560 }
1561
1562 APInt APInt::udiv(uint64_t RHS) const {
1563   assert(RHS != 0 && "Divide by zero?");
1564
1565   // First, deal with the easy case
1566   if (isSingleWord())
1567     return APInt(BitWidth, U.VAL / RHS);
1568
1569   // Get some facts about the LHS words.
1570   unsigned lhsWords = getNumWords(getActiveBits());
1571
1572   // Deal with some degenerate cases
1573   if (!lhsWords)
1574     // 0 / X ===> 0
1575     return APInt(BitWidth, 0);
1576   if (RHS == 1)
1577     // X / 1 ===> X
1578     return *this;
1579   if (this->ult(RHS))
1580     // X / Y ===> 0, iff X < Y
1581     return APInt(BitWidth, 0);
1582   if (*this == RHS)
1583     // X / X ===> 1
1584     return APInt(BitWidth, 1);
1585   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586     // All high words are zero, just use native divide
1587     return APInt(BitWidth, this->U.pVal[0] / RHS);
1588
1589   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590   APInt Quotient(BitWidth, 0); // to hold result.
1591   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592   return Quotient;
1593 }
1594
1595 APInt APInt::sdiv(const APInt &RHS) const {
1596   if (isNegative()) {
1597     if (RHS.isNegative())
1598       return (-(*this)).udiv(-RHS);
1599     return -((-(*this)).udiv(RHS));
1600   }
1601   if (RHS.isNegative())
1602     return -(this->udiv(-RHS));
1603   return this->udiv(RHS);
1604 }
1605
1606 APInt APInt::sdiv(int64_t RHS) const {
1607   if (isNegative()) {
1608     if (RHS < 0)
1609       return (-(*this)).udiv(-RHS);
1610     return -((-(*this)).udiv(RHS));
1611   }
1612   if (RHS < 0)
1613     return -(this->udiv(-RHS));
1614   return this->udiv(RHS);
1615 }
1616
1617 APInt APInt::urem(const APInt &RHS) const {
1618   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619   if (isSingleWord()) {
1620     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1622   }
1623
1624   // Get some facts about the LHS
1625   unsigned lhsWords = getNumWords(getActiveBits());
1626
1627   // Get some facts about the RHS
1628   unsigned rhsBits = RHS.getActiveBits();
1629   unsigned rhsWords = getNumWords(rhsBits);
1630   assert(rhsWords && "Performing remainder operation by zero ???");
1631
1632   // Check the degenerate cases
1633   if (lhsWords == 0)
1634     // 0 % Y ===> 0
1635     return APInt(BitWidth, 0);
1636   if (rhsBits == 1)
1637     // X % 1 ===> 0
1638     return APInt(BitWidth, 0);
1639   if (lhsWords < rhsWords || this->ult(RHS))
1640     // X % Y ===> X, iff X < Y
1641     return *this;
1642   if (*this == RHS)
1643     // X % X == 0;
1644     return APInt(BitWidth, 0);
1645   if (lhsWords == 1)
1646     // All high words are zero, just use native remainder
1647     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1648
1649   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650   APInt Remainder(BitWidth, 0);
1651   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652   return Remainder;
1653 }
1654
1655 uint64_t APInt::urem(uint64_t RHS) const {
1656   assert(RHS != 0 && "Remainder by zero?");
1657
1658   if (isSingleWord())
1659     return U.VAL % RHS;
1660
1661   // Get some facts about the LHS
1662   unsigned lhsWords = getNumWords(getActiveBits());
1663
1664   // Check the degenerate cases
1665   if (lhsWords == 0)
1666     // 0 % Y ===> 0
1667     return 0;
1668   if (RHS == 1)
1669     // X % 1 ===> 0
1670     return 0;
1671   if (this->ult(RHS))
1672     // X % Y ===> X, iff X < Y
1673     return getZExtValue();
1674   if (*this == RHS)
1675     // X % X == 0;
1676     return 0;
1677   if (lhsWords == 1)
1678     // All high words are zero, just use native remainder
1679     return U.pVal[0] % RHS;
1680
1681   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682   uint64_t Remainder;
1683   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684   return Remainder;
1685 }
1686
1687 APInt APInt::srem(const APInt &RHS) const {
1688   if (isNegative()) {
1689     if (RHS.isNegative())
1690       return -((-(*this)).urem(-RHS));
1691     return -((-(*this)).urem(RHS));
1692   }
1693   if (RHS.isNegative())
1694     return this->urem(-RHS);
1695   return this->urem(RHS);
1696 }
1697
1698 int64_t APInt::srem(int64_t RHS) const {
1699   if (isNegative()) {
1700     if (RHS < 0)
1701       return -((-(*this)).urem(-RHS));
1702     return -((-(*this)).urem(RHS));
1703   }
1704   if (RHS < 0)
1705     return this->urem(-RHS);
1706   return this->urem(RHS);
1707 }
1708
1709 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710                     APInt &Quotient, APInt &Remainder) {
1711   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712   unsigned BitWidth = LHS.BitWidth;
1713
1714   // First, deal with the easy case
1715   if (LHS.isSingleWord()) {
1716     assert(RHS.U.VAL != 0 && "Divide by zero?");
1717     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719     Quotient = APInt(BitWidth, QuotVal);
1720     Remainder = APInt(BitWidth, RemVal);
1721     return;
1722   }
1723
1724   // Get some size facts about the dividend and divisor
1725   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726   unsigned rhsBits  = RHS.getActiveBits();
1727   unsigned rhsWords = getNumWords(rhsBits);
1728   assert(rhsWords && "Performing divrem operation by zero ???");
1729
1730   // Check the degenerate cases
1731   if (lhsWords == 0) {
1732     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1733     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1734     return;
1735   }
1736
1737   if (rhsBits == 1) {
1738     Quotient = LHS;                   // X / 1 ===> X
1739     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1740   }
1741
1742   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1744     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1745     return;
1746   }
1747
1748   if (LHS == RHS) {
1749     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1750     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1751     return;
1752   }
1753
1754   // Make sure there is enough space to hold the results.
1755   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756   // change the size. This is necessary if Quotient or Remainder is aliased
1757   // with LHS or RHS.
1758   Quotient.reallocate(BitWidth);
1759   Remainder.reallocate(BitWidth);
1760
1761   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762     // There is only one word to consider so use the native versions.
1763     uint64_t lhsValue = LHS.U.pVal[0];
1764     uint64_t rhsValue = RHS.U.pVal[0];
1765     Quotient = lhsValue / rhsValue;
1766     Remainder = lhsValue % rhsValue;
1767     return;
1768   }
1769
1770   // Okay, lets do it the long way
1771   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772          Remainder.U.pVal);
1773   // Clear the rest of the Quotient and Remainder.
1774   std::memset(Quotient.U.pVal + lhsWords, 0,
1775               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776   std::memset(Remainder.U.pVal + rhsWords, 0,
1777               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1778 }
1779
1780 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781                     uint64_t &Remainder) {
1782   assert(RHS != 0 && "Divide by zero?");
1783   unsigned BitWidth = LHS.BitWidth;
1784
1785   // First, deal with the easy case
1786   if (LHS.isSingleWord()) {
1787     uint64_t QuotVal = LHS.U.VAL / RHS;
1788     Remainder = LHS.U.VAL % RHS;
1789     Quotient = APInt(BitWidth, QuotVal);
1790     return;
1791   }
1792
1793   // Get some size facts about the dividend and divisor
1794   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1795
1796   // Check the degenerate cases
1797   if (lhsWords == 0) {
1798     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1799     Remainder = 0;                    // 0 % Y ===> 0
1800     return;
1801   }
1802
1803   if (RHS == 1) {
1804     Quotient = LHS;                   // X / 1 ===> X
1805     Remainder = 0;                    // X % 1 ===> 0
1806     return;
1807   }
1808
1809   if (LHS.ult(RHS)) {
1810     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1811     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1812     return;
1813   }
1814
1815   if (LHS == RHS) {
1816     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1817     Remainder = 0;                    // X % X ===> 0;
1818     return;
1819   }
1820
1821   // Make sure there is enough space to hold the results.
1822   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823   // change the size. This is necessary if Quotient is aliased with LHS.
1824   Quotient.reallocate(BitWidth);
1825
1826   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827     // There is only one word to consider so use the native versions.
1828     uint64_t lhsValue = LHS.U.pVal[0];
1829     Quotient = lhsValue / RHS;
1830     Remainder = lhsValue % RHS;
1831     return;
1832   }
1833
1834   // Okay, lets do it the long way
1835   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836   // Clear the rest of the Quotient.
1837   std::memset(Quotient.U.pVal + lhsWords, 0,
1838               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1839 }
1840
1841 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842                     APInt &Quotient, APInt &Remainder) {
1843   if (LHS.isNegative()) {
1844     if (RHS.isNegative())
1845       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846     else {
1847       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848       Quotient.negate();
1849     }
1850     Remainder.negate();
1851   } else if (RHS.isNegative()) {
1852     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853     Quotient.negate();
1854   } else {
1855     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1856   }
1857 }
1858
1859 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860                     APInt &Quotient, int64_t &Remainder) {
1861   uint64_t R = Remainder;
1862   if (LHS.isNegative()) {
1863     if (RHS < 0)
1864       APInt::udivrem(-LHS, -RHS, Quotient, R);
1865     else {
1866       APInt::udivrem(-LHS, RHS, Quotient, R);
1867       Quotient.negate();
1868     }
1869     R = -R;
1870   } else if (RHS < 0) {
1871     APInt::udivrem(LHS, -RHS, Quotient, R);
1872     Quotient.negate();
1873   } else {
1874     APInt::udivrem(LHS, RHS, Quotient, R);
1875   }
1876   Remainder = R;
1877 }
1878
1879 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880   APInt Res = *this+RHS;
1881   Overflow = isNonNegative() == RHS.isNonNegative() &&
1882              Res.isNonNegative() != isNonNegative();
1883   return Res;
1884 }
1885
1886 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887   APInt Res = *this+RHS;
1888   Overflow = Res.ult(RHS);
1889   return Res;
1890 }
1891
1892 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893   APInt Res = *this - RHS;
1894   Overflow = isNonNegative() != RHS.isNonNegative() &&
1895              Res.isNonNegative() != isNonNegative();
1896   return Res;
1897 }
1898
1899 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900   APInt Res = *this-RHS;
1901   Overflow = Res.ugt(*this);
1902   return Res;
1903 }
1904
1905 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906   // MININT/-1  -->  overflow.
1907   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1908   return sdiv(RHS);
1909 }
1910
1911 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912   APInt Res = *this * RHS;
1913
1914   if (*this != 0 && RHS != 0)
1915     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1916   else
1917     Overflow = false;
1918   return Res;
1919 }
1920
1921 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923     Overflow = true;
1924     return *this * RHS;
1925   }
1926
1927   APInt Res = lshr(1) * RHS;
1928   Overflow = Res.isNegative();
1929   Res <<= 1;
1930   if ((*this)[0]) {
1931     Res += RHS;
1932     if (Res.ult(RHS))
1933       Overflow = true;
1934   }
1935   return Res;
1936 }
1937
1938 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939   Overflow = ShAmt.uge(getBitWidth());
1940   if (Overflow)
1941     return APInt(BitWidth, 0);
1942
1943   if (isNonNegative()) // Don't allow sign change.
1944     Overflow = ShAmt.uge(countLeadingZeros());
1945   else
1946     Overflow = ShAmt.uge(countLeadingOnes());
1947
1948   return *this << ShAmt;
1949 }
1950
1951 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952   Overflow = ShAmt.uge(getBitWidth());
1953   if (Overflow)
1954     return APInt(BitWidth, 0);
1955
1956   Overflow = ShAmt.ugt(countLeadingZeros());
1957
1958   return *this << ShAmt;
1959 }
1960
1961 APInt APInt::sadd_sat(const APInt &RHS) const {
1962   bool Overflow;
1963   APInt Res = sadd_ov(RHS, Overflow);
1964   if (!Overflow)
1965     return Res;
1966
1967   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1968                       : APInt::getSignedMaxValue(BitWidth);
1969 }
1970
1971 APInt APInt::uadd_sat(const APInt &RHS) const {
1972   bool Overflow;
1973   APInt Res = uadd_ov(RHS, Overflow);
1974   if (!Overflow)
1975     return Res;
1976
1977   return APInt::getMaxValue(BitWidth);
1978 }
1979
1980 APInt APInt::ssub_sat(const APInt &RHS) const {
1981   bool Overflow;
1982   APInt Res = ssub_ov(RHS, Overflow);
1983   if (!Overflow)
1984     return Res;
1985
1986   return isNegative() ? APInt::getSignedMinValue(BitWidth)
1987                       : APInt::getSignedMaxValue(BitWidth);
1988 }
1989
1990 APInt APInt::usub_sat(const APInt &RHS) const {
1991   bool Overflow;
1992   APInt Res = usub_ov(RHS, Overflow);
1993   if (!Overflow)
1994     return Res;
1995
1996   return APInt(BitWidth, 0);
1997 }
1998
1999
2000 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001   // Check our assumptions here
2002   assert(!str.empty() && "Invalid string length");
2003   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004           radix == 36) &&
2005          "Radix should be 2, 8, 10, 16, or 36!");
2006
2007   StringRef::iterator p = str.begin();
2008   size_t slen = str.size();
2009   bool isNeg = *p == '-';
2010   if (*p == '-' || *p == '+') {
2011     p++;
2012     slen--;
2013     assert(slen && "String is only a sign, needs a value.");
2014   }
2015   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019          "Insufficient bit width");
2020
2021   // Allocate memory if needed
2022   if (isSingleWord())
2023     U.VAL = 0;
2024   else
2025     U.pVal = getClearedMemory(getNumWords());
2026
2027   // Figure out if we can shift instead of multiply
2028   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2029
2030   // Enter digit traversal loop
2031   for (StringRef::iterator e = str.end(); p != e; ++p) {
2032     unsigned digit = getDigit(*p, radix);
2033     assert(digit < radix && "Invalid character in digit string");
2034
2035     // Shift or multiply the value by the radix
2036     if (slen > 1) {
2037       if (shift)
2038         *this <<= shift;
2039       else
2040         *this *= radix;
2041     }
2042
2043     // Add in the digit we just interpreted
2044     *this += digit;
2045   }
2046   // If its negative, put it in two's complement form
2047   if (isNeg)
2048     this->negate();
2049 }
2050
2051 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052                      bool Signed, bool formatAsCLiteral) const {
2053   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054           Radix == 36) &&
2055          "Radix should be 2, 8, 10, 16, or 36!");
2056
2057   const char *Prefix = "";
2058   if (formatAsCLiteral) {
2059     switch (Radix) {
2060       case 2:
2061         // Binary literals are a non-standard extension added in gcc 4.3:
2062         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063         Prefix = "0b";
2064         break;
2065       case 8:
2066         Prefix = "0";
2067         break;
2068       case 10:
2069         break; // No prefix
2070       case 16:
2071         Prefix = "0x";
2072         break;
2073       default:
2074         llvm_unreachable("Invalid radix!");
2075     }
2076   }
2077
2078   // First, check for a zero value and just short circuit the logic below.
2079   if (*this == 0) {
2080     while (*Prefix) {
2081       Str.push_back(*Prefix);
2082       ++Prefix;
2083     };
2084     Str.push_back('0');
2085     return;
2086   }
2087
2088   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2089
2090   if (isSingleWord()) {
2091     char Buffer[65];
2092     char *BufPtr = std::end(Buffer);
2093
2094     uint64_t N;
2095     if (!Signed) {
2096       N = getZExtValue();
2097     } else {
2098       int64_t I = getSExtValue();
2099       if (I >= 0) {
2100         N = I;
2101       } else {
2102         Str.push_back('-');
2103         N = -(uint64_t)I;
2104       }
2105     }
2106
2107     while (*Prefix) {
2108       Str.push_back(*Prefix);
2109       ++Prefix;
2110     };
2111
2112     while (N) {
2113       *--BufPtr = Digits[N % Radix];
2114       N /= Radix;
2115     }
2116     Str.append(BufPtr, std::end(Buffer));
2117     return;
2118   }
2119
2120   APInt Tmp(*this);
2121
2122   if (Signed && isNegative()) {
2123     // They want to print the signed version and it is a negative value
2124     // Flip the bits and add one to turn it into the equivalent positive
2125     // value and put a '-' in the result.
2126     Tmp.negate();
2127     Str.push_back('-');
2128   }
2129
2130   while (*Prefix) {
2131     Str.push_back(*Prefix);
2132     ++Prefix;
2133   };
2134
2135   // We insert the digits backward, then reverse them to get the right order.
2136   unsigned StartDig = Str.size();
2137
2138   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139   // because the number of bits per digit (1, 3 and 4 respectively) divides
2140   // equally.  We just shift until the value is zero.
2141   if (Radix == 2 || Radix == 8 || Radix == 16) {
2142     // Just shift tmp right for each digit width until it becomes zero
2143     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2144     unsigned MaskAmt = Radix - 1;
2145
2146     while (Tmp.getBoolValue()) {
2147       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148       Str.push_back(Digits[Digit]);
2149       Tmp.lshrInPlace(ShiftAmt);
2150     }
2151   } else {
2152     while (Tmp.getBoolValue()) {
2153       uint64_t Digit;
2154       udivrem(Tmp, Radix, Tmp, Digit);
2155       assert(Digit < Radix && "divide failed");
2156       Str.push_back(Digits[Digit]);
2157     }
2158   }
2159
2160   // Reverse the digits before returning.
2161   std::reverse(Str.begin()+StartDig, Str.end());
2162 }
2163
2164 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2165 /// It is better to pass in a SmallVector/SmallString to the methods above.
2166 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167   SmallString<40> S;
2168   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169   return S.str();
2170 }
2171
2172 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173 LLVM_DUMP_METHOD void APInt::dump() const {
2174   SmallString<40> S, U;
2175   this->toStringUnsigned(U);
2176   this->toStringSigned(S);
2177   dbgs() << "APInt(" << BitWidth << "b, "
2178          << U << "u " << S << "s)\n";
2179 }
2180 #endif
2181
2182 void APInt::print(raw_ostream &OS, bool isSigned) const {
2183   SmallString<40> S;
2184   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185   OS << S;
2186 }
2187
2188 // This implements a variety of operations on a representation of
2189 // arbitrary precision, two's-complement, bignum integer values.
2190
2191 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192 // and unrestricting assumption.
2193 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194               "Part width must be divisible by 2!");
2195
2196 /* Some handy functions local to this file.  */
2197
2198 /* Returns the integer part with the least significant BITS set.
2199    BITS cannot be zero.  */
2200 static inline APInt::WordType lowBitMask(unsigned bits) {
2201   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2202
2203   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2204 }
2205
2206 /* Returns the value of the lower half of PART.  */
2207 static inline APInt::WordType lowHalf(APInt::WordType part) {
2208   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2209 }
2210
2211 /* Returns the value of the upper half of PART.  */
2212 static inline APInt::WordType highHalf(APInt::WordType part) {
2213   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2214 }
2215
2216 /* Returns the bit number of the most significant set bit of a part.
2217    If the input number has no bits set -1U is returned.  */
2218 static unsigned partMSB(APInt::WordType value) {
2219   return findLastSet(value, ZB_Max);
2220 }
2221
2222 /* Returns the bit number of the least significant set bit of a
2223    part.  If the input number has no bits set -1U is returned.  */
2224 static unsigned partLSB(APInt::WordType value) {
2225   return findFirstSet(value, ZB_Max);
2226 }
2227
2228 /* Sets the least significant part of a bignum to the input value, and
2229    zeroes out higher parts.  */
2230 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231   assert(parts > 0);
2232
2233   dst[0] = part;
2234   for (unsigned i = 1; i < parts; i++)
2235     dst[i] = 0;
2236 }
2237
2238 /* Assign one bignum to another.  */
2239 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240   for (unsigned i = 0; i < parts; i++)
2241     dst[i] = src[i];
2242 }
2243
2244 /* Returns true if a bignum is zero, false otherwise.  */
2245 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246   for (unsigned i = 0; i < parts; i++)
2247     if (src[i])
2248       return false;
2249
2250   return true;
2251 }
2252
2253 /* Extract the given bit of a bignum; returns 0 or 1.  */
2254 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2256 }
2257
2258 /* Set the given bit of a bignum. */
2259 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260   parts[whichWord(bit)] |= maskBit(bit);
2261 }
2262
2263 /* Clears the given bit of a bignum. */
2264 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265   parts[whichWord(bit)] &= ~maskBit(bit);
2266 }
2267
2268 /* Returns the bit number of the least significant set bit of a
2269    number.  If the input number has no bits set -1U is returned.  */
2270 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271   for (unsigned i = 0; i < n; i++) {
2272     if (parts[i] != 0) {
2273       unsigned lsb = partLSB(parts[i]);
2274
2275       return lsb + i * APINT_BITS_PER_WORD;
2276     }
2277   }
2278
2279   return -1U;
2280 }
2281
2282 /* Returns the bit number of the most significant set bit of a number.
2283    If the input number has no bits set -1U is returned.  */
2284 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285   do {
2286     --n;
2287
2288     if (parts[n] != 0) {
2289       unsigned msb = partMSB(parts[n]);
2290
2291       return msb + n * APINT_BITS_PER_WORD;
2292     }
2293   } while (n);
2294
2295   return -1U;
2296 }
2297
2298 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2299    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300    the least significant bit of DST.  All high bits above srcBITS in
2301    DST are zero-filled.  */
2302 void
2303 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304                  unsigned srcBits, unsigned srcLSB) {
2305   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306   assert(dstParts <= dstCount);
2307
2308   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309   tcAssign (dst, src + firstSrcPart, dstParts);
2310
2311   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312   tcShiftRight (dst, dstParts, shift);
2313
2314   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315      in DST.  If this is less that srcBits, append the rest, else
2316      clear the high bits.  */
2317   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318   if (n < srcBits) {
2319     WordType mask = lowBitMask (srcBits - n);
2320     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321                           << n % APINT_BITS_PER_WORD);
2322   } else if (n > srcBits) {
2323     if (srcBits % APINT_BITS_PER_WORD)
2324       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2325   }
2326
2327   /* Clear high parts.  */
2328   while (dstParts < dstCount)
2329     dst[dstParts++] = 0;
2330 }
2331
2332 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2333 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334                              WordType c, unsigned parts) {
2335   assert(c <= 1);
2336
2337   for (unsigned i = 0; i < parts; i++) {
2338     WordType l = dst[i];
2339     if (c) {
2340       dst[i] += rhs[i] + 1;
2341       c = (dst[i] <= l);
2342     } else {
2343       dst[i] += rhs[i];
2344       c = (dst[i] < l);
2345     }
2346   }
2347
2348   return c;
2349 }
2350
2351 /// This function adds a single "word" integer, src, to the multiple
2352 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2354 /// @returns the carry of the addition.
2355 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356                                  unsigned parts) {
2357   for (unsigned i = 0; i < parts; ++i) {
2358     dst[i] += src;
2359     if (dst[i] >= src)
2360       return 0; // No need to carry so exit early.
2361     src = 1; // Carry one to next digit.
2362   }
2363
2364   return 1;
2365 }
2366
2367 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2368 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369                                   WordType c, unsigned parts) {
2370   assert(c <= 1);
2371
2372   for (unsigned i = 0; i < parts; i++) {
2373     WordType l = dst[i];
2374     if (c) {
2375       dst[i] -= rhs[i] + 1;
2376       c = (dst[i] >= l);
2377     } else {
2378       dst[i] -= rhs[i];
2379       c = (dst[i] > l);
2380     }
2381   }
2382
2383   return c;
2384 }
2385
2386 /// This function subtracts a single "word" (64-bit word), src, from
2387 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2389 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390 /// exhausted. In other words, if src > dst then this function returns 1,
2391 /// otherwise 0.
2392 /// @returns the borrow out of the subtraction
2393 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394                                       unsigned parts) {
2395   for (unsigned i = 0; i < parts; ++i) {
2396     WordType Dst = dst[i];
2397     dst[i] -= src;
2398     if (src <= Dst)
2399       return 0; // No need to borrow so exit early.
2400     src = 1; // We have to "borrow 1" from next "word"
2401   }
2402
2403   return 1;
2404 }
2405
2406 /* Negate a bignum in-place.  */
2407 void APInt::tcNegate(WordType *dst, unsigned parts) {
2408   tcComplement(dst, parts);
2409   tcIncrement(dst, parts);
2410 }
2411
2412 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2413     DST  = SRC * MULTIPLIER + CARRY   if add is false
2414
2415     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2416     they must start at the same point, i.e. DST == SRC.
2417
2418     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419     returned.  Otherwise DST is filled with the least significant
2420     DSTPARTS parts of the result, and if all of the omitted higher
2421     parts were zero return zero, otherwise overflow occurred and
2422     return one.  */
2423 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424                           WordType multiplier, WordType carry,
2425                           unsigned srcParts, unsigned dstParts,
2426                           bool add) {
2427   /* Otherwise our writes of DST kill our later reads of SRC.  */
2428   assert(dst <= src || dst >= src + srcParts);
2429   assert(dstParts <= srcParts + 1);
2430
2431   /* N loops; minimum of dstParts and srcParts.  */
2432   unsigned n = std::min(dstParts, srcParts);
2433
2434   for (unsigned i = 0; i < n; i++) {
2435     WordType low, mid, high, srcPart;
2436
2437       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2438
2439          This cannot overflow, because
2440
2441          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2442
2443          which is less than n^2.  */
2444
2445     srcPart = src[i];
2446
2447     if (multiplier == 0 || srcPart == 0) {
2448       low = carry;
2449       high = 0;
2450     } else {
2451       low = lowHalf(srcPart) * lowHalf(multiplier);
2452       high = highHalf(srcPart) * highHalf(multiplier);
2453
2454       mid = lowHalf(srcPart) * highHalf(multiplier);
2455       high += highHalf(mid);
2456       mid <<= APINT_BITS_PER_WORD / 2;
2457       if (low + mid < low)
2458         high++;
2459       low += mid;
2460
2461       mid = highHalf(srcPart) * lowHalf(multiplier);
2462       high += highHalf(mid);
2463       mid <<= APINT_BITS_PER_WORD / 2;
2464       if (low + mid < low)
2465         high++;
2466       low += mid;
2467
2468       /* Now add carry.  */
2469       if (low + carry < low)
2470         high++;
2471       low += carry;
2472     }
2473
2474     if (add) {
2475       /* And now DST[i], and store the new low part there.  */
2476       if (low + dst[i] < low)
2477         high++;
2478       dst[i] += low;
2479     } else
2480       dst[i] = low;
2481
2482     carry = high;
2483   }
2484
2485   if (srcParts < dstParts) {
2486     /* Full multiplication, there is no overflow.  */
2487     assert(srcParts + 1 == dstParts);
2488     dst[srcParts] = carry;
2489     return 0;
2490   }
2491
2492   /* We overflowed if there is carry.  */
2493   if (carry)
2494     return 1;
2495
2496   /* We would overflow if any significant unwritten parts would be
2497      non-zero.  This is true if any remaining src parts are non-zero
2498      and the multiplier is non-zero.  */
2499   if (multiplier)
2500     for (unsigned i = dstParts; i < srcParts; i++)
2501       if (src[i])
2502         return 1;
2503
2504   /* We fitted in the narrow destination.  */
2505   return 0;
2506 }
2507
2508 /* DST = LHS * RHS, where DST has the same width as the operands and
2509    is filled with the least significant parts of the result.  Returns
2510    one if overflow occurred, otherwise zero.  DST must be disjoint
2511    from both operands.  */
2512 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513                       const WordType *rhs, unsigned parts) {
2514   assert(dst != lhs && dst != rhs);
2515
2516   int overflow = 0;
2517   tcSet(dst, 0, parts);
2518
2519   for (unsigned i = 0; i < parts; i++)
2520     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521                                parts - i, true);
2522
2523   return overflow;
2524 }
2525
2526 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2527 /// operands. No overflow occurs. DST must be disjoint from both operands.
2528 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529                            const WordType *rhs, unsigned lhsParts,
2530                            unsigned rhsParts) {
2531   /* Put the narrower number on the LHS for less loops below.  */
2532   if (lhsParts > rhsParts)
2533     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2534
2535   assert(dst != lhs && dst != rhs);
2536
2537   tcSet(dst, 0, rhsParts);
2538
2539   for (unsigned i = 0; i < lhsParts; i++)
2540     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2541 }
2542
2543 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545    set REMAINDER to the remainder, return zero.  i.e.
2546
2547    OLD_LHS = RHS * LHS + REMAINDER
2548
2549    SCRATCH is a bignum of the same size as the operands and result for
2550    use by the routine; its contents need not be initialized and are
2551    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2552 */
2553 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554                     WordType *remainder, WordType *srhs,
2555                     unsigned parts) {
2556   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2557
2558   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559   if (shiftCount == 0)
2560     return true;
2561
2562   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2565
2566   tcAssign(srhs, rhs, parts);
2567   tcShiftLeft(srhs, parts, shiftCount);
2568   tcAssign(remainder, lhs, parts);
2569   tcSet(lhs, 0, parts);
2570
2571   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572      the total.  */
2573   for (;;) {
2574     int compare = tcCompare(remainder, srhs, parts);
2575     if (compare >= 0) {
2576       tcSubtract(remainder, srhs, 0, parts);
2577       lhs[n] |= mask;
2578     }
2579
2580     if (shiftCount == 0)
2581       break;
2582     shiftCount--;
2583     tcShiftRight(srhs, parts, 1);
2584     if ((mask >>= 1) == 0) {
2585       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586       n--;
2587     }
2588   }
2589
2590   return false;
2591 }
2592
2593 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594 /// no restrictions on Count.
2595 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596   // Don't bother performing a no-op shift.
2597   if (!Count)
2598     return;
2599
2600   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2603
2604   // Fastpath for moving by whole words.
2605   if (BitShift == 0) {
2606     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607   } else {
2608     while (Words-- > WordShift) {
2609       Dst[Words] = Dst[Words - WordShift] << BitShift;
2610       if (Words > WordShift)
2611         Dst[Words] |=
2612           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2613     }
2614   }
2615
2616   // Fill in the remainder with 0s.
2617   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2618 }
2619
2620 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621 /// are no restrictions on Count.
2622 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623   // Don't bother performing a no-op shift.
2624   if (!Count)
2625     return;
2626
2627   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2630
2631   unsigned WordsToMove = Words - WordShift;
2632   // Fastpath for moving by whole words.
2633   if (BitShift == 0) {
2634     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635   } else {
2636     for (unsigned i = 0; i != WordsToMove; ++i) {
2637       Dst[i] = Dst[i + WordShift] >> BitShift;
2638       if (i + 1 != WordsToMove)
2639         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2640     }
2641   }
2642
2643   // Fill in the remainder with 0s.
2644   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2645 }
2646
2647 /* Bitwise and of two bignums.  */
2648 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649   for (unsigned i = 0; i < parts; i++)
2650     dst[i] &= rhs[i];
2651 }
2652
2653 /* Bitwise inclusive or of two bignums.  */
2654 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655   for (unsigned i = 0; i < parts; i++)
2656     dst[i] |= rhs[i];
2657 }
2658
2659 /* Bitwise exclusive or of two bignums.  */
2660 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661   for (unsigned i = 0; i < parts; i++)
2662     dst[i] ^= rhs[i];
2663 }
2664
2665 /* Complement a bignum in-place.  */
2666 void APInt::tcComplement(WordType *dst, unsigned parts) {
2667   for (unsigned i = 0; i < parts; i++)
2668     dst[i] = ~dst[i];
2669 }
2670
2671 /* Comparison (unsigned) of two bignums.  */
2672 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673                      unsigned parts) {
2674   while (parts) {
2675     parts--;
2676     if (lhs[parts] != rhs[parts])
2677       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2678   }
2679
2680   return 0;
2681 }
2682
2683 /* Set the least significant BITS bits of a bignum, clear the
2684    rest.  */
2685 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686                                       unsigned bits) {
2687   unsigned i = 0;
2688   while (bits > APINT_BITS_PER_WORD) {
2689     dst[i++] = ~(WordType) 0;
2690     bits -= APINT_BITS_PER_WORD;
2691   }
2692
2693   if (bits)
2694     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2695
2696   while (i < parts)
2697     dst[i++] = 0;
2698 }
2699
2700 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701                                    APInt::Rounding RM) {
2702   // Currently udivrem always rounds down.
2703   switch (RM) {
2704   case APInt::Rounding::DOWN:
2705   case APInt::Rounding::TOWARD_ZERO:
2706     return A.udiv(B);
2707   case APInt::Rounding::UP: {
2708     APInt Quo, Rem;
2709     APInt::udivrem(A, B, Quo, Rem);
2710     if (Rem == 0)
2711       return Quo;
2712     return Quo + 1;
2713   }
2714   }
2715   llvm_unreachable("Unknown APInt::Rounding enum");
2716 }
2717
2718 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719                                    APInt::Rounding RM) {
2720   switch (RM) {
2721   case APInt::Rounding::DOWN:
2722   case APInt::Rounding::UP: {
2723     APInt Quo, Rem;
2724     APInt::sdivrem(A, B, Quo, Rem);
2725     if (Rem == 0)
2726       return Quo;
2727     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728     // We want to check whether the non-integer part of the mathematical value
2729     // is negative or not. If the non-integer part is negative, we need to round
2730     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731     // already rounded down.
2732     if (RM == APInt::Rounding::DOWN) {
2733       if (Rem.isNegative() != B.isNegative())
2734         return Quo - 1;
2735       return Quo;
2736     }
2737     if (Rem.isNegative() != B.isNegative())
2738       return Quo;
2739     return Quo + 1;
2740   }
2741   // Currently sdiv rounds twards zero.
2742   case APInt::Rounding::TOWARD_ZERO:
2743     return A.sdiv(B);
2744   }
2745   llvm_unreachable("Unknown APInt::Rounding enum");
2746 }
2747
2748 Optional<APInt>
2749 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750                                            unsigned RangeWidth) {
2751   unsigned CoeffWidth = A.getBitWidth();
2752   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753   assert(RangeWidth <= CoeffWidth &&
2754          "Value range width should be less than coefficient width");
2755   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2756
2757   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2759
2760   // Identify 0 as a (non)solution immediately.
2761   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763     return APInt(CoeffWidth, 0);
2764   }
2765
2766   // The result of APInt arithmetic has the same bit width as the operands,
2767   // so it can actually lose high bits. A product of two n-bit integers needs
2768   // 2n-1 bits to represent the full value.
2769   // The operation done below (on quadratic coefficients) that can produce
2770   // the largest value is the evaluation of the equation during bisection,
2771   // which needs 3 times the bitwidth of the coefficient, so the total number
2772   // of required bits is 3n.
2773   //
2774   // The purpose of this extension is to simulate the set Z of all integers,
2775   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776   // and negative numbers (not so much in a modulo arithmetic). The method
2777   // used to solve the equation is based on the standard formula for real
2778   // numbers, and uses the concepts of "positive" and "negative" with their
2779   // usual meanings.
2780   CoeffWidth *= 3;
2781   A = A.sext(CoeffWidth);
2782   B = B.sext(CoeffWidth);
2783   C = C.sext(CoeffWidth);
2784
2785   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786   // the bit width has increased.
2787   if (A.isNegative()) {
2788     A.negate();
2789     B.negate();
2790     C.negate();
2791   }
2792
2793   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795   // and R = 2^BitWidth.
2796   // Since we're trying not only to find exact solutions, but also values
2797   // that "wrap around", such a set will always have a solution, i.e. an x
2798   // that satisfies at least one of the equations, or such that |q(x)|
2799   // exceeds kR, while |q(x-1)| for the same k does not.
2800   //
2801   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802   // positive solution n (in the above sense), and also such that the n
2803   // will be the least among all solutions corresponding to k = 0, 1, ...
2804   // (more precisely, the least element in the set
2805   //   { n(k) | k is such that a solution n(k) exists }).
2806   //
2807   // Consider the parabola (over real numbers) that corresponds to the
2808   // quadratic equation. Since A > 0, the arms of the parabola will point
2809   // up. Picking different values of k will shift it up and down by R.
2810   //
2811   // We want to shift the parabola in such a way as to reduce the problem
2812   // of solving q(x) = kR to solving shifted_q(x) = 0.
2813   // (The interesting solutions are the ceilings of the real number
2814   // solutions.)
2815   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816   APInt TwoA = 2 * A;
2817   APInt SqrB = B * B;
2818   bool PickLow;
2819
2820   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821     assert(A.isStrictlyPositive());
2822     APInt T = V.abs().urem(A);
2823     if (T.isNullValue())
2824       return V;
2825     return V.isNegative() ? V+T : V+(A-T);
2826   };
2827
2828   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829   // iff B is positive.
2830   if (B.isNonNegative()) {
2831     // If B >= 0, the vertex it at a negative location (or at 0), so in
2832     // order to have a non-negative solution we need to pick k that makes
2833     // C-kR negative. To satisfy all the requirements for the solution
2834     // that we are looking for, it needs to be closest to 0 of all k.
2835     C = C.srem(R);
2836     if (C.isStrictlyPositive())
2837       C -= R;
2838     // Pick the greater solution.
2839     PickLow = false;
2840   } else {
2841     // If B < 0, the vertex is at a positive location. For any solution
2842     // to exist, the discriminant must be non-negative. This means that
2843     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844     // lower bound on values of k: kR >= C - B^2/4A.
2845     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846     // Round LowkR up (towards +inf) to the nearest kR.
2847     LowkR = RoundUp(LowkR, R);
2848
2849     // If there exists k meeting the condition above, and such that
2850     // C-kR > 0, there will be two positive real number solutions of
2851     // q(x) = kR. Out of all such values of k, pick the one that makes
2852     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853     // In other words, find maximum k such that LowkR <= kR < C.
2854     if (C.sgt(LowkR)) {
2855       // If LowkR < C, then such a k is guaranteed to exist because
2856       // LowkR itself is a multiple of R.
2857       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2858       // Pick the smaller solution.
2859       PickLow = true;
2860     } else {
2861       // If C-kR < 0 for all potential k's, it means that one solution
2862       // will be negative, while the other will be positive. The positive
2863       // solution will shift towards 0 if the parabola is moved up.
2864       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865       // to 0, or in other words, out of all parabolas that have solutions,
2866       // pick the one that is the farthest "up").
2867       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868       C -= LowkR;
2869       // Pick the greater solution.
2870       PickLow = false;
2871     }
2872   }
2873
2874   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2876
2877   APInt D = SqrB - 4*A*C;
2878   assert(D.isNonNegative() && "Negative discriminant");
2879   APInt SQ = D.sqrt();
2880
2881   APInt Q = SQ * SQ;
2882   bool InexactSQ = Q != D;
2883   // The calculated SQ may actually be greater than the exact (non-integer)
2884   // value. If that's the case, decremement SQ to get a value that is lower.
2885   if (Q.sgt(D))
2886     SQ -= 1;
2887
2888   APInt X;
2889   APInt Rem;
2890
2891   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892   // When using the quadratic formula directly, the calculated low root
2893   // may be greater than the exact one, since we would be subtracting SQ.
2894   // To make sure that the calculated root is not greater than the exact
2895   // one, subtract SQ+1 when calculating the low root (for inexact value
2896   // of SQ).
2897   if (PickLow)
2898     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899   else
2900     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2901
2902   // The updated coefficients should be such that the (exact) solution is
2903   // positive. Since APInt division rounds towards 0, the calculated one
2904   // can be 0, but cannot be negative.
2905   assert(X.isNonNegative() && "Solution should be non-negative");
2906
2907   if (!InexactSQ && Rem.isNullValue()) {
2908     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909     return X;
2910   }
2911
2912   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913   // The exact value of the square root of D should be between SQ and SQ+1.
2914   // This implies that the solution should be between that corresponding to
2915   // SQ (i.e. X) and that corresponding to SQ+1.
2916   //
2917   // The calculated X cannot be greater than the exact (real) solution.
2918   // Actually it must be strictly less than the exact solution, while
2919   // X+1 will be greater than or equal to it.
2920
2921   APInt VX = (A*X + B)*X + C;
2922   APInt VY = VX + TwoA*X + A + B;
2923   bool SignChange = VX.isNegative() != VY.isNegative() ||
2924                     VX.isNullValue() != VY.isNullValue();
2925   // If the sign did not change between X and X+1, X is not a valid solution.
2926   // This could happen when the actual (exact) roots don't have an integer
2927   // between them, so they would both be contained between X and X+1.
2928   if (!SignChange) {
2929     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930     return None;
2931   }
2932
2933   X += 1;
2934   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935   return X;
2936 }
2937
2938 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939 /// with the integer held in IntVal.
2940 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941                             unsigned StoreBytes) {
2942   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2944
2945   if (sys::IsLittleEndianHost) {
2946     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2947     // destination from LSB to MSB: Do a straight copy.
2948     memcpy(Dst, Src, StoreBytes);
2949   } else {
2950     // Big-endian host - the source is an array of 64 bit words ordered from
2951     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2952     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953     while (StoreBytes > sizeof(uint64_t)) {
2954       StoreBytes -= sizeof(uint64_t);
2955       // May not be aligned so use memcpy.
2956       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957       Src += sizeof(uint64_t);
2958     }
2959
2960     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2961   }
2962 }
2963
2964 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968   uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969                    const_cast<uint64_t *>(IntVal.getRawData()));
2970
2971   if (sys::IsLittleEndianHost)
2972     // Little-endian host - the destination must be ordered from LSB to MSB.
2973     // The source is ordered from LSB to MSB: Do a straight copy.
2974     memcpy(Dst, Src, LoadBytes);
2975   else {
2976     // Big-endian - the destination is an array of 64 bit words ordered from
2977     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2978     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979     // a word.
2980     while (LoadBytes > sizeof(uint64_t)) {
2981       LoadBytes -= sizeof(uint64_t);
2982       // May not be aligned so use memcpy.
2983       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984       Dst += sizeof(uint64_t);
2985     }
2986
2987     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
2988   }
2989 }